def test_run_ml_with_multi_label_sequence_in_crossval( test_output_dirs: OutputFolderForTests) -> None: """ Test training and testing of sequence models that predicts at multiple time points, including aggregation of cross validation results. """ logging_to_stdout() config = ToyMultiLabelSequenceModel(should_validate=False) assert config.get_target_indices() == [1, 2, 3] expected_prediction_targets = ["Seq_pos 01", "Seq_pos 02", "Seq_pos 03"] target_indices = config.get_target_indices() assert target_indices assert len(target_indices) == len(expected_prediction_targets) config.set_output_to(test_output_dirs.root_dir) config.dataset_data_frame = _get_multi_label_sequence_dataframe() config.pre_process_dataset_dataframe() config.num_epochs = 1 config.number_of_cross_validation_splits = 2 azure_config = get_default_azure_config() azure_config.train = True MLRunner(config, azure_config).run()
def test_get_class_weights_dataset( test_output_dirs: OutputFolderForTests) -> None: """ Test training and testing of sequence models that predicts at multiple time points, when it is started via run_ml. """ dataset_contents = _get_multi_label_sequence_dataframe() config = ToyMultiLabelSequenceModel(should_validate=False) assert config.get_target_indices() == [1, 2, 3] expected_prediction_targets = ["Seq_pos 01", "Seq_pos 02", "Seq_pos 03"] assert len(config.get_target_indices()) == len( expected_prediction_targets) # type: ignore config.set_output_to(test_output_dirs.root_dir) config.dataset_data_frame = dataset_contents config.pre_process_dataset_dataframe() splits = config.get_dataset_splits() train_dataset = config.create_torch_datasets(splits)[ ModelExecutionMode.TRAIN] class_counts = train_dataset.get_class_counts() assert class_counts == {0.0: 9, 1.0: 2}