예제 #1
0
def test_run_ml_with_multi_label_sequence_in_crossval(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test training and testing of sequence models that predicts at multiple time points,
    including aggregation of cross validation results.
    """
    logging_to_stdout()
    config = ToyMultiLabelSequenceModel(should_validate=False)
    assert config.get_target_indices() == [1, 2, 3]
    expected_prediction_targets = ["Seq_pos 01", "Seq_pos 02", "Seq_pos 03"]
    target_indices = config.get_target_indices()
    assert target_indices
    assert len(target_indices) == len(expected_prediction_targets)
    config.set_output_to(test_output_dirs.root_dir)
    config.dataset_data_frame = _get_multi_label_sequence_dataframe()
    config.pre_process_dataset_dataframe()
    config.num_epochs = 1
    config.number_of_cross_validation_splits = 2
    azure_config = get_default_azure_config()
    azure_config.train = True
    MLRunner(config, azure_config).run()
예제 #2
0
def test_get_class_weights_dataset(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test training and testing of sequence models that predicts at multiple time points,
    when it is started via run_ml.
    """
    dataset_contents = _get_multi_label_sequence_dataframe()
    config = ToyMultiLabelSequenceModel(should_validate=False)
    assert config.get_target_indices() == [1, 2, 3]
    expected_prediction_targets = ["Seq_pos 01", "Seq_pos 02", "Seq_pos 03"]
    assert len(config.get_target_indices()) == len(
        expected_prediction_targets)  # type: ignore
    config.set_output_to(test_output_dirs.root_dir)
    config.dataset_data_frame = dataset_contents
    config.pre_process_dataset_dataframe()
    splits = config.get_dataset_splits()
    train_dataset = config.create_torch_datasets(splits)[
        ModelExecutionMode.TRAIN]
    class_counts = train_dataset.get_class_counts()
    assert class_counts == {0.0: 9, 1.0: 2}