def test_run_ml_with_multi_label_sequence_model(
        test_output_dirs: TestOutputDirectories) -> None:
    """
    Test training and testing of sequence models that predicts at multiple time points,
    when it is started via run_ml.
    """
    logging_to_stdout()
    config = ToyMultiLabelSequenceModel(should_validate=False)
    assert config.get_target_indices() == [1, 2, 3]
    expected_prediction_targets = [
        f"{SEQUENCE_POSITION_HUE_NAME_PREFIX} {x}" for x in ["01", "02", "03"]
    ]
    _target_indices = config.get_target_indices()
    assert _target_indices is not None
    assert len(_target_indices) == len(expected_prediction_targets)
    metrics_dict = SequenceMetricsDict.create_from_config(config)
    assert metrics_dict.get_hue_names(
        include_default=False) == expected_prediction_targets
    config.set_output_to(test_output_dirs.root_dir)
    # Create a fake dataset directory to make config validation pass
    config.local_dataset = Path(test_output_dirs.root_dir)
    config.dataset_data_frame = _get_multi_label_sequence_dataframe()
    config.pre_process_dataset_dataframe()
    config.num_epochs = 1
    config.max_batch_grad_cam = 1
    azure_config = get_default_azure_config()
    azure_config.train = True
    MLRunner(config, azure_config).run()
    # The metrics file should have one entry per epoch per subject per prediction target,
    # for all the 3 prediction targets.
    metrics_file = config.outputs_folder / "Train" / METRICS_FILE_NAME
    assert metrics_file.exists()
    metrics = pd.read_csv(metrics_file)
    assert LoggingColumns.Patient.value in metrics
    assert LoggingColumns.Epoch.value in metrics
    assert LoggingColumns.Hue.value in metrics
    assert metrics[LoggingColumns.Hue.value].unique().tolist(
    ) == expected_prediction_targets
    group_by_subject = metrics.groupby(
        by=[LoggingColumns.Patient.value, LoggingColumns.Epoch.value])
    expected_prediction_target_lengths = [3, 2, 3, 3]
    for i, x in enumerate(group_by_subject):
        assert len(x[1]) == expected_prediction_target_lengths[i]
    group_by_subject_and_target = metrics.groupby(by=[
        LoggingColumns.Patient.value, LoggingColumns.Epoch.value,
        LoggingColumns.Hue.value
    ])
    for _, group in group_by_subject_and_target:
        assert len(group) == 1
def test_get_hue_name_from_target_index() -> None:
    """
    Tests if we can create metrics hue names from sequence indices, and get them back from the string.
    """
    index = 7
    hue_name = SequenceMetricsDict.get_hue_name_from_target_index(index)
    assert hue_name == "Seq_pos 07"
    assert SequenceMetricsDict.get_target_index_from_hue_name(hue_name) == index
    with pytest.raises(ValueError):
        SequenceMetricsDict.get_target_index_from_hue_name("foo 07")
    with pytest.raises(ValueError):
        SequenceMetricsDict.get_target_index_from_hue_name("Seq_pos ab")