def test_cross_validation_with_imputation():
    imputer = fancyimpute.MICE(
        n_imputations=2, n_burn_in=1, n_nearest_columns=25)
    train_data = (
        mhcflurry.dataset.Dataset.from_csv(
            get_path("data_kim2014" , "bdata.2009.mhci.public.1.txt"))
        .get_alleles(["HLA-A0201", "HLA-A0202", "HLA-A0301"]))

    folds = cross_validation_folds(
        train_data,
        n_folds=3,
        imputer=imputer,
        drop_similar_peptides=True,
        alleles=["HLA-A0201", "HLA-A0202"])

    eq_(set(x.allele for x in folds), {"HLA-A0201", "HLA-A0202"})
    eq_(len(folds), 6)

    for fold in folds:
        eq_(fold.train.unique_alleles(), set([fold.allele]))
        eq_(fold.imputed_train.unique_alleles(), set([fold.allele]))
        eq_(fold.test.unique_alleles(), set([fold.allele]))

    models = HYPERPARAMETER_DEFAULTS.models_grid(
        activation=["tanh", "relu"],
        layer_sizes=[[4]],
        embedding_output_dim=[8],
        n_training_epochs=[3])
    print(models)

    df = train_across_models_and_folds(folds, models)
    print(df)
    assert df.test_auc.mean() > 0.6
def test_cross_validation_with_imputation():
    imputer = fancyimpute.MICE(n_imputations=2,
                               n_burn_in=1,
                               n_nearest_columns=25)
    train_data = (mhcflurry.dataset.Dataset.from_csv(
        get_path("data_kim2014", "bdata.2009.mhci.public.1.txt")).get_alleles(
            ["HLA-A0201", "HLA-A0202", "HLA-A0301"]))

    folds = cross_validation_folds(train_data,
                                   n_folds=3,
                                   imputer=imputer,
                                   drop_similar_peptides=True,
                                   alleles=["HLA-A0201", "HLA-A0202"])

    eq_(set(x.allele for x in folds), {"HLA-A0201", "HLA-A0202"})
    eq_(len(folds), 6)

    for fold in folds:
        eq_(fold.train.unique_alleles(), set([fold.allele]))
        eq_(fold.imputed_train.unique_alleles(), set([fold.allele]))
        eq_(fold.test.unique_alleles(), set([fold.allele]))

    models = HYPERPARAMETER_DEFAULTS.models_grid(activation=["tanh", "relu"],
                                                 layer_sizes=[[4]],
                                                 embedding_output_dim=[8],
                                                 n_training_epochs=[3])
    print(models)

    df = train_across_models_and_folds(folds, models)
    print(df)
    assert df.test_auc.mean() > 0.6
def test_imputation():
    imputer = fancyimpute.MICE(n_imputations=2,
                               n_burn_in=1,
                               n_nearest_columns=25)
    train_data = (mhcflurry.dataset.Dataset.from_csv(
        get_path("data_kim2014", "bdata.2009.mhci.public.1.txt")).get_alleles(
            ["HLA-A0201", "HLA-A0202", "HLA-A0301"]))

    folds = cross_validation_folds(train_data,
                                   n_folds=3,
                                   imputer=imputer,
                                   drop_similar_peptides=True,
                                   alleles=["HLA-A0201", "HLA-A0202"])

    eq_(set(x.allele for x in folds), {"HLA-A0201", "HLA-A0202"})
    eq_(len(folds), 6)

    for fold in folds:
        eq_(fold.train.unique_alleles(), set([fold.allele]))
        eq_(fold.imputed_train.unique_alleles(), set([fold.allele]))
        eq_(fold.test.unique_alleles(), set([fold.allele]))
def test_imputation():
    imputer = fancyimpute.MICE(
        n_imputations=2, n_burn_in=1, n_nearest_columns=25)
    train_data = (
        mhcflurry.dataset.Dataset.from_csv(
            get_path("data_kim2014", "bdata.2009.mhci.public.1.txt"))
        .get_alleles(["HLA-A0201", "HLA-A0202", "HLA-A0301"]))

    folds = cross_validation_folds(
        train_data,
        n_folds=3,
        imputer=imputer,
        drop_similar_peptides=True,
        alleles=["HLA-A0201", "HLA-A0202"])

    eq_(set(x.allele for x in folds), {"HLA-A0201", "HLA-A0202"})
    eq_(len(folds), 6)

    for fold in folds:
        eq_(fold.train.unique_alleles(), set([fold.allele]))
        eq_(fold.imputed_train.unique_alleles(), set([fold.allele]))
        eq_(fold.test.unique_alleles(), set([fold.allele]))