Example #1
0
def test_dataset_intersection():
    dataset1 = Dataset.from_nested_dictionary({
        "H-2-Kb": {
            "SIINFEKL": 10.0,
            "FEKLSIIN": 20000.0,
            "SIFEKLIN": 50000.0,
        }
    })
    dataset2 = Dataset.from_nested_dictionary({"H-2-Kb": {"SIINFEKL": 30.0}})
    dataset_intersection = dataset1.intersection(dataset2)
    expected_result = Dataset.from_nested_dictionary(
        {"H-2-Kb": {
            "SIINFEKL": 10.0
        }})
    eq_(dataset_intersection, expected_result)
Example #2
0
def test_dataset_difference():
    dataset1 = Dataset.from_nested_dictionary({
        "H-2-Kb": {
            "SIINFEKL": 10.0,
            "FEKLSIIN": 20000.0,
            "SIFEKLIN": 50000.0,
        }
    })
    dataset2 = Dataset.from_nested_dictionary({"H-2-Kb": {"SIINFEKL": 10.0}})
    dataset_diff = dataset1.difference(dataset2)
    expected_result = Dataset.from_nested_dictionary(
        {"H-2-Kb": {
            "FEKLSIIN": 20000.0,
            "SIFEKLIN": 50000.0,
        }})
    eq_(dataset_diff, expected_result)
Example #3
0
def test_dataset_random_split():
    dataset = Dataset.from_nested_dictionary({
        "H-2-Kb": {
            "SIINFEKL": 10.0,
            "FEKLSIIN": 20000.0,
            "SIFEKLIN": 50000.0,
        }
    })
    left, right = dataset.random_split(n=2)
    assert len(left) == 2
    assert len(right) == 1
Example #4
0
def test_create_imputed_datasets_two_alleles():
    dataset = Dataset.from_nested_dictionary({
        "HLA-A*02:01": {
            "A" * 9: 20.0,
            "C" * 9: 40000.0,
        },
        "HLA-A*02:05": {
            "S" * 9: 500.0,
            "A" * 9: 25.0,
        },
    })
    imputed_dataset = dataset.impute_missing_values(MICE(n_imputations=25))
    eq_(imputed_dataset.unique_alleles(), {"HLA-A*02:01", "HLA-A*02:05"})
    expected_peptides = {"A" * 9, "C" * 9, "S" * 9}
    for allele_name, allele_data in imputed_dataset.groupby_allele():
        eq_(set(allele_data.peptides), expected_peptides)
Example #5
0
def test_create_imputed_datasets_two_alleles():
    dataset = Dataset.from_nested_dictionary({
        "HLA-A*02:01": {
            "A" * 9: 20.0,
            "C" * 9: 40000.0,
        },
        "HLA-A*02:05": {
            "S" * 9: 500.0,
            "A" * 9: 25.0,
        },
    })
    imputed_dataset = dataset.impute_missing_values(MICE(n_imputations=25))
    eq_(imputed_dataset.unique_alleles(), {"HLA-A*02:01", "HLA-A*02:05"})
    expected_peptides = {"A" * 9, "C" * 9, "S" * 9}
    for allele_name, allele_data in imputed_dataset.groupby_allele():
        eq_(set(allele_data.peptides), expected_peptides)
Example #6
0
def test_dataset_cross_validation():
    dataset = Dataset.from_nested_dictionary({
        "H-2-Kb": {
            "SIINFEKL": 10.0,
            "FEKLSIIN": 20000.0,
            "SIFEKLIN": 50000.0,
        },
        "HLA-A*02:01": {
            "ASASAS": 1.0,
            "CCC": 0.0
        }
    })

    fold_count = 0
    for train_dataset, test_dataset in dataset.cross_validation_iterator(
            test_allele="HLA-A*02:01", n_folds=2):
        assert train_dataset.unique_alleles() == {"H-2-Kb", "HLA-A*02:01"}
        assert test_dataset.unique_alleles() == {"HLA-A*02:01"}
        assert len(test_dataset) == 1
        fold_count += 1
    assert fold_count == 2