def test_dataset_intersection(): dataset1 = Dataset.from_nested_dictionary({ "H-2-Kb": { "SIINFEKL": 10.0, "FEKLSIIN": 20000.0, "SIFEKLIN": 50000.0, } }) dataset2 = Dataset.from_nested_dictionary({"H-2-Kb": {"SIINFEKL": 30.0}}) dataset_intersection = dataset1.intersection(dataset2) expected_result = Dataset.from_nested_dictionary( {"H-2-Kb": { "SIINFEKL": 10.0 }}) eq_(dataset_intersection, expected_result)
def test_dataset_difference(): dataset1 = Dataset.from_nested_dictionary({ "H-2-Kb": { "SIINFEKL": 10.0, "FEKLSIIN": 20000.0, "SIFEKLIN": 50000.0, } }) dataset2 = Dataset.from_nested_dictionary({"H-2-Kb": {"SIINFEKL": 10.0}}) dataset_diff = dataset1.difference(dataset2) expected_result = Dataset.from_nested_dictionary( {"H-2-Kb": { "FEKLSIIN": 20000.0, "SIFEKLIN": 50000.0, }}) eq_(dataset_diff, expected_result)
def test_dataset_random_split(): dataset = Dataset.from_nested_dictionary({ "H-2-Kb": { "SIINFEKL": 10.0, "FEKLSIIN": 20000.0, "SIFEKLIN": 50000.0, } }) left, right = dataset.random_split(n=2) assert len(left) == 2 assert len(right) == 1
def test_create_imputed_datasets_two_alleles(): dataset = Dataset.from_nested_dictionary({ "HLA-A*02:01": { "A" * 9: 20.0, "C" * 9: 40000.0, }, "HLA-A*02:05": { "S" * 9: 500.0, "A" * 9: 25.0, }, }) imputed_dataset = dataset.impute_missing_values(MICE(n_imputations=25)) eq_(imputed_dataset.unique_alleles(), {"HLA-A*02:01", "HLA-A*02:05"}) expected_peptides = {"A" * 9, "C" * 9, "S" * 9} for allele_name, allele_data in imputed_dataset.groupby_allele(): eq_(set(allele_data.peptides), expected_peptides)
def test_create_imputed_datasets_two_alleles(): dataset = Dataset.from_nested_dictionary({ "HLA-A*02:01": { "A" * 9: 20.0, "C" * 9: 40000.0, }, "HLA-A*02:05": { "S" * 9: 500.0, "A" * 9: 25.0, }, }) imputed_dataset = dataset.impute_missing_values(MICE(n_imputations=25)) eq_(imputed_dataset.unique_alleles(), {"HLA-A*02:01", "HLA-A*02:05"}) expected_peptides = {"A" * 9, "C" * 9, "S" * 9} for allele_name, allele_data in imputed_dataset.groupby_allele(): eq_(set(allele_data.peptides), expected_peptides)
def test_dataset_cross_validation(): dataset = Dataset.from_nested_dictionary({ "H-2-Kb": { "SIINFEKL": 10.0, "FEKLSIIN": 20000.0, "SIFEKLIN": 50000.0, }, "HLA-A*02:01": { "ASASAS": 1.0, "CCC": 0.0 } }) fold_count = 0 for train_dataset, test_dataset in dataset.cross_validation_iterator( test_allele="HLA-A*02:01", n_folds=2): assert train_dataset.unique_alleles() == {"H-2-Kb", "HLA-A*02:01"} assert test_dataset.unique_alleles() == {"HLA-A*02:01"} assert len(test_dataset) == 1 fold_count += 1 assert fold_count == 2