Python Dataset.from_nested_dictionary Examples

Programming Language: Python

Namespace/Package Name: mhcflurry.dataset

Class/Type: Dataset

Method/Function: from_nested_dictionary

Examples at hotexamples.com: 6

Python Dataset.from_nested_dictionary - 6 examples found. These are the top rated real world Python examples of mhcflurry.dataset.Dataset.from_nested_dictionary extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

from_nested_dictionary(5)

from_csv(3)

Dataset(1)

create_empty(1)

from_single_allele_dictionary(1)

Example #1

Show file

File: test_dataset.py Project: vreuter/mhcflurry

def test_dataset_intersection():
    dataset1 = Dataset.from_nested_dictionary({
        "H-2-Kb": {
            "SIINFEKL": 10.0,
            "FEKLSIIN": 20000.0,
            "SIFEKLIN": 50000.0,
        }
    })
    dataset2 = Dataset.from_nested_dictionary({"H-2-Kb": {"SIINFEKL": 30.0}})
    dataset_intersection = dataset1.intersection(dataset2)
    expected_result = Dataset.from_nested_dictionary(
        {"H-2-Kb": {
            "SIINFEKL": 10.0
        }})
    eq_(dataset_intersection, expected_result)

Example #2

Show file

File: test_dataset.py Project: vreuter/mhcflurry

def test_dataset_difference():
    dataset1 = Dataset.from_nested_dictionary({
        "H-2-Kb": {
            "SIINFEKL": 10.0,
            "FEKLSIIN": 20000.0,
            "SIFEKLIN": 50000.0,
        }
    })
    dataset2 = Dataset.from_nested_dictionary({"H-2-Kb": {"SIINFEKL": 10.0}})
    dataset_diff = dataset1.difference(dataset2)
    expected_result = Dataset.from_nested_dictionary(
        {"H-2-Kb": {
            "FEKLSIIN": 20000.0,
            "SIFEKLIN": 50000.0,
        }})
    eq_(dataset_diff, expected_result)

Example #3

Show file

File: test_dataset.py Project: vreuter/mhcflurry

def test_dataset_random_split():
    dataset = Dataset.from_nested_dictionary({
        "H-2-Kb": {
            "SIINFEKL": 10.0,
            "FEKLSIIN": 20000.0,
            "SIFEKLIN": 50000.0,
        }
    })
    left, right = dataset.random_split(n=2)
    assert len(left) == 2
    assert len(right) == 1

Example #4

Show file

def test_create_imputed_datasets_two_alleles():
    dataset = Dataset.from_nested_dictionary({
        "HLA-A*02:01": {
            "A" * 9: 20.0,
            "C" * 9: 40000.0,
        },
        "HLA-A*02:05": {
            "S" * 9: 500.0,
            "A" * 9: 25.0,
        },
    })
    imputed_dataset = dataset.impute_missing_values(MICE(n_imputations=25))
    eq_(imputed_dataset.unique_alleles(), {"HLA-A*02:01", "HLA-A*02:05"})
    expected_peptides = {"A" * 9, "C" * 9, "S" * 9}
    for allele_name, allele_data in imputed_dataset.groupby_allele():
        eq_(set(allele_data.peptides), expected_peptides)

Example #5

Show file

File: test_imputation.py Project: giancarlok/mhcflurry

def test_create_imputed_datasets_two_alleles():
    dataset = Dataset.from_nested_dictionary({
        "HLA-A*02:01": {
            "A" * 9: 20.0,
            "C" * 9: 40000.0,
        },
        "HLA-A*02:05": {
            "S" * 9: 500.0,
            "A" * 9: 25.0,
        },
    })
    imputed_dataset = dataset.impute_missing_values(MICE(n_imputations=25))
    eq_(imputed_dataset.unique_alleles(), {"HLA-A*02:01", "HLA-A*02:05"})
    expected_peptides = {"A" * 9, "C" * 9, "S" * 9}
    for allele_name, allele_data in imputed_dataset.groupby_allele():
        eq_(set(allele_data.peptides), expected_peptides)

Example #6

Show file

File: test_dataset.py Project: vreuter/mhcflurry

def test_dataset_cross_validation():
    dataset = Dataset.from_nested_dictionary({
        "H-2-Kb": {
            "SIINFEKL": 10.0,
            "FEKLSIIN": 20000.0,
            "SIFEKLIN": 50000.0,
        },
        "HLA-A*02:01": {
            "ASASAS": 1.0,
            "CCC": 0.0
        }
    })

    fold_count = 0
    for train_dataset, test_dataset in dataset.cross_validation_iterator(
            test_allele="HLA-A*02:01", n_folds=2):
        assert train_dataset.unique_alleles() == {"H-2-Kb", "HLA-A*02:01"}
        assert test_dataset.unique_alleles() == {"HLA-A*02:01"}
        assert len(test_dataset) == 1
        fold_count += 1
    assert fold_count == 2