Example #1
0
def main(args_list=None):
    if not args_list:
        args_list = sys.argv[1:]
    args = parse_args(parser, args_list)
    print(args)
    if args.hits_txt and len(args.hits_txt) > 0:
        dataset = Dataset.from_peptides_text_files(args.hits_txt)
        dataset = dataset.assemble_contigs()
    else:
        dataset = Dataset.from_csv(args.dataset_csv)

    dataframes = []

    for allele, allele_dataset in dataset.groupby_allele():
        print("-- %s" % allele)
        df = allele_dataset.to_dataframe()
        netmhciipan = NetMHCIIpan([allele])

        # pad peptides with alanines in case they're shorter than 9mer
        peptides = ensure_peptide_lengths(df.peptide)
        n_peptides = len(peptides)
        binding_predictions = netmhciipan.predict_peptides(peptides)
        ic50_pred = np.array([x.affinity for x in binding_predictions])
        percentile_rank_pred = np.array(
            [x.percentile_rank for x in binding_predictions])
        assert len(ic50_pred) == n_peptides
        assert len(percentile_rank_pred) == n_peptides
        df["netmhciipan_ic50"] = ic50_pred
        df["netmhciipan_percentile_rank"] = percentile_rank_pred
        dataframes.append(df)
        combined_df = pd.concat(dataframes)
        combined_df.to_csv(args.output_csv, index=False)
Example #2
0
def test_netmhcii_pan_multiple_alleles():
    alleles = [
        normalize_allele_name("HLA-DPA1*01:05-DPB1*100:01"),
        normalize_allele_name("HLA-DQA1*05:11-DQB1*03:02"),
        normalize_allele_name("HLA-DRB1*01:01")
    ]
    ii_pan_predictor = NetMHCIIpan(
        alleles=alleles,
        epitope_lengths=[15, 16])
    fasta_dictionary = {
        "TP53-001": "SQAMDDLMLSPDDIEQWFTED"
    }
    epitope_collection = ii_pan_predictor.predict(
        fasta_dictionary=fasta_dictionary)

    unique_lengths = {x.length for x in epitope_collection}
    eq_(unique_lengths, {15, 16})

    unique_alleles = {x.allele for x in epitope_collection}
    eq_(unique_alleles, {
        "HLA-DPA1*01:05-DPB1*100:01",
        "HLA-DQA1*05:11-DQB1*03:02",
        "HLA-DRB1*01:01"
    })

    # length of "SQAMDDLMLSPDDIEQWFTED" is 21
    # Expect 3 * ((21-15+1) + (21-16+1)) = 39 entries
    assert len(epitope_collection) == 39, \
        "Expected 39 epitopes from %s" % (epitope_collection,)
Example #3
0
def test_netmhcii_pan_multiple_alleles():
    alleles = [
        normalize_allele_name("HLA-DPA1*01:05-DPB1*100:01"),
        normalize_allele_name("HLA-DQA1*05:11-DQB1*03:02"),
        normalize_allele_name("HLA-DRA1*01:01-DRB1*01:01")
    ]
    ii_pan_predictor = NetMHCIIpan(alleles=alleles)
    fasta_dictionary = {"TP53-001": "SQAMDDLMLSPDDIEQWFTED"}
    binding_predictions = ii_pan_predictor.predict_subsequences(
        sequence_dict=fasta_dictionary, peptide_lengths=[15, 16])

    unique_lengths = {x.length for x in binding_predictions}
    eq_(unique_lengths, {15, 16})

    unique_alleles = {x.allele for x in binding_predictions}
    eq_(
        unique_alleles, {
            "HLA-DPA1*01:05-DPB1*100:01", "HLA-DQA1*05:11-DQB1*03:02",
            "HLA-DRA1*01:01-DRB1*01:01"
        })

    # length of "SQAMDDLMLSPDDIEQWFTED" is 21
    # Expect 3 * ((21-15+1) + (21-16+1)) = 39 entries
    assert len(binding_predictions) == 39, \
        "Expected 39 epitopes from %s" % (binding_predictions,)
Example #4
0
def test_netmhcii_pan_mouse():
    alleles = [normalize_allele_name("H2-IAb")]
    ii_pan_predictor = NetMHCIIpan(alleles=alleles)
    fasta_dictionary = {
        "SMAD4-001": "PAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGT",
        "TP53-001": "SQAMDDLMLSPDDIEQWFTED"
    }
    binding_predictions = ii_pan_predictor.predict_subsequences(
        sequence_dict=fasta_dictionary, peptide_lengths=[15, 16])

    unique_lengths = {x.length for x in binding_predictions}
    eq_(unique_lengths, {15, 16})

    unique_alleles = {x.allele for x in binding_predictions}
    eq_(unique_alleles, {"H-2-IAb"})

    # length of "PAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGT" is 34
    # length of "SQAMDDLMLSPDDIEQWFTED" is 21
    # Expect (34-15+1) + (34-16+1) + (21-15+1) + (21-16+1) = 52 entries
    assert len(binding_predictions) == 52, \
        "Expected 52 epitopes from %s" % (binding_predictions,)
Example #5
0
def test_netmhcii_pan_mouse():
    alleles = [normalize_allele_name("H2-IAb")]
    ii_pan_predictor = NetMHCIIpan(
        alleles=alleles,
        epitope_lengths=[15, 16])
    fasta_dictionary = {
        "SMAD4-001": "PAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGT",
        "TP53-001": "SQAMDDLMLSPDDIEQWFTED"
    }
    epitope_collection = ii_pan_predictor.predict(
        fasta_dictionary=fasta_dictionary)

    unique_lengths = {x.length for x in epitope_collection}
    eq_(unique_lengths, {15, 16})

    unique_alleles = {x.allele for x in epitope_collection}
    eq_(unique_alleles, {"H-2-IAb"})

    # length of "PAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGT" is 34
    # length of "SQAMDDLMLSPDDIEQWFTED" is 21
    # Expect (34-15+1) + (34-16+1) + (21-15+1) + (21-16+1) = 52 entries
    assert len(epitope_collection) == 52, \
        "Expected 52 epitopes from %s" % (epitope_collection,)
            start=100018900,
            ref='C',
            alt='T',
            ensembl=ensembl_grch37),
    Variant(contig=11,
            start=32861682,
            ref='G',
            alt='A',
            ensembl=ensembl_grch37)
])

alleles = ["HLA-DPA1*01:05/DPB1*100:01", "DRB10102"]

epitope_lengths = [15, 16]

mhc_model = NetMHCIIpan(alleles=alleles, epitope_lengths=epitope_lengths)


def test_netmhcii_pan_epitopes():
    epitope_predictions = predict_epitopes_from_variants(
        mhc_model=mhc_model,
        variants=variants,
        transcript_expression_dict=None,
        only_novel_epitopes=True)

    # expect (15 + 16 mutant peptides) * (2 alleles) * 2 variants =
    # 124 total epitope predictions
    eq_(len(epitope_predictions), 124)
    unique_alleles = {
        epitope_prediction.allele
        for epitope_prediction in epitope_predictions
Example #7
0
        alt='T',
        ensembl=ensembl_grch37),
    Variant(
        contig=11,
        start=32861682,
        ref='G',
        alt='A',
        ensembl=ensembl_grch37)])

alleles = [
    "HLA-DPA1*01:05/DPB1*100:01",
    "DRB10102"
]

mhc_model = NetMHCIIpan(
    alleles=alleles,
    default_peptide_lengths=[15, 16])

def test_netmhcii_pan_epitopes():
    epitope_predictions = TopiaryPredictor(
        mhc_model=mhc_model,
        only_novel_epitopes=True).predict_from_variants(variants=variants)

    # expect (15 + 16 mutant peptides) * (2 alleles) * 2 variants =
    # 124 total epitope predictions
    eq_(len(epitope_predictions), 124)
    unique_alleles = set(epitope_predictions.allele)
    assert len(unique_alleles) == 2, \
        "Expected 2 unique alleles, got %s" % (unique_alleles,)
    unique_lengths = set(epitope_predictions.peptide_length)
    assert unique_lengths == {15, 16}, \
def test_class2_8mer_fails():
    ii_pan_predictor = NetMHCIIpan(alleles=["HLA-DRB1*01:01"])
    with assert_raises(ValueError):
        ii_pan_predictor.predict_peptides(["A" * 8])
def test_class2_9mer_success():
    ii_pan_predictor = NetMHCIIpan(alleles=["HLA-DRB1*01:01"])
    predictions = ii_pan_predictor.predict_peptides(["A" * 9])
    eq_(len(predictions), 1)