def main(args_list=None): if not args_list: args_list = sys.argv[1:] args = parse_args(parser, args_list) print(args) if args.hits_txt and len(args.hits_txt) > 0: dataset = Dataset.from_peptides_text_files(args.hits_txt) dataset = dataset.assemble_contigs() else: dataset = Dataset.from_csv(args.dataset_csv) dataframes = [] for allele, allele_dataset in dataset.groupby_allele(): print("-- %s" % allele) df = allele_dataset.to_dataframe() netmhciipan = NetMHCIIpan([allele]) # pad peptides with alanines in case they're shorter than 9mer peptides = ensure_peptide_lengths(df.peptide) n_peptides = len(peptides) binding_predictions = netmhciipan.predict_peptides(peptides) ic50_pred = np.array([x.affinity for x in binding_predictions]) percentile_rank_pred = np.array( [x.percentile_rank for x in binding_predictions]) assert len(ic50_pred) == n_peptides assert len(percentile_rank_pred) == n_peptides df["netmhciipan_ic50"] = ic50_pred df["netmhciipan_percentile_rank"] = percentile_rank_pred dataframes.append(df) combined_df = pd.concat(dataframes) combined_df.to_csv(args.output_csv, index=False)
def test_netmhcii_pan_multiple_alleles(): alleles = [ normalize_allele_name("HLA-DPA1*01:05-DPB1*100:01"), normalize_allele_name("HLA-DQA1*05:11-DQB1*03:02"), normalize_allele_name("HLA-DRB1*01:01") ] ii_pan_predictor = NetMHCIIpan( alleles=alleles, epitope_lengths=[15, 16]) fasta_dictionary = { "TP53-001": "SQAMDDLMLSPDDIEQWFTED" } epitope_collection = ii_pan_predictor.predict( fasta_dictionary=fasta_dictionary) unique_lengths = {x.length for x in epitope_collection} eq_(unique_lengths, {15, 16}) unique_alleles = {x.allele for x in epitope_collection} eq_(unique_alleles, { "HLA-DPA1*01:05-DPB1*100:01", "HLA-DQA1*05:11-DQB1*03:02", "HLA-DRB1*01:01" }) # length of "SQAMDDLMLSPDDIEQWFTED" is 21 # Expect 3 * ((21-15+1) + (21-16+1)) = 39 entries assert len(epitope_collection) == 39, \ "Expected 39 epitopes from %s" % (epitope_collection,)
def test_netmhcii_pan_multiple_alleles(): alleles = [ normalize_allele_name("HLA-DPA1*01:05-DPB1*100:01"), normalize_allele_name("HLA-DQA1*05:11-DQB1*03:02"), normalize_allele_name("HLA-DRA1*01:01-DRB1*01:01") ] ii_pan_predictor = NetMHCIIpan(alleles=alleles) fasta_dictionary = {"TP53-001": "SQAMDDLMLSPDDIEQWFTED"} binding_predictions = ii_pan_predictor.predict_subsequences( sequence_dict=fasta_dictionary, peptide_lengths=[15, 16]) unique_lengths = {x.length for x in binding_predictions} eq_(unique_lengths, {15, 16}) unique_alleles = {x.allele for x in binding_predictions} eq_( unique_alleles, { "HLA-DPA1*01:05-DPB1*100:01", "HLA-DQA1*05:11-DQB1*03:02", "HLA-DRA1*01:01-DRB1*01:01" }) # length of "SQAMDDLMLSPDDIEQWFTED" is 21 # Expect 3 * ((21-15+1) + (21-16+1)) = 39 entries assert len(binding_predictions) == 39, \ "Expected 39 epitopes from %s" % (binding_predictions,)
def test_netmhcii_pan_mouse(): alleles = [normalize_allele_name("H2-IAb")] ii_pan_predictor = NetMHCIIpan(alleles=alleles) fasta_dictionary = { "SMAD4-001": "PAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGT", "TP53-001": "SQAMDDLMLSPDDIEQWFTED" } binding_predictions = ii_pan_predictor.predict_subsequences( sequence_dict=fasta_dictionary, peptide_lengths=[15, 16]) unique_lengths = {x.length for x in binding_predictions} eq_(unique_lengths, {15, 16}) unique_alleles = {x.allele for x in binding_predictions} eq_(unique_alleles, {"H-2-IAb"}) # length of "PAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGT" is 34 # length of "SQAMDDLMLSPDDIEQWFTED" is 21 # Expect (34-15+1) + (34-16+1) + (21-15+1) + (21-16+1) = 52 entries assert len(binding_predictions) == 52, \ "Expected 52 epitopes from %s" % (binding_predictions,)
def test_netmhcii_pan_mouse(): alleles = [normalize_allele_name("H2-IAb")] ii_pan_predictor = NetMHCIIpan( alleles=alleles, epitope_lengths=[15, 16]) fasta_dictionary = { "SMAD4-001": "PAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGT", "TP53-001": "SQAMDDLMLSPDDIEQWFTED" } epitope_collection = ii_pan_predictor.predict( fasta_dictionary=fasta_dictionary) unique_lengths = {x.length for x in epitope_collection} eq_(unique_lengths, {15, 16}) unique_alleles = {x.allele for x in epitope_collection} eq_(unique_alleles, {"H-2-IAb"}) # length of "PAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGT" is 34 # length of "SQAMDDLMLSPDDIEQWFTED" is 21 # Expect (34-15+1) + (34-16+1) + (21-15+1) + (21-16+1) = 52 entries assert len(epitope_collection) == 52, \ "Expected 52 epitopes from %s" % (epitope_collection,)
start=100018900, ref='C', alt='T', ensembl=ensembl_grch37), Variant(contig=11, start=32861682, ref='G', alt='A', ensembl=ensembl_grch37) ]) alleles = ["HLA-DPA1*01:05/DPB1*100:01", "DRB10102"] epitope_lengths = [15, 16] mhc_model = NetMHCIIpan(alleles=alleles, epitope_lengths=epitope_lengths) def test_netmhcii_pan_epitopes(): epitope_predictions = predict_epitopes_from_variants( mhc_model=mhc_model, variants=variants, transcript_expression_dict=None, only_novel_epitopes=True) # expect (15 + 16 mutant peptides) * (2 alleles) * 2 variants = # 124 total epitope predictions eq_(len(epitope_predictions), 124) unique_alleles = { epitope_prediction.allele for epitope_prediction in epitope_predictions
alt='T', ensembl=ensembl_grch37), Variant( contig=11, start=32861682, ref='G', alt='A', ensembl=ensembl_grch37)]) alleles = [ "HLA-DPA1*01:05/DPB1*100:01", "DRB10102" ] mhc_model = NetMHCIIpan( alleles=alleles, default_peptide_lengths=[15, 16]) def test_netmhcii_pan_epitopes(): epitope_predictions = TopiaryPredictor( mhc_model=mhc_model, only_novel_epitopes=True).predict_from_variants(variants=variants) # expect (15 + 16 mutant peptides) * (2 alleles) * 2 variants = # 124 total epitope predictions eq_(len(epitope_predictions), 124) unique_alleles = set(epitope_predictions.allele) assert len(unique_alleles) == 2, \ "Expected 2 unique alleles, got %s" % (unique_alleles,) unique_lengths = set(epitope_predictions.peptide_length) assert unique_lengths == {15, 16}, \
def test_class2_8mer_fails(): ii_pan_predictor = NetMHCIIpan(alleles=["HLA-DRB1*01:01"]) with assert_raises(ValueError): ii_pan_predictor.predict_peptides(["A" * 8])
def test_class2_9mer_success(): ii_pan_predictor = NetMHCIIpan(alleles=["HLA-DRB1*01:01"]) predictions = ii_pan_predictor.predict_peptides(["A" * 9]) eq_(len(predictions), 1)