def test_mixmhc2pred_allele(self): mutation = get_mutation(mutated_xmer="TNENLDLQELVEKLEKN", wild_type_xmer="TNENLDLQNLVEKLEKN") # this is a MHC II genotype which results in no available alleles for MixMHC2pred MHC_TWO_NEW = MhcFactory.build_mhc2_alleles( [ "HLA-DRB1*14:54", "HLA-DRB1*14:54", "HLA-DQA1*01:04", "HLA-DQA1*01:04", "HLA-DQB1*05:03", "HLA-DQB1*05:03", "HLA-DPB1*02:01", "HLA-DPB1*02:01" ], self.hla_database ) alleles = self.mixmhc2pred.transform_hla_ii_alleles_for_prediction(MHC_TWO_NEW) logger.info(alleles) best_peptide, best_rank, best_allele = self.mixmhc2pred.run( mutation=mutation, mhc=MHC_TWO_NEW, uniprot=self.uniprot ) logger.info(best_peptide) self.assertIsNone(best_peptide) self.assertIsNone(best_rank) self.assertIsNone(best_allele)
def test_parse_mhc1_hemizygous_alleles(self): mhc1s = MhcFactory.build_mhc1_alleles( ["HLA-A*01:01", "HLA-B*07:02", "HLA-C*01:02"], self.hla_database ) self.assertEqual(3, len(mhc1s)) for mhc1 in mhc1s: self.assertEqual(Zygosity.HEMIZYGOUS, mhc1.zygosity) self.assertEqual(1, len(mhc1.alleles))
def get_hla_one_test(hla_database): return MhcFactory.build_mhc1_alleles([ "HLA-A*24:02", "HLA-A*02:01", "HLA-B*15:01", "HLA-B*44:02", "HLA-C*07:02", "HLA-C*05:01", ], hla_database)
def get_h2_one_test(h2_database): return MhcFactory.build_mhc1_alleles([ "H2Kd", "H2Kd", "H2Dd", "H2Dd", "H2Ld", "H2Ld", ], h2_database)
def test_parse_mhc1_non_existing_allele_does_not_fail_mouse(self): mhc1s = MhcFactory.build_mhc1_alleles( [ "H2Kd", "H2Kz", # this one does not exist "H2Dd", "H2Ld" ], self.h2_database ) self.assertEqual(3, len(mhc1s))
def test_parse_mhc2_non_existing_allele_does_not_fail_mouse(self): mhc2s = MhcFactory.build_mhc2_alleles( [ "H2Ad", "H2Az", # this one does not exist "H2Ed", "H2Ed", ], self.h2_database ) self.assertEqual(2, len(mhc2s))
def test_parse_mhc1_hemizygous_alleles_mouse(self): mhc1s = MhcFactory.build_mhc1_alleles( [ "H2Kd", "H2Dd", "H2Ld" ], self.h2_database ) self.assertEqual(3, len(mhc1s)) for mhc1 in mhc1s: self.assertEqual(Zygosity.HEMIZYGOUS, mhc1.zygosity) self.assertEqual(1, len(mhc1.alleles))
def test_parse_mhc1_non_existing_allele_does_not_fail(self): mhc1s = MhcFactory.build_mhc1_alleles( [ "HLA-A*01:01", "HLA-A*01:01", "HLA-B*999:01", # this one does not exist "HLA-B*07:02", "HLA-C*01:02", "HLA-C*01:02", ], self.hla_database ) self.assertEqual(3, len(mhc1s))
def test_parse_mhc2_non_existing_allele_does_not_fail(self): mhc2s = MhcFactory.build_mhc2_alleles( [ "HLA-DRB1*999:01", # this one does not exist "HLA-DPA1*01:03", "HLA-DPB1*01:01", "HLA-DQA1*01:01", "HLA-DQB1*02:01", ], self.hla_database ) self.assertEqual(3, len(mhc2s))
def get_hla_two_test(hla_database): return MhcFactory.build_mhc2_alleles([ "HLA-DRB1*04:02", "HLA-DRB1*08:01", "HLA-DQA1*03:01", "HLA-DQA1*04:01", "HLA-DQB1*03:02", "HLA-DQB1*04:02", "HLA-DPA1*01:03", "HLA-DPA1*02:01", "HLA-DPB1*13:01", "HLA-DPB1*04:01", ], hla_database)
def patient(self) -> Patient: patient = None found = False while not found: dr_isoforms = self.random_elements(self.get_hla_ii_alleles_by_gene(Mhc2Name.DR), unique=True, length=2) dp_isoforms = self.random_elements(self.get_hla_ii_alleles_by_gene(Mhc2Name.DP), unique=True, length=2) dq_isoforms = self.random_elements(self.get_hla_ii_alleles_by_gene(Mhc2Name.DQ), unique=True, length=2) # NOTE: for some reason some DP alleles are malformed and cause a validation error, most do not. # thus I retry until I get a valid combination of HLA alleles, will clarify in another reincarnation try: patient = Patient( identifier=self.generator.unique.uuid4(), is_rna_available=True, tumor_type=self.random_elements(self.available_tumor_types, length=1)[0], # by setting unique=True we enforce that all patients are heterozygous mhc1=MhcFactory.build_mhc1_alleles( self.random_elements(self.get_hla_i_alleles_by_gene(Mhc1Name.A), unique=True, length=2) + self.random_elements(self.get_hla_i_alleles_by_gene(Mhc1Name.B), unique=True, length=2) + self.random_elements(self.get_hla_i_alleles_by_gene(Mhc1Name.C), unique=True, length=2), self.hla_database ), mhc2=MhcFactory.build_mhc2_alleles( [i.alpha_chain.name for i in dp_isoforms] + [i.beta_chain.name for i in dp_isoforms] + [i.alpha_chain.name for i in dq_isoforms] + [i.beta_chain.name for i in dq_isoforms] + [i.beta_chain.name for i in dr_isoforms], self.hla_database ) ) ModelValidator.validate_patient(patient) except NeofoxDataValidationException: continue found = True return patient
def test_patient_with_non_existing_allele_does_not_crash(self): """""" neoantigens, patients, patient_id = self._get_test_data() for p in patients: # sets one MHC I allele to a non existing allele p.mhc1[0].alleles[0] = MhcFactory.build_mhc1_alleles( ["HLA-A*99:99"], mhc_database=self.hla_database)[0].alleles[0] neofox = NeoFox( neoantigens=neoantigens, patient_id=patient_id, patients=patients, num_cpus=1, ) neofox.get_annotations()
def test_mixmhcpred_not_supported_allele(self): """ this is a combination of neoepitope and HLA alleles from Balachandran """ mutation = get_mutation(mutated_xmer="SIYGGLVLI", wild_type_xmer="PIYGGLVLI") best_peptide, best_rank, best_allele, best_score = self.mixmhcpred.run( mutation=mutation, mhc=MhcFactory.build_mhc1_alleles(["A02:01", "B44:02", "C05:17", "C05:01"], self.hla_database), uniprot=self.uniprot ) self.assertEqual('SIYGGLVLI', best_peptide) self.assertEqual(0.15829400000000002, best_score) self.assertEqual(1, best_rank) self.assertEqual('HLA-A*02:01', best_allele)
def test_parse_mhc2_hemizygous_alleles_mouse(self): mhc2s = MhcFactory.build_mhc2_alleles( [ "H2Ad", "H2Ed", ], self.h2_database ) self.assertEqual(2, len(mhc2s)) for mhc2 in mhc2s: self.assertEqual(1, len(mhc2.genes)) for gene in mhc2.genes: self.assertEqual(1, len(gene.alleles)) self.assertEqual(Zygosity.HEMIZYGOUS, gene.zygosity) self.assertEqual(1 if mhc2.name == Mhc2Name.DR else 1, len(mhc2.isoforms)) self._assert_isoforms(mhc2)
def test_parse_mhc2_homozygous_alleles_mouse(self): mhc2s = MhcFactory.build_mhc2_alleles( [ "H2Ad", "H2Ad", "H2Ed", "H2Ed" ], self.h2_database ) self.assertEqual(2, len(mhc2s)) for mhc2 in mhc2s: self.assertEqual(1, len(mhc2.genes)) for gene in mhc2.genes: self.assertEqual(1, len(gene.alleles)) self.assertEqual(Zygosity.HOMOZYGOUS, gene.zygosity) self.assertEqual(1, len(mhc2.isoforms)) self._assert_isoforms(mhc2)
def test_parse_mhc2_hemizygous_alleles(self): mhc2s = MhcFactory.build_mhc2_alleles( [ "HLA-DRB1*01:01", "HLA-DPA1*01:03", "HLA-DPB1*02:01", "HLA-DQA1*01:01", "HLA-DQB1*02:01", ], self.hla_database ) self.assertEqual(3, len(mhc2s)) for mhc2 in mhc2s: self.assertEqual(1 if mhc2.name == Mhc2Name.DR else 2, len(mhc2.genes)) for gene in mhc2.genes: self.assertEqual(1, len(gene.alleles)) self.assertEqual(Zygosity.HEMIZYGOUS, gene.zygosity) self.assertEqual(1 if mhc2.name == Mhc2Name.DR else 1, len(mhc2.isoforms)) self._assert_isoforms(mhc2)
def test_parse_mhc2_hetero_and_homozygous_alleles(self): mhc2s = MhcFactory.build_mhc2_alleles( [ "HLA-DRB1*01:01", "HLA-DRB1*01:01", "HLA-DPA1*01:03", "HLA-DPA1*01:03", "HLA-DPB1*02:01", "HLA-DPB1*02:02", "HLA-DQA1*01:01", "HLA-DQA1*01:01", "HLA-DQB1*02:01", "HLA-DQB1*02:02", ], self.hla_database ) self.assertEqual(3, len(mhc2s)) for mhc2 in mhc2s: self.assertEqual(1 if mhc2.name == Mhc2Name.DR else 2, len(mhc2.genes)) self.assertEqual(1 if mhc2.name == Mhc2Name.DR else 2, len(mhc2.isoforms))
def test_neoantigen_no_wt_failing(self): patient_identifier = "12345" neoantigen = Neoantigen( mutation=Mutation(mutated_xmer="SPSFPLEPDDEVFTAIAKAMEEMVEDS"), patient_identifier=patient_identifier) patient = Patient( identifier=patient_identifier, mhc1=MhcFactory.build_mhc1_alleles( [ "HLA-A*02:24", "HLA-A*36:04", "HLA-B*58:25", "HLA-B*35:102", "HLA-C*02:30", "HLA-C*07:139" ], mhc_database=self.references.get_mhc_database()), ) annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], num_cpus=1, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations)
def test_neoantigen_failing(self): patient_identifier = "12345" neoantigen = Neoantigen(mutation=Mutation( wild_type_xmer="ARPDMFCLFHGKRYFPGESWHPYLEPQ", mutated_xmer="ARPDMFCLFHGKRHFPGESWHPYLEPQ"), patient_identifier=patient_identifier) patient = Patient( identifier=patient_identifier, mhc1=MhcFactory.build_mhc1_alleles( [ "HLA-A*03:01", "HLA-A*29:02", "HLA-B*07:02", "HLA-B*44:03", "HLA-C*07:02", "HLA-C*16:01" ], mhc_database=self.references.get_mhc_database()), ) annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], num_cpus=1, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations)
def test_parse_mhc1_loss_alleles(self): mhc1s = MhcFactory.build_mhc1_alleles([], self.hla_database) self.assertEqual(0, len(mhc1s))
def get_h2_two_test(h2_database): return MhcFactory.build_mhc2_alleles(["H2Ad", "H2Ad", "H2Ed", "H2Ed"], h2_database)
def test_phbr1(self): best_multiple = BestAndMultipleBinder( runner=self.runner, configuration=self.configuration, mhc_parser=self.mhc_parser, blastp_runner=self.proteome_blastp_runner) netmhcpan = NetMhcPanPredictor( runner=self.runner, configuration=self.configuration, mhc_parser=self.mhc_parser, blastp_runner=self.proteome_blastp_runner) mutation = get_mutation( mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET", ) # all alleles = heterozygous predictions = netmhcpan.mhc_prediction(self.test_mhc_one, self.available_alleles_mhc1, mutation.mutated_xmer) predicted_neoepitopes = netmhcpan.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot) best_epitopes_per_allele = ( BestAndMultipleBinder.extract_best_epitope_per_alelle( predicted_neoepitopes, self.test_mhc_one)) phbr_i = best_multiple.calculate_phbr_i(best_epitopes_per_allele, self.test_mhc_one) self.assertIsNotNone(phbr_i) self.assertAlmostEqual(1.359324592015038, phbr_i) # one homozygous allele present mhc_alleles = MhcFactory.build_mhc1_alleles([ "HLA-A*24:02", "HLA-A*02:01", "HLA-B*15:01", "HLA-B*44:02", "HLA-C*05:01", "HLA-C*05:01", ], self.hla_database) predictions = netmhcpan.mhc_prediction(self.test_mhc_one, self.available_alleles_mhc1, mutation.mutated_xmer) predicted_neoepitopes = netmhcpan.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot) best_epitopes_per_allele = ( BestAndMultipleBinder.extract_best_epitope_per_alelle( predicted_neoepitopes, mhc_alleles)) phbr_i = best_multiple.calculate_phbr_i(best_epitopes_per_allele, mhc_alleles) self.assertIsNotNone(phbr_i) self.assertAlmostEqual(1.0036998409510969, phbr_i) # mo info for one allele mhc_alleles = MhcFactory.build_mhc1_alleles([ "HLA-A*24:02", "HLA-A*02:01", "HLA-B*15:01", "HLA-B*44:02", "HLA-C*05:01" ], self.hla_database) predictions = netmhcpan.mhc_prediction(self.test_mhc_one, self.available_alleles_mhc1, mutation.mutated_xmer) predicted_neoepitopes = netmhcpan.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot) best_epitopes_per_allele = ( BestAndMultipleBinder.extract_best_epitope_per_alelle( predicted_neoepitopes, mhc_alleles)) phbr_i = best_multiple.calculate_phbr_i(best_epitopes_per_allele, mhc_alleles) self.assertIsNone(phbr_i)
def test_phbr2(self): best_multiple = BestAndMultipleBinderMhcII( runner=self.runner, configuration=self.configuration, mhc_parser=self.mhc_parser, blastp_runner=self.proteome_blastp_runner) netmhc2pan = NetMhcIIPanPredictor( runner=self.runner, configuration=self.configuration, mhc_parser=self.mhc_parser, blastp_runner=self.proteome_blastp_runner) mutation = get_mutation( mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET", ) # all alleles = heterozygous allele_combinations = netmhc2pan.generate_mhc2_alelle_combinations( self.test_mhc_two) patient_mhc2_isoforms = best_multiple._get_only_available_combinations( allele_combinations, self.available_alleles_mhc2) predictions = netmhc2pan.mhc2_prediction(patient_mhc2_isoforms, mutation.mutated_xmer) filtered_predictions = netmhc2pan.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot) logger.info(filtered_predictions) logger.info(self.test_mhc_two) best_predicted_epitopes_per_alelle = ( best_multiple.extract_best_epitope_per_mhc2_alelle( predictions=filtered_predictions, mhc_isoforms=self.test_mhc_two)) phbr_ii = best_multiple.calculate_phbr_ii( best_predicted_epitopes_per_alelle) self.assertIsNotNone(phbr_ii) self.assertAlmostEqual(8.895757526065129, phbr_ii) # mo info for one allele mhc2_alleles = MhcFactory.build_mhc2_alleles([ "HLA-DRB1*04:02", "HLA-DRB1*08:01", "HLA-DQA1*03:01", "HLA-DQA1*04:01", "HLA-DQB1*03:02", "HLA-DQB1*04:02", "HLA-DPA1*01:03", "HLA-DPA1*02:01", "HLA-DPB1*13:01", "HLA-DPB1*13:01", ], self.hla_database) allele_combinations = netmhc2pan.generate_mhc2_alelle_combinations( mhc2_alleles) patient_mhc2_isoforms = best_multiple._get_only_available_combinations( allele_combinations, self.available_alleles_mhc2) predictions = netmhc2pan.mhc2_prediction(patient_mhc2_isoforms, mutation.mutated_xmer) filtered_predictions = netmhc2pan.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot) best_predicted_epitopes_per_alelle = ( best_multiple.extract_best_epitope_per_mhc2_alelle( filtered_predictions, mhc2_alleles)) logger.info(best_predicted_epitopes_per_alelle) logger.info(len(best_predicted_epitopes_per_alelle)) phbr_ii = best_multiple.calculate_phbr_ii( best_predicted_epitopes_per_alelle) self.assertIsNone(phbr_ii) # one allele present mhc2_alleles = MhcFactory.build_mhc2_alleles([ "HLA-DRB1*04:02", "HLA-DRB1*08:01", "HLA-DQA1*03:01", "HLA-DQA1*04:01", "HLA-DQB1*03:02", "HLA-DQB1*04:02", "HLA-DPA1*01:03", "HLA-DPA1*02:01", "HLA-DPB1*13:01", ], self.hla_database) allele_combinations = netmhc2pan.generate_mhc2_alelle_combinations( mhc2_alleles) patient_mhc2_isoforms = best_multiple._get_only_available_combinations( allele_combinations, self.available_alleles_mhc2) predictions = netmhc2pan.mhc2_prediction(patient_mhc2_isoforms, mutation.mutated_xmer) filtered_predictions = netmhc2pan.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot) best_predicted_epitopes_per_alelle = ( best_multiple.extract_best_epitope_per_mhc2_alelle( filtered_predictions, mhc2_alleles)) logger.info(best_predicted_epitopes_per_alelle) logger.info(len(best_predicted_epitopes_per_alelle)) phbr_ii = best_multiple.calculate_phbr_ii( best_predicted_epitopes_per_alelle) self.assertIsNone(phbr_ii)
def test_parse_mhc2_loss_mouse(self): mhc2s = MhcFactory.build_mhc2_alleles([], self.h2_database) self.assertEqual(0, len(mhc2s))