def test_mixmhc2pred_allele(self): mutation = get_mutation(mutated_xmer="TNENLDLQELVEKLEKN", wild_type_xmer="TNENLDLQNLVEKLEKN") # this is a MHC II genotype which results in no available alleles for MixMHC2pred MHC_TWO_NEW = MhcFactory.build_mhc2_alleles( [ "HLA-DRB1*14:54", "HLA-DRB1*14:54", "HLA-DQA1*01:04", "HLA-DQA1*01:04", "HLA-DQB1*05:03", "HLA-DQB1*05:03", "HLA-DPB1*02:01", "HLA-DPB1*02:01" ], self.hla_database ) alleles = self.mixmhc2pred.transform_hla_ii_alleles_for_prediction(MHC_TWO_NEW) logger.info(alleles) best_peptide, best_rank, best_allele = self.mixmhc2pred.run( mutation=mutation, mhc=MHC_TWO_NEW, uniprot=self.uniprot ) logger.info(best_peptide) self.assertIsNone(best_peptide) self.assertIsNone(best_rank) self.assertIsNone(best_allele)
def test_parse_mhc2_non_existing_allele_does_not_fail_mouse(self): mhc2s = MhcFactory.build_mhc2_alleles( [ "H2Ad", "H2Az", # this one does not exist "H2Ed", "H2Ed", ], self.h2_database ) self.assertEqual(2, len(mhc2s))
def test_parse_mhc2_non_existing_allele_does_not_fail(self): mhc2s = MhcFactory.build_mhc2_alleles( [ "HLA-DRB1*999:01", # this one does not exist "HLA-DPA1*01:03", "HLA-DPB1*01:01", "HLA-DQA1*01:01", "HLA-DQB1*02:01", ], self.hla_database ) self.assertEqual(3, len(mhc2s))
def get_hla_two_test(hla_database): return MhcFactory.build_mhc2_alleles([ "HLA-DRB1*04:02", "HLA-DRB1*08:01", "HLA-DQA1*03:01", "HLA-DQA1*04:01", "HLA-DQB1*03:02", "HLA-DQB1*04:02", "HLA-DPA1*01:03", "HLA-DPA1*02:01", "HLA-DPB1*13:01", "HLA-DPB1*04:01", ], hla_database)
def test_parse_mhc2_hemizygous_alleles_mouse(self): mhc2s = MhcFactory.build_mhc2_alleles( [ "H2Ad", "H2Ed", ], self.h2_database ) self.assertEqual(2, len(mhc2s)) for mhc2 in mhc2s: self.assertEqual(1, len(mhc2.genes)) for gene in mhc2.genes: self.assertEqual(1, len(gene.alleles)) self.assertEqual(Zygosity.HEMIZYGOUS, gene.zygosity) self.assertEqual(1 if mhc2.name == Mhc2Name.DR else 1, len(mhc2.isoforms)) self._assert_isoforms(mhc2)
def test_parse_mhc2_homozygous_alleles_mouse(self): mhc2s = MhcFactory.build_mhc2_alleles( [ "H2Ad", "H2Ad", "H2Ed", "H2Ed" ], self.h2_database ) self.assertEqual(2, len(mhc2s)) for mhc2 in mhc2s: self.assertEqual(1, len(mhc2.genes)) for gene in mhc2.genes: self.assertEqual(1, len(gene.alleles)) self.assertEqual(Zygosity.HOMOZYGOUS, gene.zygosity) self.assertEqual(1, len(mhc2.isoforms)) self._assert_isoforms(mhc2)
def test_parse_mhc2_hemizygous_alleles(self): mhc2s = MhcFactory.build_mhc2_alleles( [ "HLA-DRB1*01:01", "HLA-DPA1*01:03", "HLA-DPB1*02:01", "HLA-DQA1*01:01", "HLA-DQB1*02:01", ], self.hla_database ) self.assertEqual(3, len(mhc2s)) for mhc2 in mhc2s: self.assertEqual(1 if mhc2.name == Mhc2Name.DR else 2, len(mhc2.genes)) for gene in mhc2.genes: self.assertEqual(1, len(gene.alleles)) self.assertEqual(Zygosity.HEMIZYGOUS, gene.zygosity) self.assertEqual(1 if mhc2.name == Mhc2Name.DR else 1, len(mhc2.isoforms)) self._assert_isoforms(mhc2)
def test_parse_mhc2_hetero_and_homozygous_alleles(self): mhc2s = MhcFactory.build_mhc2_alleles( [ "HLA-DRB1*01:01", "HLA-DRB1*01:01", "HLA-DPA1*01:03", "HLA-DPA1*01:03", "HLA-DPB1*02:01", "HLA-DPB1*02:02", "HLA-DQA1*01:01", "HLA-DQA1*01:01", "HLA-DQB1*02:01", "HLA-DQB1*02:02", ], self.hla_database ) self.assertEqual(3, len(mhc2s)) for mhc2 in mhc2s: self.assertEqual(1 if mhc2.name == Mhc2Name.DR else 2, len(mhc2.genes)) self.assertEqual(1 if mhc2.name == Mhc2Name.DR else 2, len(mhc2.isoforms))
def patient(self) -> Patient: patient = None found = False while not found: dr_isoforms = self.random_elements(self.get_hla_ii_alleles_by_gene(Mhc2Name.DR), unique=True, length=2) dp_isoforms = self.random_elements(self.get_hla_ii_alleles_by_gene(Mhc2Name.DP), unique=True, length=2) dq_isoforms = self.random_elements(self.get_hla_ii_alleles_by_gene(Mhc2Name.DQ), unique=True, length=2) # NOTE: for some reason some DP alleles are malformed and cause a validation error, most do not. # thus I retry until I get a valid combination of HLA alleles, will clarify in another reincarnation try: patient = Patient( identifier=self.generator.unique.uuid4(), is_rna_available=True, tumor_type=self.random_elements(self.available_tumor_types, length=1)[0], # by setting unique=True we enforce that all patients are heterozygous mhc1=MhcFactory.build_mhc1_alleles( self.random_elements(self.get_hla_i_alleles_by_gene(Mhc1Name.A), unique=True, length=2) + self.random_elements(self.get_hla_i_alleles_by_gene(Mhc1Name.B), unique=True, length=2) + self.random_elements(self.get_hla_i_alleles_by_gene(Mhc1Name.C), unique=True, length=2), self.hla_database ), mhc2=MhcFactory.build_mhc2_alleles( [i.alpha_chain.name for i in dp_isoforms] + [i.beta_chain.name for i in dp_isoforms] + [i.alpha_chain.name for i in dq_isoforms] + [i.beta_chain.name for i in dq_isoforms] + [i.beta_chain.name for i in dr_isoforms], self.hla_database ) ) ModelValidator.validate_patient(patient) except NeofoxDataValidationException: continue found = True return patient
def get_h2_two_test(h2_database): return MhcFactory.build_mhc2_alleles(["H2Ad", "H2Ad", "H2Ed", "H2Ed"], h2_database)
def test_phbr2(self): best_multiple = BestAndMultipleBinderMhcII( runner=self.runner, configuration=self.configuration, mhc_parser=self.mhc_parser, blastp_runner=self.proteome_blastp_runner) netmhc2pan = NetMhcIIPanPredictor( runner=self.runner, configuration=self.configuration, mhc_parser=self.mhc_parser, blastp_runner=self.proteome_blastp_runner) mutation = get_mutation( mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET", ) # all alleles = heterozygous allele_combinations = netmhc2pan.generate_mhc2_alelle_combinations( self.test_mhc_two) patient_mhc2_isoforms = best_multiple._get_only_available_combinations( allele_combinations, self.available_alleles_mhc2) predictions = netmhc2pan.mhc2_prediction(patient_mhc2_isoforms, mutation.mutated_xmer) filtered_predictions = netmhc2pan.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot) logger.info(filtered_predictions) logger.info(self.test_mhc_two) best_predicted_epitopes_per_alelle = ( best_multiple.extract_best_epitope_per_mhc2_alelle( predictions=filtered_predictions, mhc_isoforms=self.test_mhc_two)) phbr_ii = best_multiple.calculate_phbr_ii( best_predicted_epitopes_per_alelle) self.assertIsNotNone(phbr_ii) self.assertAlmostEqual(8.895757526065129, phbr_ii) # mo info for one allele mhc2_alleles = MhcFactory.build_mhc2_alleles([ "HLA-DRB1*04:02", "HLA-DRB1*08:01", "HLA-DQA1*03:01", "HLA-DQA1*04:01", "HLA-DQB1*03:02", "HLA-DQB1*04:02", "HLA-DPA1*01:03", "HLA-DPA1*02:01", "HLA-DPB1*13:01", "HLA-DPB1*13:01", ], self.hla_database) allele_combinations = netmhc2pan.generate_mhc2_alelle_combinations( mhc2_alleles) patient_mhc2_isoforms = best_multiple._get_only_available_combinations( allele_combinations, self.available_alleles_mhc2) predictions = netmhc2pan.mhc2_prediction(patient_mhc2_isoforms, mutation.mutated_xmer) filtered_predictions = netmhc2pan.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot) best_predicted_epitopes_per_alelle = ( best_multiple.extract_best_epitope_per_mhc2_alelle( filtered_predictions, mhc2_alleles)) logger.info(best_predicted_epitopes_per_alelle) logger.info(len(best_predicted_epitopes_per_alelle)) phbr_ii = best_multiple.calculate_phbr_ii( best_predicted_epitopes_per_alelle) self.assertIsNone(phbr_ii) # one allele present mhc2_alleles = MhcFactory.build_mhc2_alleles([ "HLA-DRB1*04:02", "HLA-DRB1*08:01", "HLA-DQA1*03:01", "HLA-DQA1*04:01", "HLA-DQB1*03:02", "HLA-DQB1*04:02", "HLA-DPA1*01:03", "HLA-DPA1*02:01", "HLA-DPB1*13:01", ], self.hla_database) allele_combinations = netmhc2pan.generate_mhc2_alelle_combinations( mhc2_alleles) patient_mhc2_isoforms = best_multiple._get_only_available_combinations( allele_combinations, self.available_alleles_mhc2) predictions = netmhc2pan.mhc2_prediction(patient_mhc2_isoforms, mutation.mutated_xmer) filtered_predictions = netmhc2pan.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot) best_predicted_epitopes_per_alelle = ( best_multiple.extract_best_epitope_per_mhc2_alelle( filtered_predictions, mhc2_alleles)) logger.info(best_predicted_epitopes_per_alelle) logger.info(len(best_predicted_epitopes_per_alelle)) phbr_ii = best_multiple.calculate_phbr_ii( best_predicted_epitopes_per_alelle) self.assertIsNone(phbr_ii)
def test_parse_mhc2_loss_mouse(self): mhc2s = MhcFactory.build_mhc2_alleles([], self.h2_database) self.assertEqual(0, len(mhc2s))