def build_mhc1_alleles(alleles: List[str], mhc_database: MhcDatabase) -> List[Mhc1]: isoforms = [] try: mhc_parser = MhcParser.get_mhc_parser(mhc_database) # NOTE: during the pandas parsing of empty columns empty lists become a list with one empty string parsed_alleles = list( map(mhc_parser.parse_mhc_allele, filter(lambda a: a != "", alleles))) for a in parsed_alleles: ModelValidator.validate_mhc1_gene(a) # do we need to validate genes anymore? add test creating MhcAllele with bad gene and see what happens for mhc1_gene in mhc_database.mhc1_genes: gene_alleles = list( filter(lambda a: a.gene == mhc1_gene.name, parsed_alleles)) zygosity = MhcFactory._get_zygosity_from_alleles(gene_alleles) if zygosity == Zygosity.HOMOZYGOUS: gene_alleles = [ gene_alleles[0] ] # we don't want repeated instances of the same allele isoforms.append( Mhc1(name=mhc1_gene, zygosity=zygosity, alleles=gene_alleles)) except AssertionError as e: raise NeofoxDataValidationException(e) return list(filter(lambda i: i.zygosity != Zygosity.LOSS, isoforms))
def __init__(self, generator, mhc1_alleles, mhc2_alleles, hla_database: HlaDatabase): Provider.__init__(self, generator) self.hla_database = hla_database self.mhc_parser = MhcParser.get_mhc_parser(hla_database) # gets available alleles from netmhcpan and netmhc2pan self.available_mhc1_alleles = self.load_mhc1_alleles(mhc1_alleles) self.available_mhc2_isoforms = self.load_mhc2_isoforms(mhc2_alleles) # gets available tumor types self.available_tumor_types = ExpressionAnnotator().cohort_indices.keys()
def setUp(self): references, self.configuration = integration_test_tools.load_references(organism=ORGANISM_MUS_MUSCULUS) self.runner = Runner() self.available_alleles = references.get_available_alleles() self.test_mhc_one = integration_test_tools.get_h2_one_test(references.get_mhc_database()) self.test_mhc_two = integration_test_tools.get_h2_two_test(references.get_mhc_database()) self.mhc_parser = MhcParser.get_mhc_parser(references.get_mhc_database()) self.proteome_blastp_runner = BlastpRunner( runner=self.runner, configuration=self.configuration, database=references.get_proteome_database())
def load_mhc2_alleles(self, available_alleles: List[str]): mhc_alleles = [] for a in available_alleles: try: parsed_allele = MhcParser.get_mhc_parser( self.hla_database).parse_mhc2_isoform(a) except AssertionError: continue mhc_alleles.append(parsed_allele.name) return mhc_alleles
def setUp(self): self.references, self.configuration = integration_test_tools.load_references() self.runner = Runner() mhc_parser = MhcParser.get_mhc_parser(self.references.get_mhc_database()) self.mixmhcpred = MixMHCpred( runner=self.runner, configuration=self.configuration, mhc_parser=mhc_parser ) self.mixmhc2pred = MixMhc2Pred( runner=self.runner, configuration=self.configuration, mhc_parser=mhc_parser ) self.hla_database = self.references.get_mhc_database() self.test_mhc_one = integration_test_tools.get_hla_one_test(self.hla_database) self.test_mhc_two = integration_test_tools.get_hla_two_test(self.hla_database) self.uniprot = Uniprot(self.references.uniprot_pickle)
def build_mhc2_alleles(alleles: List[str], mhc_database: MhcDatabase) -> List[Mhc2]: mhc2s = [] try: mhc_parser = MhcParser.get_mhc_parser(mhc_database) # NOTE: during the pandas parsing of empty columns empty lists become a list with one empty string parsed_alleles = list( map(mhc_parser.parse_mhc_allele, filter(lambda a: a != "", alleles))) for a in parsed_alleles: ModelValidator.validate_mhc2_gene(a) # do we need to validate genes anymore? add test creating MhcAllele with bad gene and see what happens for mhc2_isoform_name in mhc_database.mhc2_molecules: mhc2_isoform_genes = GENES_BY_MOLECULE.get(mhc2_isoform_name) isoform_alleles = list( filter( lambda a: a.gene in [g.name for g in mhc2_isoform_genes], parsed_alleles)) genes = [] for gene_name in mhc2_isoform_genes: gene_alleles = list( filter(lambda a: a.gene == gene_name.name, isoform_alleles)) zygosity = MhcFactory._get_zygosity_from_alleles( gene_alleles) if zygosity == Zygosity.HOMOZYGOUS: gene_alleles = [ gene_alleles[0] ] # we don't want repeated instances of the same allele genes.append( Mhc2Gene(name=gene_name, zygosity=zygosity, alleles=gene_alleles)) isoforms = MhcFactory._get_mhc2_isoforms( mhc2_isoform_name, genes) mhc2s.append( Mhc2(name=mhc2_isoform_name, genes=genes, isoforms=isoforms)) except AssertionError as e: raise NeofoxDataValidationException(e) return list( filter( lambda m: all( map(lambda g: g.zygosity != Zygosity.LOSS, m.genes)), mhc2s))
def __init__( self, references: ReferenceFolder, configuration: DependenciesConfiguration, tcell_predictor: TcellPrediction, self_similarity: SelfSimilarityCalculator, affinity_threshold =neofox.AFFINITY_THRESHOLD_DEFAULT ): """class to annotate neoantigens""" self.runner = Runner() self.configuration = configuration self.proteome_db = references.proteome_db self.available_alleles = references.get_available_alleles() self.tcell_predictor = tcell_predictor self.self_similarity = self_similarity self.organism = references.organism # NOTE: this one loads a big file, but it is faster loading it multiple times than passing it around self.uniprot = Uniprot(references.uniprot_pickle) # initialise proteome and IEDB BLASTP runners self.proteome_blastp_runner = BlastpRunner( runner=self.runner, configuration=configuration, database=references.get_proteome_database()) self.iedb_blastp_runner = BlastpRunner( runner=self.runner, configuration=configuration, database=references.get_iedb_database()) # NOTE: these resources do not read any file thus can be initialised fast self.dissimilarity_calculator = DissimilarityCalculator( proteome_blastp_runner=self.proteome_blastp_runner, affinity_threshold=affinity_threshold) self.neoantigen_fitness_calculator = NeoantigenFitnessCalculator(iedb_blastp_runner=self.iedb_blastp_runner) self.neoag_calculator = NeoagCalculator( runner=self.runner, configuration=configuration, affinity_threshold=affinity_threshold ) self.differential_binding = DifferentialBinding(affinity_threshold=affinity_threshold) self.priority_score_calculator = PriorityScore() self.iedb_immunogenicity = IEDBimmunogenicity(affinity_threshold=affinity_threshold) self.amplitude = Amplitude() self.hex = Hex(runner=self.runner, configuration=configuration, references=references) self.mhc_database = references.get_mhc_database() self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database) self.resources_versions = references.get_resources_versions()
def setUp(self): references, self.configuration = integration_test_tools.load_references( ) self.runner = Runner() self.available_alleles_mhc1 = ( references.get_available_alleles().get_available_mhc_i()) self.available_alleles_mhc2 = ( references.get_available_alleles().get_available_mhc_ii()) self.hla_database = references.get_mhc_database() self.mhc_parser = MhcParser.get_mhc_parser(self.hla_database) self.test_mhc_one = integration_test_tools.get_hla_one_test( self.hla_database) self.test_mhc_two = integration_test_tools.get_hla_two_test( self.hla_database) self.uniprot = Uniprot(references.uniprot_pickle) self.proteome_blastp_runner = BlastpRunner( runner=self.runner, configuration=self.configuration, database=references.get_proteome_database())
def setUp(self) -> None: self.mhc_parser = MhcParser.get_mhc_parser(FakeHlaDatabase())