Exemple #1
0
    def build_mhc1_alleles(alleles: List[str],
                           mhc_database: MhcDatabase) -> List[Mhc1]:
        isoforms = []
        try:
            mhc_parser = MhcParser.get_mhc_parser(mhc_database)
            # NOTE: during the pandas parsing of empty columns empty lists become a list with one empty string
            parsed_alleles = list(
                map(mhc_parser.parse_mhc_allele,
                    filter(lambda a: a != "", alleles)))
            for a in parsed_alleles:
                ModelValidator.validate_mhc1_gene(a)

            # do we need to validate genes anymore? add test creating MhcAllele with bad gene and see what happens
            for mhc1_gene in mhc_database.mhc1_genes:
                gene_alleles = list(
                    filter(lambda a: a.gene == mhc1_gene.name, parsed_alleles))
                zygosity = MhcFactory._get_zygosity_from_alleles(gene_alleles)
                if zygosity == Zygosity.HOMOZYGOUS:
                    gene_alleles = [
                        gene_alleles[0]
                    ]  # we don't want repeated instances of the same allele
                isoforms.append(
                    Mhc1(name=mhc1_gene,
                         zygosity=zygosity,
                         alleles=gene_alleles))
        except AssertionError as e:
            raise NeofoxDataValidationException(e)
        return list(filter(lambda i: i.zygosity != Zygosity.LOSS, isoforms))
 def __init__(self, generator, mhc1_alleles, mhc2_alleles, hla_database: HlaDatabase):
     Provider.__init__(self, generator)
     self.hla_database = hla_database
     self.mhc_parser = MhcParser.get_mhc_parser(hla_database)
     # gets available alleles from netmhcpan and netmhc2pan
     self.available_mhc1_alleles = self.load_mhc1_alleles(mhc1_alleles)
     self.available_mhc2_isoforms = self.load_mhc2_isoforms(mhc2_alleles)
     # gets available tumor types
     self.available_tumor_types = ExpressionAnnotator().cohort_indices.keys()
Exemple #3
0
 def setUp(self):
     references, self.configuration = integration_test_tools.load_references(organism=ORGANISM_MUS_MUSCULUS)
     self.runner = Runner()
     self.available_alleles = references.get_available_alleles()
     self.test_mhc_one = integration_test_tools.get_h2_one_test(references.get_mhc_database())
     self.test_mhc_two = integration_test_tools.get_h2_two_test(references.get_mhc_database())
     self.mhc_parser = MhcParser.get_mhc_parser(references.get_mhc_database())
     self.proteome_blastp_runner = BlastpRunner(
         runner=self.runner, configuration=self.configuration,
         database=references.get_proteome_database())
 def load_mhc2_alleles(self, available_alleles: List[str]):
     mhc_alleles = []
     for a in available_alleles:
         try:
             parsed_allele = MhcParser.get_mhc_parser(
                 self.hla_database).parse_mhc2_isoform(a)
         except AssertionError:
             continue
         mhc_alleles.append(parsed_allele.name)
     return mhc_alleles
Exemple #5
0
 def setUp(self):
     self.references, self.configuration = integration_test_tools.load_references()
     self.runner = Runner()
     mhc_parser = MhcParser.get_mhc_parser(self.references.get_mhc_database())
     self.mixmhcpred = MixMHCpred(
         runner=self.runner, configuration=self.configuration, mhc_parser=mhc_parser
     )
     self.mixmhc2pred = MixMhc2Pred(
         runner=self.runner, configuration=self.configuration, mhc_parser=mhc_parser
     )
     self.hla_database = self.references.get_mhc_database()
     self.test_mhc_one = integration_test_tools.get_hla_one_test(self.hla_database)
     self.test_mhc_two = integration_test_tools.get_hla_two_test(self.hla_database)
     self.uniprot = Uniprot(self.references.uniprot_pickle)
Exemple #6
0
    def build_mhc2_alleles(alleles: List[str],
                           mhc_database: MhcDatabase) -> List[Mhc2]:
        mhc2s = []
        try:
            mhc_parser = MhcParser.get_mhc_parser(mhc_database)
            # NOTE: during the pandas parsing of empty columns empty lists become a list with one empty string
            parsed_alleles = list(
                map(mhc_parser.parse_mhc_allele,
                    filter(lambda a: a != "", alleles)))
            for a in parsed_alleles:
                ModelValidator.validate_mhc2_gene(a)

            # do we need to validate genes anymore? add test creating MhcAllele with bad gene and see what happens
            for mhc2_isoform_name in mhc_database.mhc2_molecules:
                mhc2_isoform_genes = GENES_BY_MOLECULE.get(mhc2_isoform_name)
                isoform_alleles = list(
                    filter(
                        lambda a: a.gene in
                        [g.name for g in mhc2_isoform_genes], parsed_alleles))
                genes = []
                for gene_name in mhc2_isoform_genes:
                    gene_alleles = list(
                        filter(lambda a: a.gene == gene_name.name,
                               isoform_alleles))
                    zygosity = MhcFactory._get_zygosity_from_alleles(
                        gene_alleles)
                    if zygosity == Zygosity.HOMOZYGOUS:
                        gene_alleles = [
                            gene_alleles[0]
                        ]  # we don't want repeated instances of the same allele
                    genes.append(
                        Mhc2Gene(name=gene_name,
                                 zygosity=zygosity,
                                 alleles=gene_alleles))
                isoforms = MhcFactory._get_mhc2_isoforms(
                    mhc2_isoform_name, genes)
                mhc2s.append(
                    Mhc2(name=mhc2_isoform_name,
                         genes=genes,
                         isoforms=isoforms))
        except AssertionError as e:
            raise NeofoxDataValidationException(e)
        return list(
            filter(
                lambda m: all(
                    map(lambda g: g.zygosity != Zygosity.LOSS, m.genes)),
                mhc2s))
    def __init__(
        self,
        references: ReferenceFolder,
        configuration: DependenciesConfiguration,
        tcell_predictor: TcellPrediction,
        self_similarity: SelfSimilarityCalculator,
        affinity_threshold =neofox.AFFINITY_THRESHOLD_DEFAULT
    ):
        """class to annotate neoantigens"""
        self.runner = Runner()
        self.configuration = configuration
        self.proteome_db = references.proteome_db
        self.available_alleles = references.get_available_alleles()
        self.tcell_predictor = tcell_predictor
        self.self_similarity = self_similarity
        self.organism = references.organism

        # NOTE: this one loads a big file, but it is faster loading it multiple times than passing it around
        self.uniprot = Uniprot(references.uniprot_pickle)

        # initialise proteome and IEDB BLASTP runners
        self.proteome_blastp_runner = BlastpRunner(
            runner=self.runner, configuration=configuration,
            database=references.get_proteome_database())
        self.iedb_blastp_runner = BlastpRunner(
            runner=self.runner, configuration=configuration,
            database=references.get_iedb_database())

        # NOTE: these resources do not read any file thus can be initialised fast
        self.dissimilarity_calculator = DissimilarityCalculator(
            proteome_blastp_runner=self.proteome_blastp_runner, affinity_threshold=affinity_threshold)
        self.neoantigen_fitness_calculator = NeoantigenFitnessCalculator(iedb_blastp_runner=self.iedb_blastp_runner)
        self.neoag_calculator = NeoagCalculator(
            runner=self.runner, configuration=configuration, affinity_threshold=affinity_threshold
        )
        self.differential_binding = DifferentialBinding(affinity_threshold=affinity_threshold)
        self.priority_score_calculator = PriorityScore()
        self.iedb_immunogenicity = IEDBimmunogenicity(affinity_threshold=affinity_threshold)
        self.amplitude = Amplitude()
        self.hex = Hex(runner=self.runner, configuration=configuration, references=references)
        self.mhc_database = references.get_mhc_database()
        self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database)

        self.resources_versions = references.get_resources_versions()
Exemple #8
0
 def setUp(self):
     references, self.configuration = integration_test_tools.load_references(
     )
     self.runner = Runner()
     self.available_alleles_mhc1 = (
         references.get_available_alleles().get_available_mhc_i())
     self.available_alleles_mhc2 = (
         references.get_available_alleles().get_available_mhc_ii())
     self.hla_database = references.get_mhc_database()
     self.mhc_parser = MhcParser.get_mhc_parser(self.hla_database)
     self.test_mhc_one = integration_test_tools.get_hla_one_test(
         self.hla_database)
     self.test_mhc_two = integration_test_tools.get_hla_two_test(
         self.hla_database)
     self.uniprot = Uniprot(references.uniprot_pickle)
     self.proteome_blastp_runner = BlastpRunner(
         runner=self.runner,
         configuration=self.configuration,
         database=references.get_proteome_database())
Exemple #9
0
 def setUp(self) -> None:
     self.mhc_parser = MhcParser.get_mhc_parser(FakeHlaDatabase())