コード例 #1
0
    def parse_mhc_allele(self,
                         allele: str,
                         pattern=H2_ALLELE_PATTERN) -> MhcAllele:
        match = H2_NETMHCPAN_ALLELE_PATTERN.match(allele)
        if match:
            # this ensures that netmhcpan output is normalized
            allele = "H2{gene}{protein}".format(gene=match.group(1),
                                                protein=match.group(2))
        match = H2_ALLELE_PATTERN.match(allele)
        if match is None:
            raise NeofoxDataValidationException(
                "Allele does not match H2 allele pattern {}".
                format(allele) if allele != "" else
                "Please check the format of provided alleles. An empty allele is provided"
            )

        gene = match.group(1)
        protein = match.group(2)

        # controls for existence in the HLA database and warns the user
        mhc_allele = MhcAllele(gene=gene, protein=protein)
        if not self.mhc_database.exists(mhc_allele):
            logger.warning(
                "Allele {} does not exist in the H2 database".format(allele))

        # builds a normalized representation of the allele
        name = "{gene}{protein}".format(gene=gene, protein=protein)

        # full name is the same as name in this case as the pattern does not allow variability
        mhc_allele.name = name
        mhc_allele.full_name = name
        return mhc_allele
コード例 #2
0
    def parse_mhc_allele(self, allele: str) -> MhcAllele:
        match = HLA_ALLELE_PATTERN_WITHOUT_SEPARATOR.match(allele)
        if match is not None:
            # allele without separator, controls for ambiguities
            gene = match.group(1)
            group = match.group(2)
            protein = match.group(3)
            default_allele_exists = self.mhc_database.exists(
                MhcAllele(gene=gene, group=group, protein=protein))
            if not default_allele_exists:
                # if default allele does not exist, tries alternative
                protein = group[-1:] + protein
                group = group[0:-1]
        else:
            # infers gene, group and protein from the name
            match = HLA_ALLELE_PATTERN.match(allele)
            if match is None:
                raise NeofoxDataValidationException(
                    "Allele does not match HLA allele pattern {}".
                    format(allele) if allele != "" else
                    "Please check the format of provided alleles. An empty allele is provided"
                )
            gene = match.group(1)
            group = match.group(2)
            protein = match.group(3)

        # controls for existence in the HLA database and warns the user
        mhc_allele = MhcAllele(gene=gene, group=group, protein=protein)
        if not self.mhc_database.exists(mhc_allele):
            logger.warning(
                "Allele {} does not exist in the HLA database".format(allele))

        # builds a normalized representation of the allele
        name = "HLA-{gene}*{serotype}:{protein}".format(gene=gene,
                                                        serotype=group,
                                                        protein=protein)
        # ensures that full name stores the complete allele as provided but normalizes
        # its representation
        full_name = name
        six_digits_id = match.group(4)
        if six_digits_id is not None and six_digits_id != "":
            full_name = full_name + ":{}".format(six_digits_id)
            eight_digits_id = match.group(5)
            if eight_digits_id is not None and eight_digits_id != "":
                full_name = full_name + ":{}".format(eight_digits_id)
                expression_change = match.group(6)
                if expression_change is not None and expression_change != "":
                    full_name = full_name + expression_change
        mhc_allele.name = name
        mhc_allele.full_name = full_name
        return mhc_allele