Beispiel #1
0
def test_mouse_from_human():
    assert uniprot_client.get_mouse_id('P15056') == 'P28028'
Beispiel #2
0
    def map_to_human_ref(self, prot_id, prot_ns, residue, position,
                         do_methionine_offset=True,
                         do_orthology_mapping=True,
                         do_isoform_mapping=True):
        """Check an agent for invalid sites and look for mappings.

        Look up each modification site on the agent in Uniprot and then the
        site map.

        Parameters
        ----------
        prot_id : str
            A Uniprot ID or HGNC gene symbol for the protein.
        prot_ns : str
            One of 'uniprot' or 'hgnc' indicating the type of ID given.
        residue : str
            Residue to map on the protein to check for validity and map.
        position : str
            Position of the residue to check for validity and map.
        do_methionine_offset : boolean
            Whether to check for off-by-one errors in site position (possibly)
            attributable to site numbering from mature proteins after
            cleavage of the initial methionine. If True, checks the reference
            sequence for a known modification at 1 site position greater
            than the given one; if there exists such a site, creates the
            mapping. Default is True.
        do_orthology_mapping : boolean
            Whether to check sequence positions for known modification sites
            in mouse or rat sequences (based on PhosphoSitePlus data). If a
            mouse/rat site is found that is linked to a site in the human
            reference sequence, a mapping is created. Default is True.
        do_isoform_mapping : boolean
            Whether to check sequence positions for known modifications
            in other human isoforms of the protein (based on PhosphoSitePlus
            data). If a site is found that is linked to a site in the human
            reference sequence, a mapping is created. Default is True.

        Returns
        -------
        MappedSite
            The MappedSite object gives information on results of mapping the
            site. See :py:class:`protmapper.api.MappedSite` documentation for
            details.
        """
        # Check the protein ID and namespace
        if prot_id is None:
            raise ValueError("prot_id must not be None.")
        if prot_ns not in ('uniprot', 'hgnc'):
            raise ValueError("prot_ns must be either 'uniprot' or 'hgnc' (for "
                             "HGNC symbols)")
        # Get Uniprot ID and gene name
        up_id = _get_uniprot_id(prot_id, prot_ns)
        # If an HGNC ID was given and the uniprot entry is not found, flag
        # as error
        if up_id is None:
            assert prot_ns == 'hgnc' and prot_id is not None
            return MappedSite(None, None, residue, position,
                              gene_name=prot_id, error_code='NO_UNIPROT_ID')
        # Make sure the sites are proper amino acids/positions
        try:
            valid_res, valid_pos = _validate_site(residue, position)
        except InvalidSiteException as ex:
            return MappedSite(up_id, None, residue, position,
                              error_code='INVALID_SITE',
                              description=str(ex))
        # Get the gene name from Uniprot
        gene_name = uniprot_client.get_gene_name(up_id, web_fallback=False)
        site_key = (up_id, residue, position)
        # First, check the cache to potentially avoid a costly sequence
        # lookup
        cached_site = self._cache.get(site_key)
        if cached_site is not None:
            return cached_site
        # If not cached, continue
        # Look up the residue/position in uniprot
        try:
            site_valid = uniprot_client.verify_location(up_id, residue,
                                                        position)
            error_code = None
        except HTTPError as ex:
            if ex.response.status_code == 404:
                error_code = 'UNIPROT_HTTP_NOT_FOUND'
            else:
                error_code = 'UNIPROT_HTTP_OTHER'
        except Exception as ex:
            error_code = 'UNIPROT_OTHER'
            logger.error(ex)
        if error_code:
            # Set error_code; valid will set to None, not True/False
            mapped_site = MappedSite(up_id, None, residue, position,
                                     error_code=error_code)
            return mapped_site
        # It's a valid site
        if site_valid:
            mapped_site = MappedSite(up_id, True, residue, position,
                                     description='VALID',
                                     gene_name=gene_name)
            self._cache[site_key] = mapped_site
            return mapped_site
        # If it's not a valid site, check the site map first
        curated_site = self.site_map.get(site_key, None)
        # Manually mapped in the site map
        if curated_site is not None:
            mapped_res, mapped_pos, description = curated_site
            mapped_site = MappedSite(up_id, False, residue, position,
                                     mapped_id=up_id,
                                     mapped_res=mapped_res,
                                     mapped_pos=mapped_pos,
                                     description=description,
                                     gene_name=gene_name)
            self._cache[site_key] = mapped_site
            return mapped_site

        # There is no manual mapping, next we try to see if UniProt
        # reports a signal peptide that could be responsible for the position
        # being shifted
        signal_peptide = uniprot_client.get_signal_peptide(up_id, False)
        # If there is valid signal peptide information from UniProt
        if signal_peptide and signal_peptide.begin == 1 and \
                signal_peptide.end is not None:
            offset_pos = str(int(position) + signal_peptide.end)
            # Check to see if the offset position is known to be phosphorylated
            mapped_site = self.get_psp_mapping(
                                up_id, up_id, gene_name, residue, position,
                                offset_pos, 'SIGNAL_PEPTIDE_REMOVED')
            if mapped_site:
                return mapped_site
        # ...there's no manually curated site or signal peptide, so do mapping
        # via PhosphoSite if the data is available:
        human_prot = uniprot_client.is_human(up_id)
        if phosphosite_client.has_data():
            # First, look for other entries in phosphosite for this protein
            # where this sequence position is legit (i.e., other isoforms)
            if do_isoform_mapping and up_id and human_prot:
                mapped_site = self.get_psp_mapping(
                        up_id, up_id, gene_name, residue, position, position,
                        'INFERRED_ALTERNATIVE_ISOFORM')
                if mapped_site:
                    return mapped_site
            # Try looking for rat or mouse sites
            if do_orthology_mapping and up_id and human_prot:
                # Get the mouse ID for this protein
                up_mouse = uniprot_client.get_mouse_id(up_id)
                # Get mouse sequence
                mapped_site = self.get_psp_mapping(
                                    up_id, up_mouse, gene_name, residue,
                                    position, position, 'INFERRED_MOUSE_SITE')
                if mapped_site:
                    return mapped_site
                # Try the rat sequence
                up_rat = uniprot_client.get_rat_id(up_id)
                mapped_site = self.get_psp_mapping(
                                    up_id, up_rat, gene_name, residue, position,
                                    position, 'INFERRED_RAT_SITE')
                if mapped_site:
                    return mapped_site
            # Check for methionine offset (off by one)
            if do_methionine_offset and up_id and human_prot:
                offset_pos = str(int(position) + 1)
                mapped_site = self.get_psp_mapping(
                                    up_id, up_id, gene_name, residue, position,
                                    offset_pos, 'INFERRED_METHIONINE_CLEAVAGE')
                if mapped_site:
                    return mapped_site
        # If we've gotten here, the entry is 1) not in the site map, and
        # 2) we either don't have PSP data or no mapping was found using PSP
        mapped_site = MappedSite(up_id, False, residue, position,
                                 description='NO_MAPPING_FOUND',
                                 gene_name=gene_name)
        self._cache[site_key] = mapped_site
        return mapped_site