Esempio n. 1
0
    def _check_agent_mod(self, agent, mods, do_methionine_offset=True,
                         do_orthology_mapping=True,
                         do_isoform_mapping=True):
        """Check an agent for invalid sites and look for mappings.

        Look up each modification site on the agent in Uniprot and then the
        site map.

        Parameters
        ----------
        agent : :py:class:`indra.statements.Agent`
            Agent to check for invalid modification sites.
        mods : list of :py:class:`indra.statements.ModCondition`
            Modifications to check for validity and map.
        do_methionine_offset : boolean
            Whether to check for off-by-one errors in site position (possibly)
            attributable to site numbering from mature proteins after
            cleavage of the initial methionine. If True, checks the reference
            sequence for a known modification at 1 site position greater
            than the given one; if there exists such a site, creates the
            mapping. Default is True.
        do_orthology_mapping : boolean
            Whether to check sequence positions for known modification sites
            in mouse or rat sequences (based on PhosphoSitePlus data). If a
            mouse/rat site is found that is linked to a site in the human
            reference sequence, a mapping is created. Default is True.
        do_isoform_mapping : boolean
            Whether to check sequence positions for known modifications
            in other human isoforms of the protein (based on PhosphoSitePlus
            data). If a site is found that is linked to a site in the human
            reference sequence, a mapping is created. Default is True.

        Returns
        -------
        list
            A list of invalid sites, where each entry in the list has two
            elements: ((gene_name, residue, position), mapped_site).  If the
            invalid position was not found in the site map, mapped_site is
            None; otherwise it is a tuple consisting of (residue, position,
            comment).
        """
        invalid_sites = []
        up_id = _get_uniprot_id(agent)
        # If the uniprot entry is not found, let it pass
        if not up_id:
            logger.debug("No uniprot ID for %s" % agent.name)
            return [] # Same effect as valid sites
        # Look up all of the modifications in uniprot, and add them to the list
        # of invalid sites if they are missing
        for old_mod in mods:
            # If no site information for this residue, skip
            if old_mod.position is None or old_mod.residue is None:
                continue
            site_key = (agent.name, old_mod.residue, old_mod.position)
            # Increase our count for this site
            self._sitecount[site_key] = self._sitecount.get(site_key, 0) + 1
            # First, check the cache to potentially avoid a costly sequence
            # lookup
            cached_site = self._cache.get(site_key)
            if cached_site is not None:
                if cached_site == 'VALID':
                    pass
                else:
                    invalid_sites.append((site_key, cached_site))
                continue
            # If not cached, continue
            # Look up the residue/position in uniprot
            site_valid = uniprot_client.verify_location(up_id,
                                                        old_mod.residue,
                                                        old_mod.position)
            # If it's not found in Uniprot, then look it up in the site map
            if site_valid:
                self._cache[site_key] = 'VALID'
                continue
            # Check the agent for a Uniprot ID
            up_id = agent.db_refs.get('UP')
            hgnc_id = agent.db_refs.get('HGNC')
            if not hgnc_id:
                logger.debug("No HGNC ID for %s, only curated sites will be "
                            "mapped" % agent.name)
            # NOTE: The following lookups can only be performed if the
            # Phosphosite Data is available.
            if phosphosite_client.has_data():
                # First, look for other entries in phosphosite for this protein
                # where this sequence position is legit (i.e., other isoforms)
                if do_isoform_mapping and up_id and hgnc_id:
                    human_pos = phosphosite_client.map_to_human_site(
                                  up_id, old_mod.residue, old_mod.position)
                    if human_pos:
                        mapped_site = (old_mod.residue, human_pos,
                                       'INFERRED_ALTERNATIVE_ISOFORM')
                        self._cache[site_key] = mapped_site
                        invalid_sites.append((site_key, mapped_site))
                        continue
                # Try looking for rat or mouse sites
                if do_orthology_mapping and up_id and hgnc_id:
                    # Get the mouse ID for this protein
                    up_mouse = uniprot_client.get_mouse_id(up_id)
                    # Get mouse sequence
                    human_pos = phosphosite_client.map_to_human_site(
                                  up_mouse, old_mod.residue, old_mod.position)
                    if human_pos:
                        mapped_site = (old_mod.residue, human_pos,
                                       'INFERRED_MOUSE_SITE')
                        self._cache[site_key] = mapped_site
                        invalid_sites.append((site_key, mapped_site))
                        continue
                    # Try the rat sequence
                    up_rat = uniprot_client.get_rat_id(up_id)
                    human_pos = phosphosite_client.map_to_human_site(
                                  up_rat, old_mod.residue, old_mod.position)
                    if human_pos:
                        mapped_site = (old_mod.residue, human_pos,
                                       'INFERRED_RAT_SITE')
                        self._cache[site_key] = mapped_site
                        invalid_sites.append((site_key, mapped_site))
                        continue
                # Check for methionine offset (off by one)
                if do_methionine_offset and up_id and hgnc_id:
                    try:
                        offset_pos = str(int(old_mod.position) + 1)
                    except ValueError:
                        logger.warning("Invalid position: %s" %
                                       old_mod.position)
                        continue
                    human_pos = phosphosite_client.map_to_human_site(
                                  up_id, old_mod.residue, offset_pos)
                    # If it's valid at the offset position, create the mapping
                    # and continue
                    if human_pos:
                        mapped_site = (old_mod.residue, human_pos,
                                       'INFERRED_METHIONINE_CLEAVAGE')
                        self._cache[site_key] = mapped_site
                        invalid_sites.append((site_key, mapped_site))
                        continue
            # Now check the site map
            mapped_site = self.site_map.get(site_key, None)
            if mapped_site is None:
                # No entry in the site map--set site info to None
                self._cache[site_key] = None
                invalid_sites.append((site_key, None))
            # Manually mapped in the site map
            else:
                self._cache[site_key] = mapped_site
                invalid_sites.append((site_key, mapped_site))
        return invalid_sites
def test_rat_from_human():
    assert uniprot_client.get_rat_id('P04049') == 'P11345'
Esempio n. 3
0
def test_rat_from_human():
    assert uniprot_client.get_rat_id('P04049') == 'P11345'