Ejemplo n.º 1
0
def test_no_site_in_human_ref():
    psp = map_to_human_site('Q01105', 'S', '9')
    assert isinstance(psp, PspMapping)
    assert psp.mapped_id == 'Q01105-2'
    assert psp.mapped_res == 'S'
    assert psp.mapped_pos == '9'
    assert psp.motif == 'SAPAAKVSKKELNSN'
    assert psp.respos == 7
Ejemplo n.º 2
0
def test_h2afx_s139():
    psp = map_to_human_site('P16104', 'S', '139')
    assert isinstance(psp, PspMapping)
    assert psp.mapped_id == 'P16104'
    assert psp.mapped_res == 'S'
    assert psp.mapped_pos == '139'
    assert psp.motif == 'GKKATQASQEY'
    assert psp.respos == 7
Ejemplo n.º 3
0
def test_mapping_from_mouse_isoform():
    up_id = 'Q8CI51-3'
    psp = map_to_human_site(up_id, 'S', '105')
    assert isinstance(psp, PspMapping)
    assert psp.mapped_id == 'Q96HC4'  # Human ref seq
    assert psp.mapped_res == 'S'
    assert psp.mapped_pos == '214'
    assert psp.motif == 'PTVTSVCSETSQELA'
    assert psp.respos == 7
Ejemplo n.º 4
0
def test_mapping_from_human_ref_iso_id():
    up_id = 'P29353-1'  # SHC1
    psp = map_to_human_site(up_id, 'Y', '349')
    assert isinstance(psp, PspMapping)
    assert psp.mapped_id == 'P29353'  # Human ref seq
    assert psp.mapped_res == 'Y'
    assert psp.mapped_pos == '349'
    assert psp.motif == 'EEPPDHQYYNDFPGK'
    assert psp.respos == 7
Ejemplo n.º 5
0
def test_map_mouse_to_human():
    mouse_up_id = 'Q61337'
    psp = map_to_human_site(mouse_up_id, 'S', '112')
    assert isinstance(psp, PspMapping)
    assert psp.mapped_id == 'Q92934'  # Human ref seq
    assert psp.mapped_res == 'S'
    assert psp.mapped_pos == '75'
    assert psp.motif == 'EIRSRHSSYPAGTED'
    assert psp.respos == 7
Ejemplo n.º 6
0
def test_isoform_mapping_from_mouse():
    up_id = 'P98083'  # Mouse SHC1
    psp = map_to_human_site(up_id, 'Y', '239')
    assert isinstance(psp, PspMapping)
    assert psp.mapped_id == 'P29353'  # Human ref seq
    assert psp.mapped_res == 'Y'
    assert psp.mapped_pos == '349'
    assert psp.motif == 'EEPPDHQYYNDFPGK'
    assert psp.respos == 7
Ejemplo n.º 7
0
def test_smpd1_s508():
    # The site is invalid, but PSP doesn't know that
    psp = map_to_human_site('P17405', 'S', '508')
    assert isinstance(psp, PspMapping)
    assert psp.mapped_id == 'P17405'
    assert psp.mapped_res == 'S'
    assert psp.mapped_pos == '508'
    assert psp.motif == 'DGNYSGSSHVVLDHE'
    assert psp.respos == 7
Ejemplo n.º 8
0
def test_motif_processing():
    # Make sure that site motifs with prepended underscores have the residue
    # position assigned accordingly
    psp = map_to_human_site('P68431', 'T', '3')
    assert isinstance(psp, PspMapping)
    assert psp.mapped_id == 'P68431'
    assert psp.mapped_res == 'T'
    assert psp.mapped_pos == '3'
    assert psp.motif == 'ARTKQTARKS'
    assert psp.respos == 2
Ejemplo n.º 9
0
def test_explicit_ref_isoforms():
    psp = map_to_human_site('Q9Y2K2', 'S', '551')
    assert psp.mapped_id == 'Q9Y2K2'
    assert psp.mapped_res == 'S'
    assert psp.mapped_pos == '493'

    psp = map_to_human_site('Q14155', 'S', '672')
    assert psp.mapped_id == 'Q14155'
    assert psp.mapped_res == 'S'
    assert psp.mapped_pos == '694'

    psp = map_to_human_site('O15027', 'T', '220')
    assert psp.mapped_id == 'O15027'
    assert psp.mapped_res == 'T'
    assert psp.mapped_pos == '415'

    psp = map_to_human_site('Q16555', 'S', '627')
    assert psp.mapped_id == 'Q16555'
    assert psp.mapped_res == 'S'
    assert psp.mapped_pos == '522'
Ejemplo n.º 10
0
def test_ref_seq_not_found():
    psp = map_to_human_site('P10636', 'S', '202')
    assert psp.mapped_id == 'P10636'
    assert psp.mapped_res == 'S'
    assert psp.mapped_pos == '519'
Ejemplo n.º 11
0
def test_set_s9():
    psp = map_to_human_site('Q01105', 'S', '9')
    assert isinstance(psp, PspMapping)
    assert psp.mapped_id == 'Q01105-2'
    assert psp.mapped_res == 'S'
    assert psp.mapped_pos == '9'
Ejemplo n.º 12
0
    def get_psp_mapping(self, orig_id, query_id, gene_name, res, pos,
                        query_pos, mapping_code):
        """
        Wrapper around Phosphosite queries that performs peptide remapping.

        The function is called with a uniprot ID, residue, and position
        combination that is used to query the phosphosite_client for a valid
        corresponding site on the human reference protein. The `mapping_code`
        is provided by the caller to indicate the type of mapping being
        attempted (e.g., human isoform, mouse, rat, methionine). If a valid
        mapping is obtained, this is the error code that is applied.  If a
        valid mapping is obtained but it is for a human isoform, this indicates
        that the queried site exists only on a human isoform and not on the
        human reference protein, and the code `ISOFORM_SPECIFIC_SITE` is used.
        If the site returned by the phosphosite_client is at a position that
        does not match the Uniprot reference sequence (which can happen when
        the queried site and the PhosphositePlus protein sequences both exclude
        the initial methionine), the site is remapped to the Uniprot reference
        sequence using the peptide information for the site in PhosphositePlus.
        In these cases, the mapping code `REMAPPED_FROM_PSP_SEQUENCE` is used.

        Parameters
        ----------
        orig_id : str
            Original Uniprot ID of the protein to be mapped.
        query_id : str
            Uniprot ID of the protein being queried for sites. This may differ
            from `orig_id` if the orthologous mouse or rat protein is being
            checked for sites.
        gene_name : str
            Gene name of the protein.
        res : str
            Residue of the site to be mapped.
        pos : str
            Position of the site to be mapped.
        query_pos : str
            Position being queried for a mapping. This differs from `pos`
            when off-by-one (methionine) errors are being checked.
        mapping_code : str
            Mapping code to apply in case of a successful mapping, e.g.
            `INFERRED_ALTERNATIVE_ISOFORM`, `INFERRED_MOUSE_SITE`, etc.

        Returns
        -------
        MappedSite or None
            MappedSite object containing the mapping, or None indicating
            that no mapping was found.
        """
        pspmapping = phosphosite_client.map_to_human_site(query_id, res,
                                                          query_pos)
        # If no mapping, return None
        if pspmapping is None:
            return None
        # If there is a mapping, check to make sure that it is valid wrt to the
        # reference sequence
        human_pos = pspmapping.mapped_pos

        # Check if the site mapped from PSP is valid in the Uniprot sequence
        # for the ID that we're interested in
        # PSP sometimes returns a non-UP ID like NP_001184222 which we want
        # to control for here, we do that by looking up the mnemonic
        if not uniprot_client.get_mnemonic(pspmapping.mapped_id,
                                           web_fallback=False):
            return MappedSite(orig_id, None, res, pos,
                              error_code='PSP_MAPPED_ID_NOT_UP')
        # At this point the ID is supposed to be valid UP
        try:
            site_valid = uniprot_client.verify_location(pspmapping.mapped_id,
                                      pspmapping.mapped_res,
                                      pspmapping.mapped_pos)
            error_code = None
        except HTTPError as ex:
            if ex.response.status_code == 404:
                error_code = 'UNIPROT_HTTP_NOT_FOUND'
            else:
                error_code = 'UNIPROT_HTTP_OTHER'
        except Exception as ex:
            error_code = 'UNIPROT_OTHER'
            logger.error(ex)
        if error_code:
            # Set error_code; valid will set to None, not True/False
            mapped_site = MappedSite(orig_id, None, res, pos,
                                     error_code=error_code)
            return mapped_site

        # If the mapped site is valid, we're done!
        if site_valid:
            # If the residue is different, change the code accordingly
            mapped_site = MappedSite(orig_id, False, res, pos,
                              mapped_id=pspmapping.mapped_id,
                              mapped_res=pspmapping.mapped_res,
                              mapped_pos=human_pos,
                              description=mapping_code, gene_name=gene_name)
        else:
            # If mapped site is invalid, attempt to re-map based on the seq
            updated_pos = ProtMapper.map_peptide(orig_id, pspmapping.motif,
                                                 pspmapping.respos)
            # If the re-mapping fails, we give up
            if updated_pos is None:
                return None
            # Otherwise, we update to the mapped position
            updated_pos_1x = str(updated_pos + 1)
            mapped_site = MappedSite(orig_id, False, res, pos,
                              mapped_id=pspmapping.mapped_id,
                              mapped_res=pspmapping.mapped_res,
                              mapped_pos=updated_pos_1x, # Switch to 1-indexed
                              description='REMAPPED_FROM_PSP_SEQUENCE',
                              gene_name=gene_name)
        site_key = (orig_id, res, pos)
        self._cache[site_key] = mapped_site
        return mapped_site