def test_no_site_in_human_ref(): psp = map_to_human_site('Q01105', 'S', '9') assert isinstance(psp, PspMapping) assert psp.mapped_id == 'Q01105-2' assert psp.mapped_res == 'S' assert psp.mapped_pos == '9' assert psp.motif == 'SAPAAKVSKKELNSN' assert psp.respos == 7
def test_h2afx_s139(): psp = map_to_human_site('P16104', 'S', '139') assert isinstance(psp, PspMapping) assert psp.mapped_id == 'P16104' assert psp.mapped_res == 'S' assert psp.mapped_pos == '139' assert psp.motif == 'GKKATQASQEY' assert psp.respos == 7
def test_mapping_from_mouse_isoform(): up_id = 'Q8CI51-3' psp = map_to_human_site(up_id, 'S', '105') assert isinstance(psp, PspMapping) assert psp.mapped_id == 'Q96HC4' # Human ref seq assert psp.mapped_res == 'S' assert psp.mapped_pos == '214' assert psp.motif == 'PTVTSVCSETSQELA' assert psp.respos == 7
def test_mapping_from_human_ref_iso_id(): up_id = 'P29353-1' # SHC1 psp = map_to_human_site(up_id, 'Y', '349') assert isinstance(psp, PspMapping) assert psp.mapped_id == 'P29353' # Human ref seq assert psp.mapped_res == 'Y' assert psp.mapped_pos == '349' assert psp.motif == 'EEPPDHQYYNDFPGK' assert psp.respos == 7
def test_map_mouse_to_human(): mouse_up_id = 'Q61337' psp = map_to_human_site(mouse_up_id, 'S', '112') assert isinstance(psp, PspMapping) assert psp.mapped_id == 'Q92934' # Human ref seq assert psp.mapped_res == 'S' assert psp.mapped_pos == '75' assert psp.motif == 'EIRSRHSSYPAGTED' assert psp.respos == 7
def test_isoform_mapping_from_mouse(): up_id = 'P98083' # Mouse SHC1 psp = map_to_human_site(up_id, 'Y', '239') assert isinstance(psp, PspMapping) assert psp.mapped_id == 'P29353' # Human ref seq assert psp.mapped_res == 'Y' assert psp.mapped_pos == '349' assert psp.motif == 'EEPPDHQYYNDFPGK' assert psp.respos == 7
def test_smpd1_s508(): # The site is invalid, but PSP doesn't know that psp = map_to_human_site('P17405', 'S', '508') assert isinstance(psp, PspMapping) assert psp.mapped_id == 'P17405' assert psp.mapped_res == 'S' assert psp.mapped_pos == '508' assert psp.motif == 'DGNYSGSSHVVLDHE' assert psp.respos == 7
def test_motif_processing(): # Make sure that site motifs with prepended underscores have the residue # position assigned accordingly psp = map_to_human_site('P68431', 'T', '3') assert isinstance(psp, PspMapping) assert psp.mapped_id == 'P68431' assert psp.mapped_res == 'T' assert psp.mapped_pos == '3' assert psp.motif == 'ARTKQTARKS' assert psp.respos == 2
def test_explicit_ref_isoforms(): psp = map_to_human_site('Q9Y2K2', 'S', '551') assert psp.mapped_id == 'Q9Y2K2' assert psp.mapped_res == 'S' assert psp.mapped_pos == '493' psp = map_to_human_site('Q14155', 'S', '672') assert psp.mapped_id == 'Q14155' assert psp.mapped_res == 'S' assert psp.mapped_pos == '694' psp = map_to_human_site('O15027', 'T', '220') assert psp.mapped_id == 'O15027' assert psp.mapped_res == 'T' assert psp.mapped_pos == '415' psp = map_to_human_site('Q16555', 'S', '627') assert psp.mapped_id == 'Q16555' assert psp.mapped_res == 'S' assert psp.mapped_pos == '522'
def test_ref_seq_not_found(): psp = map_to_human_site('P10636', 'S', '202') assert psp.mapped_id == 'P10636' assert psp.mapped_res == 'S' assert psp.mapped_pos == '519'
def test_set_s9(): psp = map_to_human_site('Q01105', 'S', '9') assert isinstance(psp, PspMapping) assert psp.mapped_id == 'Q01105-2' assert psp.mapped_res == 'S' assert psp.mapped_pos == '9'
def get_psp_mapping(self, orig_id, query_id, gene_name, res, pos, query_pos, mapping_code): """ Wrapper around Phosphosite queries that performs peptide remapping. The function is called with a uniprot ID, residue, and position combination that is used to query the phosphosite_client for a valid corresponding site on the human reference protein. The `mapping_code` is provided by the caller to indicate the type of mapping being attempted (e.g., human isoform, mouse, rat, methionine). If a valid mapping is obtained, this is the error code that is applied. If a valid mapping is obtained but it is for a human isoform, this indicates that the queried site exists only on a human isoform and not on the human reference protein, and the code `ISOFORM_SPECIFIC_SITE` is used. If the site returned by the phosphosite_client is at a position that does not match the Uniprot reference sequence (which can happen when the queried site and the PhosphositePlus protein sequences both exclude the initial methionine), the site is remapped to the Uniprot reference sequence using the peptide information for the site in PhosphositePlus. In these cases, the mapping code `REMAPPED_FROM_PSP_SEQUENCE` is used. Parameters ---------- orig_id : str Original Uniprot ID of the protein to be mapped. query_id : str Uniprot ID of the protein being queried for sites. This may differ from `orig_id` if the orthologous mouse or rat protein is being checked for sites. gene_name : str Gene name of the protein. res : str Residue of the site to be mapped. pos : str Position of the site to be mapped. query_pos : str Position being queried for a mapping. This differs from `pos` when off-by-one (methionine) errors are being checked. mapping_code : str Mapping code to apply in case of a successful mapping, e.g. `INFERRED_ALTERNATIVE_ISOFORM`, `INFERRED_MOUSE_SITE`, etc. Returns ------- MappedSite or None MappedSite object containing the mapping, or None indicating that no mapping was found. """ pspmapping = phosphosite_client.map_to_human_site(query_id, res, query_pos) # If no mapping, return None if pspmapping is None: return None # If there is a mapping, check to make sure that it is valid wrt to the # reference sequence human_pos = pspmapping.mapped_pos # Check if the site mapped from PSP is valid in the Uniprot sequence # for the ID that we're interested in # PSP sometimes returns a non-UP ID like NP_001184222 which we want # to control for here, we do that by looking up the mnemonic if not uniprot_client.get_mnemonic(pspmapping.mapped_id, web_fallback=False): return MappedSite(orig_id, None, res, pos, error_code='PSP_MAPPED_ID_NOT_UP') # At this point the ID is supposed to be valid UP try: site_valid = uniprot_client.verify_location(pspmapping.mapped_id, pspmapping.mapped_res, pspmapping.mapped_pos) error_code = None except HTTPError as ex: if ex.response.status_code == 404: error_code = 'UNIPROT_HTTP_NOT_FOUND' else: error_code = 'UNIPROT_HTTP_OTHER' except Exception as ex: error_code = 'UNIPROT_OTHER' logger.error(ex) if error_code: # Set error_code; valid will set to None, not True/False mapped_site = MappedSite(orig_id, None, res, pos, error_code=error_code) return mapped_site # If the mapped site is valid, we're done! if site_valid: # If the residue is different, change the code accordingly mapped_site = MappedSite(orig_id, False, res, pos, mapped_id=pspmapping.mapped_id, mapped_res=pspmapping.mapped_res, mapped_pos=human_pos, description=mapping_code, gene_name=gene_name) else: # If mapped site is invalid, attempt to re-map based on the seq updated_pos = ProtMapper.map_peptide(orig_id, pspmapping.motif, pspmapping.respos) # If the re-mapping fails, we give up if updated_pos is None: return None # Otherwise, we update to the mapped position updated_pos_1x = str(updated_pos + 1) mapped_site = MappedSite(orig_id, False, res, pos, mapped_id=pspmapping.mapped_id, mapped_res=pspmapping.mapped_res, mapped_pos=updated_pos_1x, # Switch to 1-indexed description='REMAPPED_FROM_PSP_SEQUENCE', gene_name=gene_name) site_key = (orig_id, res, pos) self._cache[site_key] = mapped_site return mapped_site