def get_short_name(item, entity='all'): from ._add_reference_to_evidence import _add_reference_to_evidence output = None accession = item['uniprot']['entry']['accession'][0] if 'shortName' in item['uniprot']['entry']['protein']['recommendedName']: evidence = Evidence() recommendedName = item['uniprot']['entry']['protein'][ 'recommendedName'] if type(recommendedName['shortName']) == str: evidence.value = recommendedName['shortName'] else: evidence.value = recommendedName['shortName']['#text'] if '@evidence' in recommendedName['shortName']: evidence_numbers_in_db = recommendedName['shortName'][ '@evidence'].split(' ') for evidence_number_in_db in evidence_numbers_in_db: evidence_in_db = item['uniprot']['entry']['evidence'][ int(evidence_number_in_db) - 1] if evidence_in_db['@key'] != evidence_number_in_db: raise ValueError( 'Evidence number does not match evidence @key') _add_reference_to_evidence(evidence, evidence_in_db) evidence.add_reference({'database': 'UniProtKB', 'id': accession}) output = evidence return output
def get_name(item, entity='all'): from ._add_reference_to_evidence import _add_reference_to_evidence evidence = Evidence() fullName = item['uniprot']['entry']['protein']['recommendedName'][ 'fullName'] if type(fullName) == str: evidence.value = fullName elif type(fullName) == OrderedDict: if '#text' in fullName: evidence.value = fullName['#text'] if '@evidence' in fullName: evidence_numbers_in_db = fullName['@evidence'].split() for evidence_number_in_db in evidence_numbers_in_db: evidence_in_db = item['uniprot']['entry']['evidence'][ int(evidence_number_in_db) - 1] if evidence_in_db['@key'] != evidence_number_in_db: raise ValueError( 'Evidence number does not match evidence @key') _add_reference_to_evidence(evidence, evidence_in_db) accession = item['uniprot']['entry']['accession'][0] evidence.add_reference({'database': 'UniProtKB', 'id': accession}) return evidence
def get_alternative_names(item, entity='all'): from ._add_reference_to_evidence import _add_reference_to_evidence from .get_short_name import get_short_name from .get_uniprot import get_uniprot output = [] uniprot = get_uniprot(item, entity=entity) ref_uniprot = uniprot.references[0] short_name = get_short_name(item) if short_name is not None: output.append(short_name) if 'alternativeName' in item['uniprot']['entry']['protein']: alternativeName = item['uniprot']['entry']['protein']['alternativeName'] if type(alternativeName)==OrderedDict: alternativeName = [alternativeName] if type(alternativeName)!=list: raise ValueError("alternativeName is not a list") for aux in alternativeName: if type(aux)==OrderedDict: for key, value in aux.items(): if key not in ['fullName', 'shortName']: raise ValueError("Uknown alternative name type") evidence = Evidence() if type(value)==str: evidence.value = value else: evidence.value = value['#text'] if '@evidence' in value: evidence_numbers_in_db = value['@evidence'].split(' ') for evidence_number_in_db in evidence_numbers_in_db: evidence_in_db = item['uniprot']['entry']['evidence'][int(evidence_number_in_db)-1] if evidence_in_db['@key']!=evidence_number_in_db: raise ValueError('Evidence number does not match evidence @key') _add_reference_to_evidence(evidence, evidence_in_db) evidence.add_reference(ref_uniprot) output.append(evidence) else: raise ValueError("Uknown alternativeName") return output
def get_dbreference(item, entity='all', dbname=None): from ._add_reference_to_evidence import _add_reference_to_evidence uniprot = item['uniprot']['entry']['accession'][0] dbReference = item['uniprot']['entry']['dbReference'] output = [] for db in dbReference: if db['@type']==dbname: accession = db['@id'] evidence = Evidence() evidence.value = accession if dbname=='ChEMBL': evidence.add_reference({'database':'ChEMBL', 'id':accession}) elif dbname=='EC': evidence.add_reference({'database':'EC', 'id':accession}) elif dbname=='DIP': evidence.add_reference({'database':'DIP', 'id':accession}) elif dbname=='ELM': evidence.add_reference({'database':'ELM', 'id':accession}) elif dbname=='IntAct': evidence.add_reference({'database':'IntAct', 'id':accession}) elif dbname=='BindingDB': evidence.add_reference({'database':'BindingDB', 'id':accession}) elif dbname=='BioGRID': evidence.add_reference({'database':'BioGRID', 'id':accession}) elif dbname=='iPTMnet': evidence.add_reference({'database':'iPTMnet', 'id':accession}) elif dbname=='MINT': evidence.add_reference({'database':'MINT', 'id':accession}) elif dbname=='PhosphoSitePlus': evidence.add_reference({'database':'PhosphoSitePlus', 'id':accession}) elif dbname=='ProDom': evidence.add_reference({'database':'ProDom', 'id':accession}) elif dbname=='ProteinModelPortal': evidence.add_reference({'database':'ProteinModelPortal', 'id':accession}) elif dbname=='STRING': evidence.add_reference({'database':'STRING', 'id':accession}) elif dbname=='SMR': evidence.add_reference({'database':'SwissModel', 'id':accession}) else: raise ValueError('Database name not recognized') if '@evidence' in db: evidence_numbers_in_db = db['@evidence'].split() for evidence_number_in_db in evidence_numbers_in_db: evidence_in_db = item['uniprot']['entry']['evidence'][int(evidence_number_in_db)-1] if evidence_in_db['@key']!=evidence_number_in_db: raise ValueError('Evidence number does not match evidence @key') _add_reference_to_evidence(evidence, evidence_in_db) evidence.add_reference({'database':'UniProtKB', 'id':uniprot}) output.append(evidence) if len(output)>1: return output elif len(output)==1: return output[0] else: return None
def get_organism_scientific_name(item, entity='all'): from ._add_reference_to_evidence import _add_reference_to_evidence output = None accession = item['uniprot']['entry']['accession'][0] if 'organism' in item['uniprot']['entry']: organism = item['uniprot']['entry']['organism'] if type(organism['name'])==list: for name in organism['name']: if name['@type']=='scientific': evidence = Evidence() evidence.value = name['#text'] dbref_type = organism['dbReference']['@type'] dbref_id = organism['dbReference']['@id'] if dbref_type=='NCBI Taxonomy': evidence.add_reference({'database':'NCBI_Taxonomy', 'id':dbref_id}) else: raise ValueError('Unknown reference in database') evidence.add_reference({'database':'UniProtKB', 'id':accession}) output = evidence return output
def get_uniprot_id(item, entity='all'): evidence = Evidence() if type(item['uniprot']['entry']['accession']) is list: accession = item['uniprot']['entry']['accession'][0] else: accession = item['uniprot']['entry']['accession'] evidence.value = accession evidence.add_reference({'database':'UniProtKB', 'id':accession}) return evidence
def get_canonical_sequence(item, entity='all'): from ._add_reference_to_evidence import _add_reference_to_evidence seq = item['uniprot']['entry']['sequence']['#text'] accession = item['uniprot']['entry']['accession'][0] evidence = Evidence() evidence.value = seq evidence.add_reference({'database': 'UniProtKB', 'id': accession}) return evidence
def get_uniprot_entry_name(item, entity='all'): from ._add_reference_to_evidence import _add_reference_to_evidence evidence = Evidence() evidence.value = item['uniprot']['entry']['name'] accession = item['uniprot']['entry']['accession'][0] evidence.add_reference({'database': 'UniProtKB', 'id': accession}) return evidence
def get_host(item, entity='all'): if 'organismHost' in item['uniprot']['entry']: evidence = Evidence() host = item['uniprot']['entry']['organismHost']['name']['#text'] accession = item['uniprot']['entry']['accession'][0] ncbi_taxonomy = item['uniprot']['entry']['organismHost'][ 'dbReference']['@id'] evidence.value = host evidence.add_NCBI_Taxonomy(id=ncbi_taxonomy) evidence.add_UniProtKB(id=accession) return evidence else: return None
def get_pdbs(item, entity='all'): output = [] uniprot = item['uniprot']['entry']['accession'][0] dbReference = item['uniprot']['entry']['dbReference'] for db in dbReference: if db['@type'] == 'PDB': accession = db['@id'] evidence = Evidence() evidence.value = accession evidence.add_reference({'database': 'PDB', 'id': accession}) evidence.add_reference({'database': 'UniProtKB', 'id': uniprot}) output.append(evidence) return output