def get_pubmed_id_for_pubchem_id(pubchem_id): """ This takes a PubChem id and then gets the PMIDs for articles on PubMed from PubChem which include this entity. """ if not isinstance(pubchem_id, str): return None requests = CacheControlHelper() url = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/' + str(pubchem_id) + '/xrefs/PubMedID/JSON' try: r = requests.get(url, timeout=10) except requests.exceptions.Timeout: print(url, file=sys.stderr) print('Timeout in QueryPubChem for URL: ' + url, file=sys.stderr) return None except BaseException as e: print(url, file=sys.stderr) print('%s received in QueryPubChem for URL: %s' % (e, url), file=sys.stderr) return None if r is not None: if 'Fault' in r.json().keys(): return None else: ans = [str(x) + '[uid]' for x in r.json()['InformationList']['Information'][0]['PubMedID']] return ans else: return None
def query_oxo(uid): """ This takes a curie id and send that id to EMBL-EBI OXO to convert to cui """ url_str = 'https://www.ebi.ac.uk/spot/oxo/api/mappings?fromId=' + str(uid) requests = CacheControlHelper() try: res = requests.get(url_str, headers={'accept': 'application/json'}, timeout=120) except requests.exceptions.Timeout: print('HTTP timeout in SemMedInterface.py; URL: ' + url_str, file=sys.stderr) time.sleep(1) ## take a timeout because NCBI rate-limits connections return None except requests.exceptions.ConnectionError: print('HTTP connection error in SemMedInterface.py; URL: ' + url_str, file=sys.stderr) time.sleep(1) ## take a timeout because NCBI rate-limits connections return None except sqlite3.OperationalError: print('Error reading sqlite cache; URL: ' + url_str, file=sys.stderr) return None status_code = res.status_code if status_code != 200: print('HTTP response status code: ' + str(status_code) + ' for URL:\n' + url_str, file=sys.stderr) res = None return res
def get_reactome_names(id): ''' Takes a reactome id then return a string containing all synonyms listed on reactome seperated by the deliminator | However, If it finds a MeSH terms in the list it will return the search term as a mesh term serach e.g. it will return something like '(IGF1R)[MeSH Terms]' This can be inputed into the google function as a non mesh term and will search as a mesh term. This is so that we do not need to handle the output of this function any differently it can all be input as non mesh terms Parameters: id - a string containing the reactome id Output: search - a string containing all synonyms of the reactome id or a mesh term formatted for the google distance function ''' # We want the actual reactome name R-HSA..., not the curie REACT:R-HSA... if "REACT:" in id: id = ":".join(id.split(":")[1:]) url = 'https://reactome.org/ContentService/data/query/' + id + '/name' # hardcoded url for reactiome names requests = CacheControlHelper() r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0' }) # sends get request that returns a string if r.status_code != 200: print('HTTP response status code: ' + str(r.status_code) + ' for URL:\n' + url, file=sys.stderr) return None nameList = r.text.split('\n') # splits returned string by line search = '' # initializes search term variable for name in nameList: if len( name ) > 0: # removes blank lines at beginning and end of response if len(re.compile("[()]").split( name)) > 1: # check for parenthesis for n in re.compile("[()]").split( name): # splits on either "(" or ")" if len(n) > 0: # removes banks generated by split if QueryNCBIeUtils.is_mesh_term( n): # check for mesh term search += '|' + n + '[MeSH Terms]' else: search += '|' + n elif len(name.split('ecNumber')) > 1: # checks for ec number if QueryNCBIeUtils.is_mesh_term(name.split('ecNumber')[0]): search += '|' + name.split( 'ecNumber')[0] + '[MeSH Terms]' else: search += '|' + name.split('ecNumber')[0] search += '|' + name.split( 'ecNumber' )[1][:-1] + '[EC/RN Number]' # removes trailing "/" and formats as ec search term else: if QueryNCBIeUtils.is_mesh_term(name): search += '|' + name + '[MeSH Terms]' else: search += '|' + name search = search[1:] # removes leading | return search
def get_cui(chemical_substance_id): if chemical_substance_id[:7].upper() == "CHEMBL:": chemical_substance_id = "CHEMBL" + chemical_substance_id[7:] handler = 'chem/' + chemical_substance_id + '?fields=drugcentral.xref.umlscui' requests = CacheControlHelper() url = QueryMyChem.API_BASE_URL + '/' + handler try: res = requests.get(url, timeout=QueryMyChem.TIMEOUT_SEC) except requests.exceptions.Timeout: print(url, file=sys.stderr) print('Timeout in QueryMyChem for URL: ' + url, file=sys.stderr) return None except KeyboardInterrupt: sys.exit(0) except BaseException as e: print(url, file=sys.stderr) print('%s received in QueryMyChem for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code if status_code != 200: # print(url, file=sys.stderr) # print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) return None id_json = res.json() res = None if 'drugcentral' in id_json.keys(): if 'xref' in id_json['drugcentral'].keys(): if 'umlscui' in id_json['drugcentral']['xref'].keys(): res = id_json['drugcentral']['xref']['umlscui'] return res
def send_query_get(handler, url_suffix): requests = CacheControlHelper() url = QueryChEMBL.API_BASE_URL + '/' + handler + '?' + url_suffix # print(url) try: res = requests.get(url, timeout=QueryChEMBL.TIMEOUT_SEC) except requests.exceptions.Timeout: print(url, file=sys.stderr) print('Timeout in QueryChEMBL for URL: ' + url, file=sys.stderr) return None except KeyboardInterrupt: sys.exit(0) except BaseException as e: print(url, file=sys.stderr) print('%s received in QueryChEMBL for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print(url, file=sys.stderr) print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) return None return res.json()
def send_query_get(entity, url_suffix): requests = CacheControlHelper() url_str = QueryPharos.API_BASE_URL + "/" + entity + url_suffix #print(url_str) try: res = requests.get(url_str) except requests.exceptions.Timeout: print(url_str, file=sys.stderr) print('Timeout in QueryMiRBase for URL: ' + url_str, file=sys.stderr) return None except KeyboardInterrupt: sys.exit(0) except BaseException as e: print(url_str, file=sys.stderr) print('%s received in QueryMiRBase for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code #print("Status code="+str(status_code)) assert status_code in [200, 404] if status_code == 404: res = None return res
def send_query_post(handler, params, retmax=1000): requests = CacheControlHelper() url_str = QueryNCBIeUtils.API_BASE_URL + '/' + handler params['retmax'] = str(retmax) params['retmode'] = 'json' # print(url_str) try: res = requests.post(url_str, data=params, timeout=QueryNCBIeUtils.TIMEOUT_SEC) except requests.exceptions.Timeout: print('HTTP timeout in QueryNCBIeUtils.py; URL: ' + url_str, file=sys.stderr) time.sleep( 1) ## take a timeout because NCBI rate-limits connections return None except requests.exceptions.ConnectionError: print('HTTP connection error in QueryNCBIeUtils.py; URL: ' + url_str, file=sys.stderr) time.sleep( 1) ## take a timeout because NCBI rate-limits connections return None status_code = res.status_code if status_code != 200: print('HTTP response status code: ' + str(status_code) + ' for URL:\n' + url_str, file=sys.stderr) res = None return res
def __access_api(handler, url_suffix, params=None, return_raw=False): requests = CacheControlHelper() if url_suffix: url = QueryMyGeneExtended.API_BASE_URL + '/' + handler + '?' + url_suffix else: url = QueryMyGeneExtended.API_BASE_URL + '/' + handler headers = {'user-agent': "mygene.py/%s python-requests/%s" % ("1.0.0", "1.0.0"), 'Accept': 'application/json'} try: res = requests.get(url, params=params, timeout=QueryMyGeneExtended.TIMEOUT_SEC, headers=headers) except requests.exceptions.Timeout: print(url, file=sys.stderr) print('Timeout in QueryMyGeneExtended for URL: ' + url, file=sys.stderr) return None except KeyboardInterrupt: sys.exit(0) except BaseException as e: print(url, file=sys.stderr) print('%s received in QueryMyGeneExtended for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print(url, file=sys.stderr) print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) return None if return_raw: return res.text else: return res.json()
def send_query_get(handler, url_suffix): requests = CacheControlHelper() url_str = QueryEBIOLS.API_BASE_URL + '/' + handler + "/" + url_suffix # print(url_str) try: res = requests.get(url_str, timeout=QueryEBIOLS.TIMEOUT_SEC) except requests.exceptions.Timeout: print('HTTP timeout in QueryNCBIeUtils.py; URL: ' + url_str, file=sys.stderr) time.sleep(1) # take a timeout because NCBI rate-limits connections return None except requests.exceptions.ConnectionError: print('HTTP connection error in QueryNCBIeUtils.py; URL: ' + url_str, file=sys.stderr) time.sleep(1) # take a timeout because NCBI rate-limits connections return None except BaseException as e: print(url_str, file=sys.stderr) print('%s received in QueryEBIOLS for URL: %s' % (e, url_str), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print('HTTP response status code: ' + str(status_code) + ' for URL:\n' + url_str, file=sys.stderr) res = None return res
def send_query_get(handler, url_suffix): requests = CacheControlHelper() url_str = QueryMiRGate.API_BASE_URL + "/" + handler + "/" + url_suffix # print(url_str) try: res = requests.get(url_str, timeout=QueryMiRGate.TIMEOUT_SEC) except requests.exceptions.Timeout: print(url_str, file=sys.stderr) print("Timeout in QueryMiRGate for URL: " + url_str, file=sys.stderr) return None except BaseException as e: print(url_str, file=sys.stderr) print('%s received in QueryMiRGate for URL: %s' % (e, url_str), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print(url_str, file=sys.stderr) print("Status code " + str(status_code) + " for url: " + url_str, file=sys.stderr) return None if len(res.content) == 0: print(url_str, file=sys.stderr) print("Empty response from URL!", file=sys.stderr) res = None return res
def __access_api(handler): requests = CacheControlHelper() url = QueryEBIOLSExtended.API_BASE_URL + '/' + handler # print(url) try: res = requests.get(url, timeout=QueryEBIOLSExtended.TIMEOUT_SEC) except requests.exceptions.Timeout: print(url, file=sys.stderr) print('Timeout in QueryEBIOLSExtended for URL: ' + url, file=sys.stderr) return None except BaseException as e: print(url, file=sys.stderr) print('%s received in QueryEBIOLSExtended for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print(url, file=sys.stderr) print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) return None return res.text
def __access_api(url): requests = CacheControlHelper() url = url + '.xml' try: res = requests.get(url, timeout=QueryHMDB.TIMEOUT_SEC) except requests.exceptions.Timeout: print(url, file=sys.stderr) print('Timeout in QueryHMDB for URL: ' + url, file=sys.stderr) return None except KeyboardInterrupt: sys.exit(0) except BaseException as e: print(url, file=sys.stderr) print('%s received in QueryHMDB for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print(url, file=sys.stderr) print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) return None return res.text
def __retrieve_entries_from_url(): # network request requests = CacheControlHelper() try: res = requests.get(GenerateMetabolitesTSV.URL) except requests.exceptions.Timeout: print(GenerateMetabolitesTSV.URL, file=sys.stderr) print("Timeout for URL: " + GenerateMetabolitesTSV.URL, file=sys.stderr) return False except BaseException as e: print(GenerateMetabolitesTSV.URL, file=sys.stderr) print('%s received in GenerateMetabolitesTSV for URL: %s' % (e, GenerateMetabolitesTSV.URL), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print(GenerateMetabolitesTSV.URL, file=sys.stderr) print('Status code ' + str(status_code) + ' for url: ' + GenerateMetabolitesTSV.URL, file=sys.stderr) return False # save content to file with open(GenerateMetabolitesTSV.FILE_NAME, 'wb') as fd: for chunk in res.iter_content(1024): fd.write(chunk) return True
def __access_api(handler): api_base_url = 'http://www.uniprot.org' url = api_base_url + '/' + handler #print(url) contact = "*****@*****.**" header = {'User-Agent': 'Python %s' % contact} requests = CacheControlHelper() try: res = requests.get(url, timeout=QueryUniprot.TIMEOUT_SEC, headers=header) except requests.exceptions.Timeout: print(url, file=sys.stderr) print('Timeout in QueryUniprot for URL: ' + url, file=sys.stderr) return None except requests.exceptions.ChunkedEncodingError: print(url, file=sys.stderr) print('ChunkedEncodingError for URL: ' + url, file=sys.stderr) return None except BaseException as e: print(url, file=sys.stderr) print('%s received in QueryUniprot for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print(url, file=sys.stderr) print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) return None return res.text
def get_request_with_cookie(url): API_KEY = '1YCxuN7PRHyrpuZnO7F5gQ' API_BASE_URL = 'https://api.omim.org/api' session_data = {'apiKey': API_KEY, 'format': 'json'} requests = CacheControlHelper() r = requests.sess.post(API_BASE_URL + "/apiKey", data=session_data) assert 200 == r.status_code cookie = r.cookies try: res = requests.get(url, cookies=cookie) except requests.exceptions.Timeout: print(url, file=sys.stderr) print("Timeout in QueryOMIM for URL: " + url, file=sys.stderr) return None except BaseException as e: print(url, file=sys.stderr) print('%s received in QueryOMIM for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print("Status code " + str(status_code) + " for URL: " + url, file=sys.stderr) return None return res.json()
def send_query_get(self, omim_handler, url_suffix): requests = CacheControlHelper() url = "{api_base_url}/{omim_handler}?{url_suffix}&format=json".format( api_base_url=QueryOMIM.API_BASE_URL, omim_handler=omim_handler, url_suffix=url_suffix) # print(url) try: res = requests.get(url, cookies=self.cookie) except requests.exceptions.Timeout: print(url, file=sys.stderr) print("Timeout in QueryOMIM for URL: " + url, file=sys.stderr) return None except KeyboardInterrupt: sys.exit(0) except BaseException as e: print(url, file=sys.stderr) print('%s received in QueryOMIM for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print("Status code " + str(status_code) + " for URL: " + url, file=sys.stderr) return None return res
def __init__(self): requests = CacheControlHelper() url = QueryOMIMExtended.API_BASE_URL + "/apiKey" session_data = {'apiKey': QueryOMIMExtended.API_KEY, 'format': 'json'} r = requests.post(url, data=session_data) assert 200 == r.status_code self.cookie = r.cookies
def send_query_get(handler, st): url_str = QueryUMLS.API_BASE_URL + '/' + handler + '&ticket=' + st requests = CacheControlHelper() try: res = requests.get(url_str, headers={'accept': 'application/json'}, timeout=QueryUMLS.TIMEOUT_SEC) except requests.exceptions.Timeout: print('HTTP timeout in QueryNCBIeUtils.py; URL: ' + url_str, file=sys.stderr) time.sleep( 1) ## take a timeout because NCBI rate-limits connections return None except requests.exceptions.ConnectionError: print('HTTP connection error in QueryNCBIeUtils.py; URL: ' + url_str, file=sys.stderr) time.sleep( 1) ## take a timeout because NCBI rate-limits connections return None status_code = res.status_code if status_code != 200: print('HTTP response status code: ' + str(status_code) + ' for URL:\n' + url_str, file=sys.stderr) res = None return res
def get_single_ticket(tgt): params = {'service': "http://umlsks.nlm.nih.gov"} h = { "Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain", "User-Agent": "python" } requests = CacheControlHelper() r = requests.post(tgt, data=params, headers=h) st = r.text return st
def uniprot_id_to_reactome_pathways(uniprot_id): """returns a ``set`` of reactome IDs of pathways associated with a given string uniprot ID :param uniprot_id: a ``str`` uniprot ID, like ``"P68871"`` :returns: a ``set`` of string Reactome IDs """ payload = { 'from': 'ACC', 'to': 'REACTOME_ID', 'format': 'tab', 'query': uniprot_id } contact = "*****@*****.**" header = {'User-Agent': 'Python %s' % contact} requests = CacheControlHelper() try: url = QueryUniprot.API_BASE_URL res = requests.post(QueryUniprot.API_BASE_URL, data=payload, headers=header) except requests.exceptions.Timeout: print(url, file=sys.stderr) print('Timeout in QueryUniprot for URL: ' + QueryUniprot.API_BASE_URL, file=sys.stderr) return None except KeyboardInterrupt: sys.exit(0) except BaseException as e: print(url, file=sys.stderr) print('%s received in QueryUniprot for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print(QueryUniprot.API_BASE_URL, file=sys.stderr) print('Status code ' + str(status_code) + ' for url: ' + QueryUniprot.API_BASE_URL, file=sys.stderr) return None # assert 200 == res.status_code res_set = set() for line in res.text.splitlines(): field_str = line.split("\t")[1] if field_str != "To": res_set.add(field_str) return res_set
def get_ticket_gen(): # params = {'username': self.username,'password': self.password} params = {'apikey': QueryUMLS.api_key} h = { "Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain", "User-Agent": "python" } requests = CacheControlHelper() r = requests.post(QueryUMLS.Ticket_URL + QueryUMLS.auth_endpoint, data=params, headers=h) response = fromstring(r.text) ## extract the entire URL needed from the HTML form (action attribute) returned - looks similar to https://utslogin.nlm.nih.gov/cas/v1/tickets/TGT-36471-aYqNLN2rFIJPXKzxwdTNC5ZT7z3B3cTAKfSc5ndHQcUxeaDOLN-cas ## we make a POST call to this URL in the getst method tgt = response.xpath('//form/@action')[0] return tgt
def send_query_get(handler, url_suffix, retmax=1000, retry_flag=True): requests = CacheControlHelper() url_str = QueryNCBIeUtils.API_BASE_URL + '/' + handler + '?' + url_suffix + '&retmode=json&retmax=' + str( retmax) # print(url_str) try: res = requests.get(url_str, headers={ 'accept': 'application/json', 'User-Agent': 'Mozilla/5.0' }, timeout=QueryNCBIeUtils.TIMEOUT_SEC) except requests.exceptions.Timeout: print('HTTP timeout in QueryNCBIeUtils.py; URL: ' + url_str, file=sys.stderr) time.sleep( 1) ## take a timeout because NCBI rate-limits connections return None except requests.exceptions.ConnectionError: print('HTTP connection error in QueryNCBIeUtils.py; URL: ' + url_str, file=sys.stderr) time.sleep( 1) ## take a timeout because NCBI rate-limits connections return None except BaseException as e: print(url_str, file=sys.stderr) print('%s received in QueryMiRGate for URL: %s' % (e, url_str), file=sys.stderr) return None status_code = res.status_code if status_code != 200: if status_code == 429 and retry_flag: time.sleep(1) res = QueryNCBIeUtils.send_query_get(handler, url_suffix, retmax, False) else: print('HTTP response status code: ' + str(status_code) + ' for URL:\n' + url_str, file=sys.stderr) res = None return res
def get_request(url): requests = CacheControlHelper() try: res = requests.get(url, timeout=120) except requests.exceptions.Timeout: print(url, file=sys.stderr) print('Timeout for URL: ' + url, file=sys.stderr) return None except KeyboardInterrupt: sys.exit(0) except BaseException as e: print(url, file=sys.stderr) print('%s received for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print(url, file=sys.stderr) print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) return None return res.json()
def get_uniprot_names(id): """ Takes a uniprot id then return a string containing all synonyms listed on uniprot seperated by the deliminator | :param id: a string containing the uniprot id :returns: a string containing all synonyms uniprot lists for """ # We want the actual uniprot name P176..., not the curie UniProtKB:P176... if "UniProtKB:" in id: id = ":".join(id.split(":")[1:]) url = 'https://www.uniprot.org/uniprot/?query=id:' + id + '&sort=score&columns=entry name,protein names,genes&format=tab' # hardcoded url for uniprot data requests = CacheControlHelper() r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}) # send get request if r.status_code != 200: # checks for error print('HTTP response status code: ' + str(r.status_code) + ' for URL:\n' + url, file=sys.stderr) return None if r.content.decode('utf-8') == '': return None df = pandas.read_csv(StringIO(r.content.decode('utf-8')), sep='\t') search = df.loc[0, 'Entry name'] # initializes search term variable if type(df.loc[0, 'Protein names']) == str: for name in re.compile("[()\[\]]").split( df.loc[0, 'Protein names']): # checks for protein section if len(name) > 1: if QueryNCBIeUtils.is_mesh_term(name): search += '|' + name + '[MeSH Terms]' else: search += '|' + name if type(df.loc[0, 'Gene names']) == str: for name in df.loc[0, 'Gene names'].split(' '): if len(name) > 1: if QueryNCBIeUtils.is_mesh_term(name): search += '|' + name + '[MeSH Terms]' else: search += '|' + name return search
def send_query_get(handler, url_suffix): requests = CacheControlHelper() url = QueryDisont.API_BASE_URL + "/" + handler + "/" + url_suffix # print(url_str) try: res = requests.get(url, timeout=QueryDisont.TIMEOUT_SEC) except requests.exceptions.Timeout: print(url, file=sys.stderr) print('Timeout in QueryDisont for URL: ' + url, file=sys.stderr) return None except BaseException as e: print(url, file=sys.stderr) print('%s received in QueryDisont for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) return None return res
def send_query_get(handler, url_suffix): requests = CacheControlHelper() url_str = QueryMiRBase.API_BASE_URL + "/" + handler + "?" + url_suffix # print(url_str) try: res = requests.get(url_str) except requests.exceptions.Timeout: print(url_str, file=sys.stderr) print('Timeout in QueryMiRBase for URL: ' + url_str, file=sys.stderr) return None except KeyboardInterrupt: sys.exit(0) except BaseException as e: print(url_str, file=sys.stderr) print('%s received in QueryMiRBase for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code assert status_code == 200 return res
def __access_api(url, params=None, headers=None): # print(url) requests = CacheControlHelper() try: res = requests.get(url, params=params, timeout=QuerySciGraph.TIMEOUT_SEC) except requests.exceptions.Timeout: print(url, file=sys.stderr) print('Timeout in QuerySciGraph for URL: ' + url, file=sys.stderr) return None except BaseException as e: print(url, file=sys.stderr) print('%s received in QuerySciGraph for URL: %s' % (e, url), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print(url, file=sys.stderr) print('Status code ' + str(status_code) + ' for url: ' + res.url, file=sys.stderr) return None return res.json()
def send_query(query): requests = CacheControlHelper() res = requests.get(query, timeout=120) return res.text
def query_mesh_id_to_uniprot_ids_desc(mesh_id): ent = 'disease' id = 'mesh' STR = "c1.MESH = '" intfield = mesh_id seq = ( """ DEFINE c0='/data/gene_disease_summary', c1='/data/diseases', c2='/data/genes', c4='/data/sources' ON 'http://www.disgenet.org/web/DisGeNET' SELECT c1 (diseaseId, name, diseaseClassName, STY, MESH, OMIM, type ), c2 (geneId, symbol, uniprotId, description, pantherName ), c0 (score, EI, Npmids, Nsnps) FROM c0 WHERE ( """ + STR + mesh_id+"""' AND c4 = 'ALL' ) ORDER BY c0.score DESC""" ); # binary_data = seq.encode('utf-8') url_str = QueryDisGeNet.SPARQL_ENDPOINT_URL requests = CacheControlHelper() try: res = requests.post(url_str, data=binary_data, timeout=QueryDisGeNet.TIMEOUT_SEC) except requests.exceptions.Timeout: print(url_str, sys.stderr) print('Timeout in QueryDisGeNet for URL: ' + url_str, file=sys.stderr) return dict() except BaseException as e: print(url_str, file=sys.stderr) print('%s received in QueryDisGeNet for URL: %s' % (e, url_str), file=sys.stderr) return None status_code = res.status_code if status_code != 200: print(url_str, sys.stderr) print('Status code ' + str(status_code) + ' for url: ' + url_str, file=sys.stderr) return dict() if len(res.content) == 0: print(url_str, file=sys.stderr) print('Empty response from URL!', file=sys.stderr) return dict() ret_data_df = pandas.read_csv(io.StringIO(res.content.decode('utf-8')), sep='\t').head(QueryDisGeNet.MAX_GENES_FOR_DISEASE) uniprot_ids_list = ret_data_df['c2.uniprotId'].tolist() gene_names_list = ret_data_df['c2.symbol'].tolist() ret_dict = dict(list(zip(uniprot_ids_list, gene_names_list))) for prot in ret_dict.copy().keys(): if type(prot)==str and prot != "null": if '.' in prot or ';' in prot: gene = ret_dict[prot] del ret_dict[prot] prot.replace('.', '') prots_to_add = prot.split(';') if len(prots_to_add) > QueryDisGeNet.MAX_PROTS_FOR_GENE: prots_to_add = prots_to_add[0:QueryDisGeNet.MAX_PROTS_FOR_GENE] dict_add = dict() for prot_name in prots_to_add: if type(prot_name) == str and prot_name != "null": dict_add[prot_name] = gene ret_dict.update(dict_add) else: ## this is a math.nan del ret_dict[prot] return(ret_dict)