Exemple #1
0
    def send_query_get(handler, url_suffix):

        requests = CacheControlHelper()
        url_str = QueryEBIOLS.API_BASE_URL + '/' + handler + "/" + url_suffix
#        print(url_str)
        try:
            res = requests.get(url_str, timeout=QueryEBIOLS.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print('HTTP timeout in QueryNCBIeUtils.py; URL: ' + url_str, file=sys.stderr)
            time.sleep(1)  # take a timeout because NCBI rate-limits connections
            return None
        except requests.exceptions.ConnectionError:
            print('HTTP connection error in QueryNCBIeUtils.py; URL: ' + url_str, file=sys.stderr)
            time.sleep(1)  # take a timeout because NCBI rate-limits connections
            return None
        except BaseException as e:
            print(url_str, file=sys.stderr)
            print('%s received in QueryEBIOLS for URL: %s' % (e, url_str), file=sys.stderr)
            return None

        status_code = res.status_code
        if status_code != 200:
            print('HTTP response status code: ' + str(status_code) + ' for URL:\n' + url_str, file=sys.stderr)
            res = None
        return res
    def query_oxo(uid):
        """
        This takes a curie id and send that id to EMBL-EBI OXO to convert to cui
        """
        url_str = 'https://www.ebi.ac.uk/spot/oxo/api/mappings?fromId=' + str(uid)
        requests = CacheControlHelper()

        try:
            res = requests.get(url_str, headers={'accept': 'application/json'}, timeout=120)
        except requests.exceptions.Timeout:
            print('HTTP timeout in SemMedInterface.py; URL: ' + url_str, file=sys.stderr)
            time.sleep(1)  ## take a timeout because NCBI rate-limits connections
            return None
        except requests.exceptions.ConnectionError:
            print('HTTP connection error in SemMedInterface.py; URL: ' + url_str, file=sys.stderr)
            time.sleep(1)  ## take a timeout because NCBI rate-limits connections
            return None
        except sqlite3.OperationalError:
            print('Error reading sqlite cache; URL: ' + url_str, file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print('HTTP response status code: ' + str(status_code) + ' for URL:\n' + url_str, file=sys.stderr)
            res = None
        return res
Exemple #3
0
 def send_query_get(self, omim_handler, url_suffix):
     requests = CacheControlHelper()
     url = "{api_base_url}/{omim_handler}?{url_suffix}&format=json".format(
         api_base_url=QueryOMIM.API_BASE_URL,
         omim_handler=omim_handler,
         url_suffix=url_suffix)
     #        print(url)
     try:
         res = requests.get(url, cookies=self.cookie)
     except requests.exceptions.Timeout:
         print(url, file=sys.stderr)
         print("Timeout in QueryOMIM for URL: " + url, file=sys.stderr)
         return None
     except KeyboardInterrupt:
         sys.exit(0)
     except BaseException as e:
         print(url, file=sys.stderr)
         print('%s received in QueryOMIM for URL: %s' % (e, url),
               file=sys.stderr)
         return None
     status_code = res.status_code
     if status_code != 200:
         print("Status code " + str(status_code) + " for URL: " + url,
               file=sys.stderr)
         return None
     return res
Exemple #4
0
    def get_pubmed_id_for_pubchem_id(pubchem_id):
        """
        This takes a PubChem id and then gets the PMIDs for articles on PubMed from PubChem which include this entity.
        """
        if not isinstance(pubchem_id, str):
            return None

        requests = CacheControlHelper()
        url = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/' + str(pubchem_id) + '/xrefs/PubMedID/JSON'
        try:
            r = requests.get(url, timeout=10)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryPubChem for URL: ' + url, file=sys.stderr)
            return None
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryPubChem for URL: %s' % (e, url), file=sys.stderr)
            return None
        if r is not None:
            if 'Fault' in r.json().keys():
                return None
            else:
                ans = [str(x) + '[uid]' for x in r.json()['InformationList']['Information'][0]['PubMedID']]
                return ans
        else:
            return None
Exemple #5
0
    def send_query_get(handler, url_suffix):

        requests = CacheControlHelper()
        url_str = QueryMiRGate.API_BASE_URL + "/" + handler + "/" + url_suffix
#        print(url_str)
        try:
            res = requests.get(url_str, timeout=QueryMiRGate.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url_str, file=sys.stderr)
            print("Timeout in QueryMiRGate for URL: " + url_str, file=sys.stderr)
            return None
        except BaseException as e:
            print(url_str, file=sys.stderr)
            print('%s received in QueryMiRGate for URL: %s' % (e, url_str), file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url_str, file=sys.stderr)
            print("Status code " + str(status_code) + " for url: " + url_str, file=sys.stderr)
            return None
        if len(res.content) == 0:
            print(url_str, file=sys.stderr)
            print("Empty response from URL!", file=sys.stderr)
            res = None
        return res
Exemple #6
0
    def get_cui(chemical_substance_id):
        if chemical_substance_id[:7].upper() == "CHEMBL:":
            chemical_substance_id = "CHEMBL" + chemical_substance_id[7:]
        handler = 'chem/' + chemical_substance_id + '?fields=drugcentral.xref.umlscui'

        requests = CacheControlHelper()
        url = QueryMyChem.API_BASE_URL + '/' + handler

        try:
            res = requests.get(url, timeout=QueryMyChem.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryMyChem for URL: ' + url, file=sys.stderr)
            return None
        except KeyboardInterrupt:
            sys.exit(0)
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryMyChem for URL: %s' % (e, url),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            # print(url, file=sys.stderr)
            # print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr)
            return None
        id_json = res.json()
        res = None
        if 'drugcentral' in id_json.keys():
            if 'xref' in id_json['drugcentral'].keys():
                if 'umlscui' in id_json['drugcentral']['xref'].keys():
                    res = id_json['drugcentral']['xref']['umlscui']
        return res
Exemple #7
0
    def __retrieve_entries_from_url():

        #   network request
        requests = CacheControlHelper()
        try:
            res = requests.get(GenerateMetabolitesTSV.URL)
        except requests.exceptions.Timeout:
            print(GenerateMetabolitesTSV.URL, file=sys.stderr)
            print("Timeout for URL: " + GenerateMetabolitesTSV.URL,
                  file=sys.stderr)
            return False
        except BaseException as e:
            print(GenerateMetabolitesTSV.URL, file=sys.stderr)
            print('%s received in GenerateMetabolitesTSV for URL: %s' %
                  (e, GenerateMetabolitesTSV.URL),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(GenerateMetabolitesTSV.URL, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' +
                  GenerateMetabolitesTSV.URL,
                  file=sys.stderr)
            return False

        #   save content to file
        with open(GenerateMetabolitesTSV.FILE_NAME, 'wb') as fd:
            for chunk in res.iter_content(1024):
                fd.write(chunk)

        return True
Exemple #8
0
    def __access_api(handler):

        requests = CacheControlHelper()
        url = QueryEBIOLSExtended.API_BASE_URL + '/' + handler
        # print(url)
        try:
            res = requests.get(url, timeout=QueryEBIOLSExtended.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryEBIOLSExtended for URL: ' + url,
                  file=sys.stderr)
            return None
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryEBIOLSExtended for URL: %s' % (e, url),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' + url,
                  file=sys.stderr)
            return None

        return res.text
Exemple #9
0
    def send_query_get(entity, url_suffix):
        requests = CacheControlHelper()
        url_str = QueryPharos.API_BASE_URL + "/" + entity + url_suffix
        #print(url_str)

        try:
            res = requests.get(url_str)
        except requests.exceptions.Timeout:
            print(url_str, file=sys.stderr)
            print('Timeout in QueryMiRBase for URL: ' + url_str,
                  file=sys.stderr)
            return None
        except KeyboardInterrupt:
            sys.exit(0)
        except BaseException as e:
            print(url_str, file=sys.stderr)
            print('%s received in QueryMiRBase for URL: %s' % (e, url),
                  file=sys.stderr)
            return None

        status_code = res.status_code
        #print("Status code="+str(status_code))
        assert status_code in [200, 404]
        if status_code == 404:
            res = None
        return res
Exemple #10
0
    def __access_api(url):

        requests = CacheControlHelper()
        url = url + '.xml'

        try:
            res = requests.get(url, timeout=QueryHMDB.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryHMDB for URL: ' + url, file=sys.stderr)
            return None
        except KeyboardInterrupt:
            sys.exit(0)
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryHMDB for URL: %s' % (e, url),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' + url,
                  file=sys.stderr)
            return None
        return res.text
Exemple #11
0
    def __access_api(handler):

        api_base_url = 'http://www.uniprot.org'
        url = api_base_url + '/' + handler
        #print(url)
        contact = "*****@*****.**"
        header = {'User-Agent': 'Python %s' % contact}

        requests = CacheControlHelper()
        try:
            res = requests.get(url,
                               timeout=QueryUniprot.TIMEOUT_SEC,
                               headers=header)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryUniprot for URL: ' + url, file=sys.stderr)
            return None
        except requests.exceptions.ChunkedEncodingError:
            print(url, file=sys.stderr)
            print('ChunkedEncodingError for URL: ' + url, file=sys.stderr)
            return None
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryUniprot for URL: %s' % (e, url),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' + url,
                  file=sys.stderr)
            return None
        return res.text
    def __access_api(handler, url_suffix, params=None, return_raw=False):

        requests = CacheControlHelper()
        if url_suffix:
            url = QueryMyGeneExtended.API_BASE_URL + '/' + handler + '?' + url_suffix
        else:
            url = QueryMyGeneExtended.API_BASE_URL + '/' + handler
        headers = {'user-agent': "mygene.py/%s python-requests/%s" % ("1.0.0", "1.0.0"), 'Accept': 'application/json'}
        try:
            res = requests.get(url, params=params, timeout=QueryMyGeneExtended.TIMEOUT_SEC, headers=headers)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryMyGeneExtended for URL: ' + url, file=sys.stderr)
            return None
        except KeyboardInterrupt:
            sys.exit(0)
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryMyGeneExtended for URL: %s' % (e, url), file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr)
            return None
        if return_raw:
            return res.text
        else:
            return res.json()
Exemple #13
0
    def get_reactome_names(id):
        '''
        Takes a reactome id then return a string containing all synonyms listed on reactome seperated by the deliminator |
        However, If it finds a MeSH terms in the list it will return the search term as a mesh term serach
        e.g. it will return something like '(IGF1R)[MeSH Terms]' 

        This can be inputed into the google function as a non mesh term and will search as a mesh term. 
        This is so that we do not need to handle the output of this function any differently it can all be input as non mesh terms

        Parameters:
            id - a string containing the reactome id

        Output:
            search - a string containing all synonyms of the reactome id or a mesh term formatted for the google distance function
        '''
        # We want the actual reactome name R-HSA..., not the curie REACT:R-HSA...
        if "REACT:" in id:
            id = ":".join(id.split(":")[1:])
        url = 'https://reactome.org/ContentService/data/query/' + id + '/name'  # hardcoded url for reactiome names
        requests = CacheControlHelper()
        r = requests.get(url,
                         headers={'User-Agent': 'Mozilla/5.0'
                                  })  # sends get request that returns a string
        if r.status_code != 200:
            print('HTTP response status code: ' + str(r.status_code) +
                  ' for URL:\n' + url,
                  file=sys.stderr)
            return None
        nameList = r.text.split('\n')  # splits returned string by line
        search = ''  # initializes search term variable
        for name in nameList:
            if len(
                    name
            ) > 0:  # removes blank lines at beginning and end of response
                if len(re.compile("[()]").split(
                        name)) > 1:  # check for parenthesis
                    for n in re.compile("[()]").split(
                            name):  # splits on either "(" or ")"
                        if len(n) > 0:  # removes banks generated by split
                            if QueryNCBIeUtils.is_mesh_term(
                                    n):  # check for mesh term
                                search += '|' + n + '[MeSH Terms]'
                            else:
                                search += '|' + n
                elif len(name.split('ecNumber')) > 1:  # checks for ec number
                    if QueryNCBIeUtils.is_mesh_term(name.split('ecNumber')[0]):
                        search += '|' + name.split(
                            'ecNumber')[0] + '[MeSH Terms]'
                    else:
                        search += '|' + name.split('ecNumber')[0]
                    search += '|' + name.split(
                        'ecNumber'
                    )[1][:-1] + '[EC/RN Number]'  # removes trailing "/" and formats as ec search term
                else:
                    if QueryNCBIeUtils.is_mesh_term(name):
                        search += '|' + name + '[MeSH Terms]'
                    else:
                        search += '|' + name
        search = search[1:]  # removes leading |
        return search
def get_request_with_cookie(url):
    API_KEY = '1YCxuN7PRHyrpuZnO7F5gQ'
    API_BASE_URL = 'https://api.omim.org/api'

    session_data = {'apiKey': API_KEY,
                    'format': 'json'}

    requests = CacheControlHelper()
    r = requests.sess.post(API_BASE_URL + "/apiKey", data=session_data)
    assert 200 == r.status_code
    cookie = r.cookies

    try:
        res = requests.get(url, cookies=cookie)
    except requests.exceptions.Timeout:
        print(url, file=sys.stderr)
        print("Timeout in QueryOMIM for URL: " + url, file=sys.stderr)
        return None
    except BaseException as e:
        print(url, file=sys.stderr)
        print('%s received in QueryOMIM for URL: %s' % (e, url), file=sys.stderr)
        return None
    status_code = res.status_code
    if status_code != 200:
        print("Status code " + str(status_code) + " for URL: " + url, file=sys.stderr)
        return None
    return res.json()
Exemple #15
0
    def send_query_get(handler, url_suffix):

        requests = CacheControlHelper()
        url = QueryChEMBL.API_BASE_URL + '/' + handler + '?' + url_suffix
        #        print(url)
        try:
            res = requests.get(url, timeout=QueryChEMBL.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryChEMBL for URL: ' + url, file=sys.stderr)
            return None
        except KeyboardInterrupt:
            sys.exit(0)
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryChEMBL for URL: %s' % (e, url),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' + url,
                  file=sys.stderr)
            return None
        return res.json()
Exemple #16
0
 def send_query_get(handler, st):
     url_str = QueryUMLS.API_BASE_URL + '/' + handler + '&ticket=' + st
     requests = CacheControlHelper()
     try:
         res = requests.get(url_str,
                            headers={'accept': 'application/json'},
                            timeout=QueryUMLS.TIMEOUT_SEC)
     except requests.exceptions.Timeout:
         print('HTTP timeout in QueryNCBIeUtils.py; URL: ' + url_str,
               file=sys.stderr)
         time.sleep(
             1)  ## take a timeout because NCBI rate-limits connections
         return None
     except requests.exceptions.ConnectionError:
         print('HTTP connection error in QueryNCBIeUtils.py; URL: ' +
               url_str,
               file=sys.stderr)
         time.sleep(
             1)  ## take a timeout because NCBI rate-limits connections
         return None
     status_code = res.status_code
     if status_code != 200:
         print('HTTP response status code: ' + str(status_code) +
               ' for URL:\n' + url_str,
               file=sys.stderr)
         res = None
     return res
Exemple #17
0
    def send_query_get(handler, url_suffix, retmax=1000, retry_flag=True):

        requests = CacheControlHelper()
        url_str = QueryNCBIeUtils.API_BASE_URL + '/' + handler + '?' + url_suffix + '&retmode=json&retmax=' + str(
            retmax)
        #        print(url_str)
        try:
            res = requests.get(url_str,
                               headers={
                                   'accept': 'application/json',
                                   'User-Agent': 'Mozilla/5.0'
                               },
                               timeout=QueryNCBIeUtils.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print('HTTP timeout in QueryNCBIeUtils.py; URL: ' + url_str,
                  file=sys.stderr)
            time.sleep(
                1)  ## take a timeout because NCBI rate-limits connections
            return None
        except requests.exceptions.ConnectionError:
            print('HTTP connection error in QueryNCBIeUtils.py; URL: ' +
                  url_str,
                  file=sys.stderr)
            time.sleep(
                1)  ## take a timeout because NCBI rate-limits connections
            return None
        except BaseException as e:
            print(url_str, file=sys.stderr)
            print('%s received in QueryMiRGate for URL: %s' % (e, url_str),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            if status_code == 429 and retry_flag:
                time.sleep(1)
                res = QueryNCBIeUtils.send_query_get(handler, url_suffix,
                                                     retmax, False)
            else:
                print('HTTP response status code: ' + str(status_code) +
                      ' for URL:\n' + url_str,
                      file=sys.stderr)
                res = None
        return res
def get_request(url):
    requests = CacheControlHelper()
    try:
        res = requests.get(url, timeout=120)
    except requests.exceptions.Timeout:
        print(url, file=sys.stderr)
        print('Timeout for URL: ' + url, file=sys.stderr)
        return None
    except KeyboardInterrupt:
        sys.exit(0)
    except BaseException as e:
        print(url, file=sys.stderr)
        print('%s received for URL: %s' % (e, url), file=sys.stderr)
        return None
    status_code = res.status_code
    if status_code != 200:
        print(url, file=sys.stderr)
        print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr)
        return None
    return res.json()
Exemple #19
0
 def get_uniprot_names(id):
     """
     Takes a uniprot id then return a string containing all synonyms listed on uniprot seperated by the deliminator |
     :param id: a string containing the uniprot id
     :returns: a string containing all synonyms uniprot lists for
     """
     # We want the actual uniprot name P176..., not the curie UniProtKB:P176...
     if "UniProtKB:" in id:
         id = ":".join(id.split(":")[1:])
     url = 'https://www.uniprot.org/uniprot/?query=id:' + id + '&sort=score&columns=entry name,protein names,genes&format=tab'  # hardcoded url for uniprot data
     requests = CacheControlHelper()
     r = requests.get(url, headers={'User-Agent':
                                    'Mozilla/5.0'})  # send get request
     if r.status_code != 200:  # checks for error
         print('HTTP response status code: ' + str(r.status_code) +
               ' for URL:\n' + url,
               file=sys.stderr)
         return None
     if r.content.decode('utf-8') == '':
         return None
     df = pandas.read_csv(StringIO(r.content.decode('utf-8')), sep='\t')
     search = df.loc[0, 'Entry name']  # initializes search term variable
     if type(df.loc[0, 'Protein names']) == str:
         for name in re.compile("[()\[\]]").split(
                 df.loc[0, 'Protein names']):  # checks for protein section
             if len(name) > 1:
                 if QueryNCBIeUtils.is_mesh_term(name):
                     search += '|' + name + '[MeSH Terms]'
                 else:
                     search += '|' + name
     if type(df.loc[0, 'Gene names']) == str:
         for name in df.loc[0, 'Gene names'].split(' '):
             if len(name) > 1:
                 if QueryNCBIeUtils.is_mesh_term(name):
                     search += '|' + name + '[MeSH Terms]'
                 else:
                     search += '|' + name
     return search
Exemple #20
0
    def send_query_get(handler, url_suffix):

        requests = CacheControlHelper()
        url = QueryDisont.API_BASE_URL + "/" + handler + "/" + url_suffix
        #        print(url_str)
        try:
            res = requests.get(url, timeout=QueryDisont.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryDisont for URL: ' + url, file=sys.stderr)
            return None
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryDisont for URL: %s' % (e, url),
                  file=sys.stderr)
            return None

        status_code = res.status_code
        if status_code != 200:
            print('Status code ' + str(status_code) + ' for url: ' + url,
                  file=sys.stderr)
            return None
        return res
Exemple #21
0
    def send_query_get(handler, url_suffix):

        requests = CacheControlHelper()
        url_str = QueryMiRBase.API_BASE_URL + "/" + handler + "?" + url_suffix
        #        print(url_str)

        try:
            res = requests.get(url_str)
        except requests.exceptions.Timeout:
            print(url_str, file=sys.stderr)
            print('Timeout in QueryMiRBase for URL: ' + url_str,
                  file=sys.stderr)
            return None
        except KeyboardInterrupt:
            sys.exit(0)
        except BaseException as e:
            print(url_str, file=sys.stderr)
            print('%s received in QueryMiRBase for URL: %s' % (e, url),
                  file=sys.stderr)
            return None

        status_code = res.status_code
        assert status_code == 200
        return res
Exemple #22
0
    def __access_api(url, params=None, headers=None):
        #        print(url)
        requests = CacheControlHelper()
        try:
            res = requests.get(url,
                               params=params,
                               timeout=QuerySciGraph.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QuerySciGraph for URL: ' + url, file=sys.stderr)
            return None
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QuerySciGraph for URL: %s' % (e, url),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' + res.url,
                  file=sys.stderr)
            return None

        return res.json()
Exemple #23
0
def send_query(query):
    requests = CacheControlHelper()
    res = requests.get(query, timeout=120)
    return res.text