Exemple #1
0
    def get_pubmed_id_for_pubchem_id(pubchem_id):
        """
        This takes a PubChem id and then gets the PMIDs for articles on PubMed from PubChem which include this entity.
        """
        if not isinstance(pubchem_id, str):
            return None

        requests = CacheControlHelper()
        url = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/' + str(pubchem_id) + '/xrefs/PubMedID/JSON'
        try:
            r = requests.get(url, timeout=10)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryPubChem for URL: ' + url, file=sys.stderr)
            return None
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryPubChem for URL: %s' % (e, url), file=sys.stderr)
            return None
        if r is not None:
            if 'Fault' in r.json().keys():
                return None
            else:
                ans = [str(x) + '[uid]' for x in r.json()['InformationList']['Information'][0]['PubMedID']]
                return ans
        else:
            return None
    def query_oxo(uid):
        """
        This takes a curie id and send that id to EMBL-EBI OXO to convert to cui
        """
        url_str = 'https://www.ebi.ac.uk/spot/oxo/api/mappings?fromId=' + str(uid)
        requests = CacheControlHelper()

        try:
            res = requests.get(url_str, headers={'accept': 'application/json'}, timeout=120)
        except requests.exceptions.Timeout:
            print('HTTP timeout in SemMedInterface.py; URL: ' + url_str, file=sys.stderr)
            time.sleep(1)  ## take a timeout because NCBI rate-limits connections
            return None
        except requests.exceptions.ConnectionError:
            print('HTTP connection error in SemMedInterface.py; URL: ' + url_str, file=sys.stderr)
            time.sleep(1)  ## take a timeout because NCBI rate-limits connections
            return None
        except sqlite3.OperationalError:
            print('Error reading sqlite cache; URL: ' + url_str, file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print('HTTP response status code: ' + str(status_code) + ' for URL:\n' + url_str, file=sys.stderr)
            res = None
        return res
Exemple #3
0
    def get_reactome_names(id):
        '''
        Takes a reactome id then return a string containing all synonyms listed on reactome seperated by the deliminator |
        However, If it finds a MeSH terms in the list it will return the search term as a mesh term serach
        e.g. it will return something like '(IGF1R)[MeSH Terms]' 

        This can be inputed into the google function as a non mesh term and will search as a mesh term. 
        This is so that we do not need to handle the output of this function any differently it can all be input as non mesh terms

        Parameters:
            id - a string containing the reactome id

        Output:
            search - a string containing all synonyms of the reactome id or a mesh term formatted for the google distance function
        '''
        # We want the actual reactome name R-HSA..., not the curie REACT:R-HSA...
        if "REACT:" in id:
            id = ":".join(id.split(":")[1:])
        url = 'https://reactome.org/ContentService/data/query/' + id + '/name'  # hardcoded url for reactiome names
        requests = CacheControlHelper()
        r = requests.get(url,
                         headers={'User-Agent': 'Mozilla/5.0'
                                  })  # sends get request that returns a string
        if r.status_code != 200:
            print('HTTP response status code: ' + str(r.status_code) +
                  ' for URL:\n' + url,
                  file=sys.stderr)
            return None
        nameList = r.text.split('\n')  # splits returned string by line
        search = ''  # initializes search term variable
        for name in nameList:
            if len(
                    name
            ) > 0:  # removes blank lines at beginning and end of response
                if len(re.compile("[()]").split(
                        name)) > 1:  # check for parenthesis
                    for n in re.compile("[()]").split(
                            name):  # splits on either "(" or ")"
                        if len(n) > 0:  # removes banks generated by split
                            if QueryNCBIeUtils.is_mesh_term(
                                    n):  # check for mesh term
                                search += '|' + n + '[MeSH Terms]'
                            else:
                                search += '|' + n
                elif len(name.split('ecNumber')) > 1:  # checks for ec number
                    if QueryNCBIeUtils.is_mesh_term(name.split('ecNumber')[0]):
                        search += '|' + name.split(
                            'ecNumber')[0] + '[MeSH Terms]'
                    else:
                        search += '|' + name.split('ecNumber')[0]
                    search += '|' + name.split(
                        'ecNumber'
                    )[1][:-1] + '[EC/RN Number]'  # removes trailing "/" and formats as ec search term
                else:
                    if QueryNCBIeUtils.is_mesh_term(name):
                        search += '|' + name + '[MeSH Terms]'
                    else:
                        search += '|' + name
        search = search[1:]  # removes leading |
        return search
Exemple #4
0
    def get_cui(chemical_substance_id):
        if chemical_substance_id[:7].upper() == "CHEMBL:":
            chemical_substance_id = "CHEMBL" + chemical_substance_id[7:]
        handler = 'chem/' + chemical_substance_id + '?fields=drugcentral.xref.umlscui'

        requests = CacheControlHelper()
        url = QueryMyChem.API_BASE_URL + '/' + handler

        try:
            res = requests.get(url, timeout=QueryMyChem.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryMyChem for URL: ' + url, file=sys.stderr)
            return None
        except KeyboardInterrupt:
            sys.exit(0)
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryMyChem for URL: %s' % (e, url),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            # print(url, file=sys.stderr)
            # print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr)
            return None
        id_json = res.json()
        res = None
        if 'drugcentral' in id_json.keys():
            if 'xref' in id_json['drugcentral'].keys():
                if 'umlscui' in id_json['drugcentral']['xref'].keys():
                    res = id_json['drugcentral']['xref']['umlscui']
        return res
Exemple #5
0
    def send_query_get(handler, url_suffix):

        requests = CacheControlHelper()
        url = QueryChEMBL.API_BASE_URL + '/' + handler + '?' + url_suffix
        #        print(url)
        try:
            res = requests.get(url, timeout=QueryChEMBL.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryChEMBL for URL: ' + url, file=sys.stderr)
            return None
        except KeyboardInterrupt:
            sys.exit(0)
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryChEMBL for URL: %s' % (e, url),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' + url,
                  file=sys.stderr)
            return None
        return res.json()
Exemple #6
0
    def send_query_get(entity, url_suffix):
        requests = CacheControlHelper()
        url_str = QueryPharos.API_BASE_URL + "/" + entity + url_suffix
        #print(url_str)

        try:
            res = requests.get(url_str)
        except requests.exceptions.Timeout:
            print(url_str, file=sys.stderr)
            print('Timeout in QueryMiRBase for URL: ' + url_str,
                  file=sys.stderr)
            return None
        except KeyboardInterrupt:
            sys.exit(0)
        except BaseException as e:
            print(url_str, file=sys.stderr)
            print('%s received in QueryMiRBase for URL: %s' % (e, url),
                  file=sys.stderr)
            return None

        status_code = res.status_code
        #print("Status code="+str(status_code))
        assert status_code in [200, 404]
        if status_code == 404:
            res = None
        return res
Exemple #7
0
    def send_query_post(handler, params, retmax=1000):

        requests = CacheControlHelper()
        url_str = QueryNCBIeUtils.API_BASE_URL + '/' + handler
        params['retmax'] = str(retmax)
        params['retmode'] = 'json'
        #        print(url_str)
        try:
            res = requests.post(url_str,
                                data=params,
                                timeout=QueryNCBIeUtils.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print('HTTP timeout in QueryNCBIeUtils.py; URL: ' + url_str,
                  file=sys.stderr)
            time.sleep(
                1)  ## take a timeout because NCBI rate-limits connections
            return None
        except requests.exceptions.ConnectionError:
            print('HTTP connection error in QueryNCBIeUtils.py; URL: ' +
                  url_str,
                  file=sys.stderr)
            time.sleep(
                1)  ## take a timeout because NCBI rate-limits connections
            return None
        status_code = res.status_code
        if status_code != 200:
            print('HTTP response status code: ' + str(status_code) +
                  ' for URL:\n' + url_str,
                  file=sys.stderr)
            res = None
        return res
    def __access_api(handler, url_suffix, params=None, return_raw=False):

        requests = CacheControlHelper()
        if url_suffix:
            url = QueryMyGeneExtended.API_BASE_URL + '/' + handler + '?' + url_suffix
        else:
            url = QueryMyGeneExtended.API_BASE_URL + '/' + handler
        headers = {'user-agent': "mygene.py/%s python-requests/%s" % ("1.0.0", "1.0.0"), 'Accept': 'application/json'}
        try:
            res = requests.get(url, params=params, timeout=QueryMyGeneExtended.TIMEOUT_SEC, headers=headers)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryMyGeneExtended for URL: ' + url, file=sys.stderr)
            return None
        except KeyboardInterrupt:
            sys.exit(0)
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryMyGeneExtended for URL: %s' % (e, url), file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr)
            return None
        if return_raw:
            return res.text
        else:
            return res.json()
Exemple #9
0
    def send_query_get(handler, url_suffix):

        requests = CacheControlHelper()
        url_str = QueryEBIOLS.API_BASE_URL + '/' + handler + "/" + url_suffix
#        print(url_str)
        try:
            res = requests.get(url_str, timeout=QueryEBIOLS.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print('HTTP timeout in QueryNCBIeUtils.py; URL: ' + url_str, file=sys.stderr)
            time.sleep(1)  # take a timeout because NCBI rate-limits connections
            return None
        except requests.exceptions.ConnectionError:
            print('HTTP connection error in QueryNCBIeUtils.py; URL: ' + url_str, file=sys.stderr)
            time.sleep(1)  # take a timeout because NCBI rate-limits connections
            return None
        except BaseException as e:
            print(url_str, file=sys.stderr)
            print('%s received in QueryEBIOLS for URL: %s' % (e, url_str), file=sys.stderr)
            return None

        status_code = res.status_code
        if status_code != 200:
            print('HTTP response status code: ' + str(status_code) + ' for URL:\n' + url_str, file=sys.stderr)
            res = None
        return res
Exemple #10
0
    def send_query_get(handler, url_suffix):

        requests = CacheControlHelper()
        url_str = QueryMiRGate.API_BASE_URL + "/" + handler + "/" + url_suffix
#        print(url_str)
        try:
            res = requests.get(url_str, timeout=QueryMiRGate.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url_str, file=sys.stderr)
            print("Timeout in QueryMiRGate for URL: " + url_str, file=sys.stderr)
            return None
        except BaseException as e:
            print(url_str, file=sys.stderr)
            print('%s received in QueryMiRGate for URL: %s' % (e, url_str), file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url_str, file=sys.stderr)
            print("Status code " + str(status_code) + " for url: " + url_str, file=sys.stderr)
            return None
        if len(res.content) == 0:
            print(url_str, file=sys.stderr)
            print("Empty response from URL!", file=sys.stderr)
            res = None
        return res
Exemple #11
0
    def __access_api(handler):

        requests = CacheControlHelper()
        url = QueryEBIOLSExtended.API_BASE_URL + '/' + handler
        # print(url)
        try:
            res = requests.get(url, timeout=QueryEBIOLSExtended.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryEBIOLSExtended for URL: ' + url,
                  file=sys.stderr)
            return None
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryEBIOLSExtended for URL: %s' % (e, url),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' + url,
                  file=sys.stderr)
            return None

        return res.text
Exemple #12
0
    def __access_api(url):

        requests = CacheControlHelper()
        url = url + '.xml'

        try:
            res = requests.get(url, timeout=QueryHMDB.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryHMDB for URL: ' + url, file=sys.stderr)
            return None
        except KeyboardInterrupt:
            sys.exit(0)
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryHMDB for URL: %s' % (e, url),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' + url,
                  file=sys.stderr)
            return None
        return res.text
Exemple #13
0
    def __retrieve_entries_from_url():

        #   network request
        requests = CacheControlHelper()
        try:
            res = requests.get(GenerateMetabolitesTSV.URL)
        except requests.exceptions.Timeout:
            print(GenerateMetabolitesTSV.URL, file=sys.stderr)
            print("Timeout for URL: " + GenerateMetabolitesTSV.URL,
                  file=sys.stderr)
            return False
        except BaseException as e:
            print(GenerateMetabolitesTSV.URL, file=sys.stderr)
            print('%s received in GenerateMetabolitesTSV for URL: %s' %
                  (e, GenerateMetabolitesTSV.URL),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(GenerateMetabolitesTSV.URL, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' +
                  GenerateMetabolitesTSV.URL,
                  file=sys.stderr)
            return False

        #   save content to file
        with open(GenerateMetabolitesTSV.FILE_NAME, 'wb') as fd:
            for chunk in res.iter_content(1024):
                fd.write(chunk)

        return True
Exemple #14
0
    def __access_api(handler):

        api_base_url = 'http://www.uniprot.org'
        url = api_base_url + '/' + handler
        #print(url)
        contact = "*****@*****.**"
        header = {'User-Agent': 'Python %s' % contact}

        requests = CacheControlHelper()
        try:
            res = requests.get(url,
                               timeout=QueryUniprot.TIMEOUT_SEC,
                               headers=header)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryUniprot for URL: ' + url, file=sys.stderr)
            return None
        except requests.exceptions.ChunkedEncodingError:
            print(url, file=sys.stderr)
            print('ChunkedEncodingError for URL: ' + url, file=sys.stderr)
            return None
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryUniprot for URL: %s' % (e, url),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' + url,
                  file=sys.stderr)
            return None
        return res.text
def get_request_with_cookie(url):
    API_KEY = '1YCxuN7PRHyrpuZnO7F5gQ'
    API_BASE_URL = 'https://api.omim.org/api'

    session_data = {'apiKey': API_KEY,
                    'format': 'json'}

    requests = CacheControlHelper()
    r = requests.sess.post(API_BASE_URL + "/apiKey", data=session_data)
    assert 200 == r.status_code
    cookie = r.cookies

    try:
        res = requests.get(url, cookies=cookie)
    except requests.exceptions.Timeout:
        print(url, file=sys.stderr)
        print("Timeout in QueryOMIM for URL: " + url, file=sys.stderr)
        return None
    except BaseException as e:
        print(url, file=sys.stderr)
        print('%s received in QueryOMIM for URL: %s' % (e, url), file=sys.stderr)
        return None
    status_code = res.status_code
    if status_code != 200:
        print("Status code " + str(status_code) + " for URL: " + url, file=sys.stderr)
        return None
    return res.json()
Exemple #16
0
 def send_query_get(self, omim_handler, url_suffix):
     requests = CacheControlHelper()
     url = "{api_base_url}/{omim_handler}?{url_suffix}&format=json".format(
         api_base_url=QueryOMIM.API_BASE_URL,
         omim_handler=omim_handler,
         url_suffix=url_suffix)
     #        print(url)
     try:
         res = requests.get(url, cookies=self.cookie)
     except requests.exceptions.Timeout:
         print(url, file=sys.stderr)
         print("Timeout in QueryOMIM for URL: " + url, file=sys.stderr)
         return None
     except KeyboardInterrupt:
         sys.exit(0)
     except BaseException as e:
         print(url, file=sys.stderr)
         print('%s received in QueryOMIM for URL: %s' % (e, url),
               file=sys.stderr)
         return None
     status_code = res.status_code
     if status_code != 200:
         print("Status code " + str(status_code) + " for URL: " + url,
               file=sys.stderr)
         return None
     return res
 def __init__(self):
     requests = CacheControlHelper()
     url = QueryOMIMExtended.API_BASE_URL + "/apiKey"
     session_data = {'apiKey': QueryOMIMExtended.API_KEY, 'format': 'json'}
     r = requests.post(url, data=session_data)
     assert 200 == r.status_code
     self.cookie = r.cookies
Exemple #18
0
 def send_query_get(handler, st):
     url_str = QueryUMLS.API_BASE_URL + '/' + handler + '&ticket=' + st
     requests = CacheControlHelper()
     try:
         res = requests.get(url_str,
                            headers={'accept': 'application/json'},
                            timeout=QueryUMLS.TIMEOUT_SEC)
     except requests.exceptions.Timeout:
         print('HTTP timeout in QueryNCBIeUtils.py; URL: ' + url_str,
               file=sys.stderr)
         time.sleep(
             1)  ## take a timeout because NCBI rate-limits connections
         return None
     except requests.exceptions.ConnectionError:
         print('HTTP connection error in QueryNCBIeUtils.py; URL: ' +
               url_str,
               file=sys.stderr)
         time.sleep(
             1)  ## take a timeout because NCBI rate-limits connections
         return None
     status_code = res.status_code
     if status_code != 200:
         print('HTTP response status code: ' + str(status_code) +
               ' for URL:\n' + url_str,
               file=sys.stderr)
         res = None
     return res
Exemple #19
0
 def get_single_ticket(tgt):
     params = {'service': "http://umlsks.nlm.nih.gov"}
     h = {
         "Content-type": "application/x-www-form-urlencoded",
         "Accept": "text/plain",
         "User-Agent": "python"
     }
     requests = CacheControlHelper()
     r = requests.post(tgt, data=params, headers=h)
     st = r.text
     return st
Exemple #20
0
    def uniprot_id_to_reactome_pathways(uniprot_id):
        """returns a ``set`` of reactome IDs of pathways associated with a given string uniprot ID

        :param uniprot_id: a ``str`` uniprot ID, like ``"P68871"``
        :returns: a ``set`` of string Reactome IDs
        """

        payload = {
            'from': 'ACC',
            'to': 'REACTOME_ID',
            'format': 'tab',
            'query': uniprot_id
        }
        contact = "*****@*****.**"
        header = {'User-Agent': 'Python %s' % contact}

        requests = CacheControlHelper()

        try:
            url = QueryUniprot.API_BASE_URL
            res = requests.post(QueryUniprot.API_BASE_URL,
                                data=payload,
                                headers=header)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryUniprot for URL: ' +
                  QueryUniprot.API_BASE_URL,
                  file=sys.stderr)
            return None
        except KeyboardInterrupt:
            sys.exit(0)
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryUniprot for URL: %s' % (e, url),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(QueryUniprot.API_BASE_URL, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' +
                  QueryUniprot.API_BASE_URL,
                  file=sys.stderr)
            return None


#        assert 200 == res.status_code
        res_set = set()
        for line in res.text.splitlines():
            field_str = line.split("\t")[1]
            if field_str != "To":
                res_set.add(field_str)
        return res_set
Exemple #21
0
 def get_ticket_gen():
     # params = {'username': self.username,'password': self.password}
     params = {'apikey': QueryUMLS.api_key}
     h = {
         "Content-type": "application/x-www-form-urlencoded",
         "Accept": "text/plain",
         "User-Agent": "python"
     }
     requests = CacheControlHelper()
     r = requests.post(QueryUMLS.Ticket_URL + QueryUMLS.auth_endpoint,
                       data=params,
                       headers=h)
     response = fromstring(r.text)
     ## extract the entire URL needed from the HTML form (action attribute) returned - looks similar to https://utslogin.nlm.nih.gov/cas/v1/tickets/TGT-36471-aYqNLN2rFIJPXKzxwdTNC5ZT7z3B3cTAKfSc5ndHQcUxeaDOLN-cas
     ## we make a POST call to this URL in the getst method
     tgt = response.xpath('//form/@action')[0]
     return tgt
Exemple #22
0
    def send_query_get(handler, url_suffix, retmax=1000, retry_flag=True):

        requests = CacheControlHelper()
        url_str = QueryNCBIeUtils.API_BASE_URL + '/' + handler + '?' + url_suffix + '&retmode=json&retmax=' + str(
            retmax)
        #        print(url_str)
        try:
            res = requests.get(url_str,
                               headers={
                                   'accept': 'application/json',
                                   'User-Agent': 'Mozilla/5.0'
                               },
                               timeout=QueryNCBIeUtils.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print('HTTP timeout in QueryNCBIeUtils.py; URL: ' + url_str,
                  file=sys.stderr)
            time.sleep(
                1)  ## take a timeout because NCBI rate-limits connections
            return None
        except requests.exceptions.ConnectionError:
            print('HTTP connection error in QueryNCBIeUtils.py; URL: ' +
                  url_str,
                  file=sys.stderr)
            time.sleep(
                1)  ## take a timeout because NCBI rate-limits connections
            return None
        except BaseException as e:
            print(url_str, file=sys.stderr)
            print('%s received in QueryMiRGate for URL: %s' % (e, url_str),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            if status_code == 429 and retry_flag:
                time.sleep(1)
                res = QueryNCBIeUtils.send_query_get(handler, url_suffix,
                                                     retmax, False)
            else:
                print('HTTP response status code: ' + str(status_code) +
                      ' for URL:\n' + url_str,
                      file=sys.stderr)
                res = None
        return res
def get_request(url):
    requests = CacheControlHelper()
    try:
        res = requests.get(url, timeout=120)
    except requests.exceptions.Timeout:
        print(url, file=sys.stderr)
        print('Timeout for URL: ' + url, file=sys.stderr)
        return None
    except KeyboardInterrupt:
        sys.exit(0)
    except BaseException as e:
        print(url, file=sys.stderr)
        print('%s received for URL: %s' % (e, url), file=sys.stderr)
        return None
    status_code = res.status_code
    if status_code != 200:
        print(url, file=sys.stderr)
        print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr)
        return None
    return res.json()
Exemple #24
0
 def get_uniprot_names(id):
     """
     Takes a uniprot id then return a string containing all synonyms listed on uniprot seperated by the deliminator |
     :param id: a string containing the uniprot id
     :returns: a string containing all synonyms uniprot lists for
     """
     # We want the actual uniprot name P176..., not the curie UniProtKB:P176...
     if "UniProtKB:" in id:
         id = ":".join(id.split(":")[1:])
     url = 'https://www.uniprot.org/uniprot/?query=id:' + id + '&sort=score&columns=entry name,protein names,genes&format=tab'  # hardcoded url for uniprot data
     requests = CacheControlHelper()
     r = requests.get(url, headers={'User-Agent':
                                    'Mozilla/5.0'})  # send get request
     if r.status_code != 200:  # checks for error
         print('HTTP response status code: ' + str(r.status_code) +
               ' for URL:\n' + url,
               file=sys.stderr)
         return None
     if r.content.decode('utf-8') == '':
         return None
     df = pandas.read_csv(StringIO(r.content.decode('utf-8')), sep='\t')
     search = df.loc[0, 'Entry name']  # initializes search term variable
     if type(df.loc[0, 'Protein names']) == str:
         for name in re.compile("[()\[\]]").split(
                 df.loc[0, 'Protein names']):  # checks for protein section
             if len(name) > 1:
                 if QueryNCBIeUtils.is_mesh_term(name):
                     search += '|' + name + '[MeSH Terms]'
                 else:
                     search += '|' + name
     if type(df.loc[0, 'Gene names']) == str:
         for name in df.loc[0, 'Gene names'].split(' '):
             if len(name) > 1:
                 if QueryNCBIeUtils.is_mesh_term(name):
                     search += '|' + name + '[MeSH Terms]'
                 else:
                     search += '|' + name
     return search
Exemple #25
0
    def send_query_get(handler, url_suffix):

        requests = CacheControlHelper()
        url = QueryDisont.API_BASE_URL + "/" + handler + "/" + url_suffix
        #        print(url_str)
        try:
            res = requests.get(url, timeout=QueryDisont.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QueryDisont for URL: ' + url, file=sys.stderr)
            return None
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QueryDisont for URL: %s' % (e, url),
                  file=sys.stderr)
            return None

        status_code = res.status_code
        if status_code != 200:
            print('Status code ' + str(status_code) + ' for url: ' + url,
                  file=sys.stderr)
            return None
        return res
Exemple #26
0
    def send_query_get(handler, url_suffix):

        requests = CacheControlHelper()
        url_str = QueryMiRBase.API_BASE_URL + "/" + handler + "?" + url_suffix
        #        print(url_str)

        try:
            res = requests.get(url_str)
        except requests.exceptions.Timeout:
            print(url_str, file=sys.stderr)
            print('Timeout in QueryMiRBase for URL: ' + url_str,
                  file=sys.stderr)
            return None
        except KeyboardInterrupt:
            sys.exit(0)
        except BaseException as e:
            print(url_str, file=sys.stderr)
            print('%s received in QueryMiRBase for URL: %s' % (e, url),
                  file=sys.stderr)
            return None

        status_code = res.status_code
        assert status_code == 200
        return res
Exemple #27
0
    def __access_api(url, params=None, headers=None):
        #        print(url)
        requests = CacheControlHelper()
        try:
            res = requests.get(url,
                               params=params,
                               timeout=QuerySciGraph.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url, file=sys.stderr)
            print('Timeout in QuerySciGraph for URL: ' + url, file=sys.stderr)
            return None
        except BaseException as e:
            print(url, file=sys.stderr)
            print('%s received in QuerySciGraph for URL: %s' % (e, url),
                  file=sys.stderr)
            return None
        status_code = res.status_code
        if status_code != 200:
            print(url, file=sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' + res.url,
                  file=sys.stderr)
            return None

        return res.json()
Exemple #28
0
def send_query(query):
    requests = CacheControlHelper()
    res = requests.get(query, timeout=120)
    return res.text
Exemple #29
0
    def query_mesh_id_to_uniprot_ids_desc(mesh_id):
        ent = 'disease'
        id = 'mesh'
        STR = "c1.MESH = '"
        intfield = mesh_id
        seq = ( """
        DEFINE
        c0='/data/gene_disease_summary',
	c1='/data/diseases',
	c2='/data/genes',
	c4='/data/sources'
        ON
           'http://www.disgenet.org/web/DisGeNET'
        SELECT
        c1 (diseaseId, name, diseaseClassName, STY, MESH, OMIM, type ),
	c2 (geneId, symbol,   uniprotId, description, pantherName ),
	c0 (score, EI, Npmids, Nsnps)

        FROM
            c0
        WHERE
            (
                """ + STR +  mesh_id+"""'
            AND
                c4 = 'ALL'
            )
        ORDER BY
            c0.score DESC""" ); #

        binary_data = seq.encode('utf-8')
        url_str = QueryDisGeNet.SPARQL_ENDPOINT_URL
        requests = CacheControlHelper()

        try:
            res = requests.post(url_str, data=binary_data, timeout=QueryDisGeNet.TIMEOUT_SEC)
        except requests.exceptions.Timeout:
            print(url_str, sys.stderr)
            print('Timeout in QueryDisGeNet for URL: ' + url_str, file=sys.stderr)
            return dict()
        except BaseException as e:
            print(url_str, file=sys.stderr)
            print('%s received in QueryDisGeNet for URL: %s' % (e, url_str), file=sys.stderr)
            return None

        status_code = res.status_code

        if status_code != 200:
            print(url_str, sys.stderr)
            print('Status code ' + str(status_code) + ' for url: ' + url_str, file=sys.stderr)
            return dict()

        if len(res.content) == 0:
            print(url_str, file=sys.stderr)
            print('Empty response from URL!', file=sys.stderr)
            return dict()

        ret_data_df = pandas.read_csv(io.StringIO(res.content.decode('utf-8')), sep='\t').head(QueryDisGeNet.MAX_GENES_FOR_DISEASE)
        uniprot_ids_list = ret_data_df['c2.uniprotId'].tolist()
        gene_names_list = ret_data_df['c2.symbol'].tolist()
        ret_dict = dict(list(zip(uniprot_ids_list, gene_names_list)))
        for prot in ret_dict.copy().keys():
            if type(prot)==str and prot != "null":
                if '.' in prot or ';' in prot:
                    gene = ret_dict[prot]
                    del ret_dict[prot]
                    prot.replace('.', '')
                    prots_to_add = prot.split(';')
                    if len(prots_to_add) > QueryDisGeNet.MAX_PROTS_FOR_GENE:
                        prots_to_add = prots_to_add[0:QueryDisGeNet.MAX_PROTS_FOR_GENE]
                        dict_add = dict()
                        for prot_name in prots_to_add:
                            if type(prot_name) == str and prot_name != "null":
                                dict_add[prot_name] = gene
                        ret_dict.update(dict_add)
            else:  ## this is a math.nan
                del ret_dict[prot]
        return(ret_dict)