def __init__(self, model): self.model = model self.logger = logging.getLogger(__name__) self.base_url = Settings.Instance().NEW_CLIENT_URL + '/' + model.name.lower() self.start = 0 self.stop = None self.count = None self.searchable = model.searchable self.limit = Settings.Instance().MAX_LIMIT self.max_url_size = Settings.Instance().MAX_URL_SIZE self.timeout = Settings.Instance().NEW_CLIENT_TIMEOUT self.current_offset = 0 self.current_index = 0 self.current_chunk = None self.current_page = 0 self.allows_list = True self.allows_multiple = True if model.collection_name: self.collection_name = model.collection_name.lower() else: self.allows_list = False self.allows_multiple = False self.api_total_count = None self.filters = [] self.frmt = 'json' self.ordering = [] self.session = None self._get_session()
def get_one(self, chembl_id=None, **kwargs): frmt = kwargs.get('frmt', 'json') use_async = kwargs.get('use_async', False) prop = kwargs.get('prop', None) method = 'get' data = None if chembl_id: return super(CompoundResource, self).get_one(chembl_id=chembl_id, frmt=frmt, use_async=use_async, prop=prop) if 'stdinchikey' in kwargs: key = 'stdinchikey' elif 'smiles' in kwargs: key = 'smiles' else: self.logger.warning('No identifier given.') return None if any(x in kwargs[key] for x in self.url_unsafe_characters): url = '{0}/{1}/{2}'.format(Settings.Instance().webservice_root_url, self.name, key) method = 'post' data = {key: kwargs[key]} else: url = '{0}/{1}/{2}/{3}.{4}'.format( Settings.Instance().webservice_root_url, self.name, key, kwargs[key], frmt) return self._get_one(url, use_async, frmt, method, data)
def get_single_image(self, chembl_id, use_async, **kwargs): try: size = kwargs.get('size', 500) engine = kwargs.get('engine', 'rdkit') ignore_coords = kwargs.get('ignoreCoords', False) query = '?engine={0}&dimensions={1}'.format(engine, size) if ignore_coords: query += '&ignoreCoords=1' if chembl_id: url = '{0}/{1}/{2}/image{3}'.format( Settings.Instance().webservice_root_url, self.name, chembl_id, query) with self._get_session() as session: if use_async and grequests: return grequests.get(url, session=session) res = session.get(url, timeout=Settings.Instance().TIMEOUT) if not res.ok: self.logger.warning( 'Error when retrieving url: {0}, status code: {1}, msg: {2}' .format(res.url, res.status_code, res.text)) return res.status_code self.logger.info(res.url) return res.content return None except Exception: return None #-----------------------------------------------------------------------------------------------------------------------
def get_one(self, chembl_id, frmt='json', asink=False, prop=None): if chembl_id: if not prop: url = '{0}/{1}/{2}.{3}'.format(Settings.Instance().webservice_root_url, self.name, chembl_id, frmt) else: url = '{0}/{1}/{2}/{3}.{4}'.format(Settings.Instance().webservice_root_url, self.name, chembl_id, prop, frmt) return self._get_one(url, asink, frmt)
def get_all(self, frmt='json'): session = self._get_session() url = '%s/%s.%s' % (Settings.Instance().webservice_root_url, self.name, frmt) return self._process_request(url, session, frmt, timeout=Settings.Instance().TIMEOUT)
def bioactivities(self, chembl_id, frmt='json'): session = self._get_session() url = '%s/%s/%s/bioactivities.%s' % (Settings.Instance( ).webservice_root_url, self.name, chembl_id, frmt) return self._process_request(url, session, frmt, timeout=Settings.Instance().TIMEOUT)
def get_all(self, frmt='json'): url = '{0}/{1}.{2}'.format(Settings.Instance().webservice_root_url, self.name, frmt) with self._get_session() as session: return self._process_request(url, session, frmt, timeout=Settings.Instance().TIMEOUT)
def bioactivities(self, chembl_id, frmt='json'): url = '{0}/{1}/{2}/bioactivities.{3}'.format( Settings.Instance().webservice_root_url, self.name, chembl_id, frmt) with self._get_session() as session: return self._process_request(url, session, frmt, timeout=Settings.Instance().TIMEOUT)
def __repr__(self): if not self.query.allows_multiple: return '{0} resource'.format(self.model.name) clone = self._clone() data = list(clone[:Settings.Instance().REPR_OUTPUT_SIZE]) length = len(self) if length > Settings.Instance().REPR_OUTPUT_SIZE: data[-1] = "...(remaining elements truncated)..." else: data = data[:length] return repr(data)
def get_service(self): try: res = requests.get('{0}/status/'.format(Settings.Instance().webservice_root_url, timeout=Settings.Instance().TIMEOUT)) if not res.ok: self.logger.warning('Error when retrieving url: {0}, status code: {1}, msg: {2}'.format( res.url, res.status_code, res.text)) return None self.logger.info(res.url) self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False)) js = res.json() if not 'service' in js: return False return js['service'] except Exception: return None
def _get_session(self): s = Settings.Instance() if not self.session: retry = Retry(total=s.TOTAL_RETRIES, backoff_factor=s.BACKOFF_FACTOR, status_forcelist=(list(range(400, 421)) + list(range(500, 505)))) size = s.CONCURRENT_SIZE adapter = requests.adapters.HTTPAdapter(pool_connections=size, pool_maxsize=size, pool_block=True, max_retries=retry) home_directory = os.path.expanduser('~') self.session = requests_cache.CachedSession( os.path.join(home_directory, s.CACHE_NAME), backend='sqlite', expire_after=s.CACHE_EXPIRE, fast_save=s.FAST_SAVE, allowable_methods=( 'GET', 'POST')) if s.CACHING else requests.Session() if s.PROXIES: self.session.proxies = s.PROXIES self.session.mount('http://', adapter) self.session.mount('https://', adapter) return self.session
def _get_async(self, kname, keys, frmt='json', prop=None, retry=0): try: rs = (self.get_one(**{'frmt': frmt, kname:key, 'asink': True, 'prop': prop}) for key in keys) return grequests.map(rs, size=min(Settings.Instance().CONCURRENT_SIZE, len(keys))) except Exception: return []
def _get(self, kname, keys, frmt='json', prop=None): if isinstance(keys, list): if len(keys) > Settings.Instance().ASYNC_TRESHOLD and grequests: return self.get_async(kname, keys, frmt, prop) else: return self.get_sync(kname, keys, frmt, prop) return self.get_one(**{'frmt': frmt, kname: keys, 'prop': prop})
def _get_session(self): s = Settings.Instance() if s.CACHING: if not self.cached_session: self.cached_session = requests_cache.CachedSession( s.CACHE_NAME, backend='sqlite', fast_save=s.FAST_SAVE, allowable_methods=('GET', 'POST')) adapter = requests.adapters.HTTPAdapter( pool_connections=s.CONCURRENT_SIZE, pool_maxsize=s.CONCURRENT_SIZE, max_retries=3, pool_block=True) self.cached_session.mount('http://', adapter) self.cached_session.mount('https://', adapter) if s.PROXIES: self.cached_session.proxies = s.PROXIES return self.cached_session if not self.session: self.session = requests.Session() adapter = requests.adapters.HTTPAdapter( pool_connections=s.CONCURRENT_SIZE, pool_maxsize=s.CONCURRENT_SIZE, max_retries=3, pool_block=True) self.session.mount('http://', adapter) self.session.mount('https://', adapter) if s.PROXIES: self.session.proxies = s.PROXIES return self.session
def _get_one(self, url, asink, frmt, method='get', data=None): with self._get_session() as session: if asink and grequests: if method == 'get': return grequests.get(url, session=session) else: return grequests.post(url, session=session, data=data, headers={'Accept': self.content_types[frmt]}) return self._process_request(url, session, frmt, timeout=Settings.Instance().TIMEOUT, method=method, data=data)
def __init__(self, cachePath, useCache): # self.__cachePath = cachePath self.__dirName = "ChEMBL-target-activity" super(ChEMBLTargetActivityProvider, self).__init__(self.__cachePath, [self.__dirName]) self.__dirPath = os.path.join(self.__cachePath, self.__dirName) self.__mU = MarshalUtil(workPath=self.__cachePath) baseVersion = 28 self.__version = baseVersion logger.info("ChEMBL API MAX_LIMIT %r", Settings.Instance().MAX_LIMIT) # pylint: disable=no-member self.__aD, self.__allIdD = self.__reload(self.__dirPath, useCache)
def _get_method(self, struct, **kwargs): frmt = kwargs.get('frmt', 'json') with self._get_session() as session: if any(x in struct for x in self.url_unsafe_characters): data = {'smiles': struct} if 'simscore' in kwargs: data['simscore'] = kwargs['simscore'] url = '{0}/{1}/similarity'.format( Settings.Instance().webservice_root_url, self.name) else: url = '{0}/{1}/substructure'.format( Settings.Instance().webservice_root_url, self.name) return self._process_request( url, session, frmt, timeout=Settings.Instance().TIMEOUT, method='post', data=data) if 'simscore' in kwargs: simscore = kwargs['simscore'] url = '{0}/{1}/similarity/{2}/{3}.{4}'.format( Settings.Instance().webservice_root_url, self.name, struct, simscore, frmt) else: url = '{0}/{1}/substructure/{2}.{3}'.format( Settings.Instance().webservice_root_url, self.name, struct, frmt) return self._process_request(url, session, frmt, timeout=Settings.Instance().TIMEOUT)
def __setstate__(self, state): msg = None pickled_version = state.get( Settings.Instance().CLIENT_VERSION_PICKLE_KEY) if pickled_version: current_version = __version__ if current_version != pickled_version: msg = ( "Pickled queryset version {0} does not match the current version {1}" .format(pickled_version, current_version)) else: msg = "Pickled queryset version is not specified." if msg: self.logger.warning(msg)
def get_one(self, chembl_id=None, **kwargs): frmt = kwargs.get('frmt', 'json') async = kwargs.get('async', False) prop = kwargs.get('prop', None) if chembl_id: return super(TargetResource, self).get_one(chembl_id=chembl_id, frmt=frmt, async=async, prop=prop) if not 'uniprot' in kwargs: self.logger.warning('No identifier given') return None key = 'uniprot' url = '%s/%s/%s/%s.%s' % (Settings.Instance().webservice_root_url, self.name, key, kwargs[key], frmt) return self._get_one(url, async, frmt)
def call_spore_function(self, definition, *method_args, **method_kw): """ Handles the actual call to the resource and define for you some additional headers and behaviour depending on the spore definition that was given. :param method_definition: Definition of the method we are defining the function. :param service_description: SPORE description of the service. Could be useful to get top-level information, such as the base url of the service. """ # for each param passed to the method, # match if it's needed in the path, and replace it there if # needed path = definition.path for kw in method_kw.keys(): key = ':%s' % kw if key in path and kw != 'data': path = path.replace(key, method_kw.pop(kw)) if path.startswith('/'): path = path[1:] url = urljoin(self.description.base_url, path) if len(method_args) == 1: method_kw['data'] = method_args[0] elif 'data' not in method_kw: resp = self.session.request(definition.method, url, data=json.dumps(method_kw)) return decode_response(resp, definition) # make the actual query to the resource resp = self.session.request(definition.method, url, **method_kw) if (not hasattr(resp, 'from_cache') or not resp.from_cache) and ( self.official or Settings.Instance().RESPECT_RATE_LIMIT): hourly_rate = int(resp.headers.get('x-hourlyratelimit-limit', 3600)) freq_s = 3600 / float(hourly_rate) time.sleep(freq_s) return decode_response(resp, definition)
def _search(self, query, method_name): url = '{0}/{1}/_search'.format(Settings.Instance().ELASTIC_URL, method_name) res = self.session.post(url, data= json.dumps({"size": 0, "suggest": { "autocomplete": { "prefix": query, "completion": { "field": "_metadata.es_completion" } } } } )) if not res.ok: return try: return [x['_id'] for x in res.json()['suggest']['autocomplete'][0]['options']] except: pass
def testFetchActivityData(self): try: logger.info("MAX_LIMIT %r", Settings.Instance().MAX_LIMIT) # pylint: disable=no-member ctP = ChEMBLTargetProvider(cachePath=self.__cachePath, useCache=True) ok = ctP.testCache() self.assertTrue(ok) # P43088|CHEMBL1987|9606 # P08243|uniprotId|CHEMBL3120|chemblId|9606|taxId tL = ["CHEMBL1987", "CHEMBL3120"] targetD = ctP.getActivityData(tL) ok = self.__mU.doExport(os.path.join( self.__cachePath, "ChEMBL-targets", "chembl-target-activity.json"), targetD, fmt="json", indent=3) self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def _get_session(self): s = Settings.Instance() if not self.session: retry = Retry(total=s.TOTAL_RETRIES, backoff_factor=s.BACKOFF_FACTOR, status_forcelist=(range(400, 421) + range(500, 505))) size = s.CONCURRENT_SIZE adapter = requests.adapters.HTTPAdapter(pool_connections=size, pool_maxsize=size, pool_block=True, max_retries=retry) self.session = requests_cache.CachedSession( s.CACHE_NAME, backend='sqlite', fast_save=s.FAST_SAVE, allowable_methods=( 'GET', 'POST')) if s.CACHING else requests.Session() if s.PROXIES: self.session.proxies = s.PROXIES self.session.headers.update({'X_HTTP_METHOD_OVERRIDE': 'get'}) self.session.mount('http://', adapter) self.session.mount('https://', adapter) return self.session
import argparse from chembl_webresource_client.new_client import new_client from chembl_webresource_client.settings import Settings Settings.Instance().CACHING = False def open_file(filename): with open(filename) as f: return f.readline().split()[0] def get_smiles(res): """ Get a list of SMILES from function results """ smiles = set() for smi in res: try: smiles.add('{}\t{}'.format( smi['molecule_structures']['canonical_smiles'], smi['molecule_chembl_id'])) except TypeError: continue return smiles def sim_search(smiles, tanimoto): """ Return compounds which are within a Tanimoto range of the SMILES input
def _get_one(self, url, async, frmt, method='get', data=None): session = self._get_session() if async: if method == 'get': return grequests.get(url, session=session) else: return grequests.post( url, session=session, data=data, headers={'Accept': self.content_types[frmt]}) return self._process_request(url, session, frmt, timeout=Settings.Instance().TIMEOUT, method=method, data=data) #----------------------------------------------------------------------------------------------------------------------- def get_one(self, chembl_id, frmt='json', async=False, prop=None): if chembl_id: if not prop: url = '%s/%s/%s.%s' % (Settings.Instance().webservice_root_url, self.name, chembl_id, frmt) else: url = '%s/%s/%s/%s.%s' % (Settings.Instance( ).webservice_root_url, self.name, chembl_id, prop, frmt) return self._get_one(url, async, frmt)
def __init__(self): self.session = None self._get_session() self.base_url = Settings.Instance().UNICHEM_URL self.timeout = Settings.Instance().NEW_CLIENT_TIMEOUT self.logger = logging.getLogger(__name__)
import datetime import logging import os.path import time from chembl_webresource_client.new_client import new_client from chembl_webresource_client.settings import Settings from rcsb.utils.multiproc.MultiProcUtil import MultiProcUtil from rcsb.utils.io.FileUtil import FileUtil from rcsb.utils.io.MarshalUtil import MarshalUtil from rcsb.utils.io.StashableBase import StashableBase from rcsb.utils.io.UrlRequestUtil import UrlRequestUtil Settings.Instance().TIMEOUT = 10 # pylint: disable=no-member Settings.Instance().MAX_LIMIT = 50 # pylint: disable=no-member Settings.MAX_LIMIT = 50 logger = logging.getLogger(__name__) class ChEMBLTargetActivityWorker(object): """A skeleton worker class that implements the interface expected by the multiprocessing module for fetching ChEMBL activity data -- """ def __init__(self, **kwargs): _ = kwargs def fetchActivity(self, dataList, procName, optionsD, workingDir):
from __future__ import annotations import os from dataclasses import dataclass from pathlib import Path from typing import Optional from chembl_webresource_client.settings import Settings as ChemblSettings from pocketutils.core.dot_dict import NestedDotDict instance = ChemblSettings.Instance() IN_CLI = "IS_IN_CI" in os.environ @dataclass(frozen=True, repr=True, unsafe_hash=True) class Settings: """""" is_testing: bool taxon: int min_pchembl: float min_confidence_score: int min_phase: int cache_path: Path n_retries: int fast_save: bool timeout_sec: int @classmethod def load(cls, data: NestedDotDict) -> Settings: # 117571
def __getstate__(self): obj_dict = self.__dict__.copy() obj_dict[Settings.Instance().CLIENT_VERSION_PICKLE_KEY] = __version__ return obj_dict
for method, definition in [ (m, d) for (m, d) in client.description.methods.items() if (m.startswith('POST_') or m.startswith('GET_')) and m.endswith('_detail') ]: searchable = False if method.replace('dispatch_detail', 'get_search') in keys: searchable = True name = definition['resource_name'] collection_name = definition['collection_name'] formats = [ format for format in definition['formats'] if format not in ('jsonp', 'html') ] default_format = definition['default_format'].split('/')[-1] if not name: continue model = Model(name, collection_name, formats, searchable) qs = QuerySet(model=model) if default_format not in ('xml', 'svg+xml'): qs.set_format(default_format) setattr(client, name, qs) return client #----------------------------------------------------------------------------------------------------------------------- new_client = client_from_url(Settings.Instance().NEW_CLIENT_URL + '/spore') #-----------------------------------------------------------------------------------------------------------------------