def get_page(self): if not self.allows_list: return if (self.stop is not None and self.current_index >= self.stop) or \ (self.api_total_count and self.current_index >= self.api_total_count): return [] if not self.current_chunk or self.current_page != int( self.current_index / self.limit): self.current_page = int(self.current_index / self.limit) data = self._prepare_url_params() with self._get_session() as session: res = session.post(self.base_url + '.' + self.frmt, data=data, timeout=self.timeout) self.logger.info(res.url) self.logger.info(data) self.logger.info('From cache: {0}'.format( res.from_cache if hasattr(res, 'from_cache') else False)) if not res.ok: handle_http_error(res) if self.frmt == 'json': json_data = res.json() self.current_chunk = json_data[self.collection_name] self.api_total_count = json_data['page_meta']['total_count'] elif self.frmt in ('mol', 'sdf'): sdf_data = res.text.encode('utf-8') self.current_chunk = sdf_data.split('$$$$\n') with self._get_session() as session: res = session.post(self.base_url + '.json', data=data, timeout=self.timeout) self.logger.info(res.url) self.logger.info(data) self.logger.info('From cache: {0}'.format( res.from_cache if hasattr(res, 'from_cache') else False)) if not res.ok: handle_http_error(res) json_data = res.json() self.api_total_count = json_data['page_meta']['total_count'] aux_data = json_data[self.collection_name] for idx, mol in enumerate(aux_data): if not mol['molecule_structures']: self.logger.info((idx, mol['molecule_chembl_id'])) self.current_chunk.insert(idx, None) self.logger.info(aux_data) self.logger.info(self.current_chunk) else: xml = parseString(res.text.encode('utf-8')) self.current_chunk = [ e.toxml() for e in xml.getElementsByTagName( self.collection_name)[0].childNodes ] page_meta = xml.getElementsByTagName('page_meta')[0] self.api_total_count = int( page_meta.getElementsByTagName('total_count') [0].childNodes[0].data) start = self.start return self.current_chunk[:( self.stop - start ) - self.current_index] if self.stop is not None else self.current_chunk
def get_page(self): if not self.allows_list: return if (self.stop is not None and self.current_index >= self.stop) or \ (self.api_total_count and self.current_index >= self.api_total_count): return [] if not self.current_chunk or self.current_page != int(self.current_index / self.limit): self.current_page = int(self.current_index / self.limit) data = self._prepare_url_params() with self._get_session() as session: res = session.post(self.base_url + '.' + self.frmt, data=data, timeout=self.timeout) self.logger.info(res.url) self.logger.info(data) self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False)) if not res.ok: handle_http_error(res) if self.frmt == 'json': json_data = res.json() self.current_chunk = json_data[self.collection_name] self.api_total_count = json_data['page_meta']['total_count'] else: xml = parseString(res.text) self.current_chunk = [e.toxml() for e in xml.getElementsByTagName(self.collection_name)[0].childNodes] page_meta = xml.getElementsByTagName('page_meta')[0] self.api_total_count = int(page_meta.getElementsByTagName('total_count')[0].childNodes[0].data) start = self.start return self.current_chunk[:(self.stop - start) - self.current_index] if self.stop is not None else self.current_chunk
def _get_results(self, url): with self._get_session() as session: res = session.get(url, timeout=self.timeout) self.logger.info(res.url) self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False)) if not res.ok: handle_http_error(res) return res.json()
def get_page(self): if not self.allows_list: return if (self.stop is not None and self.current_index >= self.stop) or \ (self.api_total_count and self.current_index >= self.api_total_count): return [] if not self.current_chunk or self.current_page != int(self.current_index / self.limit): self.current_page = int(self.current_index / self.limit) data = self._prepare_url_params() with self._get_session() as session: res = session.post(self.base_url + '.' + self.frmt, json=data, timeout=self.timeout) self.logger.info(res.url) self.logger.info(data) self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False)) if not res.ok: handle_http_error(res) if self.frmt == 'json': json_data = res.json() self.current_chunk = json_data[self.collection_name] self.api_total_count = json_data['page_meta']['total_count'] elif self.frmt in ('mol', 'sdf'): sdf_data = res.text.encode('utf-8') self.current_chunk = sdf_data.split(b'$$$$\n') with self._get_session() as session: res = session.post(self.base_url + '.json', json=data, timeout=self.timeout) self.logger.info(res.url) self.logger.info(data) self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False)) if not res.ok: handle_http_error(res) json_data = res.json() self.api_total_count = json_data['page_meta']['total_count'] aux_data = json_data[self.collection_name] for idx, mol in enumerate(aux_data): if not mol['molecule_structures']: self.logger.info((idx, mol['molecule_chembl_id'])) self.current_chunk.insert(idx, None) self.logger.info(aux_data) self.logger.info(self.current_chunk) else: xml = parseString(res.text.encode('utf-8')) self.current_chunk = [e.toxml() for e in xml.getElementsByTagName(self.collection_name)[0].childNodes] page_meta = xml.getElementsByTagName('page_meta')[0] self.api_total_count = int(page_meta.getElementsByTagName('total_count')[0].childNodes[0].data) start = self.start return self.current_chunk[:(self.stop - start) - self.current_index] if \ self.stop is not None else self.current_chunk
def _get_by_ids(self, ids): if self.frmt in ('mol', 'sdf'): headers = {'Accept': 'chemical/x-mdl-molfile'} else: headers = {'Accept': mimetypes.types_map['.'+self.frmt]} self.logger.info('headers:') self.logger.info(headers) if not isinstance(ids, (list, tuple)): url = self.base_url + '/' + quote(str(ids)) if self.frmt in ('mol', 'sdf'): url += '.sdf' if len(url) > self.max_url_size: raise Exception('URL {0} is longer than allowed {1} characters'.format(url, self.max_url_size)) with self._get_session() as session: res = session.get(url, headers=headers, timeout=self.timeout) self.logger.info(res.url) self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False)) if not res.ok: handle_http_error(res) if self.frmt == 'json': return res.json() elif self.frmt in ('svg+xml', 'xml', 'html', 'svg', 'txt', 'mol', 'sdf'): return res.text return res.content if not self.allows_multiple: self.logger.error("This resource doesn't accept multiple ids.") return ret = [] url = self.base_url + '/set/' if len(url) > self.max_url_size: raise Exception('URL {0} is longer than allowed {1} characters'.format(url, self.max_url_size)) for id in ids: if url.endswith('/'): url += quote(str(id)) if len(url) > self.max_url_size: raise Exception('URL {0} is longer than allowed {1} characters'.format(url, self.max_url_size)) else: old_url = url url += ';' + quote(str(id)) if len(url) > self.max_url_size - 12: # Allow for format specifier to be appended with self._get_session() as session: if self.frmt in ('mol', 'sdf'): old_url += '?format=' + self.frmt res = session.get(old_url, headers=headers, timeout=self.timeout) self.logger.info(res.url) self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False)) if not res.ok: handle_http_error(res) self._gather_results(res, ret) with self._get_session() as session: if self.frmt in ('mol', 'sdf'): url += '?format=' + self.frmt res = session.get(url, headers=headers, timeout=self.timeout) self.logger.info(res.url) self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False)) if res.ok: self._gather_results(res, ret) else: handle_http_error(res) return ret
def get_page(self): if not self.allows_list: return if (self.stop is not None and self.current_index >= self.stop) or \ (self.api_total_count and self.current_index >= self.api_total_count): return [] if not self.current_chunk or self.current_page != int( self.current_index / self.limit): self.current_page = int(self.current_index / self.limit) data = self._prepare_url_params() with self._get_session() as session: res = session.post(self.base_url + '.' + self.frmt, data=data, timeout=self.timeout) self.logger.info(res.url) self.logger.info(data) self.logger.info('From cache: {0}'.format( res.from_cache if hasattr(res, 'from_cache') else False)) if not res.ok: handle_http_error(res) if self.frmt == 'json': json_data = res.json() self.current_chunk = json_data[self.collection_name] self.api_total_count = json_data['page_meta']['total_count'] else: xml = parseString(res.text) self.current_chunk = [ e.toxml() for e in xml.getElementsByTagName( self.collection_name)[0].childNodes ] page_meta = xml.getElementsByTagName('page_meta')[0] self.api_total_count = int( page_meta.getElementsByTagName('total_count') [0].childNodes[0].data) start = self.start return self.current_chunk[:( self.stop - start ) - self.current_index] if self.stop is not None else self.current_chunk
def _get_by_ids(self, ids): if self.frmt in ('mol', 'sdf'): headers = {'Accept': 'chemical/x-mdl-molfile'} else: headers = {'Accept': mimetypes.types_map['.'+self.frmt]} self.logger.info('headers:') self.logger.info(headers) if not isinstance(ids, (list, tuple)): url = self.base_url + '/' + quote(str(ids)) if self.frmt in ('mol', 'sdf'): url += '.sdf' if len(url) > self.max_url_size: raise Exception('URL {0} is longer than allowed {1} characters'.format(url, self.max_url_size)) with self._get_session() as session: res = session.get(url, headers=headers, timeout=self.timeout) self.logger.info(res.url) self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False)) if not res.ok: handle_http_error(res) if self.frmt == 'json': return res.json() elif self.frmt in ('xml', 'html', 'svg', 'txt', 'mol', 'sdf'): return res.text return res.content if not self.allows_multiple: self.logger.error("This resource doesn't accept multiple ids.") return ret = [] url = self.base_url + '/set/' if len(url) > self.max_url_size: raise Exception('URL {0} is longer than allowed {1} characters'.format(url, self.max_url_size)) for id in ids: if url.endswith('/'): url += quote(str(id)) if len(url) > self.max_url_size: raise Exception('URL {0} is longer than allowed {1} characters'.format(url, self.max_url_size)) else: old_url = url url += ';' + quote(str(id)) if len(url) > self.max_url_size: with self._get_session() as session: res = session.get(old_url, headers=headers, timeout=self.timeout) self.logger.info(res.url) self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False)) if not res.ok: handle_http_error(res) self._gather_results(res, ret) with self._get_session() as session: res = session.get(url, headers=headers, timeout=self.timeout) self.logger.info(res.url) self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False)) if res.ok: self._gather_results(res, ret) else: handle_http_error(res) return ret
def _get_by_ids(self, ids): headers = {'Accept': mimetypes.types_map['.' + self.frmt]} if not isinstance(ids, (list, tuple)): url = self.base_url + '/' + quote(str(ids)) if len(url) > self.max_url_size: raise Exception('URL %s is longer than allowed %s characters' % (url, self.max_url_size)) res = self._get_session().get(url, headers=headers, timeout=self.timeout) self.logger.info(res.url) self.logger.info( 'From cache: %s' % (res.from_cache if hasattr(res, 'from_cache') else False)) if not res.ok: handle_http_error(res) if self.frmt == 'json': return res.json() elif self.frmt in ('xml', 'html', 'svg', 'txt'): return res.text return res.content if not self.allows_multiple: self.logger.error("This resource doesn't accept multiple ids.") return ret = [] url = self.base_url + '/set/' if len(url) > self.max_url_size: raise Exception('URL %s is longer than allowed %s characters' % (url, self.max_url_size)) for id in ids: if url.endswith('/'): url += quote(str(id)) if len(url) > self.max_url_size: raise Exception( 'URL %s is longer than allowed %s characters' % (url, self.max_url_size)) else: old_url = url url += ';' + quote(str(id)) if len(url) > self.max_url_size: res = self._get_session().get(old_url, headers=headers, timeout=self.timeout) self.logger.info(res.url) self.logger.info('From cache: %s' % (res.from_cache if hasattr( res, 'from_cache') else False)) if not res.ok: handle_http_error(res) self._gather_results(res, ret) res = self._get_session().get(url, headers=headers, timeout=self.timeout) self.logger.info(res.url) self.logger.info( 'From cache: %s' % (res.from_cache if hasattr(res, 'from_cache') else False)) if res.ok: self._gather_results(res, ret) else: handle_http_error(res) return ret