コード例 #1
0
 def get_page(self):
     if not self.allows_list:
         return
     if (self.stop is not None and self.current_index >= self.stop) or \
        (self.api_total_count and self.current_index >= self.api_total_count):
         return []
     if not self.current_chunk or self.current_page != int(
             self.current_index / self.limit):
         self.current_page = int(self.current_index / self.limit)
         data = self._prepare_url_params()
         with self._get_session() as session:
             res = session.post(self.base_url + '.' + self.frmt,
                                data=data,
                                timeout=self.timeout)
         self.logger.info(res.url)
         self.logger.info(data)
         self.logger.info('From cache: {0}'.format(
             res.from_cache if hasattr(res, 'from_cache') else False))
         if not res.ok:
             handle_http_error(res)
         if self.frmt == 'json':
             json_data = res.json()
             self.current_chunk = json_data[self.collection_name]
             self.api_total_count = json_data['page_meta']['total_count']
         elif self.frmt in ('mol', 'sdf'):
             sdf_data = res.text.encode('utf-8')
             self.current_chunk = sdf_data.split('$$$$\n')
             with self._get_session() as session:
                 res = session.post(self.base_url + '.json',
                                    data=data,
                                    timeout=self.timeout)
             self.logger.info(res.url)
             self.logger.info(data)
             self.logger.info('From cache: {0}'.format(
                 res.from_cache if hasattr(res, 'from_cache') else False))
             if not res.ok:
                 handle_http_error(res)
             json_data = res.json()
             self.api_total_count = json_data['page_meta']['total_count']
             aux_data = json_data[self.collection_name]
             for idx, mol in enumerate(aux_data):
                 if not mol['molecule_structures']:
                     self.logger.info((idx, mol['molecule_chembl_id']))
                     self.current_chunk.insert(idx, None)
             self.logger.info(aux_data)
             self.logger.info(self.current_chunk)
         else:
             xml = parseString(res.text.encode('utf-8'))
             self.current_chunk = [
                 e.toxml() for e in xml.getElementsByTagName(
                     self.collection_name)[0].childNodes
             ]
             page_meta = xml.getElementsByTagName('page_meta')[0]
             self.api_total_count = int(
                 page_meta.getElementsByTagName('total_count')
                 [0].childNodes[0].data)
     start = self.start
     return self.current_chunk[:(
         self.stop - start
     ) - self.current_index] if self.stop is not None else self.current_chunk
コード例 #2
0
 def get_page(self):
     if not self.allows_list:
         return
     if (self.stop is not None and self.current_index >= self.stop) or \
        (self.api_total_count and self.current_index >= self.api_total_count):
         return []
     if not self.current_chunk or self.current_page != int(self.current_index / self.limit):
         self.current_page = int(self.current_index / self.limit)
         data = self._prepare_url_params()
         with self._get_session() as session:
             res = session.post(self.base_url + '.' + self.frmt, data=data, timeout=self.timeout)
         self.logger.info(res.url)
         self.logger.info(data)
         self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False))
         if not res.ok:
             handle_http_error(res)
         if self.frmt == 'json':
             json_data = res.json()
             self.current_chunk = json_data[self.collection_name]
             self.api_total_count = json_data['page_meta']['total_count']
         else:
             xml = parseString(res.text)
             self.current_chunk = [e.toxml() for e in xml.getElementsByTagName(self.collection_name)[0].childNodes]
             page_meta = xml.getElementsByTagName('page_meta')[0]
             self.api_total_count = int(page_meta.getElementsByTagName('total_count')[0].childNodes[0].data)
     start = self.start
     return self.current_chunk[:(self.stop - start) - self.current_index] if self.stop is not None else self.current_chunk
コード例 #3
0
 def _get_results(self, url):
     with self._get_session() as session:
             res = session.get(url, timeout=self.timeout)
             self.logger.info(res.url)
             self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False))
             if not res.ok:
                 handle_http_error(res)
             return res.json()
コード例 #4
0
 def get_page(self):
     if not self.allows_list:
         return
     if (self.stop is not None and self.current_index >= self.stop) or \
        (self.api_total_count and self.current_index >= self.api_total_count):
         return []
     if not self.current_chunk or self.current_page != int(self.current_index / self.limit):
         self.current_page = int(self.current_index / self.limit)
         data = self._prepare_url_params()
         with self._get_session() as session:
             res = session.post(self.base_url + '.' + self.frmt, json=data, timeout=self.timeout)
         self.logger.info(res.url)
         self.logger.info(data)
         self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False))
         if not res.ok:
             handle_http_error(res)
         if self.frmt == 'json':
             json_data = res.json()
             self.current_chunk = json_data[self.collection_name]
             self.api_total_count = json_data['page_meta']['total_count']
         elif self.frmt in ('mol', 'sdf'):
             sdf_data = res.text.encode('utf-8')
             self.current_chunk = sdf_data.split(b'$$$$\n')
             with self._get_session() as session:
                 res = session.post(self.base_url + '.json', json=data, timeout=self.timeout)
             self.logger.info(res.url)
             self.logger.info(data)
             self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False))
             if not res.ok:
                 handle_http_error(res)
             json_data = res.json()
             self.api_total_count = json_data['page_meta']['total_count']
             aux_data = json_data[self.collection_name]
             for idx, mol in enumerate(aux_data):
                 if not mol['molecule_structures']:
                     self.logger.info((idx, mol['molecule_chembl_id']))
                     self.current_chunk.insert(idx, None)
             self.logger.info(aux_data)
             self.logger.info(self.current_chunk)
         else:
             xml = parseString(res.text.encode('utf-8'))
             self.current_chunk = [e.toxml() for e in xml.getElementsByTagName(self.collection_name)[0].childNodes]
             page_meta = xml.getElementsByTagName('page_meta')[0]
             self.api_total_count = int(page_meta.getElementsByTagName('total_count')[0].childNodes[0].data)
     start = self.start
     return self.current_chunk[:(self.stop - start) - self.current_index] if \
         self.stop is not None else self.current_chunk
コード例 #5
0
 def _get_by_ids(self, ids):
     if self.frmt in ('mol', 'sdf'):
         headers = {'Accept': 'chemical/x-mdl-molfile'}
     else:
         headers = {'Accept': mimetypes.types_map['.'+self.frmt]}
     self.logger.info('headers:')
     self.logger.info(headers)
     if not isinstance(ids, (list, tuple)):
         url = self.base_url + '/'  + quote(str(ids))
         if self.frmt in ('mol', 'sdf'):
             url += '.sdf'
         if len(url) > self.max_url_size:
             raise Exception('URL {0} is longer than allowed {1} characters'.format(url, self.max_url_size))
         with self._get_session() as session:
             res = session.get(url, headers=headers, timeout=self.timeout)
         self.logger.info(res.url)
         self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False))
         if not res.ok:
             handle_http_error(res)
         if self.frmt == 'json':
             return res.json()
         elif self.frmt in ('svg+xml', 'xml', 'html', 'svg', 'txt', 'mol', 'sdf'):
             return res.text
         return res.content
     if not self.allows_multiple:
         self.logger.error("This resource doesn't accept multiple ids.")
         return
     ret = []
     url = self.base_url + '/set/'
     if len(url) > self.max_url_size:
         raise Exception('URL {0} is longer than allowed {1} characters'.format(url, self.max_url_size))
     for id in ids:
         if url.endswith('/'):
             url += quote(str(id))
             if len(url) > self.max_url_size:
                 raise Exception('URL {0} is longer than allowed {1} characters'.format(url, self.max_url_size))
         else:
             old_url = url
             url += ';' + quote(str(id))
             if len(url) > self.max_url_size - 12: # Allow for format specifier to be appended
                 with self._get_session() as session:
                     if self.frmt in ('mol', 'sdf'):
                         old_url += '?format=' + self.frmt
                     res = session.get(old_url, headers=headers, timeout=self.timeout)
                 self.logger.info(res.url)
                 self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False))
                 if not res.ok:
                     handle_http_error(res)
                 self._gather_results(res, ret)
     with self._get_session() as session:
         if self.frmt in ('mol', 'sdf'):
             url += '?format=' + self.frmt
         res = session.get(url, headers=headers, timeout=self.timeout)
     self.logger.info(res.url)
     self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False))
     if res.ok:
         self._gather_results(res, ret)
     else:
         handle_http_error(res)
     return ret
コード例 #6
0
 def get_page(self):
     if not self.allows_list:
         return
     if (self.stop is not None and self.current_index >= self.stop) or \
        (self.api_total_count and self.current_index >= self.api_total_count):
         return []
     if not self.current_chunk or self.current_page != int(
             self.current_index / self.limit):
         self.current_page = int(self.current_index / self.limit)
         data = self._prepare_url_params()
         with self._get_session() as session:
             res = session.post(self.base_url + '.' + self.frmt,
                                data=data,
                                timeout=self.timeout)
         self.logger.info(res.url)
         self.logger.info(data)
         self.logger.info('From cache: {0}'.format(
             res.from_cache if hasattr(res, 'from_cache') else False))
         if not res.ok:
             handle_http_error(res)
         if self.frmt == 'json':
             json_data = res.json()
             self.current_chunk = json_data[self.collection_name]
             self.api_total_count = json_data['page_meta']['total_count']
         else:
             xml = parseString(res.text)
             self.current_chunk = [
                 e.toxml() for e in xml.getElementsByTagName(
                     self.collection_name)[0].childNodes
             ]
             page_meta = xml.getElementsByTagName('page_meta')[0]
             self.api_total_count = int(
                 page_meta.getElementsByTagName('total_count')
                 [0].childNodes[0].data)
     start = self.start
     return self.current_chunk[:(
         self.stop - start
     ) - self.current_index] if self.stop is not None else self.current_chunk
コード例 #7
0
 def _get_by_ids(self, ids):
     if self.frmt in ('mol', 'sdf'):
         headers = {'Accept': 'chemical/x-mdl-molfile'}
     else:
         headers = {'Accept': mimetypes.types_map['.'+self.frmt]}
     self.logger.info('headers:')
     self.logger.info(headers)
     if not isinstance(ids, (list, tuple)):
         url = self.base_url + '/'  + quote(str(ids))
         if self.frmt in ('mol', 'sdf'):
             url += '.sdf'
         if len(url) > self.max_url_size:
             raise Exception('URL {0} is longer than allowed {1} characters'.format(url, self.max_url_size))
         with self._get_session() as session:
             res = session.get(url, headers=headers, timeout=self.timeout)
         self.logger.info(res.url)
         self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False))
         if not res.ok:
             handle_http_error(res)
         if self.frmt == 'json':
             return res.json()
         elif self.frmt in ('xml', 'html', 'svg', 'txt', 'mol', 'sdf'):
             return res.text
         return res.content
     if not self.allows_multiple:
         self.logger.error("This resource doesn't accept multiple ids.")
         return
     ret = []
     url = self.base_url + '/set/'
     if len(url) > self.max_url_size:
         raise Exception('URL {0} is longer than allowed {1} characters'.format(url, self.max_url_size))
     for id in ids:
         if url.endswith('/'):
             url += quote(str(id))
             if len(url) > self.max_url_size:
                 raise Exception('URL {0} is longer than allowed {1} characters'.format(url, self.max_url_size))
         else:
             old_url = url
             url += ';' + quote(str(id))
             if len(url) > self.max_url_size:
                 with self._get_session() as session:
                     res = session.get(old_url, headers=headers, timeout=self.timeout)
                 self.logger.info(res.url)
                 self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False))
                 if not res.ok:
                     handle_http_error(res)
                 self._gather_results(res, ret)
     with self._get_session() as session:
         res = session.get(url, headers=headers, timeout=self.timeout)
     self.logger.info(res.url)
     self.logger.info('From cache: {0}'.format(res.from_cache if hasattr(res, 'from_cache') else False))
     if res.ok:
         self._gather_results(res, ret)
     else:
         handle_http_error(res)
     return ret
コード例 #8
0
 def _get_by_ids(self, ids):
     headers = {'Accept': mimetypes.types_map['.' + self.frmt]}
     if not isinstance(ids, (list, tuple)):
         url = self.base_url + '/' + quote(str(ids))
         if len(url) > self.max_url_size:
             raise Exception('URL %s is longer than allowed %s characters' %
                             (url, self.max_url_size))
         res = self._get_session().get(url,
                                       headers=headers,
                                       timeout=self.timeout)
         self.logger.info(res.url)
         self.logger.info(
             'From cache: %s' %
             (res.from_cache if hasattr(res, 'from_cache') else False))
         if not res.ok:
             handle_http_error(res)
         if self.frmt == 'json':
             return res.json()
         elif self.frmt in ('xml', 'html', 'svg', 'txt'):
             return res.text
         return res.content
     if not self.allows_multiple:
         self.logger.error("This resource doesn't accept multiple ids.")
         return
     ret = []
     url = self.base_url + '/set/'
     if len(url) > self.max_url_size:
         raise Exception('URL %s is longer than allowed %s characters' %
                         (url, self.max_url_size))
     for id in ids:
         if url.endswith('/'):
             url += quote(str(id))
             if len(url) > self.max_url_size:
                 raise Exception(
                     'URL %s is longer than allowed %s characters' %
                     (url, self.max_url_size))
         else:
             old_url = url
             url += ';' + quote(str(id))
             if len(url) > self.max_url_size:
                 res = self._get_session().get(old_url,
                                               headers=headers,
                                               timeout=self.timeout)
                 self.logger.info(res.url)
                 self.logger.info('From cache: %s' %
                                  (res.from_cache if hasattr(
                                      res, 'from_cache') else False))
                 if not res.ok:
                     handle_http_error(res)
                 self._gather_results(res, ret)
     res = self._get_session().get(url,
                                   headers=headers,
                                   timeout=self.timeout)
     self.logger.info(res.url)
     self.logger.info(
         'From cache: %s' %
         (res.from_cache if hasattr(res, 'from_cache') else False))
     if res.ok:
         self._gather_results(res, ret)
     else:
         handle_http_error(res)
     return ret