def _run(self, stream_epoch): stream_epochs = self.split(stream_epoch, self._splitting_const) self.logger.debug('Split stream epochs: {}.'.format( self.stream_epochs)) # make a request for the first stream epoch for stream_epoch in stream_epochs: request_handler = GranularFdsnRequestHandler( self._url, stream_epoch, query_params=self.query_params) last_chunk = None try: with open(self.path_tempfile, 'rb') as ifd: ifd.seek(-self.MSEED_RECORD_SIZE, 2) last_chunk = ifd.read(self.MSEED_RECORD_SIZE) except (OSError, IOError, ValueError) as err: pass self.logger.debug( 'Downloading (url={}, stream_epoch={}) ...'.format( request_handler.url, request_handler.stream_epochs)) try: with open(self.path_tempfile, 'ab') as ofd: for chunk in stream_request(request_handler.post(), chunk_size=self.CHUNK_SIZE, method='raw'): if last_chunk is not None and last_chunk == chunk: continue self._size += len(chunk) ofd.write(chunk) except RequestsError as err: if err.response.status_code == 413: self.logger.info( 'Download failed (url={}, stream_epoch={}).'.format( request_handler.url, request_handler.stream_epochs)) self._run(stream_epoch) else: return self._handle_error(err) if stream_epoch in self.stream_epochs: self.logger.debug( 'Download (url={}, stream_epoch={}) finished.'.format( request_handler.url, request_handler.stream_epochs)) if stream_epoch.endtime == self.stream_epochs[-1].endtime: return Result.ok(data=self.path_tempfile, length=self._size)
def _request(self): """ process a federated fdsnws-station text request """ routes = demux_routes(self._route()) pool_size = (len(routes) if len(routes) < self.POOL_SIZE else self.POOL_SIZE) self.logger.debug('Init worker pool (size={}).'.format(pool_size)) self._pool = mp.pool.ThreadPool(processes=pool_size) # NOTE(damb): With pleasure I'd like to define the parameter # maxtasksperchild=self.MAX_TASKS_PER_CHILD) # However, using this parameter seems to lead to processes unexpectedly # terminated. Hence some tasks never return a *ready* result. for route in routes: self.logger.debug( 'Creating DownloadTask for {!r} ...'.format(route)) t = RawDownloadTask( GranularFdsnRequestHandler(route.url, route.streams[0], query_params=self.query_params)) result = self._pool.apply_async(t) self._results.append(result)
def _request(self): """ Process a federated fdsnws-station text request """ routes = flatten_routes(self._route()) pool_size = (len(routes) if len(routes) < self.POOL_SIZE else self.POOL_SIZE) self.logger.debug('Init worker pool (size={}).'.format(pool_size)) self._pool = mp.pool.ThreadPool(processes=pool_size) for route in routes: self.logger.debug( 'Creating DownloadTask for {!r} ...'.format( route)) t = StationTextDownloadTask( GranularFdsnRequestHandler( route.url, route.streams[0], query_params=self.query_params)) result = self._pool.apply_async(t) self._results.append(result) self._pool.close()
def request(self, pool, tasks, query_params={}, **kwargs): """ Issue granular endpoint requests. """ assert hasattr(self, '_routes'), 'Missing routes.' default_task = self._get_task_by_kw(tasks, 'default') retval = [] for route in self._routes: self.logger.debug('Creating {!r} for {!r} ...'.format( default_task, route)) ctx = Context() self._ctx.append(ctx) t = default_task(GranularFdsnRequestHandler( route.url, route.streams[0], query_params=query_params), context=ctx, **kwargs) result = pool.apply_async(t) retval.append(result) return retval
def _run(self, stream_epoch): stream_epochs = self.split(stream_epoch, self._splitting_const) self.logger.debug('Split stream epochs: {}.'.format( self.stream_epochs)) # make a request for the first stream epoch for stream_epoch in stream_epochs: request_handler = GranularFdsnRequestHandler( self._url, stream_epoch, query_params=self.query_params) req = (request_handler.get() if self._http_method == 'GET' else request_handler.post()) self.logger.debug( 'Downloading (url={}, stream_epochs={}, method={!r}) ' 'to tempfile {!r}...'.format(request_handler.url, request_handler.stream_epochs, self._http_method, self.path_tempfile)) try: with open(self.path_tempfile, 'ab') as ofd: with raw_request(req, logger=self.logger) as ifd: if self._last_obj is None: ofd.write(self.JSON_LIST_START) self._size += 1 for obj in ijson.items(ifd, 'item'): # NOTE(damb): A python object has to be created # since else we cannot compare objects. (JSON is # unordered.) if (self._last_obj is not None and self._last_obj == obj): continue if self._last_obj is not None: ofd.write(self.JSON_LIST_SEP) self._size += 1 self._last_obj = obj # convert back to bytearray obj = json.dumps(obj).encode('utf-8') self._size += len(obj) ofd.write(obj) except RequestsError as err: code = (None if err.response is None else err.response.status_code) if code == 413: self.logger.info( 'Download failed (url={}, stream_epoch={}).'.format( request_handler.url, request_handler.stream_epochs)) self.update_cretry_budget(self.url, code) self._run(stream_epoch) else: return self._handle_error(err) else: code = 200 finally: if code is not None: self.update_cretry_budget(self.url, code) if stream_epoch in self.stream_epochs: self.logger.debug( 'Download (url={}, stream_epoch={}) finished.'.format( request_handler.url, request_handler.stream_epochs)) if stream_epoch.endtime == self.stream_epochs[-1].endtime: with open(self.path_tempfile, 'ab') as ofd: ofd.write(self.JSON_LIST_END) self._size += 1 return Result.ok(data=self.path_tempfile, length=self._size, extras={'type_task': self._TYPE})
def _run(self, stream_epoch): stream_epochs = self.split(stream_epoch, self._splitting_const) self.logger.debug('Split stream epochs: {}.'.format( self.stream_epochs)) # make a request for the first stream epoch for stream_epoch in stream_epochs: request_handler = GranularFdsnRequestHandler( self._url, stream_epoch, query_params=self.query_params) last_chunk = None try: with open(self.path_tempfile, 'rb') as ifd: ifd.seek(-self.MSEED_RECORD_SIZE, 2) last_chunk = ifd.read(self.MSEED_RECORD_SIZE) except (OSError, IOError, ValueError): pass req = (request_handler.get() if self._http_method == 'GET' else request_handler.post()) self.logger.debug( 'Downloading (url={}, stream_epochs={}, method={!r}) ' 'to tempfile {!r}...'.format(request_handler.url, request_handler.stream_epochs, self._http_method, self.path_tempfile)) try: with open(self.path_tempfile, 'ab') as ofd: for chunk in stream_request(req, hunk_size=self.CHUNK_SIZE, method='raw', logger=self.logger): if last_chunk is not None and last_chunk == chunk: continue self._size += len(chunk) ofd.write(chunk) except RequestsError as err: code = (None if err.response is None else err.response.status_code) if code == 413: self.logger.info( 'Download failed (url={}, stream_epoch={}).'.format( request_handler.url, request_handler.stream_epochs)) self.update_cretry_budget(self.url, code) self._run(stream_epoch) else: return self._handle_error(err) else: code = 200 finally: if code is not None: self.update_cretry_budget(self.url, code) if stream_epoch in self.stream_epochs: self.logger.debug( 'Download (url={}, stream_epoch={}) finished.'.format( request_handler.url, request_handler.stream_epochs)) if stream_epoch.endtime == self.stream_epochs[-1].endtime: return Result.ok(data=self.path_tempfile, length=self._size, extras={'type_task': self._TYPE})
def _run(self): """ Combine `StationXML <http://www.fdsn.org/xml/station/>`_ :code:`<Network></Network>` information. """ self.logger.info('Executing task {!r} ...'.format(self)) self._pool = ThreadPool(processes=self._num_workers) for route in self._routes: self.logger.debug( 'Creating DownloadTask for route {!r} ...'.format(route)) ctx = Context() self._ctx.append(ctx) t = RawDownloadTask(GranularFdsnRequestHandler( route.url, route.streams[0], query_params=self.query_params), decode_unicode=True, context=ctx, keep_tempfiles=self._keep_tempfiles, http_method=self._http_method) # apply DownloadTask asynchronoulsy to the worker pool result = self._pool.apply_async(t) self._results.append(result) self._pool.close() # fetch results ready while True: ready = [] for result in self._results: if result.ready(): _result = result.get() if _result.status_code == 200: if self._level in ('channel', 'response'): # merge <Channel></Channel> elements into # <Station></Station> from the correct # <Network></Network> epoch element for _net_element in self._extract_net_elements( _result.data): # find the correct <Network></Network> epoch # element net_element, known = self._emerge_net_element( _net_element, exclude_tags=[ '{}{}'.format(ns, self.STATION_TAG) for ns in settings.STATIONXML_NAMESPACES ]) if not known: continue # append/merge station elements for sta_element in \ self._emerge_sta_elements( _net_element): self._merge_sta_element( net_element, sta_element) elif self._level == 'station': # append <Station></Station> elements to the # corresponding <Network></Network> epoch for _net_element in self._extract_net_elements( _result.data): net_element, known = self._emerge_net_element( _net_element, exclude_tags=[ '{}{}'.format(ns, self.STATION_TAG) for ns in settings.STATIONXML_NAMESPACES ]) if not known: continue # append station elements # NOTE(damb): <Station></Station> elements # defined by multiple EIDA nodes are simply # appended; no merging is performed for sta_element in \ self._emerge_sta_elements( _net_element): net_element.append(sta_element) elif self._level == 'network': for net_element in self._extract_net_elements( _result.data): _, _ = self._emerge_net_element(net_element) self._clean(_result) self._sizes.append(_result.length) else: self._handle_error(_result) self._sizes.append(0) ready.append(result) for result in ready: self._results.remove(result) if not self._results: break if self._has_inactive_ctx(): self.logger.debug('{}: Closing ...'.format(self.name)) self._terminate() raise self.MissingContextLock self._pool.join() if not sum(self._sizes): self.logger.warning( 'Task {!r} terminates with no valid result.'.format(self)) return Result.nocontent(extras={'type_task': self._TYPE}) _length = 0 # dump xml tree for <Network></Network> epochs to temporary file self.path_tempfile = get_temp_filepath() self.logger.debug('{}: tempfile={!r}'.format(self, self.path_tempfile)) with open(self.path_tempfile, 'wb') as ofd: for net_element in self._network_elements: s = etree.tostring(net_element) _length += len(s) ofd.write(s) if self._has_inactive_ctx(): raise self.MissingContextLock self.logger.info( ('Task {!r} sucessfully finished ' '(total bytes processed: {}, after processing: {}).').format( self, sum(self._sizes), _length)) return Result.ok(data=self.path_tempfile, length=_length, extras={'type_task': self._TYPE})