Exemple #1
0
    def _run(self, stream_epoch):

        stream_epochs = self.split(stream_epoch, self._splitting_const)
        self.logger.debug('Split stream epochs: {}.'.format(
            self.stream_epochs))

        # make a request for the first stream epoch
        for stream_epoch in stream_epochs:
            request_handler = GranularFdsnRequestHandler(
                self._url, stream_epoch, query_params=self.query_params)

            last_chunk = None
            try:
                with open(self.path_tempfile, 'rb') as ifd:
                    ifd.seek(-self.MSEED_RECORD_SIZE, 2)
                    last_chunk = ifd.read(self.MSEED_RECORD_SIZE)
            except (OSError, IOError, ValueError) as err:
                pass

            self.logger.debug(
                'Downloading (url={}, stream_epoch={}) ...'.format(
                    request_handler.url, request_handler.stream_epochs))
            try:
                with open(self.path_tempfile, 'ab') as ofd:
                    for chunk in stream_request(request_handler.post(),
                                                chunk_size=self.CHUNK_SIZE,
                                                method='raw'):
                        if last_chunk is not None and last_chunk == chunk:
                            continue
                        self._size += len(chunk)
                        ofd.write(chunk)

            except RequestsError as err:
                if err.response.status_code == 413:
                    self.logger.info(
                        'Download failed (url={}, stream_epoch={}).'.format(
                            request_handler.url,
                            request_handler.stream_epochs))
                    self._run(stream_epoch)
                else:
                    return self._handle_error(err)

            if stream_epoch in self.stream_epochs:
                self.logger.debug(
                    'Download (url={}, stream_epoch={}) finished.'.format(
                        request_handler.url, request_handler.stream_epochs))

            if stream_epoch.endtime == self.stream_epochs[-1].endtime:
                return Result.ok(data=self.path_tempfile, length=self._size)
Exemple #2
0
    def _request(self):
        """
        process a federated fdsnws-station text request
        """
        routes = demux_routes(self._route())

        pool_size = (len(routes)
                     if len(routes) < self.POOL_SIZE else self.POOL_SIZE)

        self.logger.debug('Init worker pool (size={}).'.format(pool_size))
        self._pool = mp.pool.ThreadPool(processes=pool_size)
        # NOTE(damb): With pleasure I'd like to define the parameter
        # maxtasksperchild=self.MAX_TASKS_PER_CHILD)
        # However, using this parameter seems to lead to processes unexpectedly
        # terminated. Hence some tasks never return a *ready* result.

        for route in routes:
            self.logger.debug(
                'Creating DownloadTask for {!r} ...'.format(route))
            t = RawDownloadTask(
                GranularFdsnRequestHandler(route.url,
                                           route.streams[0],
                                           query_params=self.query_params))
            result = self._pool.apply_async(t)
            self._results.append(result)
Exemple #3
0
    def _request(self):
        """
        Process a federated fdsnws-station text request
        """
        routes = flatten_routes(self._route())

        pool_size = (len(routes) if
                     len(routes) < self.POOL_SIZE else self.POOL_SIZE)

        self.logger.debug('Init worker pool (size={}).'.format(pool_size))
        self._pool = mp.pool.ThreadPool(processes=pool_size)

        for route in routes:
            self.logger.debug(
                'Creating DownloadTask for {!r} ...'.format(
                    route))
            t = StationTextDownloadTask(
                GranularFdsnRequestHandler(
                    route.url,
                    route.streams[0],
                    query_params=self.query_params))
            result = self._pool.apply_async(t)
            self._results.append(result)

        self._pool.close()
Exemple #4
0
    def request(self, pool, tasks, query_params={}, **kwargs):
        """
        Issue granular endpoint requests.
        """

        assert hasattr(self, '_routes'), 'Missing routes.'

        default_task = self._get_task_by_kw(tasks, 'default')

        retval = []
        for route in self._routes:
            self.logger.debug('Creating {!r} for {!r} ...'.format(
                default_task, route))
            ctx = Context()
            self._ctx.append(ctx)
            t = default_task(GranularFdsnRequestHandler(
                route.url, route.streams[0], query_params=query_params),
                             context=ctx,
                             **kwargs)
            result = pool.apply_async(t)
            retval.append(result)

        return retval
Exemple #5
0
    def _run(self, stream_epoch):

        stream_epochs = self.split(stream_epoch, self._splitting_const)
        self.logger.debug('Split stream epochs: {}.'.format(
            self.stream_epochs))

        # make a request for the first stream epoch
        for stream_epoch in stream_epochs:
            request_handler = GranularFdsnRequestHandler(
                self._url, stream_epoch, query_params=self.query_params)

            req = (request_handler.get()
                   if self._http_method == 'GET' else request_handler.post())

            self.logger.debug(
                'Downloading (url={}, stream_epochs={}, method={!r}) '
                'to tempfile {!r}...'.format(request_handler.url,
                                             request_handler.stream_epochs,
                                             self._http_method,
                                             self.path_tempfile))
            try:
                with open(self.path_tempfile, 'ab') as ofd:
                    with raw_request(req, logger=self.logger) as ifd:

                        if self._last_obj is None:
                            ofd.write(self.JSON_LIST_START)
                            self._size += 1

                        for obj in ijson.items(ifd, 'item'):
                            # NOTE(damb): A python object has to be created
                            # since else we cannot compare objects. (JSON is
                            # unordered.)

                            if (self._last_obj is not None
                                    and self._last_obj == obj):
                                continue

                            if self._last_obj is not None:
                                ofd.write(self.JSON_LIST_SEP)
                                self._size += 1

                            self._last_obj = obj
                            # convert back to bytearray
                            obj = json.dumps(obj).encode('utf-8')

                            self._size += len(obj)
                            ofd.write(obj)

            except RequestsError as err:
                code = (None
                        if err.response is None else err.response.status_code)
                if code == 413:
                    self.logger.info(
                        'Download failed (url={}, stream_epoch={}).'.format(
                            request_handler.url,
                            request_handler.stream_epochs))

                    self.update_cretry_budget(self.url, code)
                    self._run(stream_epoch)
                else:
                    return self._handle_error(err)
            else:
                code = 200
            finally:
                if code is not None:
                    self.update_cretry_budget(self.url, code)

            if stream_epoch in self.stream_epochs:
                self.logger.debug(
                    'Download (url={}, stream_epoch={}) finished.'.format(
                        request_handler.url, request_handler.stream_epochs))

            if stream_epoch.endtime == self.stream_epochs[-1].endtime:

                with open(self.path_tempfile, 'ab') as ofd:
                    ofd.write(self.JSON_LIST_END)
                    self._size += 1

                return Result.ok(data=self.path_tempfile,
                                 length=self._size,
                                 extras={'type_task': self._TYPE})
Exemple #6
0
    def _run(self, stream_epoch):

        stream_epochs = self.split(stream_epoch, self._splitting_const)
        self.logger.debug('Split stream epochs: {}.'.format(
            self.stream_epochs))

        # make a request for the first stream epoch
        for stream_epoch in stream_epochs:

            request_handler = GranularFdsnRequestHandler(
                self._url, stream_epoch, query_params=self.query_params)

            last_chunk = None
            try:
                with open(self.path_tempfile, 'rb') as ifd:
                    ifd.seek(-self.MSEED_RECORD_SIZE, 2)
                    last_chunk = ifd.read(self.MSEED_RECORD_SIZE)
            except (OSError, IOError, ValueError):
                pass

            req = (request_handler.get()
                   if self._http_method == 'GET' else request_handler.post())

            self.logger.debug(
                'Downloading (url={}, stream_epochs={}, method={!r}) '
                'to tempfile {!r}...'.format(request_handler.url,
                                             request_handler.stream_epochs,
                                             self._http_method,
                                             self.path_tempfile))

            try:
                with open(self.path_tempfile, 'ab') as ofd:
                    for chunk in stream_request(req,
                                                hunk_size=self.CHUNK_SIZE,
                                                method='raw',
                                                logger=self.logger):
                        if last_chunk is not None and last_chunk == chunk:
                            continue
                        self._size += len(chunk)
                        ofd.write(chunk)

            except RequestsError as err:
                code = (None
                        if err.response is None else err.response.status_code)
                if code == 413:
                    self.logger.info(
                        'Download failed (url={}, stream_epoch={}).'.format(
                            request_handler.url,
                            request_handler.stream_epochs))
                    self.update_cretry_budget(self.url, code)
                    self._run(stream_epoch)
                else:
                    return self._handle_error(err)
            else:
                code = 200
            finally:
                if code is not None:
                    self.update_cretry_budget(self.url, code)

            if stream_epoch in self.stream_epochs:
                self.logger.debug(
                    'Download (url={}, stream_epoch={}) finished.'.format(
                        request_handler.url, request_handler.stream_epochs))

            if stream_epoch.endtime == self.stream_epochs[-1].endtime:
                return Result.ok(data=self.path_tempfile,
                                 length=self._size,
                                 extras={'type_task': self._TYPE})
Exemple #7
0
    def _run(self):
        """
        Combine `StationXML <http://www.fdsn.org/xml/station/>`_
        :code:`<Network></Network>` information.
        """
        self.logger.info('Executing task {!r} ...'.format(self))
        self._pool = ThreadPool(processes=self._num_workers)

        for route in self._routes:
            self.logger.debug(
                'Creating DownloadTask for route {!r} ...'.format(route))
            ctx = Context()
            self._ctx.append(ctx)

            t = RawDownloadTask(GranularFdsnRequestHandler(
                route.url, route.streams[0], query_params=self.query_params),
                                decode_unicode=True,
                                context=ctx,
                                keep_tempfiles=self._keep_tempfiles,
                                http_method=self._http_method)

            # apply DownloadTask asynchronoulsy to the worker pool
            result = self._pool.apply_async(t)

            self._results.append(result)

        self._pool.close()

        # fetch results ready
        while True:
            ready = []
            for result in self._results:
                if result.ready():
                    _result = result.get()
                    if _result.status_code == 200:
                        if self._level in ('channel', 'response'):
                            # merge <Channel></Channel> elements into
                            # <Station></Station> from the correct
                            # <Network></Network> epoch element
                            for _net_element in self._extract_net_elements(
                                    _result.data):

                                # find the correct <Network></Network> epoch
                                # element
                                net_element, known = self._emerge_net_element(
                                    _net_element,
                                    exclude_tags=[
                                        '{}{}'.format(ns, self.STATION_TAG) for
                                        ns in settings.STATIONXML_NAMESPACES
                                    ])

                                if not known:
                                    continue

                                # append/merge station elements
                                for sta_element in \
                                        self._emerge_sta_elements(
                                            _net_element):
                                    self._merge_sta_element(
                                        net_element, sta_element)

                        elif self._level == 'station':
                            # append <Station></Station> elements to the
                            # corresponding <Network></Network> epoch
                            for _net_element in self._extract_net_elements(
                                    _result.data):

                                net_element, known = self._emerge_net_element(
                                    _net_element,
                                    exclude_tags=[
                                        '{}{}'.format(ns, self.STATION_TAG) for
                                        ns in settings.STATIONXML_NAMESPACES
                                    ])

                                if not known:
                                    continue

                                # append station elements
                                # NOTE(damb): <Station></Station> elements
                                # defined by multiple EIDA nodes are simply
                                # appended; no merging is performed
                                for sta_element in \
                                        self._emerge_sta_elements(
                                            _net_element):
                                    net_element.append(sta_element)

                        elif self._level == 'network':
                            for net_element in self._extract_net_elements(
                                    _result.data):
                                _, _ = self._emerge_net_element(net_element)

                        self._clean(_result)
                        self._sizes.append(_result.length)

                    else:
                        self._handle_error(_result)
                        self._sizes.append(0)

                    ready.append(result)

            for result in ready:
                self._results.remove(result)

            if not self._results:
                break

            if self._has_inactive_ctx():
                self.logger.debug('{}: Closing ...'.format(self.name))
                self._terminate()
                raise self.MissingContextLock

        self._pool.join()

        if not sum(self._sizes):
            self.logger.warning(
                'Task {!r} terminates with no valid result.'.format(self))
            return Result.nocontent(extras={'type_task': self._TYPE})

        _length = 0
        # dump xml tree for <Network></Network> epochs to temporary file
        self.path_tempfile = get_temp_filepath()
        self.logger.debug('{}: tempfile={!r}'.format(self, self.path_tempfile))
        with open(self.path_tempfile, 'wb') as ofd:
            for net_element in self._network_elements:
                s = etree.tostring(net_element)
                _length += len(s)
                ofd.write(s)

        if self._has_inactive_ctx():
            raise self.MissingContextLock

        self.logger.info(
            ('Task {!r} sucessfully finished '
             '(total bytes processed: {}, after processing: {}).').format(
                 self, sum(self._sizes), _length))

        return Result.ok(data=self.path_tempfile,
                         length=_length,
                         extras={'type_task': self._TYPE})