Exemple #1
0
    def _run(self, stream_epoch):

        stream_epochs = self.split(stream_epoch, self._splitting_const)
        self.logger.debug('Split stream epochs: {}.'.format(
            self.stream_epochs))

        # make a request for the first stream epoch
        for stream_epoch in stream_epochs:
            request_handler = GranularFdsnRequestHandler(
                self._url, stream_epoch, query_params=self.query_params)

            last_chunk = None
            try:
                with open(self.path_tempfile, 'rb') as ifd:
                    ifd.seek(-self.MSEED_RECORD_SIZE, 2)
                    last_chunk = ifd.read(self.MSEED_RECORD_SIZE)
            except (OSError, IOError, ValueError) as err:
                pass

            self.logger.debug(
                'Downloading (url={}, stream_epoch={}) ...'.format(
                    request_handler.url, request_handler.stream_epochs))
            try:
                with open(self.path_tempfile, 'ab') as ofd:
                    for chunk in stream_request(request_handler.post(),
                                                chunk_size=self.CHUNK_SIZE,
                                                method='raw'):
                        if last_chunk is not None and last_chunk == chunk:
                            continue
                        self._size += len(chunk)
                        ofd.write(chunk)

            except RequestsError as err:
                if err.response.status_code == 413:
                    self.logger.info(
                        'Download failed (url={}, stream_epoch={}).'.format(
                            request_handler.url,
                            request_handler.stream_epochs))
                    self._run(stream_epoch)
                else:
                    return self._handle_error(err)

            if stream_epoch in self.stream_epochs:
                self.logger.debug(
                    'Download (url={}, stream_epoch={}) finished.'.format(
                        request_handler.url, request_handler.stream_epochs))

            if stream_epoch.endtime == self.stream_epochs[-1].endtime:
                return Result.ok(data=self.path_tempfile, length=self._size)
Exemple #2
0
    def _run(self, stream_epoch):

        stream_epochs = self.split(stream_epoch, self._splitting_const)
        self.logger.debug('Split stream epochs: {}.'.format(
            self.stream_epochs))

        # make a request for the first stream epoch
        for stream_epoch in stream_epochs:
            request_handler = GranularFdsnRequestHandler(
                self._url, stream_epoch, query_params=self.query_params)

            req = (request_handler.get()
                   if self._http_method == 'GET' else request_handler.post())

            self.logger.debug(
                'Downloading (url={}, stream_epochs={}, method={!r}) '
                'to tempfile {!r}...'.format(request_handler.url,
                                             request_handler.stream_epochs,
                                             self._http_method,
                                             self.path_tempfile))
            try:
                with open(self.path_tempfile, 'ab') as ofd:
                    with raw_request(req, logger=self.logger) as ifd:

                        if self._last_obj is None:
                            ofd.write(self.JSON_LIST_START)
                            self._size += 1

                        for obj in ijson.items(ifd, 'item'):
                            # NOTE(damb): A python object has to be created
                            # since else we cannot compare objects. (JSON is
                            # unordered.)

                            if (self._last_obj is not None
                                    and self._last_obj == obj):
                                continue

                            if self._last_obj is not None:
                                ofd.write(self.JSON_LIST_SEP)
                                self._size += 1

                            self._last_obj = obj
                            # convert back to bytearray
                            obj = json.dumps(obj).encode('utf-8')

                            self._size += len(obj)
                            ofd.write(obj)

            except RequestsError as err:
                code = (None
                        if err.response is None else err.response.status_code)
                if code == 413:
                    self.logger.info(
                        'Download failed (url={}, stream_epoch={}).'.format(
                            request_handler.url,
                            request_handler.stream_epochs))

                    self.update_cretry_budget(self.url, code)
                    self._run(stream_epoch)
                else:
                    return self._handle_error(err)
            else:
                code = 200
            finally:
                if code is not None:
                    self.update_cretry_budget(self.url, code)

            if stream_epoch in self.stream_epochs:
                self.logger.debug(
                    'Download (url={}, stream_epoch={}) finished.'.format(
                        request_handler.url, request_handler.stream_epochs))

            if stream_epoch.endtime == self.stream_epochs[-1].endtime:

                with open(self.path_tempfile, 'ab') as ofd:
                    ofd.write(self.JSON_LIST_END)
                    self._size += 1

                return Result.ok(data=self.path_tempfile,
                                 length=self._size,
                                 extras={'type_task': self._TYPE})
Exemple #3
0
    def _run(self, stream_epoch):

        stream_epochs = self.split(stream_epoch, self._splitting_const)
        self.logger.debug('Split stream epochs: {}.'.format(
            self.stream_epochs))

        # make a request for the first stream epoch
        for stream_epoch in stream_epochs:

            request_handler = GranularFdsnRequestHandler(
                self._url, stream_epoch, query_params=self.query_params)

            last_chunk = None
            try:
                with open(self.path_tempfile, 'rb') as ifd:
                    ifd.seek(-self.MSEED_RECORD_SIZE, 2)
                    last_chunk = ifd.read(self.MSEED_RECORD_SIZE)
            except (OSError, IOError, ValueError):
                pass

            req = (request_handler.get()
                   if self._http_method == 'GET' else request_handler.post())

            self.logger.debug(
                'Downloading (url={}, stream_epochs={}, method={!r}) '
                'to tempfile {!r}...'.format(request_handler.url,
                                             request_handler.stream_epochs,
                                             self._http_method,
                                             self.path_tempfile))

            try:
                with open(self.path_tempfile, 'ab') as ofd:
                    for chunk in stream_request(req,
                                                hunk_size=self.CHUNK_SIZE,
                                                method='raw',
                                                logger=self.logger):
                        if last_chunk is not None and last_chunk == chunk:
                            continue
                        self._size += len(chunk)
                        ofd.write(chunk)

            except RequestsError as err:
                code = (None
                        if err.response is None else err.response.status_code)
                if code == 413:
                    self.logger.info(
                        'Download failed (url={}, stream_epoch={}).'.format(
                            request_handler.url,
                            request_handler.stream_epochs))
                    self.update_cretry_budget(self.url, code)
                    self._run(stream_epoch)
                else:
                    return self._handle_error(err)
            else:
                code = 200
            finally:
                if code is not None:
                    self.update_cretry_budget(self.url, code)

            if stream_epoch in self.stream_epochs:
                self.logger.debug(
                    'Download (url={}, stream_epoch={}) finished.'.format(
                        request_handler.url, request_handler.stream_epochs))

            if stream_epoch.endtime == self.stream_epochs[-1].endtime:
                return Result.ok(data=self.path_tempfile,
                                 length=self._size,
                                 extras={'type_task': self._TYPE})