Example #1
0
 def run(self):
     """This is a dummy implementation. It will return immediately."""
     self.progress_queue.put((ProgressQueueMsg.NETWORK_DONE, {
         "ret": True,
         "processed": self.n_requests,
         "retried": self.n_retried,
         "consumed": self.n_consumed,
         "rusage": get_rusage(),
     }))
 def run(self):
     """This is a dummy implementation. It will return immediately."""
     self.progress_queue.put((ProgressQueueMsg.NETWORK_DONE, {
         "ret": True,
         "processed": self.n_requests,
         "retried": self.n_retried,
         "consumed": self.n_consumed,
         "rusage": get_rusage(),
     }))
Example #3
0
    def run(self, dry_run=False):
        if dry_run:
            i = 0
            for _ in self.perform_requests(True):
                i += 1

            return i

        self._executor = ThreadPoolExecutor(self.concurrency)
        self.session = requests.Session()
        adapter = requests.adapters.HTTPAdapter(
            pool_connections=self.concurrency, pool_maxsize=self.concurrency)
        self.session.mount('http://', adapter)
        self.session.mount('https://', adapter)

        t0 = time()
        last_report = time()
        i = 0
        r = None
        for r in self.perform_requests():
            if r is not True:
                i += 1
                self.ui.info('{} responses sent | time elapsed {}s'.format(
                    i,
                    time() - t0))

                if time() - last_report > REPORT_INTERVAL:
                    self.progress_queue.put(
                        (ProgressQueueMsg.NETWORK_PROGRESS, {
                            "processed": self.n_requests,
                            "retried": self.n_retried,
                            "consumed": self.n_consumed,
                            "rusage": get_rusage(),
                        }))
                    last_report = time()

        self.progress_queue.put((ProgressQueueMsg.NETWORK_DONE, {
            "ret": r,
            "processed": self.n_requests,
            "retried": self.n_retried,
            "consumed": self.n_consumed,
            "rusage": get_rusage(),
        }))
Example #4
0
    def run(self):
        self._executor = ThreadPoolExecutor(self.concurrency)
        self.session = requests.Session()
        adapter = requests.adapters.HTTPAdapter(
            pool_connections=self.concurrency, pool_maxsize=self.concurrency)
        self.session.mount('http://', adapter)
        self.session.mount('https://', adapter)
        self.session.verify = self.verify_ssl

        t0 = time()
        last_report = time()
        i = 0
        r = None
        for r in self.perform_requests():
            if r is not True:
                i += 1
                self.ui.info('{} responses sent | time elapsed {}s'
                             .format(i, time() - t0))

                if time() - last_report > REPORT_INTERVAL:
                    self.progress_queue.put((
                        ProgressQueueMsg.NETWORK_PROGRESS, {
                            "processed": self.n_requests,
                            "retried": self.n_retried,
                            "consumed": self.n_consumed,
                            "rusage": get_rusage(),
                        }))
                    last_report = time()

        self.progress_queue.put((ProgressQueueMsg.NETWORK_DONE, {
            "ret": r,
            "processed": self.n_requests,
            "retried": self.n_retried,
            "consumed": self.n_consumed,
            "rusage": get_rusage(),
        }))
Example #5
0
    def _shove(self, args, serialized_dialect, queue):
        dialect = serialized_dialect.to_dialect()
        signal.signal(signal.SIGINT, self.exit_fast)
        signal.signal(signal.SIGTERM, self.exit_fast)
        t2 = time()
        last_report = time()
        _ui = args[4]
        _ui.info('Shovel process started')
        csv.register_dialect('dataset_dialect', dialect)
        batch_generator = BatchGenerator(*args)
        batch = None
        try:
            n = 0
            self.shovel_status.value = b"R"
            for batch in batch_generator:
                if batch:
                    _ui.debug('queueing batch {}'.format(batch.id))
                    self.shovel_status.value = b"P"
                    while True:
                        try:
                            queue.put(batch, timeout=1)
                            break
                        except Full:
                            _ui.debug('put timed out')
                            if self.abort_flag.value:
                                _ui.info('shoveling abort requested')
                                self.exit_fast(None, None)
                                break
                            continue
                    n += 1
                if self.abort_flag.value:
                    _ui.info('shoveling abort requested')
                    self.exit_fast(None, None)
                    break

                if time() - last_report > REPORT_INTERVAL:
                    self.progress_queue.put((
                        ProgressQueueMsg.SHOVEL_PROGRESS, {
                                     "produced": n,
                                     "read": batch_generator.n_read,
                                     "skipped": batch_generator.n_skipped,
                                     "rusage": get_rusage()
                                 }))
                    last_report = time()

                self.shovel_status.value = b"R"

            self.shovel_status.value = b"D"
            _ui.info('shoveling complete | total time elapsed {}s'
                     ''.format(time() - t2))
            self.progress_queue.put((ProgressQueueMsg.SHOVEL_DONE,
                                     {
                                         "produced": n,
                                         "read": batch_generator.n_read,
                                         "skipped": batch_generator.n_skipped,
                                         "rusage": get_rusage()
                                     }))
        except csv.Error as e:
            self.shovel_status.value = b"C"
            self.progress_queue.put((
                ProgressQueueMsg.SHOVEL_CSV_ERROR,
                {
                    "batch": batch and batch._replace(data=[]) or [],
                    "error": str(e),
                    "produced": n,
                    "read": batch_generator.n_read,
                    "skipped": batch_generator.n_skipped,
                    "rusage": get_rusage(),
                },
            ))
            raise
        except Exception as e:
            self.shovel_status.value = b"E"
            self.progress_queue.put((
                ProgressQueueMsg.SHOVEL_ERROR,
                {
                    "batch": batch and batch._replace("data", []) or [],
                    "error": str(e),
                    "produced": n,
                    "read": batch_generator.n_read,
                    "skipped": batch_generator.n_skipped,
                    "rusage": get_rusage(),
                },
            ))
            raise
        finally:
            if os.name is 'nt':
                _ui.close()
Example #6
0
    def process_response(self):
        signal.signal(signal.SIGINT, self.exit_fast)
        signal.signal(signal.SIGTERM, self.exit_fast)

        """Process a successful request. """
        self._ui.debug('Writer Process started - {}'
                       ''.format(multiprocessing.current_process().name))

        rows_done = sum(rows for _, rows in self.ctx.db['checkpoints'])

        success = False
        processed = 0
        written = 0
        idle_cycles = 0
        last_report = time()

        try:
            while True:
                if self.abort_flag.value or self.local_abort_flag:
                    self._ui.debug('abort requested')
                    break
                try:
                    if idle_cycles > 2:
                        self.writer_status.value = b"I"
                    else:
                        self.writer_status.value = b"G"
                    msg, args = self.writer_queue.get(timeout=1)
                except queue.Empty:
                    idle_cycles += 1
                    continue
                idle_cycles = 0
                self.writer_status.value = b"W"

                if msg == WriterQueueMsg.CTX_ERROR:
                    # pred_name is a message if ERROR or WARNING
                    self._ui.debug('Writer ERROR')
                    self.ctx.save_error(args["batch"], error=args["error"])
                    continue
                elif msg == WriterQueueMsg.CTX_WARNING:
                    self._ui.debug('Writer WARNING')
                    self.ctx.save_warning(args["batch"], error=args["error"])
                    continue
                elif msg == WriterQueueMsg.SENTINEL:
                    self._ui.debug('Writer received SENTINEL')
                    break
                elif msg == WriterQueueMsg.RESPONSE:
                    processed += 1
                    batch = args["batch"]

                    unpack_data, format_data = self.response_handlers
                    try:
                        data, exec_time, elapsed_seconds = \
                            unpack_data(args['request'])
                        debug_msg = ('successful response {}-{}: exec time '
                                     '{:.0f}msec | round-trip: {:.0f}msec')
                        self._ui.debug(
                            debug_msg.format(batch.id, batch.rows, exec_time,
                                             elapsed_seconds * 1000))
                    except Exception as e:
                        self._ui.warning('{} response parse error: {} -- retry'
                                         ''.format(batch.id, e))
                        self.deque_failed_batch(batch)
                        continue

                    try:
                        written_fields, comb = format_data(
                            data, batch,
                            pred_name=self.ctx.pred_name,
                            pred_threshold_name=self.ctx.pred_threshold_name,
                            pred_decision_name=self.ctx.pred_decision_name,
                            keep_cols=self.ctx.keep_cols,
                            skip_row_id=self.ctx.skip_row_id,
                            fast_mode=self.ctx.fast_mode,
                            delimiter=self.ctx.dialect.delimiter,
                            max_prediction_explanations=(
                                self.ctx.max_prediction_explanations
                            )
                            )
                    except UnexpectedKeptColumnCount:
                        self._ui.fatal('Unexpected number of kept columns ' +
                                       'retrieved. This can happen in ' +
                                       '--fast mode with --keep_cols where ' +
                                       'some cells contain quoted delimiters')
                    except NoPredictionThresholdInResult:
                        self._ui.fatal('No predictionThreshold returned from '
                                       'API. --pred_threshold should be used '
                                       'only for binary classification '
                                       'predictions')
                    except Exception as e:
                        self._ui.fatal(e)

                    self.ctx.checkpoint_batch(batch, written_fields, comb)
                    written += 1
                    rows_done += batch.rows

                    if time() - last_report > REPORT_INTERVAL:
                        self.progress_queue.put((
                            ProgressQueueMsg.WRITER_PROGRESS, {
                                "processed": processed,
                                "written": written,
                                "rows": rows_done,
                                "rusage": get_rusage()
                            }))
                        last_report = time()
                else:
                    self._ui.error('Unknown Writer Queue msg: "{}", args={}'
                                   ''.format(msg, args))

            self._ui.debug('---Writer Exiting---')

            success = True
            if self.local_abort_flag:
                success = False

        except Exception as e:
            # Note this won't catch SystemExit which is raised by
            # sigterm_handler because it's based on BaseException
            self._ui.error('Writer Process error: batch={}, error={}'
                           ''.format(batch.id, e))

        finally:
            self.writer_status.value = b"D"
            self.progress_queue.put((ProgressQueueMsg.WRITER_DONE, {
                "ret": success,
                "processed": processed,
                "written": written,
                "rows": rows_done,
                "rusage": get_rusage()
            }))
            for o in [self.ctx, self.queue, self.writer_queue, self.deque,
                      self._ui]:
                if hasattr(o, 'close'):
                    # On Windows the Queue doesn't have a close attr
                    o.close()
            if success:
                sys.exit(0)
            else:
                sys.exit(1)
Example #7
0
    def process_response(self):
        signal.signal(signal.SIGINT, self.exit_fast)
        signal.signal(signal.SIGTERM, self.exit_fast)
        """Process a successful request. """
        self._ui.debug('Writer Process started - {}'
                       ''.format(multiprocessing.current_process().name))

        rows_done = 0
        for _, rows in self.ctx.db['checkpoints']:
            rows_done += rows

        success = False
        processed = 0
        written = 0
        idle_cycles = 0
        last_report = time()

        try:
            while True:
                if self.abort_flag.value or self.local_abort_flag:
                    self._ui.debug('abort requested')
                    break
                try:
                    if idle_cycles > 2:
                        self.writer_status.value = b"I"
                    else:
                        self.writer_status.value = b"G"
                    msg, args = self.writer_queue.get(timeout=1)
                except queue.Empty:
                    idle_cycles += 1
                    continue
                idle_cycles = 0
                self.writer_status.value = b"W"

                if msg == WriterQueueMsg.CTX_ERROR:
                    # pred_name is a message if ERROR or WARNING
                    self._ui.debug('Writer ERROR')
                    self.ctx.save_error(args["batch"], error=args["error"])
                    continue
                elif msg == WriterQueueMsg.CTX_WARNING:
                    self._ui.debug('Writer WARNING')
                    self.ctx.save_warning(args["batch"], error=args["error"])
                    continue
                elif msg == WriterQueueMsg.SENTINEL:
                    self._ui.debug('Writer received SENTINEL')
                    break
                elif msg == WriterQueueMsg.RESPONSE:
                    processed += 1
                    batch = args["batch"]
                    result = self.unpack_request_object(args["request"], batch)
                    if result is False:  # unpack_request_object failed
                        continue

                    (written_fields,
                     comb) = self.format_result_data(result, batch)

                    self.ctx.checkpoint_batch(batch, written_fields, comb)
                    written += 1
                    rows_done += batch.rows

                    if time() - last_report > REPORT_INTERVAL:
                        self.progress_queue.put(
                            (ProgressQueueMsg.WRITER_PROGRESS, {
                                "processed": processed,
                                "written": written,
                                "rows": rows_done,
                                "rusage": get_rusage()
                            }))
                        last_report = time()
                else:
                    self._ui.error('Unknown Writer Queue msg: "{}", args={}'
                                   ''.format(msg, args))

            self._ui.debug('---Writer Exiting---')

            success = True
            if self.local_abort_flag:
                success = False

        except Exception as e:
            # Note this won't catch SystemExit which is raised by
            # sigterm_handler because it's based on BaseException
            self._ui.error('Writer Process error: batch={}, error={}'
                           ''.format(batch.id, e))

        finally:
            self.writer_status.value = b"D"
            self.progress_queue.put((ProgressQueueMsg.WRITER_DONE, {
                "ret": success,
                "processed": processed,
                "written": written,
                "rows": rows_done,
                "rusage": get_rusage()
            }))
            for o in [
                    self.ctx, self.queue, self.writer_queue, self.deque,
                    self._ui
            ]:
                if hasattr(o, 'close'):
                    # On Windows the Queue doesn't have a close attr
                    o.close()
            if success:
                sys.exit(0)
            else:
                sys.exit(1)
Example #8
0
    def _shove(self, args, serialized_dialect, queue):
        dialect = serialized_dialect.to_dialect()
        signal.signal(signal.SIGINT, self.exit_fast)
        signal.signal(signal.SIGTERM, self.exit_fast)
        t2 = time()
        last_report = time()
        _ui = args[4]
        _ui.info('Shovel process started')
        csv.register_dialect('dataset_dialect', dialect)
        batch_generator = BatchGenerator(*args)
        try:
            n = 0
            self.shovel_status.value = b"R"
            for batch in batch_generator:
                if batch:
                    _ui.debug('queueing batch {}'.format(batch.id))
                    self.shovel_status.value = b"P"
                    while True:
                        try:
                            queue.put(batch, timeout=1)
                            break
                        except Full:
                            _ui.debug('put timed out')
                            if self.abort_flag.value:
                                _ui.info('shoveling abort requested')
                                self.exit_fast(None, None)
                                break
                            continue
                    n += 1
                if self.abort_flag.value:
                    _ui.info('shoveling abort requested')
                    self.exit_fast(None, None)
                    break

                if time() - last_report > REPORT_INTERVAL:
                    self.progress_queue.put((
                        ProgressQueueMsg.SHOVEL_PROGRESS, {
                                     "produced": n,
                                     "read": batch_generator.n_read,
                                     "skipped": batch_generator.n_skipped,
                                     "rusage": get_rusage()
                                 }))
                    last_report = time()

                self.shovel_status.value = b"R"

            self.shovel_status.value = b"D"
            _ui.info('shoveling complete | total time elapsed {}s'
                     ''.format(time() - t2))
            self.progress_queue.put((ProgressQueueMsg.SHOVEL_DONE,
                                     {
                                         "produced": n,
                                         "read": batch_generator.n_read,
                                         "skipped": batch_generator.n_skipped,
                                         "rusage": get_rusage()
                                     }))
        except csv.Error as e:
            self.shovel_status.value = b"C"
            self.progress_queue.put((ProgressQueueMsg.SHOVEL_CSV_ERROR,
                                     {
                                         "batch": batch._replace(data=[]),
                                         "error": str(e),
                                         "produced": n,
                                         "read": batch_generator.n_read,
                                         "skipped": batch_generator.n_skipped,
                                         "rusage": get_rusage()
                                     }))
            raise
        except Exception as e:
            self.shovel_status.value = b"E"
            self.progress_queue.put((ProgressQueueMsg.SHOVEL_ERROR,
                                     {
                                         "batch": batch._replace("data", []),
                                         "error": str(e),
                                         "produced": n,
                                         "read": batch_generator.n_read,
                                         "skipped": batch_generator.n_skipped,
                                         "rusage": get_rusage()
                                     }))
            raise
        finally:
            if os.name is 'nt':
                _ui.close()
Example #9
0
    def process_response(self):
        signal.signal(signal.SIGINT, self.exit_fast)
        signal.signal(signal.SIGTERM, self.exit_fast)

        """Process a successful request. """
        self._ui.debug('Writer Process started - {}'
                       ''.format(multiprocessing.current_process().name))

        rows_done = sum(rows for _, rows in self.ctx.db['checkpoints'])

        success = False
        processed = 0
        written = 0
        idle_cycles = 0
        last_report = time()

        try:
            while True:
                if self.abort_flag.value or self.local_abort_flag:
                    self._ui.debug('abort requested')
                    break
                try:
                    if idle_cycles > 2:
                        self.writer_status.value = b"I"
                    else:
                        self.writer_status.value = b"G"
                    msg, args = self.writer_queue.get(timeout=1)
                except queue.Empty:
                    idle_cycles += 1
                    continue
                idle_cycles = 0
                self.writer_status.value = b"W"

                if msg == WriterQueueMsg.CTX_ERROR:
                    # pred_name is a message if ERROR or WARNING
                    self._ui.debug('Writer ERROR')
                    self.ctx.save_error(args["batch"], error=args["error"])
                    continue
                elif msg == WriterQueueMsg.CTX_WARNING:
                    self._ui.debug('Writer WARNING')
                    self.ctx.save_warning(args["batch"], error=args["error"])
                    continue
                elif msg == WriterQueueMsg.SENTINEL:
                    self._ui.debug('Writer received SENTINEL')
                    break
                elif msg == WriterQueueMsg.RESPONSE:
                    processed += 1
                    batch = args["batch"]

                    unpack_data, format_data = self.response_handlers
                    try:
                        data, exec_time, elapsed_seconds = \
                            unpack_data(args['request'])
                        debug_msg = ('successful response {}-{}: exec time '
                                     '{:.0f}msec | round-trip: {:.0f}msec')
                        self._ui.debug(
                            debug_msg.format(batch.id, batch.rows, exec_time,
                                             elapsed_seconds * 1000))
                    except Exception as e:
                        self._ui.warning('{} response parse error: {} -- retry'
                                         ''.format(batch.id, e))
                        self.deque_failed_batch(batch)
                        continue

                    try:
                        written_fields, comb = format_data(
                            data, batch,
                            pred_name=self.ctx.pred_name,
                            keep_cols=self.ctx.keep_cols,
                            skip_row_id=self.ctx.skip_row_id,
                            fast_mode=self.ctx.fast_mode,
                            delimiter=self.ctx.dialect.delimiter)
                    except Exception as e:
                        self._ui.fatal(e)

                    self.ctx.checkpoint_batch(batch, written_fields, comb)
                    written += 1
                    rows_done += batch.rows

                    if time() - last_report > REPORT_INTERVAL:
                        self.progress_queue.put((
                            ProgressQueueMsg.WRITER_PROGRESS, {
                                "processed": processed,
                                "written": written,
                                "rows": rows_done,
                                "rusage": get_rusage()
                            }))
                        last_report = time()
                else:
                    self._ui.error('Unknown Writer Queue msg: "{}", args={}'
                                   ''.format(msg, args))

            self._ui.debug('---Writer Exiting---')

            success = True
            if self.local_abort_flag:
                success = False

        except Exception as e:
            # Note this won't catch SystemExit which is raised by
            # sigterm_handler because it's based on BaseException
            self._ui.error('Writer Process error: batch={}, error={}'
                           ''.format(batch.id, e))

        finally:
            self.writer_status.value = b"D"
            self.progress_queue.put((ProgressQueueMsg.WRITER_DONE, {
                "ret": success,
                "processed": processed,
                "written": written,
                "rows": rows_done,
                "rusage": get_rusage()
            }))
            for o in [self.ctx, self.queue, self.writer_queue, self.deque,
                      self._ui]:
                if hasattr(o, 'close'):
                    # On Windows the Queue doesn't have a close attr
                    o.close()
            if success:
                sys.exit(0)
            else:
                sys.exit(1)