Beispiel #1
0
def purge(self):
    contributors = Contributor.query.all()
    deleted_counts = {}
    for contributor in contributors:
        try:
            new_deleted_count = _purge("trip_update_purge", contributor,
                                       _delete_trip_update_rows)
            _accumulate(deleted_counts, {TripUpdate: new_deleted_count})
        except Exception:
            logger = logging.LoggerAdapter(
                logging.getLogger("purge"),
                extra={"contributor": contributor.id})
            logger.error(
                "failed to purge TripUpdate for contributor '{}'".format(
                    contributor.id))
        try:
            new_deleted_count = _purge("real_time_update_purge", contributor,
                                       _delete_real_time_update_rows)
            _accumulate(deleted_counts, {RealTimeUpdate: new_deleted_count})
        except Exception:
            logger = logging.LoggerAdapter(
                logging.getLogger("purge"),
                extra={"contributor": contributor.id})
            logger.error(
                "failed to purge RealTimeUpdate for contributor '{}'".format(
                    contributor.id))
    new_relic.record_custom_parameter("real_time_update_purge_count",
                                      deleted_counts[RealTimeUpdate])
    new_relic.record_custom_parameter("trip_update_purge_count",
                                      deleted_counts[TripUpdate])
Beispiel #2
0
def _purge(process_name: str, contributor: Contributor,
           delete_rows_func: Callable[[Contributor], Dict]) -> Dict:
    logger = logging.LoggerAdapter(logging.getLogger(process_name),
                                   extra={"contributor": contributor.id})
    logger.debug("preparing '{}' for contributor '{}'".format(
        process_name, contributor.id))

    lock_name = make_kirin_lock_name(process_name, contributor.id)
    with get_lock(logger, lock_name,
                  app.config["REDIS_LOCK_TIMEOUT_PURGE"]) as locked:
        if not locked:
            logger.warning(
                "operation '{}' for contributor '{}' is already in progress".
                format(process_name, contributor.id))
            return {}

        start_datetime = datetime.datetime.utcnow()
        logger.info("executing '{}' for contributor '{}'".format(
            process_name, contributor.id))

        purge_count = delete_rows_func(contributor)
        duration = (datetime.datetime.utcnow() -
                    start_datetime).total_seconds()
        logger.warning(
            "operation '{}' for contributor '{}' is finished: purged {} row(s) in {} s"
            .format(process_name, contributor.id, purge_count, duration))
        new_relic.record_custom_parameter(
            "{}:{}_count".format(contributor.id, process_name), purge_count)
        new_relic.record_custom_parameter(
            "{}:{}_duration".format(contributor.id, process_name), duration)
        return purge_count
Beispiel #3
0
def make_rt_update(raw_data: Any, contributor_id: str, status: str = "pending") -> RealTimeUpdate:
    """
    Create an RealTimeUpdate object for the query and persist it
    """
    rt_update = model.RealTimeUpdate(str(raw_data), contributor_id=contributor_id, status=status)
    new_relic.record_custom_parameter("real_time_update_id", rt_update.id)

    db_commit(rt_update)
    return rt_update
Beispiel #4
0
def make_rt_update(raw_data, connector_type, contributor_id, status="OK"):
    """
    Create an RealTimeUpdate object for the query and persist it
    """
    rt_update = model.RealTimeUpdate(
        raw_data, connector_type=connector_type, contributor_id=contributor_id, status=status
    )
    new_relic.record_custom_parameter("real_time_update_id", rt_update.id)

    model.db.session.add(rt_update)
    model.db.session.commit()
    return rt_update
Beispiel #5
0
def wrap_build(builder, input_raw):
    """
    Function wrapping the processing of realtime information of an external feed
    This manages errors/logger/newrelic
    :param builder: the KirinModelBuilder to be called (must inherit from abstract_builder.AbstractKirinModelBuilder)
    :param input_raw: the feed to process
    """
    contributor = builder.contributor
    start_datetime = datetime.datetime.utcnow()
    rt_update = None
    log_dict = {"contributor": contributor.id}
    record_custom_parameter("contributor", contributor.id)
    status = "OK"
    log_dict.update({"input_feed_size": sys.getsizeof(input_raw)})

    try:
        # create a raw rt_update obj, save the raw_input into the db
        rt_update, rtu_log_dict = builder.build_rt_update(input_raw)
        log_dict.update(rtu_log_dict)

        # raw_input is interpreted
        trip_updates, tu_log_dict = builder.build_trip_updates(rt_update)
        log_dict.update(tu_log_dict)

        # finally confront to previously existing information (base_schedule, previous real-time)
        _, handler_log_dict = handle(builder, rt_update, trip_updates)
        log_dict.update(handler_log_dict)

    except Exception as e:
        status = "warning" if is_only_warning_exception(e) else "failure"
        allow_reprocess = is_reprocess_allowed(e)

        if rt_update is not None:
            error = e.data["error"] if (isinstance(e, KirinException)
                                        and "error" in e.data) else str(e)
            set_rtu_status_ko(rt_update,
                              error,
                              is_reprocess_same_data_allowed=allow_reprocess)
            db_commit(rt_update)
        else:
            # rt_update is not built, make sure reprocess is allowed
            allow_reprocess_same_data(contributor.id)

        log_dict.update({"exc_summary": str(e), "reason": str(e)})

        record_custom_parameter(
            "reason", str(e))  # using __str__() here to have complete details
        # Re-raise all exceptions (as flask manages exceptions for output)
        # try/except mechanism must wrap calls to this wrap_build() function (auto. for flask)
        # See CONTRIBUTING.md for newrelic's error filtering
        raise

    finally:
        if rt_update is not None and rt_update.status == "pending":
            rt_update.status = "OK" if (status == "OK") else "KO"
            db_commit(rt_update)
        log_dict.update({
            "duration":
            (datetime.datetime.utcnow() - start_datetime).total_seconds()
        })
        record_call("kirin_feed_process_info", status, **log_dict)
        if status == "OK":
            logging.getLogger(__name__).info(status, extra=log_dict)
        elif status == "warning":
            logging.getLogger(__name__).warning(status, extra=log_dict)
        else:
            logging.getLogger(__name__).error(status, extra=log_dict)
Beispiel #6
0
def wrap_build(builder, input_raw):
    """
    Function wrapping the processing of realtime information of an external feed
    This manages errors/logger/newrelic
    :param builder: the KirinModelBuilder to be called
    :param input_raw: the feed to process
    """
    contributor = builder.contributor
    start_datetime = datetime.utcnow()
    rt_update = None
    log_dict = {"contributor": contributor.id}
    status = "OK"

    try:
        # create a raw rt_update obj, save the raw_input into the db
        rt_update, rtu_log_dict = builder.build_rt_update(input_raw)
        log_dict.update(rtu_log_dict)

        # raw_input is interpreted
        trip_updates, tu_log_dict = builder.build_trip_updates(rt_update)
        log_dict.update(tu_log_dict)

        # finally confront to previously existing information (base_schedule, previous real-time)
        _, handler_log_dict = core.handle(rt_update, trip_updates,
                                          contributor.id,
                                          builder.is_new_complete)
        log_dict.update(handler_log_dict)

    except Exception as e:
        status = "failure"
        allow_reprocess = True
        if is_invalid_input_exception(e):
            status = "warning"  # Kirin did his job correctly if the input is invalid and rejected
            allow_reprocess = False  # reprocess is useless if input is invalid

        if rt_update is not None:
            error = e.data["error"] if (isinstance(e, KirinException)
                                        and "error" in e.data) else e.message
            set_rtu_status_ko(rt_update,
                              error,
                              is_reprocess_same_data_allowed=allow_reprocess)
            model.db.session.add(rt_update)
            model.db.session.commit()
        else:
            # rt_update is not built, make sure reprocess is allowed
            allow_reprocess_same_data(contributor.id)

        log_dict.update({"exc_summary": six.text_type(e), "reason": e})

        record_custom_parameter(
            "reason", e)  # using __str__() here to have complete details
        raise  # filters later for APM (auto.)

    finally:
        log_dict.update(
            {"duration": (datetime.utcnow() - start_datetime).total_seconds()})
        record_call(status, **log_dict)
        if status == "OK":
            logging.getLogger(__name__).info(status, extra=log_dict)
        elif status == "warning":
            logging.getLogger(__name__).warning(status, extra=log_dict)
        else:
            logging.getLogger(__name__).error(status, extra=log_dict)
Beispiel #7
0
    def _on_request(self, message):
        log = logging.getLogger(__name__)
        status = "OK"
        log_dict = {}
        start_datetime = datetime.utcnow()
        try:
            task = task_pb2.Task()
            try:
                # `body` is a string, but we need binary type for
                # `ParseFromString()` to work.  It seems to work.
                body = bytes(message.payload, encoding="utf-8")
                task.ParseFromString(body)
            except DecodeError as e:
                log.warning("invalid protobuf: {}".format(str(e)))
                return

            log.info("Getting a full feed publication request",
                     extra={"task": task})
            if task.action != task_pb2.LOAD_REALTIME or not task.load_realtime:
                return
            begin_date = None
            end_date = None
            if hasattr(task.load_realtime,
                       "begin_date") and task.load_realtime.begin_date:
                begin_date = str_to_date(task.load_realtime.begin_date)

            if hasattr(task.load_realtime,
                       "end_date") and task.load_realtime.end_date:
                end_date = str_to_date(task.load_realtime.end_date)
            feed = convert_to_gtfsrt(
                TripUpdate.find_by_contributor_period(
                    task.load_realtime.contributors, begin_date, end_date),
                gtfs_realtime_pb2.FeedHeader.FULL_DATASET,
            )

            feed_str = feed.SerializeToString()
            log_dict.update({
                "contributors": task.load_realtime.contributors,
                "routing_key": task.load_realtime.queue_name,
                "output_trip_update_count": len(feed.entity),
                "output_feed_size": sys.getsizeof(feed_str),
            })
            log.info(
                "Starting of full feed publication {}, {}".format(
                    len(feed_str), task),
                extra={
                    "size": len(feed_str),
                    "task": task
                },
            )
            record_custom_parameter("contributors",
                                    task.load_realtime.contributors)
            # http://docs.celeryproject.org/projects/kombu/en/latest/userguide/producers.html#bypassing-routing-by-using-the-anon-exchange
            self.producer.publish(
                feed_str,
                routing_key=task.load_realtime.queue_name,
                retry=True,
                retry_policy={
                    "interval_start": 0,  # First retry immediately,
                    "interval_step": 2,  # then increase by 2s for every retry.
                    "interval_max":
                    10,  # but don't exceed 10s between retries.
                    "max_retries":
                    self.max_retries,  # give up after 10 (by default) tries.
                },
            )
        except Exception as e:
            status = "failure"
            log_dict.update({"reason": str(e)})
            record_custom_parameter("reason", str(e))
        finally:
            duration = (datetime.utcnow() - start_datetime).total_seconds()
            log_dict.update({"duration": duration})
            record_call("kirin_reload_info", status, **log_dict)
            log.info("End of full feed publication",
                     extra={
                         "duration": duration,
                         "task": task
                     })
            db.session.remove()
        return log_dict