def purge(self): contributors = Contributor.query.all() deleted_counts = {} for contributor in contributors: try: new_deleted_count = _purge("trip_update_purge", contributor, _delete_trip_update_rows) _accumulate(deleted_counts, {TripUpdate: new_deleted_count}) except Exception: logger = logging.LoggerAdapter( logging.getLogger("purge"), extra={"contributor": contributor.id}) logger.error( "failed to purge TripUpdate for contributor '{}'".format( contributor.id)) try: new_deleted_count = _purge("real_time_update_purge", contributor, _delete_real_time_update_rows) _accumulate(deleted_counts, {RealTimeUpdate: new_deleted_count}) except Exception: logger = logging.LoggerAdapter( logging.getLogger("purge"), extra={"contributor": contributor.id}) logger.error( "failed to purge RealTimeUpdate for contributor '{}'".format( contributor.id)) new_relic.record_custom_parameter("real_time_update_purge_count", deleted_counts[RealTimeUpdate]) new_relic.record_custom_parameter("trip_update_purge_count", deleted_counts[TripUpdate])
def _purge(process_name: str, contributor: Contributor, delete_rows_func: Callable[[Contributor], Dict]) -> Dict: logger = logging.LoggerAdapter(logging.getLogger(process_name), extra={"contributor": contributor.id}) logger.debug("preparing '{}' for contributor '{}'".format( process_name, contributor.id)) lock_name = make_kirin_lock_name(process_name, contributor.id) with get_lock(logger, lock_name, app.config["REDIS_LOCK_TIMEOUT_PURGE"]) as locked: if not locked: logger.warning( "operation '{}' for contributor '{}' is already in progress". format(process_name, contributor.id)) return {} start_datetime = datetime.datetime.utcnow() logger.info("executing '{}' for contributor '{}'".format( process_name, contributor.id)) purge_count = delete_rows_func(contributor) duration = (datetime.datetime.utcnow() - start_datetime).total_seconds() logger.warning( "operation '{}' for contributor '{}' is finished: purged {} row(s) in {} s" .format(process_name, contributor.id, purge_count, duration)) new_relic.record_custom_parameter( "{}:{}_count".format(contributor.id, process_name), purge_count) new_relic.record_custom_parameter( "{}:{}_duration".format(contributor.id, process_name), duration) return purge_count
def make_rt_update(raw_data: Any, contributor_id: str, status: str = "pending") -> RealTimeUpdate: """ Create an RealTimeUpdate object for the query and persist it """ rt_update = model.RealTimeUpdate(str(raw_data), contributor_id=contributor_id, status=status) new_relic.record_custom_parameter("real_time_update_id", rt_update.id) db_commit(rt_update) return rt_update
def make_rt_update(raw_data, connector_type, contributor_id, status="OK"): """ Create an RealTimeUpdate object for the query and persist it """ rt_update = model.RealTimeUpdate( raw_data, connector_type=connector_type, contributor_id=contributor_id, status=status ) new_relic.record_custom_parameter("real_time_update_id", rt_update.id) model.db.session.add(rt_update) model.db.session.commit() return rt_update
def wrap_build(builder, input_raw): """ Function wrapping the processing of realtime information of an external feed This manages errors/logger/newrelic :param builder: the KirinModelBuilder to be called (must inherit from abstract_builder.AbstractKirinModelBuilder) :param input_raw: the feed to process """ contributor = builder.contributor start_datetime = datetime.datetime.utcnow() rt_update = None log_dict = {"contributor": contributor.id} record_custom_parameter("contributor", contributor.id) status = "OK" log_dict.update({"input_feed_size": sys.getsizeof(input_raw)}) try: # create a raw rt_update obj, save the raw_input into the db rt_update, rtu_log_dict = builder.build_rt_update(input_raw) log_dict.update(rtu_log_dict) # raw_input is interpreted trip_updates, tu_log_dict = builder.build_trip_updates(rt_update) log_dict.update(tu_log_dict) # finally confront to previously existing information (base_schedule, previous real-time) _, handler_log_dict = handle(builder, rt_update, trip_updates) log_dict.update(handler_log_dict) except Exception as e: status = "warning" if is_only_warning_exception(e) else "failure" allow_reprocess = is_reprocess_allowed(e) if rt_update is not None: error = e.data["error"] if (isinstance(e, KirinException) and "error" in e.data) else str(e) set_rtu_status_ko(rt_update, error, is_reprocess_same_data_allowed=allow_reprocess) db_commit(rt_update) else: # rt_update is not built, make sure reprocess is allowed allow_reprocess_same_data(contributor.id) log_dict.update({"exc_summary": str(e), "reason": str(e)}) record_custom_parameter( "reason", str(e)) # using __str__() here to have complete details # Re-raise all exceptions (as flask manages exceptions for output) # try/except mechanism must wrap calls to this wrap_build() function (auto. for flask) # See CONTRIBUTING.md for newrelic's error filtering raise finally: if rt_update is not None and rt_update.status == "pending": rt_update.status = "OK" if (status == "OK") else "KO" db_commit(rt_update) log_dict.update({ "duration": (datetime.datetime.utcnow() - start_datetime).total_seconds() }) record_call("kirin_feed_process_info", status, **log_dict) if status == "OK": logging.getLogger(__name__).info(status, extra=log_dict) elif status == "warning": logging.getLogger(__name__).warning(status, extra=log_dict) else: logging.getLogger(__name__).error(status, extra=log_dict)
def wrap_build(builder, input_raw): """ Function wrapping the processing of realtime information of an external feed This manages errors/logger/newrelic :param builder: the KirinModelBuilder to be called :param input_raw: the feed to process """ contributor = builder.contributor start_datetime = datetime.utcnow() rt_update = None log_dict = {"contributor": contributor.id} status = "OK" try: # create a raw rt_update obj, save the raw_input into the db rt_update, rtu_log_dict = builder.build_rt_update(input_raw) log_dict.update(rtu_log_dict) # raw_input is interpreted trip_updates, tu_log_dict = builder.build_trip_updates(rt_update) log_dict.update(tu_log_dict) # finally confront to previously existing information (base_schedule, previous real-time) _, handler_log_dict = core.handle(rt_update, trip_updates, contributor.id, builder.is_new_complete) log_dict.update(handler_log_dict) except Exception as e: status = "failure" allow_reprocess = True if is_invalid_input_exception(e): status = "warning" # Kirin did his job correctly if the input is invalid and rejected allow_reprocess = False # reprocess is useless if input is invalid if rt_update is not None: error = e.data["error"] if (isinstance(e, KirinException) and "error" in e.data) else e.message set_rtu_status_ko(rt_update, error, is_reprocess_same_data_allowed=allow_reprocess) model.db.session.add(rt_update) model.db.session.commit() else: # rt_update is not built, make sure reprocess is allowed allow_reprocess_same_data(contributor.id) log_dict.update({"exc_summary": six.text_type(e), "reason": e}) record_custom_parameter( "reason", e) # using __str__() here to have complete details raise # filters later for APM (auto.) finally: log_dict.update( {"duration": (datetime.utcnow() - start_datetime).total_seconds()}) record_call(status, **log_dict) if status == "OK": logging.getLogger(__name__).info(status, extra=log_dict) elif status == "warning": logging.getLogger(__name__).warning(status, extra=log_dict) else: logging.getLogger(__name__).error(status, extra=log_dict)
def _on_request(self, message): log = logging.getLogger(__name__) status = "OK" log_dict = {} start_datetime = datetime.utcnow() try: task = task_pb2.Task() try: # `body` is a string, but we need binary type for # `ParseFromString()` to work. It seems to work. body = bytes(message.payload, encoding="utf-8") task.ParseFromString(body) except DecodeError as e: log.warning("invalid protobuf: {}".format(str(e))) return log.info("Getting a full feed publication request", extra={"task": task}) if task.action != task_pb2.LOAD_REALTIME or not task.load_realtime: return begin_date = None end_date = None if hasattr(task.load_realtime, "begin_date") and task.load_realtime.begin_date: begin_date = str_to_date(task.load_realtime.begin_date) if hasattr(task.load_realtime, "end_date") and task.load_realtime.end_date: end_date = str_to_date(task.load_realtime.end_date) feed = convert_to_gtfsrt( TripUpdate.find_by_contributor_period( task.load_realtime.contributors, begin_date, end_date), gtfs_realtime_pb2.FeedHeader.FULL_DATASET, ) feed_str = feed.SerializeToString() log_dict.update({ "contributors": task.load_realtime.contributors, "routing_key": task.load_realtime.queue_name, "output_trip_update_count": len(feed.entity), "output_feed_size": sys.getsizeof(feed_str), }) log.info( "Starting of full feed publication {}, {}".format( len(feed_str), task), extra={ "size": len(feed_str), "task": task }, ) record_custom_parameter("contributors", task.load_realtime.contributors) # http://docs.celeryproject.org/projects/kombu/en/latest/userguide/producers.html#bypassing-routing-by-using-the-anon-exchange self.producer.publish( feed_str, routing_key=task.load_realtime.queue_name, retry=True, retry_policy={ "interval_start": 0, # First retry immediately, "interval_step": 2, # then increase by 2s for every retry. "interval_max": 10, # but don't exceed 10s between retries. "max_retries": self.max_retries, # give up after 10 (by default) tries. }, ) except Exception as e: status = "failure" log_dict.update({"reason": str(e)}) record_custom_parameter("reason", str(e)) finally: duration = (datetime.utcnow() - start_datetime).total_seconds() log_dict.update({"duration": duration}) record_call("kirin_reload_info", status, **log_dict) log.info("End of full feed publication", extra={ "duration": duration, "task": task }) db.session.remove() return log_dict