def wait_for_trigger(self, now: datetime.datetime = datetime.datetime.now()): iv = self.time_left(now=now) sec = iv.total_seconds() if sec > 0: logger.info(f"Waiting for {human_delta(iv)}") sleep(sec) return True
def receive_event_with_backoff(self, timeout=100, backoff=1000) -> typing.Optional[Task]: task_bytes = self.receive_event(timeout) task: typing.Optional[Task] = None if not task_bytes: sleep(backoff) else: task = deserialize_task(task_bytes) if not task: logger.error("Could not deserialize task") logger.error(f" + Task bytes:{task_bytes}") return task
def _receive_event_with_backoff(self, timeout_sec=300, backoff_sec=60) -> typing.Optional[Task]: """Poll queue for the Message with passed timeout. Args: timeout_sec: Maximum time to block waiting from queue for message, event or callback (seconds); backoff_sec: Time for Thread to sleep before next try to receive the Message from queue. """ task: typing.Optional[Task] = None task_bytes = self._receive_event(timeout=timeout_sec) if not task_bytes: logger.info( f"No bytes in the message after polling with timeout - {timeout_sec} seconds. " f"Sleeping for backoff - {backoff_sec} seconds." ) sleep(backoff_sec) else: task = deserialize_task(task_bytes) if not task: logger.error(f"Could not deserialize task\n Task bytes:{str(task_bytes)}") return task
def run(self): if not self.good_to_go: sleep_time = 20 logger.error(" ### ### Latigo could not be started!") logger.error( " Please see previous error messages for clues as to why." ) logger.error("") logger.error( f" Will pause for {sleep_time} seconds before terminating." ) for i in range(sleep_time): logger.error(sleep_time - i) sleep(1) return logger.info(f"Starting {self.__class__.__name__}") logger.info(f"Prediction step: {self.continuous_prediction_timer}") done = False while not done: try: start = datetime.datetime.now() if self.continuous_prediction_timer.wait_for_trigger( now=start): self.update_model_info() self.perform_prediction_step() interval = datetime.datetime.now() - start logger.info(f"Spent {interval}") if interval > datetime.timedelta(seconds=1): sleep(interval) except KeyboardInterrupt: logger.info("Keyboard abort triggered, shutting down") done = True except Exception as e: logger.error("-----------------------------------") logger.error(f"Error occurred in scheduler: {e}") traceback.print_exc() logger.error("") logger.error("-----------------------------------") logger.info(f"Stopping {self.__class__.__name__}")
def run(self): if self.task_queue: logger.info(f"Starting processing in {self.__class__.__name__}") done = False iteration_number = 0 error_number = 0 while not done: iteration_number += 1 try: task = self._fetch_task() if task: logger.info(f"Processing task for '{task.project_name}.{task.model_name}' starting {task.from_time} lasting {task.to_time - task.from_time}") sensor_data = self._fetch_sensor_data(task) if sensor_data and sensor_data.ok(): prediction_data = self._execute_prediction(task, sensor_data) if prediction_data and prediction_data.ok(): self._store_prediction_data(task, prediction_data) self.idle_count(True) else: logger.warning(f"Skipping store due to bad prediction: {prediction_data.data}") else: logger.warning(f"Skipping prediciton due to bad data: {sensor_data}") else: logger.warning(f"No task") self.idle_count(False) sleep(1) except Exception as e: error_number += 1 logger.error("-----------------------------------") logger.error(f"Error occurred in executor: {e}") traceback.print_exc() logger.error("") logger.error("-----------------------------------") sleep(1) logger.info(f"Stopping processing in {self.__class__.__name__}") else: logger.info(f"Skipping processing in {self.__class__.__name__}")
def wait_for_trigger(self, now: datetime.datetime = datetime.datetime.now()): iv = self.time_left(now=now) if iv: sleep(iv.total_seconds())
def run(self): if not self.good_to_go: sleep_time = 60 * 5 logger.error("") logger.error(" ### ### Latigo could not be started!") logger.error( f" Will pause for {human_delta(datetime.timedelta(seconds=sleep_time))} before terminating." ) logger.error( " Please see previous error messages for clues.") logger.error("") sleep(sleep_time) return if self.task_queue: # Refraining from excessive logging # logger.info("Executor started processing") done = False iteration_number = 0 error_number = 0 executor_start = datetime.datetime.now() while not done: iteration_number += 1 try: # logger.info("Fetching task...") self.task_fetch_start = datetime.datetime.now() task = self._fetch_task() if task: task_fetch_interval = datetime.datetime.now( ) - self.task_fetch_start logger.info( f"[Prediction_task_info] {self.make_prediction_task_info(task)}. " f"Task fetched after {human_delta(task_fetch_interval)}" ) revision = self.model_info_provider.get_project_latest_revisions( task.project_name) sensor_data = self._fetch_sensor_data(task) data_fetch_interval = datetime.datetime.now( ) - self.task_fetch_start logger.info( f"Got data after {human_delta(data_fetch_interval)}" ) self._log_task_execution_time( label="fetched sensor data", chars_to_append="") if sensor_data and sensor_data.ok(): prediction_data = None try: prediction_data = self._execute_prediction( task, sensor_data, revision) prediction_execution_interval = ( datetime.datetime.now() - self.task_fetch_start) logger.info( f"Prediction completed after {human_delta(prediction_execution_interval)}" ) except InsufficientDataAfterRowFilteringError as e: logger.warning( "[Skipping the prediction 'InsufficientDataAfterRowFilteringError']: " f"{self.make_prediction_task_info(task)}. Error: {e}" ) self._log_task_execution_time( label="Got the predictions", chars_to_append="") if prediction_data and prediction_data.ok(): self._store_prediction_data_and_metadata( task, prediction_data) prediction_storage_interval = ( datetime.datetime.now() - self.task_fetch_start) logger.info( f"Prediction stored after {human_delta(prediction_storage_interval)}" ) self.idle_count(True) else: logger.warning( f"Skipping store due to bad prediction: " f"{prediction_data.data if prediction_data else 'empty'}" ) else: logger.warning( f"Skipping prediction due to bad data: {sensor_data}" ) else: logger.warning(f"No task") self.idle_count(False) sleep(1) self._log_task_execution_time(task) except EXCEPTIONS_TO_HANDLE as e: error_number += 1 logger.error("-----------------------------------") logger.error(f"Error occurred in executor: {type(e)} {e}") traceback.print_exc() logger.error("-----------------------------------") sleep(1) self._log_task_execution_time() executor_interval = datetime.datetime.now() - executor_start if 0 < self.restart_interval_sec < executor_interval.total_seconds( ): logger.info("Terminating executor for teraputic restart") done = True # logger.info("Executor stopped processing") else: logger.error("No task queue")
import distutils.util import os import socket import sys import threading from latigo.log import setup_logging from latigo import __version__ as latigo_version logger = setup_logging("latigo.app.scheduler") from latigo.utils import load_configs, sleep from latigo.scheduler import Scheduler config, err = load_configs( "../deploy/scheduler_config.yaml", os.environ["LATIGO_SCHEDULER_CONFIG_FILE"] or None, ) if not config: logger.error(f"Could not load configuration for scheduler: {err}") sleep(60 * 5) sys.exit(1) threading.current_thread().name = config.get("scheduler", {}).get( "instance_name", f"latigo-scheduler-{latigo_version}-{socket.getfqdn()}") logger.info("Configuring Latigo Scheduler") scheduler = Scheduler(config) scheduler.print_summary() logger.info("Running Latigo Scheduler") scheduler.run() logger.info("Stopping Latigo Scheduler")
from latigo import __version__ as latigo_version logger = setup_logging("latigo.app.executor") import multiprocessing_logging multiprocessing_logging.install_mp_handler() from latigo.utils import load_configs, sleep from latigo.executor import PredictionExecutor config, err = load_configs("../deploy/executor_config.yaml", os.environ["LATIGO_EXECUTOR_CONFIG_FILE"] or None) if not config: logger.error(f"Could not load configuration for executor: {err}") sleep(60 * 5) sys.exit(1) instance_count = int(config.get("executor", {}).get("instance_count", 1)) instance_name = config.get("executor", {}).get( "instance_name", f"latigo-executor-{latigo_version}-{socket.getfqdn()}") threading.current_thread().name = instance_name def wrap_executor(executor): executor.run() if __name__ == "__main__": instances = [] first: bool = True