Beispiel #1
0
 def wait_for_trigger(self, now: datetime.datetime = datetime.datetime.now()):
     iv = self.time_left(now=now)
     sec = iv.total_seconds()
     if sec > 0:
         logger.info(f"Waiting for {human_delta(iv)}")
         sleep(sec)
     return True
Beispiel #2
0
 def receive_event_with_backoff(self,
                                timeout=100,
                                backoff=1000) -> typing.Optional[Task]:
     task_bytes = self.receive_event(timeout)
     task: typing.Optional[Task] = None
     if not task_bytes:
         sleep(backoff)
     else:
         task = deserialize_task(task_bytes)
     if not task:
         logger.error("Could not deserialize task")
         logger.error(f" + Task bytes:{task_bytes}")
     return task
Beispiel #3
0
    def _receive_event_with_backoff(self, timeout_sec=300, backoff_sec=60) -> typing.Optional[Task]:
        """Poll queue for the Message with passed timeout.

        Args:
            timeout_sec: Maximum time to block waiting from queue for message, event or callback (seconds);
            backoff_sec: Time for Thread to sleep before next try to receive the Message from queue.
        """
        task: typing.Optional[Task] = None
        task_bytes = self._receive_event(timeout=timeout_sec)

        if not task_bytes:
            logger.info(
                f"No bytes in the message after polling with timeout - {timeout_sec} seconds. "
                f"Sleeping for backoff - {backoff_sec} seconds."
            )
            sleep(backoff_sec)
        else:
            task = deserialize_task(task_bytes)
            if not task:
                logger.error(f"Could not deserialize task\n Task bytes:{str(task_bytes)}")

        return task
Beispiel #4
0
 def run(self):
     if not self.good_to_go:
         sleep_time = 20
         logger.error(" ### ### Latigo could not be started!")
         logger.error(
             "         Please see previous error messages for clues as to why."
         )
         logger.error("")
         logger.error(
             f"         Will pause for {sleep_time} seconds before terminating."
         )
         for i in range(sleep_time):
             logger.error(sleep_time - i)
             sleep(1)
         return
     logger.info(f"Starting {self.__class__.__name__}")
     logger.info(f"Prediction step: {self.continuous_prediction_timer}")
     done = False
     while not done:
         try:
             start = datetime.datetime.now()
             if self.continuous_prediction_timer.wait_for_trigger(
                     now=start):
                 self.update_model_info()
                 self.perform_prediction_step()
             interval = datetime.datetime.now() - start
             logger.info(f"Spent {interval}")
             if interval > datetime.timedelta(seconds=1):
                 sleep(interval)
         except KeyboardInterrupt:
             logger.info("Keyboard abort triggered, shutting down")
             done = True
         except Exception as e:
             logger.error("-----------------------------------")
             logger.error(f"Error occurred in scheduler: {e}")
             traceback.print_exc()
             logger.error("")
             logger.error("-----------------------------------")
     logger.info(f"Stopping {self.__class__.__name__}")
Beispiel #5
0
 def run(self):
     if self.task_queue:
         logger.info(f"Starting processing in {self.__class__.__name__}")
         done = False
         iteration_number = 0
         error_number = 0
         while not done:
             iteration_number += 1
             try:
                 task = self._fetch_task()
                 if task:
                     logger.info(f"Processing task for '{task.project_name}.{task.model_name}' starting {task.from_time} lasting {task.to_time - task.from_time}")
                     sensor_data = self._fetch_sensor_data(task)
                     if sensor_data and sensor_data.ok():
                         prediction_data = self._execute_prediction(task, sensor_data)
                         if prediction_data and prediction_data.ok():
                             self._store_prediction_data(task, prediction_data)
                             self.idle_count(True)
                         else:
                             logger.warning(f"Skipping store due to bad prediction: {prediction_data.data}")
                     else:
                         logger.warning(f"Skipping prediciton due to bad data: {sensor_data}")
                 else:
                     logger.warning(f"No task")
                     self.idle_count(False)
                     sleep(1)
             except Exception as e:
                 error_number += 1
                 logger.error("-----------------------------------")
                 logger.error(f"Error occurred in executor: {e}")
                 traceback.print_exc()
                 logger.error("")
                 logger.error("-----------------------------------")
                 sleep(1)
         logger.info(f"Stopping processing in {self.__class__.__name__}")
     else:
         logger.info(f"Skipping processing in {self.__class__.__name__}")
Beispiel #6
0
 def wait_for_trigger(self,
                      now: datetime.datetime = datetime.datetime.now()):
     iv = self.time_left(now=now)
     if iv:
         sleep(iv.total_seconds())
Beispiel #7
0
    def run(self):
        if not self.good_to_go:
            sleep_time = 60 * 5
            logger.error("")
            logger.error(" ### ### Latigo could not be started!")
            logger.error(
                f"         Will pause for {human_delta(datetime.timedelta(seconds=sleep_time))} before terminating."
            )
            logger.error(
                "         Please see previous error messages for clues.")
            logger.error("")
            sleep(sleep_time)
            return
        if self.task_queue:
            # Refraining from excessive logging
            # logger.info("Executor started processing")
            done = False
            iteration_number = 0
            error_number = 0
            executor_start = datetime.datetime.now()
            while not done:
                iteration_number += 1
                try:
                    # logger.info("Fetching task...")
                    self.task_fetch_start = datetime.datetime.now()
                    task = self._fetch_task()
                    if task:
                        task_fetch_interval = datetime.datetime.now(
                        ) - self.task_fetch_start
                        logger.info(
                            f"[Prediction_task_info] {self.make_prediction_task_info(task)}. "
                            f"Task fetched after {human_delta(task_fetch_interval)}"
                        )
                        revision = self.model_info_provider.get_project_latest_revisions(
                            task.project_name)

                        sensor_data = self._fetch_sensor_data(task)
                        data_fetch_interval = datetime.datetime.now(
                        ) - self.task_fetch_start
                        logger.info(
                            f"Got data after {human_delta(data_fetch_interval)}"
                        )

                        self._log_task_execution_time(
                            label="fetched sensor data", chars_to_append="")
                        if sensor_data and sensor_data.ok():
                            prediction_data = None
                            try:
                                prediction_data = self._execute_prediction(
                                    task, sensor_data, revision)

                                prediction_execution_interval = (
                                    datetime.datetime.now() -
                                    self.task_fetch_start)
                                logger.info(
                                    f"Prediction completed after {human_delta(prediction_execution_interval)}"
                                )
                            except InsufficientDataAfterRowFilteringError as e:
                                logger.warning(
                                    "[Skipping the prediction 'InsufficientDataAfterRowFilteringError']: "
                                    f"{self.make_prediction_task_info(task)}. Error: {e}"
                                )
                            self._log_task_execution_time(
                                label="Got the predictions",
                                chars_to_append="")

                            if prediction_data and prediction_data.ok():
                                self._store_prediction_data_and_metadata(
                                    task, prediction_data)
                                prediction_storage_interval = (
                                    datetime.datetime.now() -
                                    self.task_fetch_start)
                                logger.info(
                                    f"Prediction stored after {human_delta(prediction_storage_interval)}"
                                )
                                self.idle_count(True)
                            else:
                                logger.warning(
                                    f"Skipping store due to bad prediction: "
                                    f"{prediction_data.data if prediction_data else 'empty'}"
                                )
                        else:
                            logger.warning(
                                f"Skipping prediction due to bad data: {sensor_data}"
                            )
                    else:
                        logger.warning(f"No task")
                        self.idle_count(False)
                        sleep(1)

                    self._log_task_execution_time(task)

                except EXCEPTIONS_TO_HANDLE as e:
                    error_number += 1
                    logger.error("-----------------------------------")
                    logger.error(f"Error occurred in executor: {type(e)} {e}")
                    traceback.print_exc()
                    logger.error("-----------------------------------")
                    sleep(1)
                    self._log_task_execution_time()

            executor_interval = datetime.datetime.now() - executor_start
            if 0 < self.restart_interval_sec < executor_interval.total_seconds(
            ):
                logger.info("Terminating executor for teraputic restart")
                done = True
            # logger.info("Executor stopped processing")
        else:
            logger.error("No task queue")
Beispiel #8
0
import distutils.util
import os
import socket
import sys
import threading
from latigo.log import setup_logging
from latigo import __version__ as latigo_version

logger = setup_logging("latigo.app.scheduler")
from latigo.utils import load_configs, sleep
from latigo.scheduler import Scheduler

config, err = load_configs(
    "../deploy/scheduler_config.yaml",
    os.environ["LATIGO_SCHEDULER_CONFIG_FILE"] or None,
)
if not config:
    logger.error(f"Could not load configuration for scheduler: {err}")
    sleep(60 * 5)
    sys.exit(1)

threading.current_thread().name = config.get("scheduler", {}).get(
    "instance_name", f"latigo-scheduler-{latigo_version}-{socket.getfqdn()}")

logger.info("Configuring Latigo Scheduler")
scheduler = Scheduler(config)
scheduler.print_summary()
logger.info("Running Latigo Scheduler")
scheduler.run()
logger.info("Stopping Latigo Scheduler")
Beispiel #9
0
from latigo import __version__ as latigo_version

logger = setup_logging("latigo.app.executor")

import multiprocessing_logging

multiprocessing_logging.install_mp_handler()

from latigo.utils import load_configs, sleep
from latigo.executor import PredictionExecutor

config, err = load_configs("../deploy/executor_config.yaml",
                           os.environ["LATIGO_EXECUTOR_CONFIG_FILE"] or None)
if not config:
    logger.error(f"Could not load configuration for executor: {err}")
    sleep(60 * 5)
    sys.exit(1)

instance_count = int(config.get("executor", {}).get("instance_count", 1))
instance_name = config.get("executor", {}).get(
    "instance_name", f"latigo-executor-{latigo_version}-{socket.getfqdn()}")
threading.current_thread().name = instance_name


def wrap_executor(executor):
    executor.run()


if __name__ == "__main__":
    instances = []
    first: bool = True