Exemple #1
0
class ComponentManager:
    """
    Base class for decisionengine components such as Sources and Channels
    """
    def __init__(self, name):
        """
        :type name: :obj:`str`
        :arg name: Name of source corresponding to this source manager
        """
        self.name = name
        self.state = ProcessingState()
        self.loglevel = multiprocessing.Value("i", logging.WARNING)

    def get_state_value(self):
        return self.state.get_state_value()

    def get_state(self):
        return self.state.get()

    def get_state_name(self):
        return self.get_state().name

    def set_loglevel_value(self, log_level):
        """Assumes log_level is a string corresponding to the supported logging-module levels."""
        with self.loglevel.get_lock():
            # Convert from string to int form using technique
            # suggested by logging module
            self.loglevel.value = getattr(logging, log_level)

    def get_loglevel(self):
        with self.loglevel.get_lock():
            return self.loglevel.value

    def take_offline(self):
        """
        Adjust status to offline
        """
        self.state.set(State.OFFLINE)
Exemple #2
0
class TaskManager:
    """
    Task manager
    """
    def __init__(self, name, workers, dataspace, expected_products, exchange,
                 broker_url, queue_info):
        """
        :type name: :obj:`str`
        :arg name: Name of channel corresponding to this task manager
        :type generation_id: :obj:`int`
        :arg generation_id: Task manager generation id provided by caller
        :type channel_dict: :obj:`dict`
        :arg channel_dict: channel configuration
        :type global_config: :obj:`dict`
        :arg global_config: global configuration
        """
        self.name = name
        self.state = ProcessingState()
        self.loglevel = multiprocessing.Value("i", logging.WARNING)

        self.id = str(uuid.uuid4()).upper()
        self.data_block_t0 = datablock.DataBlock(dataspace, name, self.id,
                                                 1)  # my current data block
        self.logger = structlog.getLogger(CHANNELLOGGERNAME)
        self.logger = self.logger.bind(module=__name__.split(".")[-1],
                                       channel=self.name)

        # The DE owns the sources
        self.source_workers = workers["sources"]
        self.transform_workers = workers["transforms"]
        self.logic_engine = workers["logic_engine"]
        self.publisher_workers = workers["publishers"]

        self.exchange = exchange
        self.broker_url = broker_url
        self.connection = Connection(self.broker_url)

        self.source_product_cache = SourceProductCache(expected_products,
                                                       self.logger)
        self.queue_info = queue_info
        self.routing_keys = [info[1] for info in self.queue_info]

    def get_state_value(self):
        return self.state.get_state_value()

    def get_state(self):
        return self.state.get()

    def get_state_name(self):
        return self.get_state().name

    def set_loglevel_value(self, log_level):
        """Assumes log_level is a string corresponding to the supported logging-module levels."""
        with self.loglevel.get_lock():
            # Convert from string to int form using technique
            # suggested by logging module
            self.loglevel.value = getattr(logging, log_level)

    def get_loglevel(self):
        with self.loglevel.get_lock():
            return self.loglevel.value

    def take_offline(self):
        """
        Adjust status to stop the decision cycles and bring the task manager offline
        """
        self.state.set(State.SHUTTINGDOWN)

    def run_cycle(self, messages):
        for name, msg_body in messages.items():
            module_spec = msg_body["source_module"]
            module_name = msg_body["class_name"]
            data = msg_body["data"]
            assert data
            if data is State.SHUTDOWN:
                self.logger.info(
                    f"Channel {self.name} has received shutdown flag from source {module_spec} (class {module_name})"
                )
                self.take_offline()
                return

            assert isinstance(data, dict)
            self.logger.debug(f"Data received from {module_name}: {data}")

            data_to_process = self.source_product_cache.update(data)
            if data_to_process is None:
                return

            header = datablock.Header(self.data_block_t0.taskmanager_id,
                                      create_time=time.time(),
                                      creator=module_spec)
            self.logger.info(f"Source {module_name} header done")

            try:
                self.data_block_put(data_to_process, header,
                                    self.data_block_t0)
            except Exception:  # pragma: no cover
                self.logger.exception(
                    "Exception inserting data into the data block.")
                self.logger.error(
                    f"Could not insert data from the following message\n{msg_body}"
                )
                return

            self.logger.info(f"Source {module_name} data block put done")

        try:
            self.decision_cycle()
            with self.state.lock:
                if not self.state.should_stop():
                    # If we are signaled to stop, don't override that state
                    # otherwise the last decision_cycle completed without error
                    self.state.set(State.STEADY)
                    CHANNEL_STATE_GAUGE.labels(self.name).set(
                        self.get_state_value())
        except Exception:  # pragma: no cover
            self.logger.exception("Exception in the task manager main loop")
            self.logger.error(
                "Error occured. Task manager %s exits with state %s", self.id,
                self.get_state_name())

    def run_cycles(self):
        """
        Task manager main loop
        """
        self.logger.setLevel(self.loglevel.value)
        self.logger.info(f"Starting task manager {self.id}")

        queues = []
        for queue_name, key in self.queue_info:
            self.logger.debug(
                f"Creating queue {queue_name} with routing key {key}")
            queues.append(
                Queue(
                    queue_name,
                    exchange=self.exchange,
                    routing_key=key,
                    auto_delete=True,
                ))

        with LatestMessages(queues, self.broker_url) as messages:
            self.state.set(State.ACTIVE)
            self.logger.debug(f"Channel {self.name} is listening for events")
            while not self.state.should_stop():
                msgs = messages.consume()
                if msgs:
                    self.run_cycle(msgs)
            self.logger.info(
                f"Task manager {self.id} received stop signal and is shutting down"
            )

        self.state.set(State.SHUTTINGDOWN)
        CHANNEL_STATE_GAUGE.labels(self.name).set(self.get_state_value())
        self.logger.debug(
            "Shutting down. Will call shutdown on all publishers")
        for worker in self.publisher_workers.values():
            worker.module_instance.shutdown()
        self.state.set(State.OFFLINE)
        CHANNEL_STATE_GAUGE.labels(self.name).set(self.get_state_value())

    def get_produces(self):
        # FIXME: What happens if a transform and source have the same name?
        produces = {}
        for name, worker in self.source_workers.items():
            produces[name] = list(worker.module_instance._produces.keys())
        for name, worker in self.transform_workers.items():
            produces[name] = list(worker.module_instance._produces.keys())
        return produces

    def get_consumes(self):
        # FIXME: What happens if a transform and publisher have the same name?
        consumes = {}
        for name, worker in self.transform_workers.items():
            consumes[name] = list(worker.module_instance._consumes.keys())
        for name, worker in self.publisher_workers.items():
            consumes[name] = list(worker.module_instance._consumes.keys())
        return consumes

    def data_block_put(self, data, header, data_block):
        """
        Put data into data block

        :type data: :obj:`dict`
        :arg data: key, value pairs
        :type header: :obj:`~datablock.Header`
        :arg header: data header
        :type data_block: :obj:`~datablock.DataBlock`
        :arg data_block: data block
        """

        if not isinstance(data, dict):
            self.logger.error(
                f"data_block put expecting {dict} type, got {type(data)}")
            return
        self.logger.debug(f"data_block_put {data}")
        with data_block.lock:
            metadata = datablock.Metadata(
                data_block.taskmanager_id,
                state="END_CYCLE",
                generation_id=data_block.generation_id)
            for key, product in data.items():
                data_block.put(key, product, header, metadata=metadata)

    def decision_cycle(self):
        """
        Decision cycle to be run periodically (by trigger)
        """

        data_block_t1 = self.data_block_t0.duplicate()
        self.logger.debug(f"Duplicated block {self.data_block_t0}")

        try:
            self.run_transforms(data_block_t1)
        except Exception:  # pragma: no cover
            self.logger.exception("Error in decision cycle(transforms) ")
            # We do not call 'take_offline' here because it has
            # already been called during run_transform.

        actions = None
        try:
            actions = self.run_logic_engine(data_block_t1)
            self.logger.info("ran all logic engines")
        except Exception:  # pragma: no cover
            self.logger.exception("Error in decision cycle(logic engine) ")
            self.take_offline()

        if actions is None:
            return

        try:
            self.run_publishers(actions, data_block_t1)
        except Exception:  # pragma: no cover
            self.logger.exception("Error in decision cycle(publishers) ")
            self.take_offline()

    def run_transforms(self, data_block=None):
        """
        Run transforms.
        So far in main process.

        :type data_block: :obj:`~datablock.DataBlock`
        :arg data_block: data block

        """
        self.logger.info("run_transforms")
        self.logger.debug(f"run_transforms: data block {data_block}")
        if not data_block:
            return

        for key, worker in self.transform_workers.items():
            self.logger.info(f"starting transform {key}")
            self.run_transform(worker, data_block)
        self.logger.info("all transforms finished")

    def run_transform(self, worker, data_block):
        """
        Run a transform

        :type worker: :obj:`~Worker`
        :arg worker: Transform worker
        :type data_block: :obj:`~datablock.DataBlock`
        :arg data_block: data block
        """
        consume_keys = list(worker.module_instance._consumes.keys())

        self.logger.info("transform: %s expected keys: %s provided keys: %s",
                         worker.name, consume_keys, list(data_block.keys()))
        self.logger.info("Run transform %s", worker.name)
        try:
            with TRANSFORM_RUN_HISTOGRAM.labels(self.name, worker.name).time():
                data = worker.module_instance.transform(data_block)
                self.logger.debug(f"transform returned {data}")
                header = datablock.Header(data_block.taskmanager_id,
                                          create_time=time.time(),
                                          creator=worker.name)
                self.data_block_put(data, header, data_block)
                self.logger.info("transform put data")
                TRANSFORM_RUN_GAUGE.labels(self.name,
                                           worker.name).set_to_current_time()
        except Exception:  # pragma: no cover
            self.logger.exception(f"exception from transform {worker.name} ")
            self.take_offline()

    def run_logic_engine(self, data_block):
        """
        Run Logic Engine.

        :type data_block: :obj:`~datablock.DataBlock`
        :arg data_block: data block
        """
        if not data_block:
            raise RuntimeError(
                "Cannot run logic engine on data block that is 'None'.")

        if self.logic_engine is None:
            self.logger.info("No logic engine to run")
            return None

        try:
            actions = new_facts = None
            with LOGICENGINE_RUN_HISTOGRAM.labels(
                    self.name, self.logic_engine.name).time():
                self.logger.info("Run logic engine %s", self.logic_engine.name)
                self.logger.debug("Run logic engine %s %s",
                                  self.logic_engine.name, data_block)
                actions, new_facts = self.logic_engine.module_instance.evaluate(
                    data_block)
                self.logger.info("Run logic engine %s done",
                                 self.logic_engine.name)
                LOGICENGINE_RUN_GAUGE.labels(
                    self.name, self.logic_engine.name).set_to_current_time()
                self.logger.info(
                    "Logic engine %s generated newfacts: %s",
                    self.logic_engine.name,
                    new_facts.to_dict(orient="records"),
                )
                self.logger.info("Logic engine %s generated actions: %s",
                                 self.logic_engine.name, actions)

            data = {"de_logicengine_facts": new_facts}
            header = datablock.Header(data_block.taskmanager_id,
                                      create_time=time.time(),
                                      creator="logicengine")
            self.data_block_put(data, header, data_block)
            return actions
        except Exception:  # pragma: no cover
            self.logger.exception("Unexpected logic engine error!")
            raise

    def run_publishers(self, actions, data_block):
        """
        Run Publishers in main process.

        :type data_block: :obj:`~datablock.DataBlock`
        :arg data_block: data block

        """
        if not data_block:
            return
        try:
            for action_list in actions.values():
                for action in action_list:
                    worker = self.publisher_workers[action]
                    name = worker.name
                    self.logger.info(f"Run publisher {name}")
                    self.logger.debug(f"Run publisher {name} {data_block}")
                    try:
                        with PUBLISHER_RUN_HISTOGRAM.labels(self.name,
                                                            name).time():
                            worker.module_instance.publish(data_block)
                            PUBLISHER_RUN_GAUGE.labels(
                                self.name, name).set_to_current_time()
                    except KeyError as e:
                        if self.state.should_stop():
                            self.logger.warning(
                                f"TaskManager stopping, ignore exception {name} publish() call: {e}"
                            )
                            continue
                        raise  # pragma: no cover
        except Exception:  # pragma: no cover
            self.logger.exception("Unexpected error!")
            raise