Ejemplo n.º 1
0
def test_Header_constructor(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])

    assert header.data["taskmanager_id"] == my_tm["taskmanager_id"]

    createTime = 1.0
    expirationTime = 3.0
    scheduleTime = 5.0
    creator = "creator"
    schema = 1
    header = datablock.Header(
        my_tm["taskmanager_id"],
        create_time=createTime,
        expiration_time=expirationTime,
        scheduled_create_time=scheduleTime,
        creator=creator,
        schema_id=schema,
    )
    assert header.data["taskmanager_id"] == my_tm["taskmanager_id"]
    assert header.data["create_time"] == createTime
    assert header.data["expiration_time"] == expirationTime
    assert header.data["scheduled_create_time"] == scheduleTime
    assert header.data["creator"] == creator
    assert header.data["schema_id"] == schema
Ejemplo n.º 2
0
    def test_Header_constructor(self):
        dataproduct = self.data["dataproduct"][0]

        header = datablock.Header(dataproduct["taskmanager_id"])
        self.assertEqual(header.data["taskmanager_id"],
                         dataproduct["taskmanager_id"])

        createTime = 1.0
        expirationTime = 3.0
        scheduleTime = 5.0
        creator = "creator"
        schema = 1
        header = datablock.Header(dataproduct["taskmanager_id"],
                                  create_time=createTime,
                                  expiration_time=expirationTime,
                                  scheduled_create_time=scheduleTime,
                                  creator=creator,
                                  schema_id=schema)
        self.assertEqual(header.data["taskmanager_id"],
                         dataproduct["taskmanager_id"])
        self.assertEqual(header.data["create_time"], createTime)
        self.assertEqual(header.data["expiration_time"], expirationTime)
        self.assertEqual(header.data["scheduled_create_time"], scheduleTime)
        self.assertEqual(header.data["creator"], creator)
        self.assertEqual(header.data["schema_id"], schema)
Ejemplo n.º 3
0
    def test_DataBlock_get_metadata(self):
        dataproduct = self.data["dataproduct"][0]
        header = datablock.Header(dataproduct["taskmanager_id"])
        metadata = datablock.Metadata(dataproduct["taskmanager_id"], generation_id=int(dataproduct["generation_id"]))
        self.datablock.put(dataproduct["key"], dataproduct["value"], header, metadata)

        self.assertEqual(metadata, self.datablock.get_metadata(dataproduct["key"]))
Ejemplo n.º 4
0
def test_DataBlock_to_str(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples

    expected = {
        "taskmanager_id":
        my_tm["taskmanager_id"],
        "generation_id":
        dataspace.get_last_generation_id(my_tm["name"],
                                         my_tm["taskmanager_id"]),
        "sequence_id":
        len(dataspace.get_dataproducts(my_tm["sequence_id"])) + 1,
        "keys": [
            "example_test_key",
        ],
        "dataproducts": {
            "example_test_key": "example_test_value"
        },
    }

    header = datablock.Header(my_tm["taskmanager_id"])

    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])
    dblock.put("example_test_key", "example_test_value", header)

    result = ast.literal_eval(str(dblock))
    assert result == expected
Ejemplo n.º 5
0
def test_DataBlock_key_management(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    metadata = datablock.Metadata(
        my_tm["taskmanager_id"],
        generation_id=dataspace.get_last_generation_id(
            my_tm["name"], my_tm["taskmanager_id"]),
    )
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    # test with automatic metadata and string value
    dblock.put("example_test_key", "example_test_value", header)

    assert "example_test_key" in dblock.keys()
    assert "example_test_key" in dblock

    assert dblock.get("example_test_key") == "example_test_value"

    # Test product-retriever interface
    retriever = datablock.ProductRetriever("example_test_key", None, None)
    assert retriever(dblock) == "example_test_value"
    assert (
        str(retriever) ==
        "Product retriever for {'name': 'example_test_key', 'type': None, 'creator': None}"
    )

    # test new key with manual metadata and dict value
    newDict = {"subKey": "newValue"}
    dblock.put("new_example_test_key", newDict, header, metadata)
    assert dblock["new_example_test_key"] == newDict
Ejemplo n.º 6
0
    def run_transform(self, worker, data_block):
        """
        Run a transform

        :type worker: :obj:`~Worker`
        :arg worker: Transform worker
        :type data_block: :obj:`~datablock.DataBlock`
        :arg data_block: data block
        """
        consume_keys = list(worker.module_instance._consumes.keys())

        self.logger.info("transform: %s expected keys: %s provided keys: %s",
                         worker.name, consume_keys, list(data_block.keys()))
        self.logger.info("Run transform %s", worker.name)
        try:
            with TRANSFORM_RUN_HISTOGRAM.labels(self.name, worker.name).time():
                data = worker.module_instance.transform(data_block)
                self.logger.debug(f"transform returned {data}")
                header = datablock.Header(data_block.taskmanager_id,
                                          create_time=time.time(),
                                          creator=worker.name)
                self.data_block_put(data, header, data_block)
                self.logger.info("transform put data")
                TRANSFORM_RUN_GAUGE.labels(self.name,
                                           worker.name).set_to_current_time()
        except Exception:  # pragma: no cover
            self.logger.exception(f"exception from transform {worker.name} ")
            self.take_offline()
Ejemplo n.º 7
0
def test_DataBlock_key_management(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    assert "example_test_key" in dblock.keys()
    assert "example_test_key" in dblock

    assert dblock.get("example_test_key") == "example_test_value"

    # Test product-retriever interface
    retriever = datablock.ProductRetriever("example_test_key", None, None)
    assert retriever(dblock) == "example_test_value"
    assert (
        str(retriever) ==
        "Product retriever for {'name': 'example_test_key', 'type': None, 'creator': None}"
    )

    # FIXME: The following behavior should be disallowed for data-integrity reasons!
    #        i.e. replacing a product name with a different value.
    newDict = {"subKey": "newValue"}
    dblock.put("example_test_key", newDict, header)
    assert dblock["example_test_key"] == newDict
Ejemplo n.º 8
0
    def test_DataBlock_key_management(self):
        dataproduct = self.data["dataproduct"][0]
        header = datablock.Header(dataproduct["taskmanager_id"])

        self.datablock.put(dataproduct["key"], dataproduct["value"], header)

        self.assertIn(dataproduct["key"], self.datablock.keys())
        self.assertIn(dataproduct["key"], self.datablock)

        self.assertEqual(self.datablock.get(dataproduct["key"]),
                         dataproduct["value"])

        # Test product-retriever interface
        retriever = datablock.ProductRetriever(dataproduct["key"], None, None)
        assert retriever(self.datablock) == dataproduct["value"]
        assert str(
            retriever
        ) == "Product retriever for {'name': 'test_key1', 'type': None, 'creator': None}"

        # FIXME: The following behavior should be disallowed for data-integrity reasons!
        #        i.e. replacing a product name with a different value.
        newDict = {"subKey": "newValue"}
        self.datablock.put(dataproduct["key"], newDict, header)
        self.assertEqual(self.datablock[dataproduct["key"]], newDict)

        with self.assertRaises(KeyError):
            self.datablock["invalidKey"]
Ejemplo n.º 9
0
 def test_DataBlock_get_taskmanagers(self):
     taskmanager = self.data["taskmanager"][0]
     dataproduct = self.data["dataproduct"][0]
     header = datablock.Header(dataproduct["taskmanager_id"])
     self.datablock.put(dataproduct["key"], dataproduct["value"], header)
     tms = self.dataspace.get_taskmanagers()
     self.assertEqual(taskmanager["taskmanager_id"], tms[0]["taskmanager_id"])
Ejemplo n.º 10
0
    def run_transform(self, transform, data_block):
        """
        Run a transform

        :type transform: :obj:`~Worker`
        :arg transform: source Worker
        :type data_block: :obj:`~datablock.DataBlock`
        :arg data_block: data block
        """
        consume_keys = list(transform.worker._consumes.keys())

        logging.getLogger().info(
            'transform: %s expected keys: %s provided keys: %s',
            transform.name, consume_keys, list(data_block.keys()))
        logging.getLogger().info('run transform %s', transform.name)
        try:
            data = transform.worker.transform(data_block)
            logging.getLogger().debug(f'transform returned {data}')
            header = datablock.Header(data_block.taskmanager_id,
                                      create_time=time.time(),
                                      creator=transform.name)
            self.data_block_put(data, header, data_block)
            logging.getLogger().info('transform put data')
        except Exception:  # pragma: no cover
            logging.getLogger().exception(
                f'exception from transform {transform.name} ')
            self.take_offline(data_block)
Ejemplo n.º 11
0
    def test_DataBlock_get_taskmanager(self):
        taskmanager = self.data["taskmanager"][0]
        dataproduct = self.data["dataproduct"][0]
        header = datablock.Header(dataproduct["taskmanager_id"])
        self.datablock.put(dataproduct["key"], dataproduct["value"], header)

        tid = self.datablock.get_taskmanager(taskmanager["name"])["taskmanager_id"]
        self.assertEqual(taskmanager["taskmanager_id"], tid)
Ejemplo n.º 12
0
def test_DataBlock_get_header(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    assert header == dblock.get_header("example_test_key")
Ejemplo n.º 13
0
def test_DataBlock_no_key_by_name(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    with pytest.raises(KeyError):
        dblock["no_such_key_exists"]
Ejemplo n.º 14
0
            def run_cycle(body, message):
                module_spec = body["source_module"]
                module_name = body["class_name"]
                data = body["data"]
                assert data
                self.logger.debug(f"Data received from {module_name}: {data}")

                if not self.sources_have_run_once:
                    self.source_product_cache.update(**data)
                    missing_products = self.expected_source_products - set(
                        self.source_product_cache.keys())
                    if missing_products:
                        self.logger.info(
                            f"Waiting on more data (missing {missing_products})"
                        )
                        message.ack()
                        return
                    self.logger.info("All sources have executed at least once")
                    data = self.source_product_cache
                    self.sources_have_run_once = True

                header = datablock.Header(self.data_block_t0.taskmanager_id,
                                          create_time=time.time(),
                                          creator=module_spec)
                self.logger.info(f"Source {module_name} header done")

                try:
                    self.data_block_put(data, header, self.data_block_t0)
                except Exception:  # pragma: no cover
                    self.logger.exception(
                        "Exception inserting data into the data block.")
                    self.logger.error(
                        f"Could not insert data from the following message\n{body}"
                    )
                    message.ack()
                    return

                self.logger.info(f"Source {module_name} data block put done")

                try:
                    self.decision_cycle()
                    with self.state.lock:
                        if not self.state.should_stop():
                            # If we are signaled to stop, don't override that state
                            # otherwise the last decision_cycle completed without error
                            self.state.set(State.STEADY)
                            CHANNEL_STATE_GAUGE.labels(self.name).set(
                                self.get_state_value())
                except Exception:  # pragma: no cover
                    self.logger.exception(
                        "Exception in the task manager main loop")
                    self.logger.error(
                        "Error occured. Task manager %s exits with state %s",
                        self.id, self.get_state_name())
                message.ack()
Ejemplo n.º 15
0
    def test_DataBlock_to_str(self):
        dataproduct = self.data["dataproduct"][0]
        header = datablock.Header(dataproduct["taskmanager_id"])
        self.datablock.put(dataproduct["key"], dataproduct["value"], header)

        result = str(self.datablock)
        self.assertEqual(
            result,
            "{'taskamanger_id': '1', 'generation_id': 1, 'sequence_id': 2, "
            "'keys': ['%s'], 'dataproducts': {'%s': '%s'}}" %
            (dataproduct["key"], dataproduct["key"], dataproduct["value"]))
Ejemplo n.º 16
0
def test_DataBlock_mark_expired(dataspace):  # noqa: F811
    # mark_expired is just a stub in this case
    # failure in a real implementation should raise an exception
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    assert dblock.mark_expired(1) is None
Ejemplo n.º 17
0
def test_DataBlock_get_dataproducts(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    products = dblock.get_dataproducts()
    assert len(products) == 1
    assert products[0]["key"] == "example_test_key"
    assert products[0]["value"] == "example_test_value"
Ejemplo n.º 18
0
    def run_cycle(self, messages):
        for name, msg_body in messages.items():
            module_spec = msg_body["source_module"]
            module_name = msg_body["class_name"]
            data = msg_body["data"]
            assert data
            if data is State.SHUTDOWN:
                self.logger.info(
                    f"Channel {self.name} has received shutdown flag from source {module_spec} (class {module_name})"
                )
                self.take_offline()
                return

            assert isinstance(data, dict)
            self.logger.debug(f"Data received from {module_name}: {data}")

            data_to_process = self.source_product_cache.update(data)
            if data_to_process is None:
                return

            header = datablock.Header(self.data_block_t0.taskmanager_id,
                                      create_time=time.time(),
                                      creator=module_spec)
            self.logger.info(f"Source {module_name} header done")

            try:
                self.data_block_put(data_to_process, header,
                                    self.data_block_t0)
            except Exception:  # pragma: no cover
                self.logger.exception(
                    "Exception inserting data into the data block.")
                self.logger.error(
                    f"Could not insert data from the following message\n{msg_body}"
                )
                return

            self.logger.info(f"Source {module_name} data block put done")

        try:
            self.decision_cycle()
            with self.state.lock:
                if not self.state.should_stop():
                    # If we are signaled to stop, don't override that state
                    # otherwise the last decision_cycle completed without error
                    self.state.set(State.STEADY)
                    CHANNEL_STATE_GAUGE.labels(self.name).set(
                        self.get_state_value())
        except Exception:  # pragma: no cover
            self.logger.exception("Exception in the task manager main loop")
            self.logger.error(
                "Error occured. Task manager %s exits with state %s", self.id,
                self.get_state_name())
Ejemplo n.º 19
0
    def run_logic_engine(self, data_block=None):
        """
        Run Logic Engine.

        :type data_block: :obj:`~datablock.DataBlock`
        :arg data_block: data block
        """
        le_list = []
        if not data_block:
            return

        try:
            for le in self.channel.le_s:
                logging.getLogger().info('run logic engine %s',
                                         self.channel.le_s[le].name)
                logging.getLogger().debug('run logic engine %s %s',
                                          self.channel.le_s[le].name,
                                          data_block)
                rc = self.channel.le_s[le].worker.evaluate(data_block)
                le_list.append(rc)
                logging.getLogger().info('run logic engine %s done',
                                         self.channel.le_s[le].name)
                logging.getLogger().info(
                    'logic engine %s generated newfacts: %s',
                    self.channel.le_s[le].name,
                    rc['newfacts'].to_dict(orient='records'))
                logging.getLogger().info(
                    'logic engine %s generated actions: %s',
                    self.channel.le_s[le].name, rc['actions'])

            # Add new facts to the datablock
            # Add empty dataframe if nothing is available
            if le_list:
                all_facts = pandas.concat([i['newfacts'] for i in le_list],
                                          ignore_index=True)
            else:
                logging.getLogger().info(
                    'Logic engine(s) did not return any new facts')
                all_facts = pandas.DataFrame()

            data = {'de_logicengine_facts': all_facts}
            t = time.time()
            header = datablock.Header(data_block.taskmanager_id,
                                      create_time=t,
                                      creator='logicengine')
            self.data_block_put(data, header, data_block)
        except Exception:  # pragma: no cover
            logging.getLogger().exception("Unexpected error!")
            raise
        else:
            return le_list
Ejemplo n.º 20
0
    def run_transform(self, transform, data_block):
        """
        Run a transform

        :type transform: :obj:`~Worker`
        :arg transform: source Worker
        :type data_block: :obj:`~datablock.DataBlock`
        :arg data_block: data block
        """
        data_to = self.channel.task_manager.get('data_TO', TRANSFORMS_TO)
        consume_keys = transform.worker.consumes()

        logging.getLogger().info(
            'transform: %s expected keys: %s provided keys: %s',
            transform.name, consume_keys, list(data_block.keys()))
        loop_counter = 0
        while True:
            # Check if data is ready
            if set(consume_keys) <= set(data_block.keys()):
                # data is ready -  may run transform()
                logging.getLogger().info('run transform %s', transform.name)
                try:
                    with data_block.lock:
                        data = transform.worker.transform(data_block)
                    logging.getLogger().debug('transform returned %s', data)
                    t = time.time()
                    header = datablock.Header(data_block.taskmanager_id,
                                              create_time=t,
                                              creator=transform.name)
                    self.data_block_put(data, header, data_block)
                    logging.getLogger().info('transform put data')
                except Exception as e:
                    log_exception(
                        logging.getLogger(),
                        'exception from transform {} : {}'.format(
                            transform.name, e))
                    self.offline_task_manager(data_block)
                break
            else:
                s = transform.stop_running.wait(1)
                if s:
                    logging.getLogger().info(
                        'received stop_running signal for %s', transform.name)
                    break
                loop_counter += 1
                if loop_counter == data_to:
                    logging.getLogger().info(
                        'transform %s did not get consumes data in %s seconds. Exiting',
                        transform.name, data_to)
                    break
        transform.data_updated.set()
Ejemplo n.º 21
0
    def test_DataBlock_duplicate(self):
        dataproduct = self.data["dataproduct"][0]
        header = datablock.Header(dataproduct["taskmanager_id"])
        metadata = datablock.Metadata(dataproduct["taskmanager_id"], generation_id=int(dataproduct["generation_id"]))
        self.datablock.put(dataproduct["key"], dataproduct["value"], header, metadata)

        dblock = self.datablock.duplicate()

        self.assertEqual(dblock.taskmanager_id, self.datablock.taskmanager_id)
        self.assertEqual(dblock.generation_id + 1, self.datablock.generation_id)
        self.assertEqual(dblock.sequence_id, self.datablock.sequence_id)
        self.assertEqual(dblock._keys, self.datablock._keys)
        for key in self.datablock._keys:
            self.assertEqual(dblock[key], self.datablock[key])
Ejemplo n.º 22
0
    def run_logic_engine(self, data_block=None):
        """
        Run Logic Engine.

        :type data_block: :obj:`~datablock.DataBlock`
        :arg data_block: data block
        """
        le_list = []
        if not data_block:
            return

        try:
            for le in self.channel.le_s:
                self.logger.info("run logic engine %s",
                                 self.channel.le_s[le].name)
                self.logger.debug("run logic engine %s %s",
                                  self.channel.le_s[le].name, data_block)
                rc = self.channel.le_s[le].worker.evaluate(data_block)
                le_list.append(rc)
                self.logger.info("run logic engine %s done",
                                 self.channel.le_s[le].name)
                self.logger.info(
                    "logic engine %s generated newfacts: %s",
                    self.channel.le_s[le].name,
                    rc["newfacts"].to_dict(orient="records"),
                )
                self.logger.info("logic engine %s generated actions: %s",
                                 self.channel.le_s[le].name, rc["actions"])

            # Add new facts to the datablock
            # Add empty dataframe if nothing is available
            if le_list:
                all_facts = pd.concat([i["newfacts"] for i in le_list],
                                      ignore_index=True)
            else:
                self.logger.info(
                    "Logic engine(s) did not return any new facts")
                all_facts = pd.DataFrame()

            data = {"de_logicengine_facts": all_facts}
            t = time.time()
            header = datablock.Header(data_block.taskmanager_id,
                                      create_time=t,
                                      creator="logicengine")
            self.data_block_put(data, header, data_block)
        except Exception:  # pragma: no cover
            self.logger.exception("Unexpected error!")
            raise
        else:
            return le_list
Ejemplo n.º 23
0
def test_DataBlock_key_management_change_name(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    # FIXME: The following behavior should be disallowed for data-integrity reasons!
    #        i.e. replacing a product name from datablock.ProductRetriever with a
    #             different value.
    newDict = {"subKey": "newValue"}
    dblock.put("example_test_key", newDict, header)
    assert dblock["example_test_key"] == newDict
Ejemplo n.º 24
0
def test_DataBlock_is_expired_with_key(dataspace):  # noqa: F811
    """This test just validates the method/function exists.
    The stub within our default code should be replaced
    by a class inheriting from it.
    That class should have more rational return types.
    """
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    assert dblock.is_expired(key="example_test_key") is None
Ejemplo n.º 25
0
def test_DataBlock_get_metadata(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    metadata = datablock.Metadata(
        my_tm["taskmanager_id"],
        generation_id=dataspace.get_last_generation_id(
            my_tm["name"], my_tm["taskmanager_id"]),
    )
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header, metadata)

    assert metadata == dblock.get_metadata("example_test_key")
Ejemplo n.º 26
0
    def run_transform(self, transform, data_block):
        """
        Run a transform

        :type transform: :obj:`~Worker`
        :arg transform: source Worker
        :type data_block: :obj:`~datablock.DataBlock`
        :arg data_block: data block
        """
        data_to = self.channel.task_manager.get('data_TO', _TRANSFORMS_TO)
        consume_keys = list(transform.worker._consumes.keys())

        logging.getLogger().info(
            'transform: %s expected keys: %s provided keys: %s',
            transform.name, consume_keys, list(data_block.keys()))
        loop_counter = 0
        while not self.state.should_stop():
            # Check if data is ready
            if set(consume_keys) <= set(data_block.keys()):
                # data is ready -  may run transform()
                logging.getLogger().info('run transform %s', transform.name)
                try:
                    with data_block.lock:
                        data = transform.worker.transform(data_block)
                    logging.getLogger().debug(f'transform returned {data}')
                    t = time.time()
                    header = datablock.Header(data_block.taskmanager_id,
                                              create_time=t,
                                              creator=transform.name)
                    self.data_block_put(data, header, data_block)
                    logging.getLogger().info('transform put data')
                except Exception:  # pragma: no cover
                    logging.getLogger().exception(
                        f'exception from transform {transform.name} ')
                    self.take_offline(data_block)
                break
            s = transform.stop_running.wait(1)
            if s:
                logging.getLogger().info(
                    f'received stop_running signal for {transform.name}')
                break
            loop_counter += 1
            if loop_counter == data_to:
                logging.getLogger().info(
                    f'transform {transform.name} did not get consumes data'
                    f'in {data_to} seconds. Exiting')
                break
        transform.data_updated.set()
Ejemplo n.º 27
0
    def run_source(self, src):
        """
        Get the data from source
        and put it into the data block

        :type src: :obj:`~Worker`
        :arg src: source Worker
        """

        # If task manager is in offline state, do not keep executing sources.
        while not self.state.should_stop():
            try:
                logging.getLogger().info(f'Src {src.name} calling acquire')
                data = src.worker.acquire()
                Module.verify_products(src.worker, data)
                logging.getLogger().info(f'Src {src.name} acquire retuned')
                logging.getLogger().info(f'Src {src.name} filling header')
                if data:
                    t = time.time()
                    header = datablock.Header(
                        self.data_block_t0.taskmanager_id,
                        create_time=t,
                        creator=src.module)
                    logging.getLogger().info(f'Src {src.name} header done')
                    self.data_block_put(data, header, self.data_block_t0)
                    logging.getLogger().info(
                        f'Src {src.name} data block put done')
                else:
                    logging.getLogger().warning(
                        f'Src {src.name} acquire retuned no data')
                src.run_counter += 1
                src.data_updated.set()
                logging.getLogger().info(
                    f'Src {src.name} {src.module} finished cycle')
            except Exception:
                logging.getLogger().exception(
                    f'Exception running source {src.name} ')
                self.take_offline(self.data_block_t0)
            if src.schedule > 0:
                s = src.stop_running.wait(src.schedule)
                if s:
                    logging.getLogger().info(
                        f'received stop_running signal for {src.name}')
                    break
            else:
                logging.getLogger().info(f'source {src.name} runs only once')
                break
        logging.getLogger().info(f'stopped {src.name}')
Ejemplo n.º 28
0
    def run_transform(self, transform, data_block):
        """
        Run a transform

        :type transform: :obj:`~Worker`
        :arg transform: source Worker
        :type data_block: :obj:`~datablock.DataBlock`
        :arg data_block: data block
        """
        data_to = self.channel.task_manager.get('data_TO', TRANSFORMS_TO)
        consume_keys = transform.worker.consumes()

        self.logger.info('transform: %s expected keys: %s provided keys: %s' %
                         (transform.name, consume_keys, data_block.keys()))
        loop_counter = 0
        while 1:
            # Check if data is ready
            if set(consume_keys) <= set(data_block.keys()):
                # data is ready -  may run transform()
                self.logger.info('run transform %s' % (transform.name, ))
                try:
                    with data_block.lock:
                        data = transform.worker.transform(data_block)
                    self.logger.debug('transform returned %s' % (data, ))
                    t = time.time()
                    header = datablock.Header(data_block.taskmanager_id,
                                              create_time=t,
                                              creator=transform.name)
                    self.data_block_put(data, header, data_block)
                    self.logger.info('transform put data')
                except Exception, detail:
                    self.logger.error('exception from %s: %s' %
                                      (transform.name, detail))
                    self.offline_task_manager(data_block)
                break
            else:
                s = transform.stop_running.wait(1)
                if s:
                    self.logger.info("received stop_running signal for %s" %
                                     (transform.name, ))
                    break
                loop_counter += 1
                if loop_counter == data_to:
                    self.logger.info(
                        "transform %s did not get consumes data in %s seconds. Exiting"
                        % (transform.name, data_to))
                    break
Ejemplo n.º 29
0
    def run_source(self, src):
        """
        Get the data from source
        and put it into the data block

        :type src: :obj:`~Worker`
        :arg src: source Worker
        """

        while True:
            try:
                logging.getLogger().info('Src %s calling acquire', src.name)
                data = src.worker.acquire()
                logging.getLogger().info('Src %s acquire retuned', src.name)
                logging.getLogger().info('Src %s filling header', src.name)
                if data:
                    t = time.time()
                    header = datablock.Header(
                        self.data_block_t0.taskmanager_id,
                        create_time=t,
                        creator=src.module)
                    logging.getLogger().info('Src %s header done', src.name)
                    self.data_block_put(data, header, self.data_block_t0)
                    logging.getLogger().info('Src %s data block put done',
                                             src.name)
                else:
                    logging.getLogger().warning(
                        'Src %s acquire retuned no data', src.name)
                src.run_counter += 1
                src.data_updated.set()
                logging.getLogger().info('Src %s %s finished cycle', src.name,
                                         src.module)
            except Exception as e:
                log_exception(
                    logging.getLogger(),
                    'Exception running source {} : {}'.format(src.name, e))
                self.offline_task_manager(self.data_block_t0)
            if src.schedule > 0:
                s = src.stop_running.wait(src.schedule)
                if s:
                    logging.getLogger().info(
                        'received stop_running signal for %s', src.name)
                    break
            else:
                logging.getLogger().info('source %s runs only once', src.name)
                break
        logging.getLogger().info('stopped %s', src.name)
Ejemplo n.º 30
0
def test_DataBlock_duplicate(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    dblock_2 = dblock.duplicate()

    assert dblock.taskmanager_id == dblock_2.taskmanager_id
    assert dblock.generation_id == dblock_2.generation_id + 1
    assert dblock.sequence_id == dblock_2.sequence_id
    assert dblock._keys == dblock_2._keys

    for key in dblock._keys:
        assert dblock[key] == dblock_2[key]