def test_DataBlock_constructor(self):
        dblock = datablock.DataBlock(self.dataspace, self.data["taskmanager"][0]["name"],
                                     self.data["taskmanager"][0]["taskmanager_id"])
        self.assertEqual(str(dblock.generation_id), self.data["dataproduct"][0]["generation_id"])

        dblock = datablock.DataBlock(self.dataspace, self.data["taskmanager"][0]["name"],
                                     generation_id=self.data["dataproduct"][0]["generation_id"])
        self.assertEqual(str(dblock.generation_id), self.data["dataproduct"][0]["generation_id"])

        dblock = datablock.DataBlock(self.dataspace, self.data["taskmanager"][0]["name"],
                                     taskmanager_id=self.data["taskmanager"][0]["taskmanager_id"],
                                     sequence_id=1)
        self.assertEqual(str(dblock.generation_id), self.data["dataproduct"][0]["generation_id"])
def test_DataBlock_constructor(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples

    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])
    assert dblock.generation_id == 1

    dblock = datablock.DataBlock(dataspace, my_tm["name"], generation_id=1)
    assert dblock.generation_id == 1

    dblock = datablock.DataBlock(dataspace,
                                 my_tm["name"],
                                 my_tm["taskmanager_id"],
                                 sequence_id=1)
    assert dblock.generation_id == 1
Beispiel #3
0
 def __init__(self, name, generation_id, channel_dict, global_config):
     """
     :type name: :obj:`str`
     :arg name: Name of channel corresponding to this task manager
     :type generation_id: :obj:`int`
     :arg generation_id: Task Manager generation id provided by caller
     :type channel_dict: :obj:`dict`
     :arg channel_dict: channel configuration
     :type global_config: :obj:`dict`
     :arg global_config: global configuration
      """
     self.id = str(uuid.uuid4()).upper()
     self.dataspace = dataspace.DataSpace(global_config)
     self.data_block_t0 = datablock.DataBlock(
         self.dataspace, name, self.id,
         generation_id)  # my current data block
     self.name = name
     self.channel = Channel(channel_dict)
     self.state = ProcessingState()
     self.loglevel = multiprocessing.Value('i', logging.WARNING)
     self.lock = threading.Lock()
     # The rest of this function will go away once the source-proxy
     # has been reimplemented.
     for src_worker in self.channel.sources.values():
         src_worker.worker.post_create(global_config)
def test_DataBlock_key_management(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    metadata = datablock.Metadata(
        my_tm["taskmanager_id"],
        generation_id=dataspace.get_last_generation_id(
            my_tm["name"], my_tm["taskmanager_id"]),
    )
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    # test with automatic metadata and string value
    dblock.put("example_test_key", "example_test_value", header)

    assert "example_test_key" in dblock.keys()
    assert "example_test_key" in dblock

    assert dblock.get("example_test_key") == "example_test_value"

    # Test product-retriever interface
    retriever = datablock.ProductRetriever("example_test_key", None, None)
    assert retriever(dblock) == "example_test_value"
    assert (
        str(retriever) ==
        "Product retriever for {'name': 'example_test_key', 'type': None, 'creator': None}"
    )

    # test new key with manual metadata and dict value
    newDict = {"subKey": "newValue"}
    dblock.put("new_example_test_key", newDict, header, metadata)
    assert dblock["new_example_test_key"] == newDict
def test_bad_datablock(global_config, dataspace, caplog):  # noqa: F811
    for channel in _TEST_CHANNEL_NAMES:
        with RunChannel(global_config, channel) as task_manager:
            dblock = datablock.DataBlock(dataspace, channel)
            task_manager.data_block_put("bad_string", "header", dblock)
            task_manager.take_offline()
            assert "data_block put expecting" in caplog.text
    def rpc_print_products(self):
        with self.channel_workers.access() as workers:
            channel_keys = workers.keys()
            if not channel_keys:
                return "No channels are currently active.\n"

            width = max(len(x) for x in channel_keys) + 1
            txt = ""
            for ch, worker in workers.items():
                if not worker.is_alive():
                    txt += f"Channel {ch} is in ERROR state\n"
                    continue

                txt += f"channel: {ch:<{width}}, id = {worker.task_manager.id:<{width}}, state = {worker.get_state_name():<10} \n"
                tm = self.dataspace.get_taskmanager(ch)
                data_block = datablock.DataBlock(
                    self.dataspace, ch, taskmanager_id=tm["taskmanager_id"], sequence_id=tm["sequence_id"]
                )
                data_block.generation_id -= 1
                channel_config = self.channel_config_loader.get_channels()[ch]
                produces = worker.get_produces()
                for i in ("sources", "transforms", "logicengines", "publishers"):
                    txt += f"\t{i}:\n"
                    modules = channel_config.get(i, {})
                    for mod_name in modules.keys():
                        txt += f"\t\t{mod_name}\n"
                        products = produces.get(mod_name, [])
                        for product in products:
                            try:
                                df = data_block[product]
                                df = pd.read_json(df.to_json())
                                txt += f"{tabulate.tabulate(df, headers='keys', tablefmt='psql')}\n"
                            except Exception as e:  # pragma: no cover
                                txt += f"\t\t\t{e}\n"
        return txt[:-1]
def test_DataBlock_key_management(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    assert "example_test_key" in dblock.keys()
    assert "example_test_key" in dblock

    assert dblock.get("example_test_key") == "example_test_value"

    # Test product-retriever interface
    retriever = datablock.ProductRetriever("example_test_key", None, None)
    assert retriever(dblock) == "example_test_value"
    assert (
        str(retriever) ==
        "Product retriever for {'name': 'example_test_key', 'type': None, 'creator': None}"
    )

    # FIXME: The following behavior should be disallowed for data-integrity reasons!
    #        i.e. replacing a product name with a different value.
    newDict = {"subKey": "newValue"}
    dblock.put("example_test_key", newDict, header)
    assert dblock["example_test_key"] == newDict
 def rpc_print_products(self):
     width = max(map(lambda x: len(x), self.task_managers.keys())) + 1
     txt = ""
     for ch, worker in self.task_managers.items():
         sname = TaskManager.STATE_NAMES[worker.task_manager.get_state()]
         txt += "channel: {:<{width}}, id = {:<{width}}, state = {:<10} \n".format(
             ch, worker.task_manager.id, sname, width=width)
         tm = self.dataspace.get_taskmanager(ch)
         data_block = datablock.DataBlock(
             self.dataspace,
             ch,
             taskmanager_id=tm['taskmanager_id'],
             sequence_id=tm['sequence_id'])
         data_block.generation_id -= 1
         channel_config = self.config_manager.get_channels()[ch]
         produces = self.config_manager.get_produces(channel_config)
         for i in ("sources", "transforms", "logicengines", "publishers"):
             txt += "\t{}:\n".format(i)
             modules = channel_config.get(i, {})
             for mod_name, mod_config in modules.iteritems():
                 txt += "\t\t{}\n".format(mod_name)
                 products = produces.get(mod_name, [])
                 for product in products:
                     try:
                         df = data_block[product]
                         df = pd.read_json(df.to_json())
                         txt += "{}\n".format(
                             tabulate.tabulate(df,
                                               headers='keys',
                                               tablefmt='psql'))
                     except Exception as e:
                         txt += "\t\t\t{}\n".format(str(e))
                         pass
     return txt[:-1]
Beispiel #9
0
def test_bad_datablock(global_config, dataspace, caplog):  # noqa: F811
    with RunChannel(global_config, "test_channel") as task_manager:
        task_manager.state.wait_while(State.ACTIVE)
        dblock = datablock.DataBlock(dataspace, task_manager.name)
        task_manager.data_block_put("bad_string", "header", dblock)
        task_manager.take_offline()
        assert "data_block put expecting" in caplog.text
def test_DataBlock_to_str(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples

    expected = {
        "taskmanager_id":
        my_tm["taskmanager_id"],
        "generation_id":
        dataspace.get_last_generation_id(my_tm["name"],
                                         my_tm["taskmanager_id"]),
        "sequence_id":
        len(dataspace.get_dataproducts(my_tm["sequence_id"])) + 1,
        "keys": [
            "example_test_key",
        ],
        "dataproducts": {
            "example_test_key": "example_test_value"
        },
    }

    header = datablock.Header(my_tm["taskmanager_id"])

    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])
    dblock.put("example_test_key", "example_test_value", header)

    result = ast.literal_eval(str(dblock))
    assert result == expected
Beispiel #11
0
    def acquire(self):
        """
        Overrides Source class method
        """
        data_block = None
        for _ in range(self.retries):
            try:
                tm = self.dataspace.get_taskmanager(self.source_channel)
                self.logger.debug('task manager %s', tm)
                if tm['taskmanager_id']:
                    # get last datablock
                    data_block = datablock.DataBlock(
                        self.dataspace,
                        self.source_channel,
                        taskmanager_id=tm['taskmanager_id'],
                        sequence_id=tm['sequence_id'])
                    self.logger.debug('data block %s', data_block)
                    if data_block and data_block.generation_id:
                        self.logger.debug("DATABLOCK %s", data_block)
                        # This is a valid datablock
                        break
            except Exception as detail:
                self.logger.error('Error getting datablock for %s %s',
                                  self.source_channel, detail)

            time.sleep(self.retry_to)

        if not data_block:
            raise RuntimeError('Could not get data.')

        rc = {}
        filled_keys = []
        for _ in range(self.retries):
            if len(filled_keys) != len(self.data_keys):
                for k in self.data_keys:
                    if isinstance(k, tuple) or isinstance(k, list):
                        k_in = k[0]
                        k_out = k[1]
                    else:
                        k_in = k
                        k_out = k
                    if k_in not in filled_keys:
                        try:
                            rc[k_out] = pd.DataFrame(
                                self._get_data(data_block, k_in))
                            filled_keys.append(k)
                        except KeyError as ke:
                            self.logger.debug("KEYERROR %s", ke)
            if len(filled_keys) == len(self.data_keys):
                break
            # expected data is not ready yet
            time.sleep(self.retry_to)

        if len(filled_keys) != len(self.data_keys):
            raise RuntimeError(
                'Could not get all data. Expected {} Filled {}'.format(
                    self.data_keys, filled_keys))
        return rc
def test_DataBlock_get_header(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    assert header == dblock.get_header("example_test_key")
Beispiel #13
0
    def rpc_query_tool(self, product, format=None, start_time=None):
        with QUERY_TOOL_HISTOGRAM.labels(product).time():
            found = False
            result = pd.DataFrame()
            txt = f"Product {product}: "

            with self.channel_workers.access() as workers:
                for ch, worker in workers.items():
                    if not worker.is_alive():
                        txt += f"Channel {ch} is in not active\n"
                        continue

                    produces = worker.get_produces()
                    r = [x for x in list(produces.items()) if product in x[1]]
                    if not r:
                        continue
                    found = True
                    txt += f" Found in channel {ch}\n"

                    if start_time:
                        tms = self.dataspace.get_taskmanagers(
                            ch, start_time=start_time)
                    else:
                        tms = [self.dataspace.get_taskmanager(ch)]
                    for tm in tms:
                        try:
                            data_block = datablock.DataBlock(
                                self.dataspace,
                                ch,
                                taskmanager_id=tm["taskmanager_id"],
                                sequence_id=tm["sequence_id"])
                            products = data_block.get_dataproducts(product)
                            for p in products:
                                df = p["value"]
                                if df.shape[0] > 0:
                                    df["channel"] = [tm["name"]] * df.shape[0]
                                    df["taskmanager_id"] = [
                                        p["taskmanager_id"]
                                    ] * df.shape[0]
                                    df["generation_id"] = [p["generation_id"]
                                                           ] * df.shape[0]
                                    result = result.append(df)
                        except Exception as e:  # pragma: no cover
                            txt += f"\t\t{e}\n"

            if found:
                dataframe_formatter = self._dataframe_to_table
                if format == "csv":
                    dataframe_formatter = self._dataframe_to_csv
                if format == "json":
                    dataframe_formatter = self._dataframe_to_json
                result = result.reset_index(drop=True)
                txt += dataframe_formatter(result)
            else:
                txt += "Not produced by any module\n"
            return txt
def test_DataBlock_no_key_by_name(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    with pytest.raises(KeyError):
        dblock["no_such_key_exists"]
def test_DataBlock_mark_expired(dataspace):  # noqa: F811
    # mark_expired is just a stub in this case
    # failure in a real implementation should raise an exception
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    assert dblock.mark_expired(1) is None
    def rpc_print_product(self, product, columns=None, query=None):
        found = False
        txt = "Product {}: ".format(product)
        for ch, worker in self.task_managers.items():
            channel_config = self.config_manager.get_channels()[ch]
            produces = self.config_manager.get_produces(channel_config)
            r = filter(lambda x: product in x[1], produces.items())
            if not r:
                continue
            found = True
            txt += " Found in channel {}\n".format(ch)
            tm = self.dataspace.get_taskmanager(ch)
            try:
                data_block = datablock.DataBlock(
                    self.dataspace,
                    ch,
                    taskmanager_id=tm['taskmanager_id'],
                    sequence_id=tm['sequence_id'])
                data_block.generation_id -= 1
                df = data_block[product]
                df = pd.read_json(df.to_json())
                column_names = []
                if columns:
                    column_names = columns.split(",")
                if query:
                    if column_names:
                        txt += "{}\n".format(
                            tabulate.tabulate(
                                df.loc[:, column_names].query(query),
                                headers='keys',
                                tablefmt='psql'))
                    else:
                        txt += "{}\n".format(
                            tabulate.tabulate(df.query(query),
                                              headers='keys',
                                              tablefmt='psql'))

                else:
                    if column_names:
                        txt += "{}\n".format(
                            tabulate.tabulate(df.loc[:, column_names],
                                              headers='keys',
                                              tablefmt='psql'))
                    else:
                        txt += "{}\n".format(
                            tabulate.tabulate(df,
                                              headers='keys',
                                              tablefmt='psql'))
            except Exception as e:
                txt += "\t\t{}\n".format(str(e))
                pass
            if not found:
                txt += "Not Found\n"
        return txt[:-1]
def test_DataBlock_get_dataproducts(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    products = dblock.get_dataproducts()
    assert len(products) == 1
    assert products[0]["key"] == "example_test_key"
    assert products[0]["value"] == "example_test_value"
Beispiel #18
0
    def acquire(self):
        """
        Overrides Source class method
        """
        data_block = None
        for _ in range(self.max_attempts):
            try:
                tm = self.dataspace.get_taskmanager(self.source_channel)
                self.logger.debug("task manager %s", tm)
                if tm["taskmanager_id"]:
                    # get last datablock
                    data_block = datablock.DataBlock(
                        self.dataspace,
                        self.source_channel,
                        taskmanager_id=tm["taskmanager_id"],
                        sequence_id=tm["sequence_id"],
                    )
                    self.logger.debug("data block %s", data_block)
                    if data_block and data_block.generation_id:
                        self.logger.debug("DATABLOCK %s", data_block)
                        # This is a valid datablock
                        break
            except Exception as detail:
                self.logger.error("Error getting datablock for %s %s",
                                  self.source_channel, detail)

            time.sleep(self.retry_interval)

        if not data_block:
            raise RuntimeError("Could not get data.")

        rc = {}
        filled_keys = []
        for _ in range(self.max_attempts):
            if len(filled_keys) != len(self.data_keys):
                for k_in, k_out in self.data_keys.items():
                    if k_in not in filled_keys:
                        try:
                            rc[k_out] = self._get_data(data_block, k_in)
                            filled_keys.append(k_in)
                        except KeyError as ke:
                            self.logger.debug("KEYERROR %s", ke)
            if len(filled_keys) == len(self.data_keys):
                break
            # expected data is not ready yet
            time.sleep(self.retry_interval)

        if len(filled_keys) != len(self.data_keys):
            raise RuntimeError(
                f"Could not get all data. Expected {self.data_keys} Filled {filled_keys}"
            )
        return rc
def test_DataBlock_key_management_change_name(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    # FIXME: The following behavior should be disallowed for data-integrity reasons!
    #        i.e. replacing a product name from datablock.ProductRetriever with a
    #             different value.
    newDict = {"subKey": "newValue"}
    dblock.put("example_test_key", newDict, header)
    assert dblock["example_test_key"] == newDict
def test_DataBlock_get_metadata(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    metadata = datablock.Metadata(
        my_tm["taskmanager_id"],
        generation_id=dataspace.get_last_generation_id(
            my_tm["name"], my_tm["taskmanager_id"]),
    )
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header, metadata)

    assert metadata == dblock.get_metadata("example_test_key")
def test_DataBlock_is_expired_with_key(dataspace):  # noqa: F811
    """This test just validates the method/function exists.
    The stub within our default code should be replaced
    by a class inheriting from it.
    That class should have more rational return types.
    """
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    assert dblock.is_expired(key="example_test_key") is None
Beispiel #22
0
def main():
    """
    Call this a a test unit or use as CLI of this module
    """
    import argparse
    parser = argparse.ArgumentParser()

    parser.add_argument('--configtemplate',
                        action='store_true',
                        help='prints the expected module configuration')

    parser.add_argument(
        '--configinfo',
        action='store_true',
        help='prints config template along with produces and consumes info')
    args = parser.parse_args()
    if args.configtemplate:
        module_config_template()
    elif args.configinfo:
        module_config_info()
    else:
        config_manager = configmanager.ConfigManager()
        config_manager.load()
        global_config = config_manager.get_global_config()
        print("GLOBAL CONF", global_config)
        ds = dataspace.DataSpace(global_config)

        data_block = datablock.DataBlock(
            ds,
            # '5CC840DD-88B9-45CE-9DA2-FF531289AC66',
            'C56E0AAF-99D3-42A8-88A3-921E30C1879C',
            1)

        fm_info = AWSFOMPublisher({
            "publish_to_graphite":
            True,
            "graphite_host":
            "fifemondata.fnal.gov",
            "graphite_port":
            2104,
            "graphite_context":
            "hepcloud.aws",
            "output_file":
            "%s/de_data/AWS_figure_of_merit.csv" % (os.environ.get('HOME'), )
        })
        rc = fm_info.publish(data_block)
Beispiel #23
0
    def rpc_print_products(self):
        with self.workers.access() as workers:
            channel_keys = workers.keys()
            if not channel_keys:
                return "No channels are currently active.\n"

            width = max([len(x) for x in channel_keys]) + 1
            txt = ""
            for ch, worker in workers.items():
                if not worker.is_alive():
                    txt += f"Channel {ch} is in ERROR state\n"
                    continue

                txt += "channel: {:<{width}}, id = {:<{width}}, state = {:<10} \n".format(
                    ch,
                    worker.task_manager_id,
                    worker.get_state_name(),
                    width=width)
                tm = self.dataspace.get_taskmanager(ch)
                data_block = datablock.DataBlock(
                    self.dataspace,
                    ch,
                    taskmanager_id=tm['taskmanager_id'],
                    sequence_id=tm['sequence_id'])
                data_block.generation_id -= 1
                channel_config = self.channel_config_loader.get_channels()[ch]
                produces = self.channel_config_loader.get_produces(
                    channel_config)
                for i in ("sources", "transforms", "logicengines",
                          "publishers"):
                    txt += "\t{}:\n".format(i)
                    modules = channel_config.get(i, {})
                    for mod_name, mod_config in modules.items():
                        txt += "\t\t{}\n".format(mod_name)
                        products = produces.get(mod_name, [])
                        for product in products:
                            try:
                                df = data_block[product]
                                df = pd.read_json(df.to_json())
                                txt += "{}\n".format(
                                    tabulate.tabulate(df,
                                                      headers='keys',
                                                      tablefmt='psql'))
                            except Exception as e:
                                txt += "\t\t\t{}\n".format(e)
        return txt[:-1]
def test_DataBlock_duplicate(dataspace):  # noqa: F811
    my_tm = dataspace.get_taskmanagers()[0]  # fetch one of our loaded examples
    header = datablock.Header(my_tm["taskmanager_id"])
    dblock = datablock.DataBlock(dataspace, my_tm["name"],
                                 my_tm["taskmanager_id"])

    dblock.put("example_test_key", "example_test_value", header)

    dblock_2 = dblock.duplicate()

    assert dblock.taskmanager_id == dblock_2.taskmanager_id
    assert dblock.generation_id == dblock_2.generation_id + 1
    assert dblock.sequence_id == dblock_2.sequence_id
    assert dblock._keys == dblock_2._keys

    for key in dblock._keys:
        assert dblock[key] == dblock_2[key]
Beispiel #25
0
 def __init__(self, name, task_manager_id, generation_id, channel_dict,
              global_config):
     """
     :type task_manager_id: :obj:`int`
     :arg task_manager_id: Task Manager id provided by caller
     :type channel_dict: :obj:`dict`
     :arg channel_dict: channel configuration
     :type data_block: :obj:`~datablock.DataBlock`
     :arg data_block: data block
     """
     self.dataspace = dataspace.DataSpace(global_config)
     self.data_block_t0 = datablock.DataBlock(
         self.dataspace, name, task_manager_id,
         generation_id)  # my current data block
     self.name = name
     self.id = task_manager_id
     self.channel = Channel(channel_dict)
     self.state = multiprocessing.Value('i', BOOT)
     self.decision_cycle_active = False
     self.lock = threading.Lock()
     self.logger = de_logger.get_logger()
     self.stop = False  # stop running all loops when this is True
Beispiel #26
0
def main():
    """
    Call this a a test unit or use as CLI of this module
    """
    import argparse
    parser = argparse.ArgumentParser()

    parser.add_argument('--configtemplate',
                        action='store_true',
                        help='prints the expected module configuration')

    parser.add_argument(
        '--configinfo',
        action='store_true',
        help='prints config template along with produces and consumes info')
    args = parser.parse_args()
    if args.configtemplate:
        module_config_template()
    elif args.configinfo:
        module_config_info()
    else:
        config_manager = configmanager.ConfigManager()
        config_manager.load()
        global_config = config_manager.get_global_config()
        print "GLOBAL CONF", global_config
        ds = dataspace.DataSpace(global_config)

        #data_block = datablock.DataBlock(ds,
        #                                 '6D596F43-B4DB-4418-812A-79869001E72B',
        #                                 1)
        data_block = datablock.DataBlock(
            ds, "AWS_Calculations_with_source_proxy",
            "F70B4110-E66D-49CA-9333-4A983A679F37", 1, 109)

        fm_info = FigureOfMerit()
        rc = fm_info.transform(data_block)
        print "INFO"
        print rc
Beispiel #27
0
    def __init__(self, name, workers, dataspace, expected_products, exchange,
                 broker_url, queue_info):
        """
        :type name: :obj:`str`
        :arg name: Name of channel corresponding to this task manager
        :type generation_id: :obj:`int`
        :arg generation_id: Task manager generation id provided by caller
        :type channel_dict: :obj:`dict`
        :arg channel_dict: channel configuration
        :type global_config: :obj:`dict`
        :arg global_config: global configuration
        """
        self.name = name
        self.state = ProcessingState()
        self.loglevel = multiprocessing.Value("i", logging.WARNING)

        self.id = str(uuid.uuid4()).upper()
        self.data_block_t0 = datablock.DataBlock(dataspace, name, self.id,
                                                 1)  # my current data block
        self.logger = structlog.getLogger(CHANNELLOGGERNAME)
        self.logger = self.logger.bind(module=__name__.split(".")[-1],
                                       channel=self.name)

        # The DE owns the sources
        self.source_workers = workers["sources"]
        self.transform_workers = workers["transforms"]
        self.logic_engine = workers["logic_engine"]
        self.publisher_workers = workers["publishers"]

        self.exchange = exchange
        self.broker_url = broker_url
        self.connection = Connection(self.broker_url)

        self.source_product_cache = SourceProductCache(expected_products,
                                                       self.logger)
        self.queue_info = queue_info
        self.routing_keys = [info[1] for info in self.queue_info]
Beispiel #28
0
 def acquire(self):
     """
     Overrides Source class method
     """
     retry_cnt = 0
     data_block = None
     while retry_cnt < self.retries:
         try:
             tm = self.dataspace.get_taskmanager(self.source_channel)
             self.logger.debug('task manager %s' % (tm, ))
             if tm['taskmanager_id']:
                 # get last datablock
                 data_block = datablock.DataBlock(
                     self.dataspace,
                     self.source_channel,
                     taskmanager_id=tm['taskmanager_id'],
                     sequence_id=tm['sequence_id'])
                 break
             else:
                 retry_cnt += 1
                 time.sleep(self.retry_to)
         except Exception, detail:
             self.logger.error('Error getting datablock for %s %s' %
                               (self.source_channel, detail))
    def rpc_print_product(self,
                          product,
                          columns=None,
                          query=None,
                          types=False,
                          format=None):
        found = False
        txt = "Product {}: ".format(product)
        with self.workers.access() as workers:
            for ch, worker in workers.items():
                if not worker.is_alive():
                    txt += f"Channel {ch} is in not active\n"
                    continue

                produces = worker.get_produces()
                r = [x for x in list(produces.items()) if product in x[1]]
                if not r:
                    continue
                found = True
                txt += " Found in channel {}\n".format(ch)
                tm = self.dataspace.get_taskmanager(ch)
                try:
                    data_block = datablock.DataBlock(
                        self.dataspace,
                        ch,
                        taskmanager_id=tm['taskmanager_id'],
                        sequence_id=tm['sequence_id'])
                    data_block.generation_id -= 1
                    df = data_block[product]
                    df = pd.read_json(df.to_json())
                    dataframe_formatter = self._dataframe_to_table
                    if format == 'vertical':
                        dataframe_formatter = self._dataframe_to_vertical_tables
                    if format == 'column-names':
                        dataframe_formatter = self._dataframe_to_column_names
                    if format == 'json':
                        dataframe_formatter = self._dataframe_to_json
                    if types:
                        for column in df.columns:
                            df.insert(
                                df.columns.get_loc(column) + 1,
                                f"{column}.type", df[column].transform(
                                    lambda x: type(x).__name__))
                    column_names = []
                    if columns:
                        column_names = columns.split(",")
                    if query:
                        if column_names:
                            txt += dataframe_formatter(
                                df.loc[:, column_names].query(query))
                        else:
                            txt += dataframe_formatter(df.query(query))

                    else:
                        if column_names:
                            txt += dataframe_formatter(df.loc[:, column_names])
                        else:
                            txt += dataframe_formatter(df)
                except Exception as e:  # pragma: no cover
                    txt += "\t\t{}\n".format(e)
        if not found:
            txt += "Not produced by any module\n"
        return txt[:-1]
Beispiel #30
0
    def rpc_print_product(self,
                          product,
                          columns=None,
                          query=None,
                          types=False,
                          format=None):
        if not isinstance(product, str):
            raise ValueError(
                f"Requested product should be a string not {type(product)}")

        found = False
        txt = f"Product {product}: "
        with self.channel_workers.access() as workers:
            for ch, worker in workers.items():
                if not worker.is_alive():
                    txt += f"Channel {ch} is in not active\n"
                    self.logger.debug(
                        f"Channel:{ch} is in not active when running rpc_print_product"
                    )
                    continue

                produces = worker.get_produces()
                r = [x for x in list(produces.items()) if product in x[1]]
                if not r:
                    continue
                found = True
                txt += f" Found in channel {ch}\n"
                self.logger.debug(
                    f"Found channel:{ch} active when running rpc_print_product"
                )
                tm = self.dataspace.get_taskmanager(ch)
                self.logger.debug(
                    f"rpc_print_product - channel:{ch} taskmanager:{tm}")
                try:
                    data_block = datablock.DataBlock(
                        self.dataspace,
                        ch,
                        taskmanager_id=tm["taskmanager_id"],
                        sequence_id=tm["sequence_id"])
                    data_block.generation_id -= 1
                    df = data_block[product]
                    dfj = df.to_json()
                    self.logger.debug(
                        f"rpc_print_product - channel:{ch} task manager:{tm} datablock:{dfj}"
                    )
                    df = pd.read_json(dfj)
                    dataframe_formatter = self._dataframe_to_table
                    if format == "vertical":
                        dataframe_formatter = self._dataframe_to_vertical_tables
                    if format == "column-names":
                        dataframe_formatter = self._dataframe_to_column_names
                    if format == "json":
                        dataframe_formatter = self._dataframe_to_json
                    if types:
                        for column in df.columns:
                            df.insert(
                                df.columns.get_loc(column) + 1,
                                f"{column}.type",
                                df[column].transform(
                                    lambda x: type(x).__name__),
                            )
                    column_names = []
                    if columns:
                        column_names = columns.split(",")
                    if query:
                        if column_names:
                            txt += dataframe_formatter(
                                df.loc[:, column_names].query(query))
                        else:
                            txt += dataframe_formatter(df.query(query))

                    else:
                        if column_names:
                            txt += dataframe_formatter(df.loc[:, column_names])
                        else:
                            txt += dataframe_formatter(df)
                except Exception as e:  # pragma: no cover
                    txt += f"\t\t{e}\n"
        if not found:
            txt += "Not produced by any module\n"
        return txt[:-1]