Beispiel #1
0
    def run(self):
        while True:
            try:
                self.log.debug("Worker-%s: waiting", self.identifier)
                [metadata, data] = self.query.get()
                time.sleep(0.1)
            except Exception:
                break

            if self.transfer_type in [
                    "QUERY_NEXT_METADATA", "STREAM_METADATA"
            ]:
                self.log.debug("Worker-%s: metadata %s", self.identifier,
                               metadata["filename"])

                filepath = generate_filepath(self.basepath, metadata)

                self.log.debug("Worker-%s: filepath %s", self.identifier,
                               filepath)

                with open(filepath, "r") as file_descriptor:
                    file_descriptor.read()
                    self.log.debug("Worker-%s: file %s read", self.identifier,
                                   filepath)
            else:
                print("filepath", generate_filepath(self.basepath, metadata))
                print("metadata", metadata)

            print("data", str(data)[:100])
Beispiel #2
0
    def get_metadata(self, targets, metadata):
        """Implementation of the abstract method get_metadata.

        Args:
            targets (list): The target list this file is supposed to go.
            metadata (dict): The dictionary with the metadata to extend.
        """

        # Build source file
        self.source_file = generate_filepath(metadata["source_path"], metadata)
        # Build target file
        # if local_target is not set (== None) generate_filepath returns None
        self.target_file = generate_filepath(self.config_df["local_target"],
                                             metadata)

        if targets:
            try:
                filesize = os.path.getsize(self.source_file)
                file_mod_time = os.stat(self.source_file).st_mtime
                file_create_time = os.stat(self.source_file).st_ctime

            except Exception:
                self.log.error("Unable to create metadata dictionary.")
                raise

            try:
                self.log.debug("create metadata for source file...")
                # metadata = {
                #        "filename"       : ...,
                #        "source_path"     : ...,  # in unix format
                #        "relative_path"   : ...,  # in unix format
                #        "filesize"       : ...,
                #        "file_mod_time"    : ...,
                #        "file_create_time" : ...,
                #        "chunksize"      : ...
                #        }
                if self.is_windows:
                    # TODO use pathlib here instead
                    metadata["source_path"] = (metadata["source_path"].replace(
                        "\\", "/"))
                    metadata["relative_path"] = (
                        metadata["relative_path"].replace("\\", "/"))

                metadata["filesize"] = filesize
                metadata["file_mod_time"] = file_mod_time
                metadata["file_create_time"] = file_create_time
                metadata["chunksize"] = self.config_df["chunksize"]
                if (self.config_df["use_cleaner"] and
                        self.config_df["remove_data"] == "with_confirmation"):
                    metadata["confirmation_required"] = (
                        self.confirmation_topic.decode())
                else:
                    metadata["confirmation_required"] = False

                self.log.debug("metadata = %s", metadata)
            except Exception:
                self.log.error("Unable to assemble multi-part message.")
                raise
Beispiel #3
0
    def get_metadata(self, targets, metadata):
        """Implementation of the abstract method get_metadata.

        Args:
            targets (list): The target list this file is supposed to go.
            metadata (dict): The dictionary with the metadata to extend.
        """

        timeout = 10000

        # Get new data
        self.metadata_r, self.data_r = self.transfer.get(timeout)

        if (metadata["relative_path"] != self.metadata_r["relative_path"]
                or metadata["source_path"] != self.metadata_r["source_path"]
                or metadata["filename"] != self.metadata_r["filename"]):
            self.log.error("Received metadata do not match data")

        # Use received data to prevent mismatch of metadata and data
        # TODO handle case if file type requested by target does not match

        # pylint: disable=attribute-defined-outside-init

        # Build source file
        self.source_file = generate_filepath(self.metadata_r["source_path"],
                                             self.metadata_r)

        # Build target file
        # if local_target is not set (== None) generate_filepath returns None
        self.target_file = generate_filepath(self.config_df["local_target"],
                                             self.metadata_r)

        # Extends metadata
        if targets:
            if "filesize" not in self.metadata_r:
                self.log.error("Received metadata do not contain 'filesize'")

            if "file_mod_time" not in self.metadata_r:
                self.log.error("Received metadata do not contain "
                               "'file_mod_time'. Setting it to current time")
                self.metadata_r["file_mod_time"] = time.time()

            if "file_create_time" not in self.metadata_r:
                self.log.error("Received metadata do not contain "
                               "'file_create_time'. Setting it to current "
                               "time")
                self.metadata_r["file_create_time"] = time.time()

            if "chunksize" not in self.metadata_r:
                self.log.error("Received metadata do not contain 'chunksize'. "
                               "Setting it to locally configured one")
                self.metadata_r["chunksize"] = self.config_df["chunksize"]
Beispiel #4
0
    def get_metadata(self, targets, metadata):
        """Implementation of the abstract method get_metadata.

        Args:
            targets (list): The target list this file is supposed to go.
            metadata (dict): The dictionary with the metadata to extend.
        """

        # Build source file
        self.source_file = generate_filepath(metadata["source_path"],
                                             metadata)

        if targets:
            try:
                self.log.debug("create metadata for source file...")
                # metadata = {
                #        "filename"       : ...,
                #        "file_mod_time"    : ...,
                #        "file_create_time" : ...,
                #        "chunksize"      : ...
                #        }
                metadata["filesize"] = None
                metadata["file_mod_time"] = time.time()
                metadata["file_create_time"] = time.time()
                metadata["chunksize"] = None

                self.log.debug("metadata = %s", metadata)
            except Exception:
                self.log.error("Unable to assemble multi-part message.")
                raise
Beispiel #5
0
def run_plugin_thread(plugin_name, plugin_config, target_dir, data_queue, log,
                      event):
    """
    Load, configure, and execute a plugin

    This function is intended to execute all plugin code on a separate thread
    to protect the main thread from slow or blocking plugins.

    Parameters
    ----------
    plugin_name: str
        The name of the module in the plugin directory
    plugin_config: dict
        The plugin configuration options
    target_dir: str
        The local part of the target directory
    data_queue: queue.Queue
        The queue instance use for message passing
    log: logging.Logger
        A logger instance
    even: threading.Event
        Event instance for receiving the stop signal at shutdown
    """
    try:
        plugin_m = import_module("plugins." + plugin_name)
        plugin = plugin_m.Plugin(plugin_config)
        plugin.setup()
        log.info("Loading '%s' plugin", plugin_name)
    except Exception:
        log.error("Could not load '%s' plugin", plugin_name, exc_info=True)
        return

    while not event.is_set():
        try:
            data = data_queue.get(timeout=0.5)
        except queue.Empty:
            continue

        try:
            [metadata, data] = data
            plugin.process(local_path=generate_filepath(target_dir, metadata),
                           metadata=metadata,
                           data=data)
        except Exception:
            log.error("Processing data with '%s' plugin failed.",
                      plugin_name,
                      exc_info=True)

        data_queue.task_done()

    try:
        plugin.stop()
    except Exception:
        log.error("Error while stopping '%s' plugin",
                  plugin_name,
                  exc_info=True)
Beispiel #6
0
    def get_metadata(self, targets, metadata):
        """Implementation of the abstract method get_metadata.

        Args:
            targets (list): The target list this file is supposed to go.
            metadata (dict): The dictionary with the metadata to extend.
        """

        # pylint: disable=attribute-defined-outside-init

        # no normpath used because that would transform http://...
        # into http:/...
        self.source_file = os.path.join(metadata["source_path"],
                                        metadata["relative_path"],
                                        metadata["filename"])

        # Build target file
        # if local_target is not set (== None) generate_filepath returns None
        self.target_file = generate_filepath(self.config_df["local_target"],
                                             metadata)

        metadata["chunksize"] = self.config_df["chunksize"]

        if targets:
            try:
                self.log.debug("create metadata for source file...")
                # metadata = {
                #        "filename"       : ...,
                #        "source_path"     : ...,
                #        "relative_path"   : ...,
                #        "filesize"       : ...,
                #        "file_mod_time"    : ...,
                #        "file_create_time" : ...,
                #        "chunksize"      : ...
                #        }
                metadata["file_mod_time"] = time.time()
                metadata["file_create_time"] = time.time()
                if self.config_df["remove_data"] == "with_confirmation":
                    metadata["confirmation_required"] = (
                        self.confirmation_topic.decode()
                    )
                else:
                    metadata["confirmation_required"] = False

                self.log.debug("metadata = %s", metadata)
            except Exception:
                self.log.error("Unable to assemble multi-part message.",
                               exc_info=True)
                raise
Beispiel #7
0
    def get_metadata(self, targets, metadata):
        """Implementation of the abstract method get_metadata.

        Args:
            targets (list): The target list this file is supposed to go.
            metadata (dict): The dictionary with the metadata to extend.
        """
        # pylint: disable=attribute-defined-outside-init

        # Build source file
        self.source_file = generate_filepath(metadata["source_path"],
                                             metadata)

        # Build target file
        # if local_target is not set (== None) generate_filepath returns None
        self.target_file = generate_filepath(self.config_df["local_target"],
                                             metadata)

        # Extends metadata
        if targets:
            metadata["filesize"] = 0
            metadata["file_mod_time"] = 1481734310.6207027
            metadata["file_create_time"] = 1481734310.6207028
            metadata["chunksize"] = self.config_df["chunksize"]
def main():
    """Connects to hidra and request metadata.
    """

    parser = argparse.ArgumentParser()

    parser.add_argument("--signal_host",
                        type=str,
                        help="Host where HiDRA is running",
                        default=socket.getfqdn())
    parser.add_argument("--target_host",
                        type=str,
                        help="Host where the data should be send to",
                        default=socket.getfqdn())

    arguments = parser.parse_args()

    targets = [[arguments.target_host, "50101", 0]]
    base_target_path = os.path.join(BASE_DIR, "data", "target")

    print("\n==== TEST: Query for the newest filename ====\n")

    query = Transfer("QUERY_NEXT_METADATA", arguments.signal_host)

    query.initiate(targets)

    query.start()

    try:
        while True:
            try:
                [metadata, _] = query.get()
            except Exception:
                query.stop()
                raise

            print()
            print(generate_filepath(base_target_path, metadata))
            print()
    finally:
        query.stop()
        print("\n==== TEST END: Query for the newest filename ====\n")