Esempio n. 1
0
 def testRequestAndBufferedReplyQueues(self):
     """Tests REQ and buffered REP queue pairs."""
     reply_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue(
         name='requestbufferedreply_replybind',
         delay_open=False,
         linger_seconds=1)
     request_queue = zeromq_queue.ZeroMQRequestConnectQueue(
         name='requestbufferedreply_requestconnect',
         delay_open=False,
         port=reply_queue.port,
         linger_seconds=1)
     self._testItemTransferred(reply_queue, request_queue)
     reply_queue.Close()
     request_queue.Close()
     request_queue = ZeroMQRequestBindQueue(
         name='requestbufferedreply_requestbind',
         delay_open=False,
         linger_seconds=1)
     reply_queue = ZeroMQBufferedReplyConnectQueue(
         name='requestbufferedreply_replyconnect',
         delay_open=False,
         port=request_queue.port,
         linger_seconds=0)
     self._testItemTransferred(reply_queue, request_queue)
     reply_queue.Close()
     request_queue.Close()
Esempio n. 2
0
    def testMain(self):
        """Tests the _Main function."""
        output_task_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue(
            delay_open=True,
            linger_seconds=0,
            maximum_items=1,
            name='test output task queue',
            timeout_seconds=self._QUEUE_TIMEOUT)
        output_task_queue.Open()

        input_task_queue = zeromq_queue.ZeroMQRequestConnectQueue(
            delay_open=True,
            linger_seconds=0,
            name='test input task queue',
            port=output_task_queue.port,
            timeout_seconds=self._QUEUE_TIMEOUT)

        configuration = configurations.ProcessingConfiguration()

        test_process = worker_process.WorkerProcess(input_task_queue,
                                                    None,
                                                    None,
                                                    None,
                                                    None,
                                                    configuration,
                                                    name='TestWorker')

        test_process.start()

        output_task_queue.PushItem(plaso_queue.QueueAbort(), block=False)
        output_task_queue.Close(abort=True)
Esempio n. 3
0
 def testBufferedReplyQueue(self):
     """Tests for the buffered reply queue."""
     test_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue(
         name='bufferedreply_bind', delay_open=False, linger_seconds=1)
     test_queue.PushItem('This is a test item.')
     test_queue.Close(abort=True)
     with self.assertRaises(errors.QueueAlreadyClosed):
         test_queue.PushItem('This shouldn\'t work')
Esempio n. 4
0
 def testBufferedReplyQueue(self):
     """Tests for the buffered reply queue."""
     test_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue(
         delay_open=False)
     test_queue.PushItem(u'This is a test item.')
     test_queue.Empty()
     test_queue.Close()
     with self.assertRaises(errors.QueueAlreadyClosed):
         test_queue.PushItem(u'This shouldn\'t work')
Esempio n. 5
0
 def testRequestAndBufferedReplyQueues(self):
     """Tests REQ and buffered REP queue pairs."""
     reply_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue(
         delay_open=False)
     request_queue = zeromq_queue.ZeroMQRequestConnectQueue(
         delay_open=False, port=reply_queue.port)
     self._testItemTransferred(reply_queue, request_queue)
     request_queue = zeromq_queue.ZeroMQRequestBindQueue(delay_open=False)
     reply_queue = zeromq_queue.ZeroMQBufferedReplyConnectQueue(
         delay_open=False, port=request_queue.port)
     self._testItemTransferred(reply_queue, request_queue)
Esempio n. 6
0
  def testEmptyBufferedQueues(self):
    """Tests the Empty method for buffered queues."""
    queue = zeromq_queue.ZeroMQBufferedReplyBindQueue(
        name='requestbufferedreply_replybind', delay_open=False,
        linger_seconds=1, buffer_max_size=3, timeout_seconds=2,
        buffer_timeout_seconds=1)
    try:
      while True:
        queue.PushItem('item', block=False)
    except errors.QueueFull:
      # Queue is now full
      pass

    with self.assertRaises(errors.QueueFull):
      queue.PushItem('item', block=False)

    queue.Empty()
    # We should now be able to push another item without an exception.
    queue.PushItem('item')
    queue.Empty()
    queue.Close()
Esempio n. 7
0
    def ProcessSources(self,
                       session_identifier,
                       source_path_specs,
                       storage_writer,
                       processing_configuration,
                       enable_sigsegv_handler=False,
                       number_of_worker_processes=0,
                       status_update_callback=None,
                       worker_memory_limit=None):
        """Processes the sources and extract events.

    Args:
      session_identifier (str): identifier of the session.
      source_path_specs (list[dfvfs.PathSpec]): path specifications of
          the sources to process.
      storage_writer (StorageWriter): storage writer for a session storage.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler
          should be enabled.
      number_of_worker_processes (Optional[int]): number of worker processes.
      status_update_callback (Optional[function]): callback function for status
          updates.
      worker_memory_limit (Optional[int]): maximum amount of memory a worker is
          allowed to consume, where None represents the default memory limit
          and 0 represents no limit.

    Returns:
      ProcessingStatus: processing status.
    """
        if number_of_worker_processes < 1:
            # One worker for each "available" CPU (minus other processes).
            # The number here is derived from the fact that the engine starts up:
            # * A main process.
            #
            # If we want to utilize all CPUs on the system we therefore need to start
            # up workers that amounts to the total number of CPUs - the other
            # processes.
            try:
                cpu_count = multiprocessing.cpu_count() - 1

                if cpu_count <= self._WORKER_PROCESSES_MINIMUM:
                    cpu_count = self._WORKER_PROCESSES_MINIMUM

                elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM:
                    cpu_count = self._WORKER_PROCESSES_MAXIMUM

            except NotImplementedError:
                logger.error((
                    'Unable to determine number of CPUs defaulting to {0:d} worker '
                    'processes.').format(self._WORKER_PROCESSES_MINIMUM))
                cpu_count = self._WORKER_PROCESSES_MINIMUM

            number_of_worker_processes = cpu_count

        self._enable_sigsegv_handler = enable_sigsegv_handler
        self._number_of_worker_processes = number_of_worker_processes

        if worker_memory_limit is None:
            self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT
        else:
            self._worker_memory_limit = worker_memory_limit

        # Keep track of certain values so we can spawn new extraction workers.
        self._processing_configuration = processing_configuration

        self._debug_output = processing_configuration.debug_output
        self._log_filename = processing_configuration.log_filename
        self._session_identifier = session_identifier
        self._status_update_callback = status_update_callback
        self._storage_writer = storage_writer

        # Set up the task queue.
        if not self._use_zeromq:
            self._task_queue = multi_process_queue.MultiProcessingQueue(
                maximum_number_of_queued_items=self._maximum_number_of_tasks)

        else:
            task_outbound_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue(
                delay_open=True,
                linger_seconds=0,
                maximum_items=1,
                name='main_task_queue',
                timeout_seconds=self._ZEROMQ_NO_WORKER_REQUEST_TIME_SECONDS)
            self._task_queue = task_outbound_queue

            # The ZeroMQ backed queue must be started first, so we can save its port.
            # TODO: raises: attribute-defined-outside-init
            # self._task_queue.name = 'Task queue'
            self._task_queue.Open()
            self._task_queue_port = self._task_queue.port

        self._StartProfiling(self._processing_configuration.profiling)
        self._task_manager.StartProfiling(
            self._processing_configuration.profiling, self._name)

        if self._serializers_profiler:
            storage_writer.SetSerializersProfiler(self._serializers_profiler)

        if self._storage_profiler:
            storage_writer.SetStorageProfiler(self._storage_profiler)

        # Set up the storage writer before the worker processes.
        storage_writer.StartTaskStorage()

        for worker_number in range(number_of_worker_processes):
            # First argument to _StartWorkerProcess is not used.
            extraction_process = self._StartWorkerProcess('', storage_writer)
            if not extraction_process:
                logger.error('Unable to create worker process: {0:d}'.format(
                    worker_number))

        self._StartStatusUpdateThread()

        try:
            # Open the storage file after creating the worker processes otherwise
            # the ZIP storage file will remain locked as long as the worker processes
            # are alive.
            storage_writer.Open()
            storage_writer.WriteSessionStart()

            try:
                storage_writer.WritePreprocessingInformation(
                    self.knowledge_base)

                self._ProcessSources(source_path_specs, storage_writer)

            finally:
                storage_writer.WriteSessionCompletion(aborted=self._abort)

                storage_writer.Close()

        finally:
            # Stop the status update thread after close of the storage writer
            # so we include the storage sync to disk in the status updates.
            self._StopStatusUpdateThread()

            if self._serializers_profiler:
                storage_writer.SetSerializersProfiler(None)

            if self._storage_profiler:
                storage_writer.SetStorageProfiler(None)

            self._task_manager.StopProfiling()
            self._StopProfiling()

        try:
            self._StopExtractionProcesses(abort=self._abort)

        except KeyboardInterrupt:
            self._AbortKill()

            # The abort can leave the main process unresponsive
            # due to incorrectly finalized IPC.
            self._KillProcess(os.getpid())

        # The task queue should be closed by _StopExtractionProcesses, this
        # close is a failsafe, primarily due to MultiProcessingQueue's
        # blocking behavior.
        self._task_queue.Close(abort=True)

        if self._processing_status.error_path_specs:
            task_storage_abort = True
        else:
            task_storage_abort = self._abort

        try:
            storage_writer.StopTaskStorage(abort=task_storage_abort)
        except (IOError, OSError) as exception:
            logger.error(
                'Unable to stop task storage with error: {0!s}'.format(
                    exception))

        if self._abort:
            logger.debug('Processing aborted.')
            self._processing_status.aborted = True
        else:
            logger.debug('Processing completed.')

        # Reset values.
        self._enable_sigsegv_handler = None
        self._number_of_worker_processes = None
        self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT

        self._processing_configuration = None

        self._session_identifier = None
        self._status_update_callback = None
        self._storage_writer = None

        return self._processing_status
Esempio n. 8
0
    def ProcessSources(self,
                       session,
                       source_path_specs,
                       storage_writer,
                       processing_configuration,
                       enable_sigsegv_handler=False,
                       status_update_callback=None,
                       storage_file_path=None):
        """Processes the sources and extract events.

    Args:
      session (Session): session in which the sources are processed.
      source_path_specs (list[dfvfs.PathSpec]): path specifications of
          the sources to process.
      storage_writer (StorageWriter): storage writer for a session storage.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler
          should be enabled.
      status_update_callback (Optional[function]): callback function for status
          updates.
      storage_file_path (Optional[str]): path to the session storage file.

    Returns:
      ProcessingStatus: processing status.
    """
        self._enable_sigsegv_handler = enable_sigsegv_handler

        # Keep track of certain values so we can spawn new extraction workers.
        self._processing_configuration = processing_configuration

        self._debug_output = processing_configuration.debug_output
        self._log_filename = processing_configuration.log_filename
        self._session = session
        self._status_update_callback = status_update_callback
        self._storage_file_path = storage_file_path
        self._storage_writer = storage_writer
        self._task_storage_format = processing_configuration.task_storage_format

        # Set up the task queue.
        task_outbound_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue(
            delay_open=True,
            linger_seconds=0,
            maximum_items=1,
            name='main_task_queue',
            timeout_seconds=self._ZEROMQ_NO_WORKER_REQUEST_TIME_SECONDS)
        self._task_queue = task_outbound_queue

        # The ZeroMQ backed queue must be started first, so we can save its port.
        # TODO: raises: attribute-defined-outside-init
        # self._task_queue.name = 'Task queue'
        self._task_queue.Open()
        self._task_queue_port = self._task_queue.port

        # Set up the task storage before the worker processes.
        self._StartTaskStorage(self._task_storage_format)

        for worker_number in range(self._number_of_worker_processes):
            process_name = 'Worker_{0:02d}'.format(self._last_worker_number)
            worker_process = self._StartWorkerProcess(process_name)
            if not worker_process:
                logger.error('Unable to create worker process: {0:d}'.format(
                    worker_number))

        self._StartProfiling(self._processing_configuration.profiling)
        self._task_manager.StartProfiling(
            self._processing_configuration.profiling, self._name)

        if self._serializers_profiler:
            storage_writer.SetSerializersProfiler(self._serializers_profiler)

        if self._storage_profiler:
            storage_writer.SetStorageProfiler(self._storage_profiler)

        self._StartStatusUpdateThread()

        try:
            self._ProcessSources(source_path_specs, storage_writer)

        finally:
            # Stop the status update thread after close of the storage writer
            # so we include the storage sync to disk in the status updates.
            self._StopStatusUpdateThread()

            if self._serializers_profiler:
                storage_writer.SetSerializersProfiler(None)

            if self._storage_profiler:
                storage_writer.SetStorageProfiler(None)

            self._task_manager.StopProfiling()
            self._StopProfiling()

        try:
            self._StopExtractionProcesses(abort=self._abort)

        except KeyboardInterrupt:
            self._AbortKill()

            # The abort can leave the main process unresponsive
            # due to incorrectly finalized IPC.
            self._KillProcess(os.getpid())

        # The task queue should be closed by _StopExtractionProcesses, this
        # close is a failsafe.
        self._task_queue.Close(abort=True)

        if self._processing_status.error_path_specs:
            task_storage_abort = True
        else:
            task_storage_abort = self._abort

        try:
            self._StopTaskStorage(self._task_storage_format,
                                  abort=task_storage_abort)
        except (IOError, OSError) as exception:
            logger.error(
                'Unable to stop task storage with error: {0!s}'.format(
                    exception))

        if self._abort:
            logger.debug('Processing aborted.')
            self._processing_status.aborted = True
        else:
            logger.debug('Processing completed.')

        # Update the status view one last time.
        self._UpdateStatus()

        # Reset values.
        self._enable_sigsegv_handler = None

        self._processing_configuration = None

        self._session = None
        self._status_update_callback = None
        self._storage_file_path = None
        self._storage_writer = None
        self._task_storage_format = None

        return self._processing_status
Esempio n. 9
0
  def ProcessSources(
      self, session_identifier, source_path_specs, storage_writer,
      enable_sigsegv_handler=False, filter_find_specs=None,
      filter_object=None, hasher_names_string=None, mount_path=None,
      number_of_worker_processes=0, parser_filter_expression=None,
      preferred_year=None, process_archives=False,
      process_compressed_streams=True, status_update_callback=None,
      show_memory_usage=False, temporary_directory=None, text_prepend=None,
      yara_rules_string=None):
    """Processes the sources and extract event objects.

    Args:
      session_identifier (str): identifier of the session.
      source_path_specs (list[dfvfs.PathSpec]): path specifications of
          the sources to process.
      storage_writer (StorageWriter): storage writer for a session storage.
      enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler
          should be enabled.
      filter_find_specs (Optional[list[dfvfs.FindSpec]]): find specifications
          used in path specification extraction.
      filter_object (Optional[objectfilter.Filter]): filter object.
      hasher_names_string (Optional[str]): comma separated string of names
          of hashers to use during processing.
      mount_path (Optional[str]): mount path.
      number_of_worker_processes (Optional[int]): number of worker processes.
      parser_filter_expression (Optional[str]): parser filter expression,
          where None represents all parsers and plugins.
      preferred_year (Optional[int]): preferred year.
      process_archives (Optional[bool]): True if archive files should be
          scanned for file entries.
      process_compressed_streams (Optional[bool]): True if file content in
          compressed streams should be processed.
      show_memory_usage (Optional[bool]): True if memory information should be
          included in status updates.
      status_update_callback (Optional[function]): callback function for status
          updates.
      temporary_directory (Optional[str]): path of the directory for temporary
          files.
      text_prepend (Optional[str]): text to prepend to every event.
      yara_rules_string (Optional[str]): unparsed yara rule definitions.

    Returns:
      ProcessingStatus: processing status.
    """
    if number_of_worker_processes < 1:
      # One worker for each "available" CPU (minus other processes).
      # The number here is derived from the fact that the engine starts up:
      # * A main process.
      #
      # If we want to utilize all CPUs on the system we therefore need to start
      # up workers that amounts to the total number of CPUs - the other
      # processes.
      try:
        cpu_count = multiprocessing.cpu_count() - 1

        if cpu_count <= self._WORKER_PROCESSES_MINIMUM:
          cpu_count = self._WORKER_PROCESSES_MINIMUM

        elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM:
          cpu_count = self._WORKER_PROCESSES_MAXIMUM

      except NotImplementedError:
        logging.error((
            u'Unable to determine number of CPUs defaulting to {0:d} worker '
            u'processes.').format(self._WORKER_PROCESSES_MINIMUM))
        cpu_count = self._WORKER_PROCESSES_MINIMUM

      number_of_worker_processes = cpu_count

    self._enable_sigsegv_handler = enable_sigsegv_handler
    self._number_of_worker_processes = number_of_worker_processes
    self._show_memory_usage = show_memory_usage

    # Keep track of certain values so we can spawn new extraction workers.
    self._filter_find_specs = filter_find_specs
    self._filter_object = filter_object
    self._hasher_names_string = hasher_names_string
    self._mount_path = mount_path
    self._parser_filter_expression = parser_filter_expression
    self._preferred_year = preferred_year
    self._process_archives = process_archives
    self._process_compressed_streams = process_compressed_streams
    self._session_identifier = session_identifier
    self._status_update_callback = status_update_callback
    self._storage_writer = storage_writer
    self._temporary_directory = temporary_directory
    self._text_prepend = text_prepend
    self._yara_rules_string = yara_rules_string

    # Set up the task queue.
    if not self._use_zeromq:
      self._task_queue = multi_process_queue.MultiProcessingQueue(
          maximum_number_of_queued_items=self._maximum_number_of_tasks)

    else:
      task_outbound_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue(
          delay_open=True, linger_seconds=0, maximum_items=1,
          name=u'main_task_queue',
          timeout_seconds=self._ZEROMQ_NO_WORKER_REQUEST_TIME_SECONDS)
      self._task_queue = task_outbound_queue

      # The ZeroMQ backed queue must be started first, so we can save its port.
      # TODO: raises: attribute-defined-outside-init
      # self._task_queue.name = u'Task queue'
      self._task_queue.Open()
      self._task_queue_port = self._task_queue.port

    self._StartProfiling()

    if self._serializers_profiler:
      storage_writer.SetSerializersProfiler(self._serializers_profiler)

    # Set up the storage writer before the worker processes.
    storage_writer.StartTaskStorage()

    for _ in range(number_of_worker_processes):
      extraction_process = self._StartExtractionWorkerProcess(storage_writer)
      self._StartMonitoringProcess(extraction_process.pid)

    self._StartStatusUpdateThread()

    try:
      # Open the storage file after creating the worker processes otherwise
      # the ZIP storage file will remain locked as long as the worker processes
      # are alive.
      storage_writer.Open()
      storage_writer.WriteSessionStart()

      try:
        storage_writer.WritePreprocessingInformation(self.knowledge_base)

        self._ProcessSources(
            source_path_specs, storage_writer,
            filter_find_specs=filter_find_specs)

      finally:
        storage_writer.WriteSessionCompletion(aborted=self._abort)

        storage_writer.Close()

    finally:
      # Stop the status update thread after close of the storage writer
      # so we include the storage sync to disk in the status updates.
      self._StopStatusUpdateThread()

      if self._serializers_profiler:
        storage_writer.SetSerializersProfiler(None)

      self._StopProfiling()

    try:
      self._StopExtractionProcesses(abort=self._abort)

    except KeyboardInterrupt:
      self._AbortKill()

      # The abort can leave the main process unresponsive
      # due to incorrectly finalized IPC.
      self._KillProcess(os.getpid())

    # The task queue should be closed by _StopExtractionProcesses, this
    # close is a failsafe, primarily due to MultiProcessingQueue's
    # blocking behaviour.
    self._task_queue.Close(abort=True)

    if self._processing_status.error_path_specs:
      task_storage_abort = True
    else:
      task_storage_abort = self._abort

    try:
      storage_writer.StopTaskStorage(abort=task_storage_abort)
    except (IOError, OSError) as exception:
      logging.error(u'Unable to stop task storage with error: {0:s}'.format(
          exception))

    if self._abort:
      logging.debug(u'Processing aborted.')
      self._processing_status.aborted = True
    else:
      logging.debug(u'Processing completed.')

    # Reset values.
    self._enable_sigsegv_handler = None
    self._number_of_worker_processes = None
    self._show_memory_usage = None

    self._filter_find_specs = None
    self._filter_object = None
    self._hasher_names_string = None
    self._mount_path = None
    self._parser_filter_expression = None
    self._preferred_year = None
    self._process_archives = None
    self._process_compressed_streams = None
    self._session_identifier = None
    self._status_update_callback = None
    self._storage_writer = None
    self._text_prepend = None

    return self._processing_status