Ejemplo n.º 1
0
  def _StopAnalysisProcesses(self, abort=False):
    """Stops the analysis processes.

    Args:
      abort (bool): True to indicated the stop is issued on abort.
    """
    logger.debug('Stopping analysis processes.')
    self._StopMonitoringProcesses()

    if abort:
      # Signal all the processes to abort.
      self._AbortTerminate()

    # Wake the processes to make sure that they are not blocking
    # waiting for the queue new items.
    for event_queue in self._event_queues.values():
      event_queue.PushItem(plaso_queue.QueueAbort(), block=False)

    # Try waiting for the processes to exit normally.
    self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
    for event_queue in self._event_queues.values():
      event_queue.Close(abort=abort)

    if abort:
      # Kill any remaining processes.
      self._AbortKill()
    else:
      # Check if the processes are still alive and terminate them if necessary.
      self._AbortTerminate()
      self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)

      for event_queue in self._event_queues.values():
        event_queue.Close(abort=True)
Ejemplo n.º 2
0
  def _StopMonitoringProcess(self, process):
    """Stops monitoring a process.

    Args:
      process (MultiProcessBaseProcess): process.

    Raises:
      KeyError: if the process is not monitored.
      ValueError: if the process is missing.
    """
    if process is None:
      raise ValueError('Missing process.')

    pid = process.pid

    self._RaiseIfNotMonitored(pid)

    del self._process_information_per_pid[pid]

    rpc_client = self._rpc_clients_per_pid.get(pid, None)
    if rpc_client:
      rpc_client.Close()
      del self._rpc_clients_per_pid[pid]

    if pid in self._rpc_errors_per_pid:
      del self._rpc_errors_per_pid[pid]

    logger.debug('Stopped monitoring process: {0:s} (PID: {1:d})'.format(
        process.name, pid))
Ejemplo n.º 3
0
  def _TimeoutTasks(self, tasks_for_timeout):
    """Checks for inactive tasks and marks such tasks as abandoned.

    Note that this method does not lock the manager and should be called
    by a method holding the manager lock.

    Args:
      tasks_for_timeout (dict[str, Task]): mapping of task identifiers to Tasks
        that will be checked for inactivity and marked as abandoned if
        required.
    """
    if not tasks_for_timeout:
      return

    inactive_time = int(time.time() * 1000000) - self._TASK_INACTIVE_TIME

    for task_identifier, task in iter(tasks_for_timeout.items()):
      last_active_time = task.last_processing_time
      if not last_active_time:
        last_active_time = task.start_time

      if last_active_time < inactive_time:
        logger.debug('Task {0:s} is abandoned'.format(task_identifier))
        self._tasks_abandoned[task_identifier] = task
        del tasks_for_timeout[task_identifier]
Ejemplo n.º 4
0
    def _AbandonInactiveProcessingTasks(self):
        """Marks processing tasks that exceed the inactive time as abandoned.

    This method does not lock the manager and should be called by a method
    holding the manager lock.
    """
        if self._tasks_processing:
            inactive_time = time.time() - self._TASK_INACTIVE_TIME
            inactive_time = int(inactive_time *
                                definitions.MICROSECONDS_PER_SECOND)

            # Abandon all tasks after they're identified so as not to modify the
            # dict while iterating over it.
            tasks_to_abandon = []
            for task_identifier, task in self._tasks_processing.items():
                if task.last_processing_time < inactive_time:
                    logger.debug('Abandoned processing task: {0:s}.'.format(
                        task_identifier))

                    self.SampleTaskStatus(task, 'abandoned_processing')
                    tasks_to_abandon.append((task_identifier, task))

            for task_identifier, task in tasks_to_abandon:
                self._tasks_abandoned[task_identifier] = task
                del self._tasks_processing[task_identifier]
Ejemplo n.º 5
0
    def CreateTask(self,
                   session_identifier,
                   storage_format=definitions.STORAGE_FORMAT_SQLITE):
        """Creates a task.

    Args:
      session_identifier (str): the identifier of the session the task is
          part of.
      storage_format (Optional[str]): the storage format that the task should be
          stored in.

    Returns:
      Task: task attribute container.
    """
        task = tasks.Task(session_identifier)
        task.storage_format = storage_format
        logger.debug('Created task: {0:s}.'.format(task.identifier))

        with self._lock:
            self._tasks_queued[task.identifier] = task
            self._total_number_of_tasks += 1

            self.SampleTaskStatus(task, 'created')

        return task
Ejemplo n.º 6
0
    def CreateRetryTask(self):
        """Creates a task that to retry a previously abandoned task.

    Returns:
      Task: a task that was abandoned but should be retried or None if there are
          no abandoned tasks that should be retried.
    """
        with self._lock:
            abandoned_task = self._GetTaskPendingRetry()
            if not abandoned_task:
                return None

            # The abandoned task is kept in _tasks_abandoned so it can be still
            # identified in CheckTaskToMerge and UpdateTaskAsPendingMerge.

            retry_task = abandoned_task.CreateRetryTask()
            logger.debug('Retrying task {0:s} as {1:s}.'.format(
                abandoned_task.identifier, retry_task.identifier))

            self._tasks_queued[retry_task.identifier] = retry_task
            self._total_number_of_tasks += 1

            self.SampleTaskStatus(retry_task, 'created_retry')

            return retry_task
Ejemplo n.º 7
0
  def _StopMonitoringProcess(self, process):
    """Stops monitoring a process.

    Args:
      process (MultiProcessBaseProcess): process.

    Raises:
      KeyError: if the process is not monitored.
      ValueError: if the process is missing.
    """
    if process is None:
      raise ValueError('Missing process.')

    pid = process.pid

    self._RaiseIfNotMonitored(pid)

    del self._process_information_per_pid[pid]

    rpc_client = self._rpc_clients_per_pid.get(pid, None)
    if rpc_client:
      rpc_client.Close()
      del self._rpc_clients_per_pid[pid]

    if pid in self._rpc_errors_per_pid:
      del self._rpc_errors_per_pid[pid]

    logger.debug('Stopped monitoring process: {0:s} (PID: {1:d})'.format(
        process.name, pid))
Ejemplo n.º 8
0
  def CreateRetryTask(self):
    """Creates a task that to retry a previously abandoned task.

    Returns:
      Task: a task that was abandoned but should be retried or None if there are
          no abandoned tasks that should be retried.
    """
    with self._lock:
      abandoned_task = self._GetTaskPendingRetry()
      if not abandoned_task:
        return None

      # The abandoned task is kept in _tasks_abandoned so it can be still
      # identified in CheckTaskToMerge and UpdateTaskAsPendingMerge.

      retry_task = abandoned_task.CreateRetryTask()
      logger.debug('Retrying task {0:s} as {1:s}.'.format(
          abandoned_task.identifier, retry_task.identifier))

      self._tasks_queued[retry_task.identifier] = retry_task
      self._total_number_of_tasks += 1

      self.SampleTaskStatus(retry_task, 'created_retry')

      return retry_task
Ejemplo n.º 9
0
  def _StartWorkerProcess(self, process_name, storage_writer):
    """Creates, starts, monitors and registers a worker process.

    Args:
      process_name (str): process name.
      storage_writer (StorageWriter): storage writer for a session storage used
          to create task storage.

    Returns:
      MultiProcessWorkerProcess: extraction worker process or None if the
          process could not be started.
    """
    process_name = 'Worker_{0:02d}'.format(self._last_worker_number)
    logger.debug('Starting worker process {0:s}'.format(process_name))

    if self._use_zeromq:
      queue_name = '{0:s} task queue'.format(process_name)
      task_queue = zeromq_queue.ZeroMQRequestConnectQueue(
          delay_open=True, linger_seconds=0, name=queue_name,
          port=self._task_queue_port,
          timeout_seconds=self._TASK_QUEUE_TIMEOUT_SECONDS)
    else:
      task_queue = self._task_queue

    process = worker_process.WorkerProcess(
        task_queue, storage_writer, self.knowledge_base,
        self._session_identifier, self._processing_configuration,
        enable_sigsegv_handler=self._enable_sigsegv_handler, name=process_name)

    # Remove all possible log handlers to prevent a child process from logging
    # to the main process log file and garbling the log. The log handlers are
    # recreated after the worker process has been started.
    for handler in logging.root.handlers:
      logging.root.removeHandler(handler)
      handler.close()

    process.start()

    loggers.ConfigureLogging(
        debug_output=self._debug_output, filename=self._log_filename,
        mode='a', quiet_mode=self._quiet_mode)

    try:
      self._StartMonitoringProcess(process)

    except (IOError, KeyError) as exception:
      pid = process.pid
      logger.error((
          'Unable to monitor replacement worker process: {0:s} '
          '(PID: {1:d}) with error: {2!s}').format(
              process_name, pid, exception))

      self._TerminateProcess(process)
      return None

    self._RegisterProcess(process)

    self._last_worker_number += 1

    return process
Ejemplo n.º 10
0
  def _AbandonQueuedTasks(self):
    """Marks queued tasks abandoned.

    This method does not lock the manager and should be called by a method
    holding the manager lock.
    """
    for task_identifier, task in iter(self._tasks_queued.items()):
      logger.debug('Abandoned queued task: {0:s}.'.format(task_identifier))

      self._tasks_abandoned[task_identifier] = task
      del self._tasks_queued[task_identifier]
Ejemplo n.º 11
0
    def _ProcessTask(self, task):
        """Processes a task.

    Args:
      task (Task): task.
    """
        logger.debug('Started processing task: {0:s}.'.format(task.identifier))

        if self._tasks_profiler:
            self._tasks_profiler.Sample(task, 'processing_started')

        self._task = task

        task_storage_writer = self._storage_writer.CreateTaskStorage(
            task, self._processing_configuration.task_storage_format)

        if self._serializers_profiler:
            task_storage_writer.SetSerializersProfiler(
                self._serializers_profiler)

        task_storage_writer.Open()

        self._parser_mediator.SetStorageWriter(task_storage_writer)

        task_storage_writer.WriteTaskStart()

        try:
            # TODO: add support for more task types.
            self._ProcessPathSpec(self._extraction_worker,
                                  self._parser_mediator, task.path_spec)
            self._number_of_consumed_sources += 1

            if self._guppy_memory_profiler:
                self._guppy_memory_profiler.Sample()

        finally:
            task_storage_writer.WriteTaskCompletion(aborted=self._abort)

            self._parser_mediator.SetStorageWriter(None)

            task_storage_writer.Close()

        try:
            self._storage_writer.FinalizeTaskStorage(task)
        except IOError:
            pass

        self._task = None

        if self._tasks_profiler:
            self._tasks_profiler.Sample(task, 'processing_completed')

        logger.debug('Completed processing task: {0:s}.'.format(
            task.identifier))
Ejemplo n.º 12
0
    def _AbandonQueuedTasks(self):
        """Marks queued tasks abandoned.

    This method does not lock the manager and should be called by a method
    holding the manager lock.
    """
        for task_identifier, task in iter(self._tasks_queued.items()):
            logger.debug(
                'Abandoned queued task: {0:s}.'.format(task_identifier))

            self._tasks_abandoned[task_identifier] = task
            del self._tasks_queued[task_identifier]
Ejemplo n.º 13
0
  def _ProcessTask(self, task):
    """Processes a task.

    Args:
      task (Task): task.
    """
    logger.debug('Started processing task: {0:s}.'.format(task.identifier))

    if self._tasks_profiler:
      self._tasks_profiler.Sample(task, 'processing_started')

    self._task = task

    storage_writer = self._storage_writer.CreateTaskStorage(task)

    if self._serializers_profiler:
      storage_writer.SetSerializersProfiler(self._serializers_profiler)

    storage_writer.Open()

    self._parser_mediator.SetStorageWriter(storage_writer)

    storage_writer.WriteTaskStart()

    try:
      # TODO: add support for more task types.
      self._ProcessPathSpec(
          self._extraction_worker, self._parser_mediator, task.path_spec)
      self._number_of_consumed_sources += 1

      if self._guppy_memory_profiler:
        self._guppy_memory_profiler.Sample()

    finally:
      storage_writer.WriteTaskCompletion(aborted=self._abort)

      self._parser_mediator.SetStorageWriter(None)

      storage_writer.Close()

    try:
      self._storage_writer.FinalizeTaskStorage(task)
    except IOError:
      pass

    self._task = None

    if self._tasks_profiler:
      self._tasks_profiler.Sample(task, 'processing_completed')

    logger.debug('Completed processing task: {0:s}.'.format(task.identifier))
Ejemplo n.º 14
0
  def _AbortJoin(self, timeout=None):
    """Aborts all registered processes by joining with the parent process.

    Args:
      timeout (int): number of seconds to wait for processes to join, where
          None represents no timeout.
    """
    for pid, process in iter(self._processes_per_pid.items()):
      logger.debug('Waiting for process: {0:s} (PID: {1:d}).'.format(
          process.name, pid))
      process.join(timeout=timeout)
      if not process.is_alive():
        logger.debug('Process {0:s} (PID: {1:d}) stopped.'.format(
            process.name, pid))
Ejemplo n.º 15
0
  def _AbortJoin(self, timeout=None):
    """Aborts all registered processes by joining with the parent process.

    Args:
      timeout (int): number of seconds to wait for processes to join, where
          None represents no timeout.
    """
    for pid, process in iter(self._processes_per_pid.items()):
      logger.debug('Waiting for process: {0:s} (PID: {1:d}).'.format(
          process.name, pid))
      process.join(timeout=timeout)
      if not process.is_alive():
        logger.debug('Process {0:s} (PID: {1:d}) stopped.'.format(
            process.name, pid))
Ejemplo n.º 16
0
    def CompleteTask(self, task):
        """Completes a task.

    The task is complete and can be removed from the task manager.

    Args:
      task (Task): task.
    """
        with self._lock:
            if task.identifier in self._tasks_merging:
                del self._tasks_merging[task.identifier]
                logger.debug('Task {0:s} is complete.'.format(task.identifier))

            if task.identifier in self._tasks_pending_merge:
                logger.debug(
                    'Task {0:s} completed while pending merge.'.format(
                        task.identifier))
                return

            if task.identifier in self._tasks_processing:
                del self._tasks_processing[task.identifier]
                logger.debug('Task {0:s} completed from processing.'.format(
                    task.identifier))
                return

            if task.identifier in self._tasks_queued:
                del self._tasks_queued[task.identifier]
                logger.debug('Task {0:s} is completed from queued.'.format(
                    task.identifier))
                return
Ejemplo n.º 17
0
    def HasPendingTasks(self):
        """Determines if there are tasks running or in need of retrying.

    Returns:
      bool: True if there are tasks that are active, ready to be merged or
          need to be retried.
    """
        logger.debug('Checking for pending tasks')
        with self._lock:
            self._AbandonInactiveProcessingTasks()

            if self._tasks_processing:
                return True

            # There are no tasks being processed, but we might be
            # waiting for some tasks to be merged.
            if self._HasTasksPendingMerge():
                return True

            # There are no tasks processing or pending merge, but there may
            # still be some waiting to be retried, so we check that.
            if self._HasTasksPendingRetry():
                return True

            # It is possible that a worker has processed a task and the foreman has
            # not been informed about it, since there is no feedback from the worker
            # when it pops a task from the queue.

            # If we believe all the workers are idle for longer than the task
            # inactive time (timeout) abandon all queued tasks. This ensures
            # that processing actually stops when the foreman never gets an
            # update from a worker.

            if self._tasks_queued:
                inactive_time = time.time() - self._TASK_INACTIVE_TIME
                inactive_time = int(inactive_time *
                                    definitions.MICROSECONDS_PER_SECOND)

                if self._latest_task_processing_time < inactive_time:
                    self._AbandonQueuedTasks()

            if self._tasks_queued:
                return True

            if self._tasks_merging:
                return True

        # There are no tasks pending any work.
        return False
Ejemplo n.º 18
0
    def _StopProcessStatusRPCServer(self):
        """Stops the process status RPC server."""
        if not self._rpc_server:
            return

        # Make sure the engine gets one more status update so it knows
        # the worker has completed.
        self._WaitForStatusNotRunning()

        self._rpc_server.Stop()
        self._rpc_server = None
        self.rpc_port.value = 0

        logger.debug('Process: {0!s} process status RPC server stopped'.format(
            self._name))
Ejemplo n.º 19
0
  def _StopProcessStatusRPCServer(self):
    """Stops the process status RPC server."""
    if not self._rpc_server:
      return

    # Make sure the engine gets one more status update so it knows
    # the worker has completed.
    self._WaitForStatusNotRunning()

    self._rpc_server.Stop()
    self._rpc_server = None
    self.rpc_port.value = 0

    logger.debug(
        'Process: {0!s} process status RPC server stopped'.format(self._name))
Ejemplo n.º 20
0
  def _AbandonQueuedTasks(self):
    """Marks queued tasks abandoned.

    This method does not lock the manager and should be called by a method
    holding the manager lock.
    """
    # Abandon all tasks after they're identified so as not to modify the
    # dict while iterating over it.
    tasks_to_abandon = []
    for task_identifier, task in iter(self._tasks_queued.items()):
      logger.debug('Abandoned queued task: {0:s}.'.format(task_identifier))
      tasks_to_abandon.append((task_identifier, task))

    for task_identifier, task in tasks_to_abandon:
      self._tasks_abandoned[task_identifier] = task
      del self._tasks_queued[task_identifier]
Ejemplo n.º 21
0
    def UpdateTaskAsProcessingByIdentifier(self, task_identifier):
        """Updates the task manager to reflect the task is processing.

    Args:
      task_identifier (str): unique identifier of the task.

    Raises:
      KeyError: if the task is not known to the task manager.
    """
        with self._lock:
            task_processing = self._tasks_processing.get(task_identifier, None)
            if task_processing:
                task_processing.UpdateProcessingTime()
                self._UpdateLatestProcessingTime(task_processing)
                return

            task_queued = self._tasks_queued.get(task_identifier, None)
            if task_queued:
                logger.debug('Task {0:s} was queued, now processing.'.format(
                    task_identifier))
                self._tasks_processing[task_identifier] = task_queued
                del self._tasks_queued[task_identifier]

                task_queued.UpdateProcessingTime()
                self._UpdateLatestProcessingTime(task_queued)
                return

            task_abandoned = self._tasks_abandoned.get(task_identifier, None)
            if task_abandoned:
                del self._tasks_abandoned[task_identifier]
                self._tasks_processing[task_identifier] = task_abandoned
                logger.debug(
                    'Task {0:s} was abandoned, but now processing.'.format(
                        task_identifier))

                task_abandoned.UpdateProcessingTime()
                self._UpdateLatestProcessingTime(task_abandoned)
                return

            if task_identifier in self._tasks_pending_merge:
                # No need to update the processing time, as this task is already
                # finished processing and is just waiting for merge.
                return

        # If we get here, we don't know what state the tasks is in, so raise.
        raise KeyError(
            'Status of task {0:s} is unknown.'.format(task_identifier))
Ejemplo n.º 22
0
    def _RunProcess(self):
        """Runs the process."""
        # Prevent the KeyboardInterrupt being raised inside the process.
        # This will prevent a process from generating a traceback when interrupted.
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        # A SIGTERM signal handler is necessary to make sure IPC is cleaned up
        # correctly on terminate.
        signal.signal(signal.SIGTERM, self._SigTermHandler)

        # A SIGSEGV signal handler is necessary to try to indicate where
        # worker failed.
        # WARNING the SIGSEGV handler will deadlock the process on a real segfault.
        if self._enable_sigsegv_handler:
            self._original_sigsegv_handler = signal.signal(
                signal.SIGSEGV, self._SigSegvHandler)

        self._pid = os.getpid()
        self._process_information = process_info.ProcessInfo(self._pid)

        # We need to set the is running status explicitly to True in case
        # the process completes before the engine is able to determine
        # the status of the process, such as in the unit tests.
        self._status_is_running = True

        # Logging needs to be configured before the first output otherwise we
        # mess up the logging of the parent process.
        loggers.ConfigureLogging(debug_output=self._debug_output,
                                 filename=self._log_filename,
                                 quiet_mode=self._quiet_mode)

        logger.debug('Process: {0!s} (PID: {1:d}) started'.format(
            self._name, self._pid))

        self._StartProcessStatusRPCServer()

        self._Main()

        self._StopProcessStatusRPCServer()

        logger.debug('Process: {0!s} (PID: {1:d}) stopped'.format(
            self._name, self._pid))

        # Make sure log files are cleanly closed.
        logging.shutdown()

        self._status_is_running = False
Ejemplo n.º 23
0
  def run(self):
    """Runs the process."""
    # Prevent the KeyboardInterrupt being raised inside the process.
    # This will prevent a process from generating a traceback when interrupted.
    signal.signal(signal.SIGINT, signal.SIG_IGN)

    # A SIGTERM signal handler is necessary to make sure IPC is cleaned up
    # correctly on terminate.
    signal.signal(signal.SIGTERM, self._SigTermHandler)

    # A SIGSEGV signal handler is necessary to try to indicate where
    # worker failed.
    # WARNING the SIGSEGV handler will deadlock the process on a real segfault.
    if self._enable_sigsegv_handler:
      self._original_sigsegv_handler = signal.signal(
          signal.SIGSEGV, self._SigSegvHandler)

    self._pid = os.getpid()
    self._process_information = process_info.ProcessInfo(self._pid)

    # We need to set the is running status explicitly to True in case
    # the process completes before the engine is able to determine
    # the status of the process, e.g. in the unit tests.
    self._status_is_running = True

    # Logging needs to be configured before the first output otherwise we
    # mess up the logging of the parent process.
    loggers.ConfigureLogging(
        debug_output=self._debug_output, filename=self._log_filename,
        quiet_mode=self._quiet_mode)

    logger.debug(
        'Process: {0!s} (PID: {1:d}) started'.format(self._name, self._pid))

    self._StartProcessStatusRPCServer()

    self._Main()

    self._StopProcessStatusRPCServer()

    logger.debug(
        'Process: {0!s} (PID: {1:d}) stopped'.format(self._name, self._pid))

    # Make sure log files are cleanly closed.
    logging.shutdown()

    self._status_is_running = False
Ejemplo n.º 24
0
    def CreateTask(self, session_identifier):
        """Creates a task.

    Args:
      session_identifier (str): the identifier of the session the task is
          part of.

    Returns:
      Task: task attribute container.
    """
        task = tasks.Task(session_identifier)
        logger.debug('Created task: {0:s}.'.format(task.identifier))

        with self._lock:
            self._QueueTask(task)

        return task
Ejemplo n.º 25
0
  def UpdateTaskAsProcessingByIdentifier(self, task_identifier):
    """Updates the task manager to reflect the task is processing.

    Args:
      task_identifier (str): unique identifier of the task.

    Raises:
      KeyError: if the task is not known to the task manager.
    """
    with self._lock:
      task_processing = self._tasks_processing.get(task_identifier, None)
      if task_processing:
        task_processing.UpdateProcessingTime()
        self._UpdateLatestProcessingTime(task_processing)
        return

      task_queued = self._tasks_queued.get(task_identifier, None)
      if task_queued:
        logger.debug('Task {0:s} was queued, now processing.'.format(
            task_identifier))
        self._tasks_processing[task_identifier] = task_queued
        del self._tasks_queued[task_identifier]

        task_queued.UpdateProcessingTime()
        self._UpdateLatestProcessingTime(task_queued)
        return

      task_abandoned = self._tasks_abandoned.get(task_identifier, None)
      if task_abandoned:
        del self._tasks_abandoned[task_identifier]
        self._tasks_processing[task_identifier] = task_abandoned
        logger.debug('Task {0:s} was abandoned, but now processing.'.format(
            task_identifier))

        task_abandoned.UpdateProcessingTime()
        self._UpdateLatestProcessingTime(task_abandoned)
        return

      if task_identifier in self._tasks_pending_merge:
        # No need to update the processing time, as this task is already
        # finished processing and is just waiting for merge.
        return

    # If we get here, we don't know what state the tasks is in, so raise.
    raise KeyError('Status of task {0:s} is unknown.'.format(task_identifier))
Ejemplo n.º 26
0
    def CreateRetryTask(self):
        """Creates a task that to retry a previously abandoned task.

    Returns:
      Task: a task that was abandoned but should be retried or None if there are
          no abandoned tasks that should be retried.
    """
        with self._lock:
            abandoned_task = self._GetTaskPendingRetry()
            if not abandoned_task:
                return None

            retry_task = abandoned_task.CreateRetry()
            logger.debug('Retrying task {0:s} as {1:s}.'.format(
                abandoned_task.identifier, retry_task.identifier))

            self._QueueTask(retry_task)
            return retry_task
Ejemplo n.º 27
0
    def _AbandonInactiveProcessingTasks(self):
        """Marks processing tasks that exceed the inactive time as abandoned.

    This method does not lock the manager and should be called by a method
    holding the manager lock.
    """
        if self._tasks_processing:
            inactive_time = time.time() - self._TASK_INACTIVE_TIME
            inactive_time = int(inactive_time *
                                definitions.MICROSECONDS_PER_SECOND)

            for task_identifier, task in iter(self._tasks_processing.items()):
                if task.last_processing_time < inactive_time:
                    logger.debug('Abandoned processing task: {0:s}.'.format(
                        task_identifier))

                    self._tasks_abandoned[task_identifier] = task
                    del self._tasks_processing[task_identifier]
Ejemplo n.º 28
0
    def _StopAnalysisProcesses(self, abort=False):
        """Stops the analysis processes.

    Args:
      abort (bool): True to indicated the stop is issued on abort.
    """
        logger.debug('Stopping analysis processes.')
        self._StopMonitoringProcesses()

        # Note that multiprocessing.Queue is very sensitive regarding
        # blocking on either a get or a put. So we try to prevent using
        # any blocking behavior.

        if abort:
            # Signal all the processes to abort.
            self._AbortTerminate()

        if not self._use_zeromq:
            logger.debug('Emptying queues.')
            for event_queue in self._event_queues.values():
                event_queue.Empty()

        # Wake the processes to make sure that they are not blocking
        # waiting for the queue new items.
        for event_queue in self._event_queues.values():
            event_queue.PushItem(plaso_queue.QueueAbort(), block=False)

        # Try waiting for the processes to exit normally.
        self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
        for event_queue in self._event_queues.values():
            event_queue.Close(abort=abort)

        if abort:
            # Kill any remaining processes.
            self._AbortKill()
        else:
            # Check if the processes are still alive and terminate them if necessary.
            self._AbortTerminate()
            self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)

            for event_queue in self._event_queues.values():
                event_queue.Close(abort=True)
Ejemplo n.º 29
0
    def _StopExtractionProcesses(self, abort=False):
        """Stops the extraction processes.

    Args:
      abort (bool): True to indicated the stop is issued on abort.
    """
        logger.debug('Stopping extraction processes.')
        self._StopMonitoringProcesses()

        # Note that multiprocessing.Queue is very sensitive regarding
        # blocking on either a get or a put. So we try to prevent using
        # any blocking behavior.

        if abort:
            # Signal all the processes to abort.
            self._AbortTerminate()

        logger.debug('Emptying task queue.')
        self._task_queue.Empty()

        # Wake the processes to make sure that they are not blocking
        # waiting for the queue new items.
        for _ in self._processes_per_pid:
            try:
                self._task_queue.PushItem(plaso_queue.QueueAbort(),
                                          block=False)
            except errors.QueueFull:
                logger.warning(
                    'Task queue full, unable to push abort message.')

        # Try waiting for the processes to exit normally.
        self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
        self._task_queue.Close(abort=abort)

        if not abort:
            # Check if the processes are still alive and terminate them if necessary.
            self._AbortTerminate()
            self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
            self._task_queue.Close(abort=True)

        # Kill any lingering processes.
        self._AbortKill()
Ejemplo n.º 30
0
  def _StopAnalysisProcesses(self, abort=False):
    """Stops the analysis processes.

    Args:
      abort (bool): True to indicated the stop is issued on abort.
    """
    logger.debug('Stopping analysis processes.')
    self._StopMonitoringProcesses()

    # Note that multiprocessing.Queue is very sensitive regarding
    # blocking on either a get or a put. So we try to prevent using
    # any blocking behavior.

    if abort:
      # Signal all the processes to abort.
      self._AbortTerminate()

    if not self._use_zeromq:
      logger.debug('Emptying queues.')
      for event_queue in self._event_queues.values():
        event_queue.Empty()

    # Wake the processes to make sure that they are not blocking
    # waiting for the queue new items.
    for event_queue in self._event_queues.values():
      event_queue.PushItem(plaso_queue.QueueAbort(), block=False)

    # Try waiting for the processes to exit normally.
    self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
    for event_queue in self._event_queues.values():
      event_queue.Close(abort=abort)

    if abort:
      # Kill any remaining processes.
      self._AbortKill()
    else:
      # Check if the processes are still alive and terminate them if necessary.
      self._AbortTerminate()
      self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)

      for event_queue in self._event_queues.values():
        event_queue.Close(abort=True)
Ejemplo n.º 31
0
    def _FillEventSourceHeap(self,
                             storage_writer,
                             event_source_heap,
                             start_with_first=False):
        """Fills the event source heap with the available written event sources.

    Args:
      storage_writer (StorageWriter): storage writer for a session storage.
      event_source_heap (_EventSourceHeap): event source heap.
      start_with_first (Optional[bool]): True if the function should start
          with the first written event source.
    """
        if self._processing_profiler:
            self._processing_profiler.StartTiming('fill_event_source_heap')

        if self._processing_profiler:
            self._processing_profiler.StartTiming('get_event_source')

        if start_with_first:
            event_source = storage_writer.GetFirstWrittenEventSource()
        else:
            event_source = storage_writer.GetNextWrittenEventSource()

        if self._processing_profiler:
            self._processing_profiler.StopTiming('get_event_source')

        while event_source:
            event_source_heap.PushEventSource(event_source)
            if event_source_heap.IsFull():
                logger.debug('Source heap is full.')
                break

            if self._processing_profiler:
                self._processing_profiler.StartTiming('get_event_source')

            event_source = storage_writer.GetNextWrittenEventSource()

            if self._processing_profiler:
                self._processing_profiler.StopTiming('get_event_source')

        if self._processing_profiler:
            self._processing_profiler.StopTiming('fill_event_source_heap')
Ejemplo n.º 32
0
  def CompleteTask(self, task):
    """Completes a task.

    The task is complete and can be removed from the task manager.

    Args:
      task (Task): task.

    Raises:
      KeyError: if the task was not merging.
    """
    with self._lock:
      if task.identifier not in self._tasks_merging:
        raise KeyError('Task {0:s} was not merging.'.format(task.identifier))

      self.SampleTaskStatus(task, 'completed')

      del self._tasks_merging[task.identifier]

      logger.debug('Completed task {0:s}.'.format(task.identifier))
Ejemplo n.º 33
0
  def CreateTask(self, session_identifier):
    """Creates a task.

    Args:
      session_identifier (str): the identifier of the session the task is
          part of.

    Returns:
      Task: task attribute container.
    """
    task = tasks.Task(session_identifier)
    logger.debug('Created task: {0:s}.'.format(task.identifier))

    with self._lock:
      self._tasks_queued[task.identifier] = task
      self._total_number_of_tasks += 1

      self.SampleTaskStatus(task, 'created')

    return task
Ejemplo n.º 34
0
  def CreateTask(self, session_identifier):
    """Creates a task.

    Args:
      session_identifier (str): the identifier of the session the task is
          part of.

    Returns:
      Task: task attribute container.
    """
    task = tasks.Task(session_identifier)
    logger.debug('Created task: {0:s}.'.format(task.identifier))

    with self._lock:
      self._tasks_queued[task.identifier] = task
      self._total_number_of_tasks += 1

      self.SampleTaskStatus(task, 'created')

    return task
Ejemplo n.º 35
0
  def CompleteTask(self, task):
    """Completes a task.

    The task is complete and can be removed from the task manager.

    Args:
      task (Task): task.

    Raises:
      KeyError: if the task was not merging.
    """
    with self._lock:
      if task.identifier not in self._tasks_merging:
        raise KeyError('Task {0:s} was not merging.'.format(task.identifier))

      self.SampleTaskStatus(task, 'completed')

      del self._tasks_merging[task.identifier]

      logger.debug('Completed task {0:s}.'.format(task.identifier))
Ejemplo n.º 36
0
  def GetRetryTask(self):
    """Creates a task that is an attempt to retry an abandoned task.

    Returns:
      Task: a task that is a retry of an existing task or None if there are
          no tasks that need to be retried.
    """
    with self._lock:
      for abandoned_task in self._tasks_abandoned.values():
        # Only retry abandoned tasks that are yet to be retried and
        # are not themselves retries of another task.
        if self._TaskIsRetriable(abandoned_task):
          retry_task = abandoned_task.CreateRetry()
          logger.debug(
              'Retrying task {0:s} as {1:s}'.format(
                  abandoned_task.identifier, retry_task.identifier))
          self._tasks_queued[retry_task.identifier] = retry_task
          self._total_number_of_tasks += 1
          return retry_task

    return None
Ejemplo n.º 37
0
  def _StopExtractionProcesses(self, abort=False):
    """Stops the extraction processes.

    Args:
      abort (bool): True to indicated the stop is issued on abort.
    """
    logger.debug('Stopping extraction processes.')
    self._StopMonitoringProcesses()

    # Note that multiprocessing.Queue is very sensitive regarding
    # blocking on either a get or a put. So we try to prevent using
    # any blocking behavior.

    if abort:
      # Signal all the processes to abort.
      self._AbortTerminate()

    logger.debug('Emptying task queue.')
    self._task_queue.Empty()

    # Wake the processes to make sure that they are not blocking
    # waiting for the queue new items.
    for _ in self._processes_per_pid:
      try:
        self._task_queue.PushItem(plaso_queue.QueueAbort(), block=False)
      except errors.QueueFull:
        logger.warning('Task queue full, unable to push abort message.')

    # Try waiting for the processes to exit normally.
    self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
    self._task_queue.Close(abort=abort)

    if not abort:
      # Check if the processes are still alive and terminate them if necessary.
      self._AbortTerminate()
      self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
      self._task_queue.Close(abort=True)

    # Kill any lingering processes.
    self._AbortKill()
Ejemplo n.º 38
0
  def RemoveTask(self, task):
    """Removes an abandoned task.

    Args:
      task (Task): task.

    Raises:
      KeyError: if the task was not abandoned or the task was abandoned and
          was not retried.
    """
    with self._lock:
      if task.identifier not in self._tasks_abandoned:
        raise KeyError('Task {0:s} was not abandoned.'.format(task.identifier))

      if not task.has_retry:
        raise KeyError(
            'Will not remove a task {0:s} without retry task.'.format(
                task.identifier))

      del self._tasks_abandoned[task.identifier]

      logger.debug('Removed task {0:s}.'.format(task.identifier))
Ejemplo n.º 39
0
    def UpdateTaskAsPendingMerge(self, task):
        """Updates the task manager to reflect the task is ready to be merged.

    Args:
      task (Task): task.

    Raises:
      KeyError: if the task was not processing or abandoned.
    """
        with self._lock:
            is_processing = task.identifier in self._tasks_processing
            is_abandoned = task.identifier in self._tasks_abandoned
            is_queued = task.identifier in self._tasks_queued

            if not (is_queued or is_abandoned or is_processing):
                raise KeyError('Status of task {0:s} is unknown.'.format(
                    task.identifier))

            self._tasks_pending_merge.PushTask(task)

            task.UpdateProcessingTime()
            self._UpdateLastestProcessingTime(task)

            if is_queued:
                del self._tasks_queued[task.identifier]

            if is_processing:
                del self._tasks_processing[task.identifier]

            if is_abandoned:
                del self._tasks_abandoned[task.identifier]

        if is_abandoned:
            logger.warning(
                'Previously abandoned task {0:s} is now pending merge.'.format(
                    task.identifier))
        else:
            logger.debug('Task {0:s} is pending merge.'.format(
                task.identifier))
Ejemplo n.º 40
0
  def RemoveTask(self, task):
    """Removes an abandoned task.

    Args:
      task (Task): task.

    Raises:
      KeyError: if the task was not abandoned or the task was abandoned and
          was not retried.
    """
    with self._lock:
      if task.identifier not in self._tasks_abandoned:
        raise KeyError('Task {0:s} was not abandoned.'.format(task.identifier))

      if not task.has_retry:
        raise KeyError(
            'Will not remove a task {0:s} without retry task.'.format(
                task.identifier))

      del self._tasks_abandoned[task.identifier]

      logger.debug('Removed task {0:s}.'.format(task.identifier))
Ejemplo n.º 41
0
    def _StartProcessStatusRPCServer(self):
        """Starts the process status RPC server."""
        if self._rpc_server:
            return

        self._rpc_server = plaso_xmlrpc.XMLProcessStatusRPCServer(
            self._GetStatus)

        hostname = 'localhost'

        # Try the PID as port number first otherwise pick something random
        # between 1024 and 60000.
        if self._pid < 1024 or self._pid > 60000:
            port = random.randint(1024, 60000)
        else:
            port = self._pid

        if not self._rpc_server.Start(hostname, port):
            port = 0
            for _ in range(self._NUMBER_OF_RPC_SERVER_START_ATTEMPTS):
                port = random.randint(1024, 60000)
                if self._rpc_server.Start(hostname, port):
                    break

                port = 0

        if not port:
            logger.error(
                ('Unable to start a process status RPC server for {0!s} '
                 '(PID: {1:d})').format(self._name, self._pid))
            self._rpc_server = None
            return

        self.rpc_port.value = port

        logger.debug('Process: {0!s} process status RPC server started'.format(
            self._name))
Ejemplo n.º 42
0
  def _AbandonInactiveProcessingTasks(self):
    """Marks processing tasks that exceed the inactive time as abandoned.

    This method does not lock the manager and should be called by a method
    holding the manager lock.
    """
    if self._tasks_processing:
      inactive_time = time.time() - self._TASK_INACTIVE_TIME
      inactive_time = int(inactive_time * definitions.MICROSECONDS_PER_SECOND)

      # Abandon all tasks after they're identified so as not to modify the
      # dict while iterating over it.
      tasks_to_abandon = []
      for task_identifier, task in iter(self._tasks_processing.items()):
        if task.last_processing_time < inactive_time:
          logger.debug('Abandoned processing task: {0:s}.'.format(
              task_identifier))

          self.SampleTaskStatus(task, 'abandoned_processing')
          tasks_to_abandon.append((task_identifier, task))

      for task_identifier, task in tasks_to_abandon:
        self._tasks_abandoned[task_identifier] = task
        del self._tasks_processing[task_identifier]
Ejemplo n.º 43
0
  def _StartProcessStatusRPCServer(self):
    """Starts the process status RPC server."""
    if self._rpc_server:
      return

    self._rpc_server = plaso_xmlrpc.XMLProcessStatusRPCServer(self._GetStatus)

    hostname = 'localhost'

    # Try the PID as port number first otherwise pick something random
    # between 1024 and 60000.
    if self._pid < 1024 or self._pid > 60000:
      port = random.randint(1024, 60000)
    else:
      port = self._pid

    if not self._rpc_server.Start(hostname, port):
      port = 0
      for _ in range(self._NUMBER_OF_RPC_SERVER_START_ATTEMPTS):
        port = random.randint(1024, 60000)
        if self._rpc_server.Start(hostname, port):
          break

        port = 0

    if not port:
      logger.error((
          'Unable to start a process status RPC server for {0!s} '
          '(PID: {1:d})').format(self._name, self._pid))
      self._rpc_server = None
      return

    self.rpc_port.value = port

    logger.debug(
        'Process: {0!s} process status RPC server started'.format(self._name))
Ejemplo n.º 44
0
    def UpdateTaskAsPendingMerge(self, task):
        """Updates the task manager to reflect that the task is ready to be merged.

    Args:
      task (Task): task.

    Raises:
      KeyError: if the task was not queued, processing or abandoned, or
          the task was abandoned and has a retry task.
    """
        with self._lock:
            is_abandoned = task.identifier in self._tasks_abandoned
            is_processing = task.identifier in self._tasks_processing
            is_queued = task.identifier in self._tasks_queued

            if not is_queued and not is_processing and not is_abandoned:
                raise KeyError('Status of task {0:s} is unknown.'.format(
                    task.identifier))

            if is_abandoned and task.has_retry:
                raise KeyError(
                    'Will not merge a task {0:s} with retry task.'.format(
                        task.identifier))

            if is_queued:
                logger.debug('Task {0:s} was queued, now merging.'.format(
                    task.identifier))
                del self._tasks_queued[task.identifier]

            if is_processing:
                logger.debug('Task {0:s} was processing, now merging.'.format(
                    task.identifier))
                del self._tasks_processing[task.identifier]

            if is_abandoned:
                logger.debug('Task {0:s} was abandoned, now merging.'.format(
                    task.identifier))
                del self._tasks_abandoned[task.identifier]

            self._tasks_pending_merge.PushTask(task)

            self.SampleTaskStatus(task, 'pending_merge')

            task.UpdateProcessingTime()
            self._UpdateLatestProcessingTime(task)
Ejemplo n.º 45
0
  def UpdateTaskAsPendingMerge(self, task):
    """Updates the task manager to reflect the task is ready to be merged.

    Args:
      task (Task): task.

    Raises:
      KeyError: if the task was not queued, processing or abandoned, or
          the task was abandoned and has a retry task.
    """
    with self._lock:
      is_abandoned = task.identifier in self._tasks_abandoned
      is_processing = task.identifier in self._tasks_processing
      is_queued = task.identifier in self._tasks_queued

      if not is_queued and not is_processing and not is_abandoned:
        raise KeyError('Status of task {0:s} is unknown.'.format(
            task.identifier))

      if is_abandoned and task.has_retry:
        raise KeyError('Will not merge a task {0:s} with retry task.'.format(
            task.identifier))

      if is_queued:
        logger.debug('Task {0:s} was queued, now merging.'.format(
            task.identifier))
        del self._tasks_queued[task.identifier]

      if is_processing:
        logger.debug('Task {0:s} was processing, now merging.'.format(
            task.identifier))
        del self._tasks_processing[task.identifier]

      if is_abandoned:
        logger.debug('Task {0:s} was abandoned, now merging.'.format(
            task.identifier))
        del self._tasks_abandoned[task.identifier]

      self._tasks_pending_merge.PushTask(task)

      self.SampleTaskStatus(task, 'pending_merge')

      task.UpdateProcessingTime()
      self._UpdateLatestProcessingTime(task)
Ejemplo n.º 46
0
  def _Main(self):
    """The main loop."""
    # We need a resolver context per process to prevent multi processing
    # issues with file objects stored in images.
    resolver_context = context.Context()

    for credential_configuration in self._processing_configuration.credentials:
      resolver.Resolver.key_chain.SetCredential(
          credential_configuration.path_spec,
          credential_configuration.credential_type,
          credential_configuration.credential_data)

    self._parser_mediator = parsers_mediator.ParserMediator(
        None, self._knowledge_base,
        artifacts_filter_helper=self._artifacts_filter_helper,
        preferred_year=self._processing_configuration.preferred_year,
        resolver_context=resolver_context,
        temporary_directory=self._processing_configuration.temporary_directory)

    self._parser_mediator.SetEventExtractionConfiguration(
        self._processing_configuration.event_extraction)

    self._parser_mediator.SetInputSourceConfiguration(
        self._processing_configuration.input_source)

    # We need to initialize the parser and hasher objects after the process
    # has forked otherwise on Windows the "fork" will fail with
    # a PickleError for Python modules that cannot be pickled.
    self._extraction_worker = worker.EventExtractionWorker(
        parser_filter_expression=(
            self._processing_configuration.parser_filter_expression))

    self._extraction_worker.SetExtractionConfiguration(
        self._processing_configuration.extraction)

    self._parser_mediator.StartProfiling(
        self._processing_configuration.profiling, self._name,
        self._process_information)
    self._StartProfiling(self._processing_configuration.profiling)

    if self._processing_profiler:
      self._extraction_worker.SetProcessingProfiler(self._processing_profiler)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(self._serializers_profiler)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(self._storage_profiler)

    logger.debug('Worker: {0!s} (PID: {1:d}) started.'.format(
        self._name, self._pid))

    self._status = definitions.STATUS_INDICATOR_RUNNING

    try:
      logger.debug('{0!s} (PID: {1:d}) started monitoring task queue.'.format(
          self._name, self._pid))

      while not self._abort:
        try:
          task = self._task_queue.PopItem()
        except (errors.QueueClose, errors.QueueEmpty) as exception:
          logger.debug('ConsumeItems exiting with exception {0:s}.'.format(
              type(exception)))
          break

        if isinstance(task, plaso_queue.QueueAbort):
          logger.debug('ConsumeItems exiting, dequeued QueueAbort object.')
          break

        self._ProcessTask(task)

      logger.debug('{0!s} (PID: {1:d}) stopped monitoring task queue.'.format(
          self._name, self._pid))

    # All exceptions need to be caught here to prevent the process
    # from being killed by an uncaught exception.
    except Exception as exception:  # pylint: disable=broad-except
      logger.warning(
          'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
              self._name, self._pid))
      logger.exception(exception)

      self._abort = True

    if self._processing_profiler:
      self._extraction_worker.SetProcessingProfiler(None)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(None)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(None)

    self._StopProfiling()
    self._parser_mediator.StopProfiling()

    self._extraction_worker = None
    self._parser_mediator = None
    self._storage_writer = None

    if self._abort:
      self._status = definitions.STATUS_INDICATOR_ABORTED
    else:
      self._status = definitions.STATUS_INDICATOR_COMPLETED

    logger.debug('Worker: {0!s} (PID: {1:d}) stopped.'.format(
        self._name, self._pid))

    try:
      self._task_queue.Close(abort=self._abort)
    except errors.QueueAlreadyClosed:
      logger.error('Queue for {0:s} was already closed.'.format(self.name))
Ejemplo n.º 47
0
  def _ScheduleTasks(self, storage_writer):
    """Schedules tasks.

    Args:
      storage_writer (StorageWriter): storage writer for a session storage.
    """
    logger.debug('Task scheduler started')

    self._status = definitions.STATUS_INDICATOR_RUNNING

    # TODO: make tasks persistent.

    # TODO: protect task scheduler loop by catch all and
    # handle abort path.

    event_source_heap = _EventSourceHeap()

    self._FillEventSourceHeap(
        storage_writer, event_source_heap, start_with_first=True)

    event_source = event_source_heap.PopEventSource()

    task = None
    while event_source or self._task_manager.HasPendingTasks():
      if self._abort:
        break

      try:
        if not task:
          task = self._task_manager.CreateRetryTask()

        if not task and event_source:
          task = self._task_manager.CreateTask(self._session_identifier)
          task.file_entry_type = event_source.file_entry_type
          task.path_spec = event_source.path_spec
          event_source = None

          self._number_of_consumed_sources += 1

          if self._guppy_memory_profiler:
            self._guppy_memory_profiler.Sample()

        if task:
          if self._ScheduleTask(task):
            logger.debug(
                'Scheduled task {0:s} for path specification {1:s}'.format(
                    task.identifier, task.path_spec.comparable))

            self._task_manager.SampleTaskStatus(task, 'scheduled')

            task = None

          else:
            self._task_manager.SampleTaskStatus(task, 'schedule_attempted')

        self._MergeTaskStorage(storage_writer)

        if not event_source_heap.IsFull():
          self._FillEventSourceHeap(storage_writer, event_source_heap)

        if not task and not event_source:
          event_source = event_source_heap.PopEventSource()

      except KeyboardInterrupt:
        self._abort = True

        self._processing_status.aborted = True
        if self._status_update_callback:
          self._status_update_callback(self._processing_status)

    for task in self._task_manager.GetFailedTasks():
      warning = warnings.ExtractionWarning(
          message='Worker failed to process path specification',
          path_spec=task.path_spec)
      self._storage_writer.AddWarning(warning)
      self._processing_status.error_path_specs.append(task.path_spec)

    self._status = definitions.STATUS_INDICATOR_IDLE

    if self._abort:
      logger.debug('Task scheduler aborted')
    else:
      logger.debug('Task scheduler stopped')
Ejemplo n.º 48
0
  def _Main(self):
    """The main loop."""
    self._StartProfiling(self._processing_configuration.profiling)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(self._serializers_profiler)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(self._storage_profiler)

    logger.debug('Analysis plugin: {0!s} (PID: {1:d}) started'.format(
        self._name, self._pid))

    # Creating the threading event in the constructor will cause a pickle
    # error on Windows when an analysis process is created.
    self._foreman_status_wait_event = threading.Event()
    self._status = definitions.PROCESSING_STATUS_ANALYZING

    task = tasks.Task()
    # TODO: temporary solution.
    task.identifier = self._analysis_plugin.plugin_name

    self._task = task

    storage_writer = self._storage_writer.CreateTaskStorage(task)

    if self._serializers_profiler:
      storage_writer.SetSerializersProfiler(self._serializers_profiler)

    if self._storage_profiler:
      storage_writer.SetStorageProfiler(self._storage_profiler)

    storage_writer.Open()

    self._analysis_mediator = analysis_mediator.AnalysisMediator(
        storage_writer, self._knowledge_base, data_location=self._data_location)

    # TODO: set event_filter_expression in mediator.

    storage_writer.WriteTaskStart()

    try:
      logger.debug(
          '{0!s} (PID: {1:d}) started monitoring event queue.'.format(
              self._name, self._pid))

      while not self._abort:
        try:
          event = self._event_queue.PopItem()

        except (errors.QueueClose, errors.QueueEmpty) as exception:
          logger.debug('ConsumeItems exiting with exception {0:s}.'.format(
              type(exception)))
          break

        if isinstance(event, plaso_queue.QueueAbort):
          logger.debug('ConsumeItems exiting, dequeued QueueAbort object.')
          break

        self._ProcessEvent(self._analysis_mediator, event)

        self._number_of_consumed_events += 1

        if self._guppy_memory_profiler:
          self._guppy_memory_profiler.Sample()

      logger.debug(
          '{0!s} (PID: {1:d}) stopped monitoring event queue.'.format(
              self._name, self._pid))

      if not self._abort:
        self._status = definitions.PROCESSING_STATUS_REPORTING

        self._analysis_mediator.ProduceAnalysisReport(self._analysis_plugin)

    # All exceptions need to be caught here to prevent the process
    # from being killed by an uncaught exception.
    except Exception as exception:  # pylint: disable=broad-except
      logger.warning(
          'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
              self._name, self._pid))
      logger.exception(exception)

      self._abort = True

    finally:
      storage_writer.WriteTaskCompletion(aborted=self._abort)

      storage_writer.Close()

      if self._serializers_profiler:
        storage_writer.SetSerializersProfiler(None)

      if self._storage_profiler:
        storage_writer.SetStorageProfiler(None)

    try:
      self._storage_writer.FinalizeTaskStorage(task)
    except IOError:
      pass

    if self._abort:
      self._status = definitions.PROCESSING_STATUS_ABORTED
    else:
      self._status = definitions.PROCESSING_STATUS_COMPLETED

    self._foreman_status_wait_event.wait(self._FOREMAN_STATUS_WAIT)

    logger.debug('Analysis plugin: {0!s} (PID: {1:d}) stopped'.format(
        self._name, self._pid))

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(None)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(None)

    self._StopProfiling()

    self._analysis_mediator = None
    self._foreman_status_wait_event = None
    self._storage_writer = None
    self._task = None

    try:
      self._event_queue.Close(abort=self._abort)
    except errors.QueueAlreadyClosed:
      logger.error('Queue for {0:s} was already closed.'.format(self.name))
Ejemplo n.º 49
0
  def _UpdateProcessingStatus(self, pid, process_status, used_memory):
    """Updates the processing status.

    Args:
      pid (int): process identifier (PID) of the worker process.
      process_status (dict[str, object]): status values received from
          the worker process.
      used_memory (int): size of used memory in bytes.

    Raises:
      KeyError: if the process is not registered with the engine.
    """
    self._RaiseIfNotRegistered(pid)

    if not process_status:
      return

    process = self._processes_per_pid[pid]

    processing_status = process_status.get('processing_status', None)

    self._RaiseIfNotMonitored(pid)

    display_name = process_status.get('display_name', '')

    number_of_consumed_event_tags = process_status.get(
        'number_of_consumed_event_tags', None)
    number_of_produced_event_tags = process_status.get(
        'number_of_produced_event_tags', None)

    number_of_consumed_events = process_status.get(
        'number_of_consumed_events', None)
    number_of_produced_events = process_status.get(
        'number_of_produced_events', None)

    number_of_consumed_reports = process_status.get(
        'number_of_consumed_reports', None)
    number_of_produced_reports = process_status.get(
        'number_of_produced_reports', None)

    number_of_consumed_sources = process_status.get(
        'number_of_consumed_sources', None)
    number_of_produced_sources = process_status.get(
        'number_of_produced_sources', None)

    number_of_consumed_warnings = process_status.get(
        'number_of_consumed_warnings', None)
    number_of_produced_warnings = process_status.get(
        'number_of_produced_warnings', None)

    if processing_status != definitions.STATUS_INDICATOR_IDLE:
      last_activity_timestamp = process_status.get(
          'last_activity_timestamp', 0.0)

      if last_activity_timestamp:
        last_activity_timestamp += self._PROCESS_WORKER_TIMEOUT

        current_timestamp = time.time()
        if current_timestamp > last_activity_timestamp:
          logger.error((
              'Process {0:s} (PID: {1:d}) has not reported activity within '
              'the timeout period.').format(process.name, pid))
          processing_status = definitions.STATUS_INDICATOR_NOT_RESPONDING

    self._processing_status.UpdateWorkerStatus(
        process.name, processing_status, pid, used_memory, display_name,
        number_of_consumed_sources, number_of_produced_sources,
        number_of_consumed_events, number_of_produced_events,
        number_of_consumed_event_tags, number_of_produced_event_tags,
        number_of_consumed_reports, number_of_produced_reports,
        number_of_consumed_warnings, number_of_produced_warnings)

    task_identifier = process_status.get('task_identifier', '')
    if not task_identifier:
      return

    try:
      self._task_manager.UpdateTaskAsProcessingByIdentifier(task_identifier)
      return
    except KeyError:
      logger.debug(
          'Worker {0:s} is processing unknown task: {1:s}.'.format(
              process.name, task_identifier))
Ejemplo n.º 50
0
  def ProcessSources(
      self, session_identifier, source_path_specs, storage_writer,
      processing_configuration, enable_sigsegv_handler=False,
      number_of_worker_processes=0, status_update_callback=None,
      worker_memory_limit=None):
    """Processes the sources and extract events.

    Args:
      session_identifier (str): identifier of the session.
      source_path_specs (list[dfvfs.PathSpec]): path specifications of
          the sources to process.
      storage_writer (StorageWriter): storage writer for a session storage.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler
          should be enabled.
      number_of_worker_processes (Optional[int]): number of worker processes.
      status_update_callback (Optional[function]): callback function for status
          updates.
      worker_memory_limit (Optional[int]): maximum amount of memory a worker is
          allowed to consume, where None represents the default memory limit
          and 0 represents no limit.

    Returns:
      ProcessingStatus: processing status.
    """
    if number_of_worker_processes < 1:
      # One worker for each "available" CPU (minus other processes).
      # The number here is derived from the fact that the engine starts up:
      # * A main process.
      #
      # If we want to utilize all CPUs on the system we therefore need to start
      # up workers that amounts to the total number of CPUs - the other
      # processes.
      try:
        cpu_count = multiprocessing.cpu_count() - 1

        if cpu_count <= self._WORKER_PROCESSES_MINIMUM:
          cpu_count = self._WORKER_PROCESSES_MINIMUM

        elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM:
          cpu_count = self._WORKER_PROCESSES_MAXIMUM

      except NotImplementedError:
        logger.error((
            'Unable to determine number of CPUs defaulting to {0:d} worker '
            'processes.').format(self._WORKER_PROCESSES_MINIMUM))
        cpu_count = self._WORKER_PROCESSES_MINIMUM

      number_of_worker_processes = cpu_count

    self._enable_sigsegv_handler = enable_sigsegv_handler
    self._number_of_worker_processes = number_of_worker_processes

    if worker_memory_limit is None:
      self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT
    else:
      self._worker_memory_limit = worker_memory_limit

    # Keep track of certain values so we can spawn new extraction workers.
    self._processing_configuration = processing_configuration

    self._debug_output = processing_configuration.debug_output
    self._log_filename = processing_configuration.log_filename
    self._session_identifier = session_identifier
    self._status_update_callback = status_update_callback
    self._storage_writer = storage_writer

    # Set up the task queue.
    if not self._use_zeromq:
      self._task_queue = multi_process_queue.MultiProcessingQueue(
          maximum_number_of_queued_items=self._maximum_number_of_tasks)

    else:
      task_outbound_queue = zeromq_queue.ZeroMQBufferedReplyBindQueue(
          delay_open=True, linger_seconds=0, maximum_items=1,
          name='main_task_queue',
          timeout_seconds=self._ZEROMQ_NO_WORKER_REQUEST_TIME_SECONDS)
      self._task_queue = task_outbound_queue

      # The ZeroMQ backed queue must be started first, so we can save its port.
      # TODO: raises: attribute-defined-outside-init
      # self._task_queue.name = 'Task queue'
      self._task_queue.Open()
      self._task_queue_port = self._task_queue.port

    self._StartProfiling(self._processing_configuration.profiling)
    self._task_manager.StartProfiling(
        self._processing_configuration.profiling, self._name)

    if self._serializers_profiler:
      storage_writer.SetSerializersProfiler(self._serializers_profiler)

    if self._storage_profiler:
      storage_writer.SetStorageProfiler(self._storage_profiler)

    # Set up the storage writer before the worker processes.
    storage_writer.StartTaskStorage()

    for worker_number in range(number_of_worker_processes):
      # First argument to _StartWorkerProcess is not used.
      extraction_process = self._StartWorkerProcess('', storage_writer)
      if not extraction_process:
        logger.error('Unable to create worker process: {0:d}'.format(
            worker_number))

    self._StartStatusUpdateThread()

    try:
      # Open the storage file after creating the worker processes otherwise
      # the ZIP storage file will remain locked as long as the worker processes
      # are alive.
      storage_writer.Open()
      storage_writer.WriteSessionStart()

      try:
        storage_writer.WritePreprocessingInformation(self.knowledge_base)

        self._ProcessSources(source_path_specs, storage_writer)

      finally:
        storage_writer.WriteSessionCompletion(aborted=self._abort)

        storage_writer.Close()

    finally:
      # Stop the status update thread after close of the storage writer
      # so we include the storage sync to disk in the status updates.
      self._StopStatusUpdateThread()

      if self._serializers_profiler:
        storage_writer.SetSerializersProfiler(None)

      if self._storage_profiler:
        storage_writer.SetStorageProfiler(None)

      self._task_manager.StopProfiling()
      self._StopProfiling()

    try:
      self._StopExtractionProcesses(abort=self._abort)

    except KeyboardInterrupt:
      self._AbortKill()

      # The abort can leave the main process unresponsive
      # due to incorrectly finalized IPC.
      self._KillProcess(os.getpid())

    # The task queue should be closed by _StopExtractionProcesses, this
    # close is a failsafe, primarily due to MultiProcessingQueue's
    # blocking behavior.
    self._task_queue.Close(abort=True)

    if self._processing_status.error_path_specs:
      task_storage_abort = True
    else:
      task_storage_abort = self._abort

    try:
      storage_writer.StopTaskStorage(abort=task_storage_abort)
    except (IOError, OSError) as exception:
      logger.error('Unable to stop task storage with error: {0!s}'.format(
          exception))

    if self._abort:
      logger.debug('Processing aborted.')
      self._processing_status.aborted = True
    else:
      logger.debug('Processing completed.')

    # Reset values.
    self._enable_sigsegv_handler = None
    self._number_of_worker_processes = None
    self._worker_memory_limit = definitions.DEFAULT_WORKER_MEMORY_LIMIT

    self._processing_configuration = None

    self._session_identifier = None
    self._status_update_callback = None
    self._storage_writer = None

    return self._processing_status
Ejemplo n.º 51
0
  def _Main(self):
    """The main loop."""
    # We need a resolver context per process to prevent multi processing
    # issues with file objects stored in images.
    resolver_context = context.Context()

    for credential_configuration in self._processing_configuration.credentials:
      resolver.Resolver.key_chain.SetCredential(
          credential_configuration.path_spec,
          credential_configuration.credential_type,
          credential_configuration.credential_data)

    self._parser_mediator = parsers_mediator.ParserMediator(
        None, self._knowledge_base,
        preferred_year=self._processing_configuration.preferred_year,
        resolver_context=resolver_context,
        temporary_directory=self._processing_configuration.temporary_directory)

    self._parser_mediator.SetEventExtractionConfiguration(
        self._processing_configuration.event_extraction)

    self._parser_mediator.SetInputSourceConfiguration(
        self._processing_configuration.input_source)

    # We need to initialize the parser and hasher objects after the process
    # has forked otherwise on Windows the "fork" will fail with
    # a PickleError for Python modules that cannot be pickled.
    self._extraction_worker = worker.EventExtractionWorker(
        parser_filter_expression=(
            self._processing_configuration.parser_filter_expression))

    self._extraction_worker.SetExtractionConfiguration(
        self._processing_configuration.extraction)

    self._parser_mediator.StartProfiling(
        self._processing_configuration.profiling, self._name,
        self._process_information)
    self._StartProfiling(self._processing_configuration.profiling)

    if self._processing_profiler:
      self._extraction_worker.SetProcessingProfiler(self._processing_profiler)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(self._serializers_profiler)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(self._storage_profiler)

    logger.debug('Worker: {0!s} (PID: {1:d}) started.'.format(
        self._name, self._pid))

    self._status = definitions.PROCESSING_STATUS_RUNNING

    try:
      logger.debug('{0!s} (PID: {1:d}) started monitoring task queue.'.format(
          self._name, self._pid))

      while not self._abort:
        try:
          task = self._task_queue.PopItem()
        except (errors.QueueClose, errors.QueueEmpty) as exception:
          logger.debug('ConsumeItems exiting with exception {0:s}.'.format(
              type(exception)))
          break

        if isinstance(task, plaso_queue.QueueAbort):
          logger.debug('ConsumeItems exiting, dequeued QueueAbort object.')
          break

        self._ProcessTask(task)

      logger.debug('{0!s} (PID: {1:d}) stopped monitoring task queue.'.format(
          self._name, self._pid))

    # All exceptions need to be caught here to prevent the process
    # from being killed by an uncaught exception.
    except Exception as exception:  # pylint: disable=broad-except
      logger.warning(
          'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
              self._name, self._pid))
      logger.exception(exception)

      self._abort = True

    if self._processing_profiler:
      self._extraction_worker.SetProcessingProfiler(None)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(None)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(None)

    self._StopProfiling()
    self._parser_mediator.StopProfiling()

    self._extraction_worker = None
    self._parser_mediator = None
    self._storage_writer = None

    if self._abort:
      self._status = definitions.PROCESSING_STATUS_ABORTED
    else:
      self._status = definitions.PROCESSING_STATUS_COMPLETED

    logger.debug('Worker: {0!s} (PID: {1:d}) stopped.'.format(
        self._name, self._pid))

    try:
      self._task_queue.Close(abort=self._abort)
    except errors.QueueAlreadyClosed:
      logger.error('Queue for {0:s} was already closed.'.format(self.name))
Ejemplo n.º 52
0
  def _AnalyzeEvents(self, storage_writer, analysis_plugins, event_filter=None):
    """Analyzes events in a plaso storage.

    Args:
      storage_writer (StorageWriter): storage writer.
      analysis_plugins (dict[str, AnalysisPlugin]): analysis plugins that
          should be run and their names.
      event_filter (Optional[FilterObject]): event filter.

    Returns:
      collections.Counter: counter containing information about the events
          processed and filtered.

    Raises:
      RuntimeError: if a non-recoverable situation is encountered.
    """
    self._status = definitions.PROCESSING_STATUS_RUNNING
    self._number_of_consumed_errors = 0
    self._number_of_consumed_events = 0
    self._number_of_consumed_reports = 0
    self._number_of_consumed_sources = 0
    self._number_of_produced_errors = 0
    self._number_of_produced_events = 0
    self._number_of_produced_reports = 0
    self._number_of_produced_sources = 0

    number_of_filtered_events = 0

    logger.debug('Processing events.')

    filter_limit = getattr(event_filter, 'limit', None)

    for event in storage_writer.GetSortedEvents():
      event_data_identifier = event.GetEventDataIdentifier()
      if event_data_identifier:
        event_data = storage_writer.GetEventDataByIdentifier(
            event_data_identifier)
        if event_data:
          for attribute_name, attribute_value in event_data.GetAttributes():
            setattr(event, attribute_name, attribute_value)

      event_identifier = event.GetIdentifier()
      event.tag = self._event_tag_index.GetEventTagByIdentifier(
          storage_writer, event_identifier)

      if event_filter:
        filter_match = event_filter.Match(event)
      else:
        filter_match = None

      # pylint: disable=singleton-comparison
      if filter_match == False:
        number_of_filtered_events += 1
        continue

      for event_queue in self._event_queues.values():
        # TODO: Check for premature exit of analysis plugins.
        event_queue.PushItem(event)

      self._number_of_consumed_events += 1

      if (event_filter and filter_limit and
          filter_limit == self._number_of_consumed_events):
        break

    logger.debug('Finished pushing events to analysis plugins.')
    # Signal that we have finished adding events.
    for event_queue in self._event_queues.values():
      event_queue.PushItem(plaso_queue.QueueAbort(), block=False)

    logger.debug('Processing analysis plugin results.')

    # TODO: use a task based approach.
    plugin_names = [plugin_name for plugin_name in analysis_plugins.keys()]
    while plugin_names:
      for plugin_name in list(plugin_names):
        if self._abort:
          break

        # TODO: temporary solution.
        task = tasks.Task()
        task.identifier = plugin_name

        merge_ready = storage_writer.CheckTaskReadyForMerge(task)
        if merge_ready:
          storage_writer.PrepareMergeTaskStorage(task)
          self._status = definitions.PROCESSING_STATUS_MERGING

          event_queue = self._event_queues[plugin_name]
          del self._event_queues[plugin_name]

          event_queue.Close()

          storage_merge_reader = storage_writer.StartMergeTaskStorage(task)

          storage_merge_reader.MergeAttributeContainers(
              callback=self._MergeEventTag)
          # TODO: temporary solution.
          plugin_names.remove(plugin_name)

          self._status = definitions.PROCESSING_STATUS_RUNNING

          self._number_of_produced_event_tags = (
              storage_writer.number_of_event_tags)
          self._number_of_produced_reports = (
              storage_writer.number_of_analysis_reports)

    try:
      storage_writer.StopTaskStorage(abort=self._abort)
    except (IOError, OSError) as exception:
      logger.error('Unable to stop task storage with error: {0!s}'.format(
          exception))

    if self._abort:
      logger.debug('Processing aborted.')
    else:
      logger.debug('Processing completed.')

    events_counter = collections.Counter()
    events_counter['Events filtered'] = number_of_filtered_events
    events_counter['Events processed'] = self._number_of_consumed_events

    return events_counter