Example #1
0
  def _ProcessPathSpec(self, extraction_worker, parser_mediator, path_spec):
    """Processes a path specification.

    Args:
      extraction_worker (worker.ExtractionWorker): extraction worker.
      parser_mediator (ParserMediator): parser mediator.
      path_spec (dfvfs.PathSpec): path specification.
    """
    self._current_display_name = parser_mediator.GetDisplayNameForPathSpec(
        path_spec)

    try:
      extraction_worker.ProcessPathSpec(parser_mediator, path_spec)

    except dfvfs_errors.CacheFullError:
      # TODO: signal engine of failure.
      self._abort = True
      logger.error((
          'ABORT: detected cache full error while processing path spec: '
          '{0:s}').format(self._current_display_name))

    except Exception as exception:  # pylint: disable=broad-except
      parser_mediator.ProduceExtractionError((
          'unable to process path specification with error: '
          '{0!s}').format(exception), path_spec=path_spec)

      if self._processing_configuration.debug_output:
        logger.warning((
            'Unhandled exception while processing path specification: '
            '{0:s}.').format(self._current_display_name))
        logger.exception(exception)
Example #2
0
  def __init__(self, maximum_number_of_queued_items=0, timeout=None):
    """Initializes a multi-processing queue.

    Args:
      maximum_number_of_queued_items (Optional[int]): maximum number of queued
          items, where 0 represents no limit.
      timeout (Optional[float]): number of seconds for the get to time out,
          where None will block until a new item is put onto the queue.
    """
    super(MultiProcessingQueue, self).__init__()
    self._timeout = timeout

    # maxsize contains the maximum number of items allowed to be queued,
    # where 0 represents unlimited.

    # We need to check that we aren't asking for a bigger queue than the
    # platform supports, which requires access to this internal
    # multiprocessing value.
    # pylint: disable=no-member,protected-access
    queue_max_length = _multiprocessing.SemLock.SEM_VALUE_MAX
    # pylint: enable=no-member,protected-access

    if maximum_number_of_queued_items > queue_max_length:
      logger.warning((
          'Requested maximum queue size: {0:d} is larger than the maximum '
          'size supported by the system. Defaulting to: {1:d}').format(
              maximum_number_of_queued_items, queue_max_length))
      maximum_number_of_queued_items = queue_max_length

    # This queue appears not to be FIFO.
    self._queue = multiprocessing.Queue(maxsize=maximum_number_of_queued_items)
Example #3
0
  def __init__(self, maximum_number_of_queued_items=0, timeout=None):
    """Initializes a multi-processing queue.

    Args:
      maximum_number_of_queued_items (Optional[int]): maximum number of queued
          items, where 0 represents no limit.
      timeout (Optional[float]): number of seconds for the get to time out,
          where None will block until a new item is put onto the queue.
    """
    super(MultiProcessingQueue, self).__init__()
    self._timeout = timeout

    # maxsize contains the maximum number of items allowed to be queued,
    # where 0 represents unlimited.

    # We need to check that we aren't asking for a bigger queue than the
    # platform supports, which requires access to this internal
    # multiprocessing value.
    # pylint: disable=no-member,protected-access
    queue_max_length = _multiprocessing.SemLock.SEM_VALUE_MAX
    # pylint: enable=no-member,protected-access

    if maximum_number_of_queued_items > queue_max_length:
      logger.warning((
          'Requested maximum queue size: {0:d} is larger than the maximum '
          'size supported by the system. Defaulting to: {1:d}').format(
              maximum_number_of_queued_items, queue_max_length))
      maximum_number_of_queued_items = queue_max_length

    # This queue appears not to be FIFO.
    self._queue = multiprocessing.Queue(maxsize=maximum_number_of_queued_items)
Example #4
0
  def _ProcessPathSpec(self, extraction_worker, parser_mediator, path_spec):
    """Processes a path specification.

    Args:
      extraction_worker (worker.ExtractionWorker): extraction worker.
      parser_mediator (ParserMediator): parser mediator.
      path_spec (dfvfs.PathSpec): path specification.
    """
    self._current_display_name = parser_mediator.GetDisplayNameForPathSpec(
        path_spec)

    try:
      extraction_worker.ProcessPathSpec(parser_mediator, path_spec)

    except dfvfs_errors.CacheFullError:
      # TODO: signal engine of failure.
      self._abort = True
      logger.error((
          'ABORT: detected cache full error while processing path spec: '
          '{0:s}').format(self._current_display_name))

    except Exception as exception:  # pylint: disable=broad-except
      parser_mediator.ProduceExtractionWarning((
          'unable to process path specification with error: '
          '{0!s}').format(exception), path_spec=path_spec)

      if self._processing_configuration.debug_output:
        logger.warning((
            'Unhandled exception while processing path specification: '
            '{0:s}.').format(self._current_display_name))
        logger.exception(exception)
Example #5
0
  def _AbortTerminate(self):
    """Aborts all registered processes by sending a SIGTERM or equivalent."""
    for pid, process in iter(self._processes_per_pid.items()):
      if not process.is_alive():
        continue

      logger.warning('Terminating process: {0:s} (PID: {1:d}).'.format(
          process.name, pid))
      process.terminate()
Example #6
0
  def _AbortKill(self):
    """Aborts all registered processes by sending a SIGKILL or equivalent."""
    for pid, process in iter(self._processes_per_pid.items()):
      if not process.is_alive():
        continue

      logger.warning('Killing process: {0:s} (PID: {1:d}).'.format(
          process.name, pid))
      self._KillProcess(pid)
Example #7
0
  def _AbortTerminate(self):
    """Aborts all registered processes by sending a SIGTERM or equivalent."""
    for pid, process in iter(self._processes_per_pid.items()):
      if not process.is_alive():
        continue

      logger.warning('Terminating process: {0:s} (PID: {1:d}).'.format(
          process.name, pid))
      process.terminate()
Example #8
0
  def _AbortKill(self):
    """Aborts all registered processes by sending a SIGKILL or equivalent."""
    for pid, process in iter(self._processes_per_pid.items()):
      if not process.is_alive():
        continue

      logger.warning('Killing process: {0:s} (PID: {1:d}).'.format(
          process.name, pid))
      self._KillProcess(pid)
Example #9
0
  def _TerminateProcess(self, process):
    """Terminate a process.

    Args:
      process (MultiProcessBaseProcess): process to terminate.
    """
    pid = process.pid
    logger.warning('Terminating process: (PID: {0:d}).'.format(pid))
    process.terminate()

    # Wait for the process to exit.
    process.join(timeout=self._PROCESS_JOIN_TIMEOUT)

    if process.is_alive():
      logger.warning('Killing process: (PID: {0:d}).'.format(pid))
      self._KillProcess(pid)
Example #10
0
  def _TerminateProcess(self, process):
    """Terminate a process.

    Args:
      process (MultiProcessBaseProcess): process to terminate.
    """
    pid = process.pid
    logger.warning('Terminating process: (PID: {0:d}).'.format(pid))
    process.terminate()

    # Wait for the process to exit.
    process.join(timeout=self._PROCESS_JOIN_TIMEOUT)

    if process.is_alive():
      logger.warning('Killing process: (PID: {0:d}).'.format(pid))
      self._KillProcess(pid)
Example #11
0
    def CallFunction(self):
        """Calls the function via RPC."""
        if self._xmlrpc_proxy is None:
            return None

        rpc_call = getattr(self._xmlrpc_proxy, self._RPC_FUNCTION_NAME, None)
        if rpc_call is None:
            return None

        try:
            return rpc_call()  # pylint: disable=not-callable
        except (expat.ExpatError, SocketServer.socket.error,
                xmlrpclib.Fault) as exception:
            logger.warning(
                'Unable to make RPC call with error: {0!s}'.format(exception))
            return None
Example #12
0
  def CallFunction(self):
    """Calls the function via RPC."""
    if self._xmlrpc_proxy is None:
      return None

    rpc_call = getattr(self._xmlrpc_proxy, self._RPC_FUNCTION_NAME, None)
    if rpc_call is None:
      return None

    try:
      return rpc_call()  # pylint: disable=not-callable
    except (
        expat.ExpatError, SocketServer.socket.error,
        xmlrpclib.Fault) as exception:
      logger.warning('Unable to make RPC call with error: {0!s}'.format(
          exception))
      return None
Example #13
0
  def _ProcessEvent(self, mediator, event):
    """Processes an event.

    Args:
      mediator (AnalysisMediator): mediates interactions between
          analysis plugins and other components, such as storage and dfvfs.
      event (EventObject): event.
    """
    try:
      self._analysis_plugin.ExamineEvent(mediator, event)

    except Exception as exception:  # pylint: disable=broad-except
      self.SignalAbort()

      # TODO: write analysis error.

      if self._debug_output:
        logger.warning('Unhandled exception while processing event object.')
        logger.exception(exception)
Example #14
0
  def _ProcessEvent(self, mediator, event):
    """Processes an event.

    Args:
      mediator (AnalysisMediator): mediates interactions between
          analysis plugins and other components, such as storage and dfvfs.
      event (EventObject): event.
    """
    try:
      self._analysis_plugin.ExamineEvent(mediator, event)

    except Exception as exception:  # pylint: disable=broad-except
      self.SignalAbort()

      # TODO: write analysis error.

      if self._debug_output:
        logger.warning('Unhandled exception while processing event object.')
        logger.exception(exception)
Example #15
0
    def _StopExtractionProcesses(self, abort=False):
        """Stops the extraction processes.

    Args:
      abort (bool): True to indicated the stop is issued on abort.
    """
        logger.debug('Stopping extraction processes.')
        self._StopMonitoringProcesses()

        # Note that multiprocessing.Queue is very sensitive regarding
        # blocking on either a get or a put. So we try to prevent using
        # any blocking behavior.

        if abort:
            # Signal all the processes to abort.
            self._AbortTerminate()

        logger.debug('Emptying task queue.')
        self._task_queue.Empty()

        # Wake the processes to make sure that they are not blocking
        # waiting for the queue new items.
        for _ in self._processes_per_pid:
            try:
                self._task_queue.PushItem(plaso_queue.QueueAbort(),
                                          block=False)
            except errors.QueueFull:
                logger.warning(
                    'Task queue full, unable to push abort message.')

        # Try waiting for the processes to exit normally.
        self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
        self._task_queue.Close(abort=abort)

        if not abort:
            # Check if the processes are still alive and terminate them if necessary.
            self._AbortTerminate()
            self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
            self._task_queue.Close(abort=True)

        # Kill any lingering processes.
        self._AbortKill()
Example #16
0
    def _ProcessEvent(self, mediator, event, event_data, event_data_stream):
        """Processes an event.

    Args:
      mediator (AnalysisMediator): mediates interactions between
          analysis plugins and other components, such as storage and dfvfs.
      event (EventObject): event.
      event_data (EventData): event data.
      event_data_stream (EventDataStream): event data stream.
    """
        try:
            self._analysis_plugin.ExamineEvent(mediator, event, event_data,
                                               event_data_stream)

        except Exception as exception:  # pylint: disable=broad-except
            # TODO: write analysis error and change logger to debug only.

            logger.warning(
                'Unhandled exception while processing event object.')
            logger.exception(exception)
Example #17
0
    def Open(self, hostname, port):
        """Opens a RPC communication channel to the server.

    Args:
      hostname: the hostname or IP address to connect to for requests.
      port: the port to connect to for requests.

    Returns:
      A boolean indicating if the communication channel was established.
    """
        server_url = 'http://{0:s}:{1:d}'.format(hostname, port)
        try:
            self._xmlrpc_proxy = xmlrpclib.ServerProxy(server_url,
                                                       allow_none=True)
        except SocketServer.socket.error as exception:
            logger.warning(
                ('Unable to connect to RPC server on {0:s}:{1:d} with error: '
                 '{2:s}').format(hostname, port, exception))
            return False

        return True
Example #18
0
  def _StopExtractionProcesses(self, abort=False):
    """Stops the extraction processes.

    Args:
      abort (bool): True to indicated the stop is issued on abort.
    """
    logger.debug('Stopping extraction processes.')
    self._StopMonitoringProcesses()

    # Note that multiprocessing.Queue is very sensitive regarding
    # blocking on either a get or a put. So we try to prevent using
    # any blocking behavior.

    if abort:
      # Signal all the processes to abort.
      self._AbortTerminate()

    logger.debug('Emptying task queue.')
    self._task_queue.Empty()

    # Wake the processes to make sure that they are not blocking
    # waiting for the queue new items.
    for _ in self._processes_per_pid:
      try:
        self._task_queue.PushItem(plaso_queue.QueueAbort(), block=False)
      except errors.QueueFull:
        logger.warning('Task queue full, unable to push abort message.')

    # Try waiting for the processes to exit normally.
    self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
    self._task_queue.Close(abort=abort)

    if not abort:
      # Check if the processes are still alive and terminate them if necessary.
      self._AbortTerminate()
      self._AbortJoin(timeout=self._PROCESS_JOIN_TIMEOUT)
      self._task_queue.Close(abort=True)

    # Kill any lingering processes.
    self._AbortKill()
Example #19
0
    def _Open(self, hostname, port):
        """Opens the RPC communication channel for clients.

    Args:
      hostname (str): hostname or IP address to connect to for requests.
      port (int): port to connect to for requests.

    Returns:
      bool: True if the communication channel was successfully opened.
    """
        try:
            self._xmlrpc_server = SimpleXMLRPCServer.SimpleXMLRPCServer(
                (hostname, port), logRequests=False, allow_none=True)
        except SocketServer.socket.error as exception:
            logger.warning(
                ('Unable to bind a RPC server on {0:s}:{1:d} with error: '
                 '{2!s}').format(hostname, port, exception))
            return False

        self._xmlrpc_server.register_function(self._callback,
                                              self._RPC_FUNCTION_NAME)
        return True
Example #20
0
  def _Open(self, hostname, port):
    """Opens the RPC communication channel for clients.

    Args:
      hostname (str): hostname or IP address to connect to for requests.
      port (int): port to connect to for requests.

    Returns:
      bool: True if the communication channel was successfully opened.
    """
    try:
      self._xmlrpc_server = SimpleXMLRPCServer.SimpleXMLRPCServer(
          (hostname, port), logRequests=False, allow_none=True)
    except SocketServer.socket.error as exception:
      logger.warning((
          'Unable to bind a RPC server on {0:s}:{1:d} with error: '
          '{2!s}').format(hostname, port, exception))
      return False

    self._xmlrpc_server.register_function(
        self._callback, self._RPC_FUNCTION_NAME)
    return True
Example #21
0
    def UpdateTaskAsPendingMerge(self, task):
        """Updates the task manager to reflect the task is ready to be merged.

    Args:
      task (Task): task.

    Raises:
      KeyError: if the task was not processing or abandoned.
    """
        with self._lock:
            is_processing = task.identifier in self._tasks_processing
            is_abandoned = task.identifier in self._tasks_abandoned
            is_queued = task.identifier in self._tasks_queued

            if not (is_queued or is_abandoned or is_processing):
                raise KeyError('Status of task {0:s} is unknown.'.format(
                    task.identifier))

            self._tasks_pending_merge.PushTask(task)

            task.UpdateProcessingTime()
            self._UpdateLastestProcessingTime(task)

            if is_queued:
                del self._tasks_queued[task.identifier]

            if is_processing:
                del self._tasks_processing[task.identifier]

            if is_abandoned:
                del self._tasks_abandoned[task.identifier]

        if is_abandoned:
            logger.warning(
                'Previously abandoned task {0:s} is now pending merge.'.format(
                    task.identifier))
        else:
            logger.debug('Task {0:s} is pending merge.'.format(
                task.identifier))
Example #22
0
  def Open(self, hostname, port):
    """Opens a RPC communication channel to the server.

    Args:
      hostname (str): hostname or IP address to connect to for requests.
      port (int): port to connect to for requests.

    Returns:
      bool: True if the communication channel was established.
    """
    server_url = 'http://{0:s}:{1:d}'.format(hostname, port)

    try:
      self._xmlrpc_proxy = xmlrpclib.ServerProxy(
          server_url, allow_none=True)
    except SocketServer.socket.error as exception:
      logger.warning((
          'Unable to connect to RPC server on {0:s}:{1:d} with error: '
          '{2!s}').format(hostname, port, exception))
      return False

    return True
Example #23
0
    def _Main(self):
        """The main loop."""
        self._StartProfiling(self._processing_configuration.profiling)

        if self._serializers_profiler:
            self._storage_writer.SetSerializersProfiler(
                self._serializers_profiler)

        if self._storage_profiler:
            self._storage_writer.SetStorageProfiler(self._storage_profiler)

        logger.debug('Analysis plugin: {0!s} (PID: {1:d}) started'.format(
            self._name, self._pid))

        # Creating the threading event in the constructor will cause a pickle
        # error on Windows when an analysis process is created.
        self._foreman_status_wait_event = threading.Event()
        self._status = definitions.PROCESSING_STATUS_ANALYZING

        task = tasks.Task()
        # TODO: temporary solution.
        task.identifier = self._analysis_plugin.plugin_name

        self._task = task

        storage_writer = self._storage_writer.CreateTaskStorage(task)

        if self._serializers_profiler:
            storage_writer.SetSerializersProfiler(self._serializers_profiler)

        if self._storage_profiler:
            storage_writer.SetStorageProfiler(self._storage_profiler)

        storage_writer.Open()

        self._analysis_mediator = analysis_mediator.AnalysisMediator(
            storage_writer,
            self._knowledge_base,
            data_location=self._data_location)

        # TODO: set event_filter_expression in mediator.

        storage_writer.WriteTaskStart()

        try:
            logger.debug(
                '{0!s} (PID: {1:d}) started monitoring event queue.'.format(
                    self._name, self._pid))

            while not self._abort:
                try:
                    event = self._event_queue.PopItem()

                except (errors.QueueClose, errors.QueueEmpty) as exception:
                    logger.debug(
                        'ConsumeItems exiting with exception {0:s}.'.format(
                            type(exception)))
                    break

                if isinstance(event, plaso_queue.QueueAbort):
                    logger.debug(
                        'ConsumeItems exiting, dequeued QueueAbort object.')
                    break

                self._ProcessEvent(self._analysis_mediator, event)

                self._number_of_consumed_events += 1

                if self._guppy_memory_profiler:
                    self._guppy_memory_profiler.Sample()

            logger.debug(
                '{0!s} (PID: {1:d}) stopped monitoring event queue.'.format(
                    self._name, self._pid))

            if not self._abort:
                self._status = definitions.PROCESSING_STATUS_REPORTING

                self._analysis_mediator.ProduceAnalysisReport(
                    self._analysis_plugin)

        # All exceptions need to be caught here to prevent the process
        # from being killed by an uncaught exception.
        except Exception as exception:  # pylint: disable=broad-except
            logger.warning(
                'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
                    self._name, self._pid))
            logger.exception(exception)

            self._abort = True

        finally:
            storage_writer.WriteTaskCompletion(aborted=self._abort)

            storage_writer.Close()

            if self._serializers_profiler:
                storage_writer.SetSerializersProfiler(None)

            if self._storage_profiler:
                storage_writer.SetStorageProfiler(None)

        try:
            self._storage_writer.FinalizeTaskStorage(task)
        except IOError:
            pass

        if self._abort:
            self._status = definitions.PROCESSING_STATUS_ABORTED
        else:
            self._status = definitions.PROCESSING_STATUS_COMPLETED

        self._foreman_status_wait_event.wait(self._FOREMAN_STATUS_WAIT)

        logger.debug('Analysis plugin: {0!s} (PID: {1:d}) stopped'.format(
            self._name, self._pid))

        if self._serializers_profiler:
            self._storage_writer.SetSerializersProfiler(None)

        if self._storage_profiler:
            self._storage_writer.SetStorageProfiler(None)

        self._StopProfiling()

        self._analysis_mediator = None
        self._foreman_status_wait_event = None
        self._storage_writer = None
        self._task = None

        try:
            self._event_queue.Close(abort=self._abort)
        except errors.QueueAlreadyClosed:
            logger.error('Queue for {0:s} was already closed.'.format(
                self.name))
Example #24
0
  def _CheckStatusWorkerProcess(self, pid):
    """Checks the status of a worker process.

    If a worker process is not responding the process is terminated and
    a replacement process is started.

    Args:
      pid (int): process ID (PID) of a registered worker process.

    Raises:
      KeyError: if the process is not registered with the engine.
    """
    # TODO: Refactor this method, simplify and separate concerns (monitoring
    # vs management).
    self._RaiseIfNotRegistered(pid)

    process = self._processes_per_pid[pid]

    process_status = self._QueryProcessStatus(process)
    if process_status is None:
      process_is_alive = False
    else:
      process_is_alive = True

    process_information = self._process_information_per_pid[pid]
    used_memory = process_information.GetUsedMemory() or 0

    if self._worker_memory_limit and used_memory > self._worker_memory_limit:
      logger.warning((
          'Process: {0:s} (PID: {1:d}) killed because it exceeded the '
          'memory limit: {2:d}.').format(
              process.name, pid, self._worker_memory_limit))
      self._KillProcess(pid)

    if isinstance(process_status, dict):
      self._rpc_errors_per_pid[pid] = 0
      status_indicator = process_status.get('processing_status', None)

    else:
      rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1
      self._rpc_errors_per_pid[pid] = rpc_errors

      if rpc_errors > self._MAXIMUM_RPC_ERRORS:
        process_is_alive = False

      if process_is_alive:
        rpc_port = process.rpc_port.value
        logger.warning((
            'Unable to retrieve process: {0:s} (PID: {1:d}) status via '
            'RPC socket: http://localhost:{2:d}').format(
                process.name, pid, rpc_port))

        processing_status_string = 'RPC error'
        status_indicator = definitions.PROCESSING_STATUS_RUNNING
      else:
        processing_status_string = 'killed'
        status_indicator = definitions.PROCESSING_STATUS_KILLED

      process_status = {
          'processing_status': processing_status_string}

    self._UpdateProcessingStatus(pid, process_status, used_memory)

    # _UpdateProcessingStatus can also change the status of the worker,
    # So refresh the status if applicable.
    for worker_status in self._processing_status.workers_status:
      if worker_status.pid == pid:
        status_indicator = worker_status.status
        break

    if status_indicator in definitions.PROCESSING_ERROR_STATUS:
      logger.error((
          'Process {0:s} (PID: {1:d}) is not functioning correctly. '
          'Status code: {2!s}.').format(process.name, pid, status_indicator))

      self._TerminateProcessByPid(pid)

      logger.info('Starting replacement worker process for {0:s}'.format(
          process.name))
      replacement_process_attempts = 0
      replacement_process = None
      while replacement_process_attempts < self._MAXIMUM_REPLACEMENT_RETRIES:
        replacement_process_attempts += 1
        replacement_process = self._StartWorkerProcess(
            process.name, self._storage_writer)
        if not replacement_process:
          time.sleep(self._REPLACEMENT_WORKER_RETRY_DELAY)
          break
      if not replacement_process:
        logger.error(
            'Unable to create replacement worker process for: {0:s}'.format(
                process.name))
Example #25
0
  def _GetEventIdentifiers(self, event, event_data, event_data_stream):
    """Retrieves different identifiers of the event.

    The event data attributes and values can be represented as a string and used
    for sorting and uniquely identifying events. This function determines
    multiple identifiers:
    * an identifier of the attributes and values without the timestamp
      description (or usage). This is referred to as the MACB group
      identifier.
    * an identifier of the attributes and values including the timestamp
      description (or usage). This is referred to as the event content
      identifier.

    The identifier without the timestamp description can be used to group
    events that have the same MACB (modification, access, change, birth)
    timestamps. The PsortEventHeap will store these events individually and
    relies on PsortMultiProcessEngine to do the actual grouping of events.

    Args:
      event (EventObject): event.
      event_data (EventData): event data.
      event_data_stream (EventDataStream): event data stream.

    Returns:
      tuple: containing:

        str: identifier of the event MACB group or None if the event cannot
            be grouped.
        str: identifier of the event content.
    """
    attributes = []

    attribute_string = 'data_type: {0:s}'.format(event_data.data_type)
    attributes.append(attribute_string)

    event_attributes = list(event_data.GetAttributes())
    if event_data_stream:
      event_data_stream_attributes = event_data_stream.GetAttributes()
      event_attributes.extend(event_data_stream_attributes)

    for attribute_name, attribute_value in sorted(event_attributes):
      # The filestat parser operates on file entry level and has no event data
      # stream with a path specification. Therefore we need filename and inode
      # to make sure events of different file entries are considered unique.
      if event_data.data_type == 'fs:stat' and attribute_name in (
          'filename', 'inode'):
        pass

      elif attribute_name in self._IDENTIFIER_EXCLUDED_ATTRIBUTES:
        continue

      if not attribute_value:
        continue

      # Note that support for event_data.pathspec is kept for backwards
      # compatibility. The current value is event_data_stream.path_spec.
      if attribute_name in ('path_spec', 'pathspec'):
        attribute_value = attribute_value.comparable

      elif isinstance(attribute_value, dict):
        attribute_value = sorted(attribute_value.items())

      elif isinstance(attribute_value, set):
        attribute_value = sorted(list(attribute_value))

      elif isinstance(attribute_value, bytes):
        attribute_value = repr(attribute_value)

      try:
        attribute_string = '{0:s}: {1!s}'.format(
            attribute_name, attribute_value)
      except UnicodeDecodeError:
        logger.error('Failed to decode attribute {0:s}'.format(
            attribute_name))
      attributes.append(attribute_string)

    # The 'atime', 'ctime', 'crtime', 'mtime' are included for backwards
    # compatibility with the filestat parser.
    if event.timestamp_desc in (
        'atime', 'ctime', 'crtime', 'mtime',
        definitions.TIME_DESCRIPTION_LAST_ACCESS,
        definitions.TIME_DESCRIPTION_CHANGE,
        definitions.TIME_DESCRIPTION_CREATION,
        definitions.TIME_DESCRIPTION_MODIFICATION):
      macb_group_identifier = ', '.join(attributes)
    else:
      macb_group_identifier = None

    timestamp_desc = event.timestamp_desc
    if timestamp_desc is None:
      logger.warning('Missing timestamp_desc attribute')
      timestamp_desc = definitions.TIME_DESCRIPTION_UNKNOWN

    attributes.insert(0, timestamp_desc)
    content_identifier = ', '.join(attributes)

    return macb_group_identifier, content_identifier
Example #26
0
  def _CheckStatusAnalysisProcess(self, pid):
    """Checks the status of an analysis process.

    Args:
      pid (int): process ID (PID) of a registered analysis process.

    Raises:
      KeyError: if the process is not registered with the engine.
    """
    # TODO: Refactor this method, simplify and separate concerns (monitoring
    # vs management).
    self._RaiseIfNotRegistered(pid)

    if pid in self._completed_analysis_processes:
      status_indicator = definitions.STATUS_INDICATOR_COMPLETED
      process_status = {
          'processing_status': status_indicator}
      used_memory = 0

    else:
      process = self._processes_per_pid[pid]

      process_status = self._QueryProcessStatus(process)
      if process_status is None:
        process_is_alive = False
      else:
        process_is_alive = True

      process_information = self._process_information_per_pid[pid]
      used_memory = process_information.GetUsedMemory() or 0

      if self._worker_memory_limit and used_memory > self._worker_memory_limit:
        logger.warning((
            'Process: {0:s} (PID: {1:d}) killed because it exceeded the '
            'memory limit: {2:d}.').format(
                process.name, pid, self._worker_memory_limit))
        self._KillProcess(pid)

      if isinstance(process_status, dict):
        self._rpc_errors_per_pid[pid] = 0
        status_indicator = process_status.get('processing_status', None)

        if status_indicator == definitions.STATUS_INDICATOR_COMPLETED:
          self._completed_analysis_processes.add(pid)

      else:
        rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1
        self._rpc_errors_per_pid[pid] = rpc_errors

        if rpc_errors > self._MAXIMUM_RPC_ERRORS:
          process_is_alive = False

        if process_is_alive:
          rpc_port = process.rpc_port.value
          logger.warning((
              'Unable to retrieve process: {0:s} (PID: {1:d}) status via '
              'RPC socket: http://localhost:{2:d}').format(
                  process.name, pid, rpc_port))

          processing_status_string = 'RPC error'
          status_indicator = definitions.STATUS_INDICATOR_RUNNING
        else:
          processing_status_string = 'killed'
          status_indicator = definitions.STATUS_INDICATOR_KILLED

        process_status = {
            'processing_status': processing_status_string}

    self._UpdateProcessingStatus(pid, process_status, used_memory)

    if status_indicator in definitions.ERROR_STATUS_INDICATORS:
      logger.error((
          'Process {0:s} (PID: {1:d}) is not functioning correctly. '
          'Status code: {2!s}.').format(
              process.name, pid, status_indicator))

      self._TerminateProcessByPid(pid)
Example #27
0
  def _CheckStatusAnalysisProcess(self, pid):
    """Checks the status of an analysis process.

    Args:
      pid (int): process ID (PID) of a registered analysis process.

    Raises:
      KeyError: if the process is not registered with the engine.
    """
    # TODO: Refactor this method, simplify and separate concerns (monitoring
    # vs management).
    self._RaiseIfNotRegistered(pid)

    if pid in self._completed_analysis_processes:
      status_indicator = definitions.PROCESSING_STATUS_COMPLETED
      process_status = {
          'processing_status': status_indicator}
      used_memory = 0

    else:
      process = self._processes_per_pid[pid]

      process_status = self._QueryProcessStatus(process)
      if process_status is None:
        process_is_alive = False
      else:
        process_is_alive = True

      process_information = self._process_information_per_pid[pid]
      used_memory = process_information.GetUsedMemory() or 0

      if self._worker_memory_limit and used_memory > self._worker_memory_limit:
        logger.warning((
            'Process: {0:s} (PID: {1:d}) killed because it exceeded the '
            'memory limit: {2:d}.').format(
                process.name, pid, self._worker_memory_limit))
        self._KillProcess(pid)

      if isinstance(process_status, dict):
        self._rpc_errors_per_pid[pid] = 0
        status_indicator = process_status.get('processing_status', None)

        if status_indicator == definitions.PROCESSING_STATUS_COMPLETED:
          self._completed_analysis_processes.add(pid)

      else:
        rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1
        self._rpc_errors_per_pid[pid] = rpc_errors

        if rpc_errors > self._MAXIMUM_RPC_ERRORS:
          process_is_alive = False

        if process_is_alive:
          rpc_port = process.rpc_port.value
          logger.warning((
              'Unable to retrieve process: {0:s} (PID: {1:d}) status via '
              'RPC socket: http://localhost:{2:d}').format(
                  process.name, pid, rpc_port))

          processing_status_string = 'RPC error'
          status_indicator = definitions.PROCESSING_STATUS_RUNNING
        else:
          processing_status_string = 'killed'
          status_indicator = definitions.PROCESSING_STATUS_KILLED

        process_status = {
            'processing_status': processing_status_string}

    self._UpdateProcessingStatus(pid, process_status, used_memory)

    if status_indicator in definitions.PROCESSING_ERROR_STATUS:
      logger.error((
          'Process {0:s} (PID: {1:d}) is not functioning correctly. '
          'Status code: {2!s}.').format(
              process.name, pid, status_indicator))

      self._TerminateProcessByPid(pid)
Example #28
0
  def _Main(self):
    """The main loop."""
    # We need a resolver context per process to prevent multi processing
    # issues with file objects stored in images.
    resolver_context = context.Context()

    for credential_configuration in self._processing_configuration.credentials:
      resolver.Resolver.key_chain.SetCredential(
          credential_configuration.path_spec,
          credential_configuration.credential_type,
          credential_configuration.credential_data)

    self._parser_mediator = parsers_mediator.ParserMediator(
        None, self._knowledge_base,
        preferred_year=self._processing_configuration.preferred_year,
        resolver_context=resolver_context,
        temporary_directory=self._processing_configuration.temporary_directory)

    self._parser_mediator.SetEventExtractionConfiguration(
        self._processing_configuration.event_extraction)

    self._parser_mediator.SetInputSourceConfiguration(
        self._processing_configuration.input_source)

    # We need to initialize the parser and hasher objects after the process
    # has forked otherwise on Windows the "fork" will fail with
    # a PickleError for Python modules that cannot be pickled.
    self._extraction_worker = worker.EventExtractionWorker(
        parser_filter_expression=(
            self._processing_configuration.parser_filter_expression))

    self._extraction_worker.SetExtractionConfiguration(
        self._processing_configuration.extraction)

    self._parser_mediator.StartProfiling(
        self._processing_configuration.profiling, self._name,
        self._process_information)
    self._StartProfiling(self._processing_configuration.profiling)

    if self._processing_profiler:
      self._extraction_worker.SetProcessingProfiler(self._processing_profiler)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(self._serializers_profiler)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(self._storage_profiler)

    logger.debug('Worker: {0!s} (PID: {1:d}) started.'.format(
        self._name, self._pid))

    self._status = definitions.PROCESSING_STATUS_RUNNING

    try:
      logger.debug('{0!s} (PID: {1:d}) started monitoring task queue.'.format(
          self._name, self._pid))

      while not self._abort:
        try:
          task = self._task_queue.PopItem()
        except (errors.QueueClose, errors.QueueEmpty) as exception:
          logger.debug('ConsumeItems exiting with exception {0:s}.'.format(
              type(exception)))
          break

        if isinstance(task, plaso_queue.QueueAbort):
          logger.debug('ConsumeItems exiting, dequeued QueueAbort object.')
          break

        self._ProcessTask(task)

      logger.debug('{0!s} (PID: {1:d}) stopped monitoring task queue.'.format(
          self._name, self._pid))

    # All exceptions need to be caught here to prevent the process
    # from being killed by an uncaught exception.
    except Exception as exception:  # pylint: disable=broad-except
      logger.warning(
          'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
              self._name, self._pid))
      logger.exception(exception)

      self._abort = True

    if self._processing_profiler:
      self._extraction_worker.SetProcessingProfiler(None)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(None)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(None)

    self._StopProfiling()
    self._parser_mediator.StopProfiling()

    self._extraction_worker = None
    self._parser_mediator = None
    self._storage_writer = None

    if self._abort:
      self._status = definitions.PROCESSING_STATUS_ABORTED
    else:
      self._status = definitions.PROCESSING_STATUS_COMPLETED

    logger.debug('Worker: {0!s} (PID: {1:d}) stopped.'.format(
        self._name, self._pid))

    try:
      self._task_queue.Close(abort=self._abort)
    except errors.QueueAlreadyClosed:
      logger.error('Queue for {0:s} was already closed.'.format(self.name))
Example #29
0
  def _Main(self):
    """The main loop."""
    # We need a resolver context per process to prevent multi processing
    # issues with file objects stored in images.
    resolver_context = context.Context()

    for credential_configuration in self._processing_configuration.credentials:
      resolver.Resolver.key_chain.SetCredential(
          credential_configuration.path_spec,
          credential_configuration.credential_type,
          credential_configuration.credential_data)

    self._parser_mediator = parsers_mediator.ParserMediator(
        None, self._knowledge_base,
        artifacts_filter_helper=self._artifacts_filter_helper,
        preferred_year=self._processing_configuration.preferred_year,
        resolver_context=resolver_context,
        temporary_directory=self._processing_configuration.temporary_directory)

    self._parser_mediator.SetEventExtractionConfiguration(
        self._processing_configuration.event_extraction)

    self._parser_mediator.SetInputSourceConfiguration(
        self._processing_configuration.input_source)

    # We need to initialize the parser and hasher objects after the process
    # has forked otherwise on Windows the "fork" will fail with
    # a PickleError for Python modules that cannot be pickled.
    self._extraction_worker = worker.EventExtractionWorker(
        parser_filter_expression=(
            self._processing_configuration.parser_filter_expression))

    self._extraction_worker.SetExtractionConfiguration(
        self._processing_configuration.extraction)

    self._parser_mediator.StartProfiling(
        self._processing_configuration.profiling, self._name,
        self._process_information)
    self._StartProfiling(self._processing_configuration.profiling)

    if self._processing_profiler:
      self._extraction_worker.SetProcessingProfiler(self._processing_profiler)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(self._serializers_profiler)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(self._storage_profiler)

    logger.debug('Worker: {0!s} (PID: {1:d}) started.'.format(
        self._name, self._pid))

    self._status = definitions.STATUS_INDICATOR_RUNNING

    try:
      logger.debug('{0!s} (PID: {1:d}) started monitoring task queue.'.format(
          self._name, self._pid))

      while not self._abort:
        try:
          task = self._task_queue.PopItem()
        except (errors.QueueClose, errors.QueueEmpty) as exception:
          logger.debug('ConsumeItems exiting with exception {0:s}.'.format(
              type(exception)))
          break

        if isinstance(task, plaso_queue.QueueAbort):
          logger.debug('ConsumeItems exiting, dequeued QueueAbort object.')
          break

        self._ProcessTask(task)

      logger.debug('{0!s} (PID: {1:d}) stopped monitoring task queue.'.format(
          self._name, self._pid))

    # All exceptions need to be caught here to prevent the process
    # from being killed by an uncaught exception.
    except Exception as exception:  # pylint: disable=broad-except
      logger.warning(
          'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
              self._name, self._pid))
      logger.exception(exception)

      self._abort = True

    if self._processing_profiler:
      self._extraction_worker.SetProcessingProfiler(None)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(None)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(None)

    self._StopProfiling()
    self._parser_mediator.StopProfiling()

    self._extraction_worker = None
    self._parser_mediator = None
    self._storage_writer = None

    if self._abort:
      self._status = definitions.STATUS_INDICATOR_ABORTED
    else:
      self._status = definitions.STATUS_INDICATOR_COMPLETED

    logger.debug('Worker: {0!s} (PID: {1:d}) stopped.'.format(
        self._name, self._pid))

    try:
      self._task_queue.Close(abort=self._abort)
    except errors.QueueAlreadyClosed:
      logger.error('Queue for {0:s} was already closed.'.format(self.name))
Example #30
0
  def _CheckStatusWorkerProcess(self, pid):
    """Checks the status of a worker process.

    If a worker process is not responding the process is terminated and
    a replacement process is started.

    Args:
      pid (int): process ID (PID) of a registered worker process.

    Raises:
      KeyError: if the process is not registered with the engine.
    """
    # TODO: Refactor this method, simplify and separate concerns (monitoring
    # vs management).
    self._RaiseIfNotRegistered(pid)

    process = self._processes_per_pid[pid]

    process_status = self._QueryProcessStatus(process)
    if process_status is None:
      process_is_alive = False
    else:
      process_is_alive = True

    process_information = self._process_information_per_pid[pid]
    used_memory = process_information.GetUsedMemory() or 0

    if self._worker_memory_limit and used_memory > self._worker_memory_limit:
      logger.warning((
          'Process: {0:s} (PID: {1:d}) killed because it exceeded the '
          'memory limit: {2:d}.').format(
              process.name, pid, self._worker_memory_limit))
      self._KillProcess(pid)

    if isinstance(process_status, dict):
      self._rpc_errors_per_pid[pid] = 0
      status_indicator = process_status.get('processing_status', None)

    else:
      rpc_errors = self._rpc_errors_per_pid.get(pid, 0) + 1
      self._rpc_errors_per_pid[pid] = rpc_errors

      if rpc_errors > self._MAXIMUM_RPC_ERRORS:
        process_is_alive = False

      if process_is_alive:
        rpc_port = process.rpc_port.value
        logger.warning((
            'Unable to retrieve process: {0:s} (PID: {1:d}) status via '
            'RPC socket: http://localhost:{2:d}').format(
                process.name, pid, rpc_port))

        processing_status_string = 'RPC error'
        status_indicator = definitions.PROCESSING_STATUS_RUNNING
      else:
        processing_status_string = 'killed'
        status_indicator = definitions.PROCESSING_STATUS_KILLED

      process_status = {
          'processing_status': processing_status_string}

    self._UpdateProcessingStatus(pid, process_status, used_memory)

    # _UpdateProcessingStatus can also change the status of the worker,
    # So refresh the status if applicable.
    for worker_status in self._processing_status.workers_status:
      if worker_status.pid == pid:
        status_indicator = worker_status.status
        break

    if status_indicator in definitions.PROCESSING_ERROR_STATUS:
      logger.error((
          'Process {0:s} (PID: {1:d}) is not functioning correctly. '
          'Status code: {2!s}.').format(process.name, pid, status_indicator))

      self._TerminateProcessByPid(pid)

      logger.info('Starting replacement worker process for {0:s}'.format(
          process.name))
      replacement_process_attempts = 0
      replacement_process = None
      while replacement_process_attempts < self._MAXIMUM_REPLACEMENT_RETRIES:
        replacement_process_attempts += 1
        replacement_process = self._StartWorkerProcess(
            process.name, self._storage_writer)
        if not replacement_process:
          time.sleep(self._REPLACEMENT_WORKER_RETRY_DELAY)
          break
      if not replacement_process:
        logger.error(
            'Unable to create replacement worker process for: {0:s}'.format(
                process.name))
Example #31
0
  def _Main(self):
    """The main loop."""
    self._StartProfiling(self._processing_configuration.profiling)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(self._serializers_profiler)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(self._storage_profiler)

    logger.debug('Analysis plugin: {0!s} (PID: {1:d}) started'.format(
        self._name, self._pid))

    # Creating the threading event in the constructor will cause a pickle
    # error on Windows when an analysis process is created.
    self._foreman_status_wait_event = threading.Event()
    self._status = definitions.PROCESSING_STATUS_ANALYZING

    task = tasks.Task()
    # TODO: temporary solution.
    task.identifier = self._analysis_plugin.plugin_name

    self._task = task

    storage_writer = self._storage_writer.CreateTaskStorage(task)

    if self._serializers_profiler:
      storage_writer.SetSerializersProfiler(self._serializers_profiler)

    if self._storage_profiler:
      storage_writer.SetStorageProfiler(self._storage_profiler)

    storage_writer.Open()

    self._analysis_mediator = analysis_mediator.AnalysisMediator(
        storage_writer, self._knowledge_base, data_location=self._data_location)

    # TODO: set event_filter_expression in mediator.

    storage_writer.WriteTaskStart()

    try:
      logger.debug(
          '{0!s} (PID: {1:d}) started monitoring event queue.'.format(
              self._name, self._pid))

      while not self._abort:
        try:
          event = self._event_queue.PopItem()

        except (errors.QueueClose, errors.QueueEmpty) as exception:
          logger.debug('ConsumeItems exiting with exception {0:s}.'.format(
              type(exception)))
          break

        if isinstance(event, plaso_queue.QueueAbort):
          logger.debug('ConsumeItems exiting, dequeued QueueAbort object.')
          break

        self._ProcessEvent(self._analysis_mediator, event)

        self._number_of_consumed_events += 1

        if self._guppy_memory_profiler:
          self._guppy_memory_profiler.Sample()

      logger.debug(
          '{0!s} (PID: {1:d}) stopped monitoring event queue.'.format(
              self._name, self._pid))

      if not self._abort:
        self._status = definitions.PROCESSING_STATUS_REPORTING

        self._analysis_mediator.ProduceAnalysisReport(self._analysis_plugin)

    # All exceptions need to be caught here to prevent the process
    # from being killed by an uncaught exception.
    except Exception as exception:  # pylint: disable=broad-except
      logger.warning(
          'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
              self._name, self._pid))
      logger.exception(exception)

      self._abort = True

    finally:
      storage_writer.WriteTaskCompletion(aborted=self._abort)

      storage_writer.Close()

      if self._serializers_profiler:
        storage_writer.SetSerializersProfiler(None)

      if self._storage_profiler:
        storage_writer.SetStorageProfiler(None)

    try:
      self._storage_writer.FinalizeTaskStorage(task)
    except IOError:
      pass

    if self._abort:
      self._status = definitions.PROCESSING_STATUS_ABORTED
    else:
      self._status = definitions.PROCESSING_STATUS_COMPLETED

    self._foreman_status_wait_event.wait(self._FOREMAN_STATUS_WAIT)

    logger.debug('Analysis plugin: {0!s} (PID: {1:d}) stopped'.format(
        self._name, self._pid))

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(None)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(None)

    self._StopProfiling()

    self._analysis_mediator = None
    self._foreman_status_wait_event = None
    self._storage_writer = None
    self._task = None

    try:
      self._event_queue.Close(abort=self._abort)
    except errors.QueueAlreadyClosed:
      logger.error('Queue for {0:s} was already closed.'.format(self.name))