Exemple #1
0
    def _ParseFile(self, parser_object, path, knowledge_base_object):
        """Parses a file using the parser object.

    Args:
      parser_object: the parser object.
      path: the path of the file to parse.
      knowledge_base_object: the knowledge base object (instance of
                             KnowledgeBase).

    Returns:
      An event object queue object (instance of Queue).
    """
        event_queue = queue.SingleThreadedQueue()
        event_queue_producer = queue.EventObjectQueueProducer(event_queue)

        parse_error_queue = queue.SingleThreadedQueue()

        parser_context = parsers_context.ParserContext(event_queue_producer,
                                                       parse_error_queue,
                                                       knowledge_base_object)
        path_spec = path_spec_factory.Factory.NewPathSpec(
            definitions.TYPE_INDICATOR_OS, location=path)
        file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec)

        parser_object.Parse(parser_context, file_entry)
        event_queue.SignalEndOfInput()

        return event_queue
Exemple #2
0
  def testStorageWriter(self):
    """Test the storage writer."""
    self.assertEquals(len(self._event_objects), 4)

    # The storage writer is normally run in a separate thread.
    # For the purpose of this test it has to be run in sequence,
    # hence the call to WriteEventObjects after all the event objects
    # have been queued up.
    test_queue = queue.MultiThreadedQueue()
    test_queue_producer = queue.EventObjectQueueProducer(test_queue)
    test_queue_producer.ProduceEventObjects(self._event_objects)
    test_queue_producer.SignalEndOfInput()

    with tempfile.NamedTemporaryFile() as temp_file:
      storage_writer = storage.StorageFileWriter(test_queue, temp_file)
      storage_writer.WriteEventObjects()

      z_file = zipfile.ZipFile(temp_file, 'r', zipfile.ZIP_DEFLATED)

      expected_z_filename_list = [
          'plaso_index.000001', 'plaso_meta.000001', 'plaso_proto.000001',
          'plaso_timestamps.000001']

      z_filename_list = sorted(z_file.namelist())
      self.assertEquals(len(z_filename_list), 4)
      self.assertEquals(z_filename_list, expected_z_filename_list)
Exemple #3
0
  def _GetParserContext(
     self, event_queue, parse_error_queue, knowledge_base_values=None):
    """Retrieves a parser context object.

    Args:
      event_queue: the event queue (instance of Queue).
      parse_error_queue: the parse error queue (instance of Queue).
      knowledge_base_values: optional dict containing the knowledge base
                             values. The default is None.

    Returns:
      A parser context object (instance of ParserContext).
    """
    event_queue_producer = queue.EventObjectQueueProducer(event_queue)
    parse_error_queue_producer = queue.ParseErrorQueueProducer(
        parse_error_queue)

    knowledge_base_object = knowledge_base.KnowledgeBase()
    if knowledge_base_values:
      for identifier, value in knowledge_base_values.iteritems():
        knowledge_base_object.SetValue(identifier, value)

    return context.ParserContext(
        event_queue_producer, parse_error_queue_producer,
        knowledge_base_object)
Exemple #4
0
def ParseFile(file_entry):
  """Parse a file given a file entry and yield results."""
  if not file_entry:
    return

  # Create the necessary items.
  proc_queue = queue.SingleThreadedQueue()
  storage_queue = queue.SingleThreadedQueue()
  storage_queue_producer = queue.EventObjectQueueProducer(storage_queue)
  pre_obj = event.PreprocessObject()
  all_parsers = putils.FindAllParsers(pre_obj)

  # Create a worker.
  worker_object = worker.EventExtractionWorker(
      'my_worker', proc_queue, storage_queue_producer, pre_obj, all_parsers)

  # Parse the file.
  worker_object.ParseFile(file_entry)

  storage_queue.SignalEndOfInput()
  proc_queue.SignalEndOfInput()

  while True:
    try:
      item = storage_queue.PopItem()
    except errors.QueueEmpty:
      break

    if isinstance(item, queue.QueueEndOfInput):
      break

    yield item
Exemple #5
0
    def setUp(self):
        """Sets up the objects used throughout the test."""
        self._temp_directory = tempfile.mkdtemp()
        self._storage_filename = os.path.join(self._temp_directory, 'plaso.db')
        self._tag_input_filename = os.path.join(self._temp_directory,
                                                'input1.tag')

        tag_input_file = open(self._tag_input_filename, 'wb')
        tag_input_file.write('\n'.join([
            'Test Tag', '  filename contains \'/tmp/whoaaaa\'',
            '  parser is \'TestEvent\' and stuff is \'dude\''
        ]))
        tag_input_file.close()

        pfilter.TimeRangeCache.ResetTimeConstraints()

        test_queue = queue.MultiThreadedQueue()
        test_queue_producer = queue.EventObjectQueueProducer(test_queue)
        test_queue_producer.ProduceEventObjects([
            TestEvent(0),
            TestEvent(1000),
            TestEvent(2000000, '/tmp/whoaaaaa'),
            TestEvent(2500000, '/tmp/whoaaaaa'),
            TestEvent(5000000, '/tmp/whoaaaaa', 'dude')
        ])
        test_queue_producer.SignalEndOfInput()

        storage_writer = storage.StorageFileWriter(test_queue,
                                                   self._storage_filename)
        storage_writer.WriteEventObjects()

        self._storage_file = storage.StorageFile(self._storage_filename)
        self._storage_file.SetStoreLimit()
Exemple #6
0
  def testImageWithFilterCollection(self):
    """Test collection on a storage media image file with a filter."""
    test_file = self._GetTestFilePath(['image.dd'])

    volume_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
        parent=volume_path_spec)

    filter_name = ''
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
      filter_name = temp_file.name
      temp_file.write('/a_directory/.+zip\n')
      temp_file.write('/a_directory/another.+\n')
      temp_file.write('/passwords.txt\n')

    test_collection_queue = queue.SingleThreadedQueue()
    test_storage_queue = queue.SingleThreadedQueue()
    test_storage_queue_producer = queue.EventObjectQueueProducer(
        test_storage_queue)
    resolver_context = context.Context()
    test_collector = collector.Collector(
        test_collection_queue, test_storage_queue_producer, test_file,
        path_spec, resolver_context=resolver_context)

    find_specs = engine_utils.BuildFindSpecsFromFile(filter_name)
    test_collector.SetFilter(find_specs)

    test_collector.Collect()

    test_collector_queue_consumer = TestCollectorQueueConsumer(
        test_collection_queue)
    test_collector_queue_consumer.ConsumePathSpecs()

    try:
      os.remove(filter_name)
    except (OSError, IOError) as exception:
      logging.warning((
          u'Unable to remove temporary file: {0:s} with error: {1:s}').format(
              filter_name, exception))

    self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 2)

    paths = test_collector_queue_consumer.GetFilePaths()

    # path_specs[0]
    # type: TSK
    # file_path: '/a_directory/another_file'
    # container_path: 'test_data/image.dd'
    # image_offset: 0
    self.assertEquals(paths[0], u'/a_directory/another_file')

    # path_specs[1]
    # type: TSK
    # file_path: '/passwords.txt'
    # container_path: 'test_data/image.dd'
    # image_offset: 0
    self.assertEquals(paths[1], u'/passwords.txt')
Exemple #7
0
    def __init__(self, collection_queue, storage_queue):
        """Initialize the engine object.

    Args:
      collection_queue: the collection queue object (instance of Queue).
      storage_queue: the storage queue object (instance of Queue).
    """
        self._collection_queue = collection_queue
        self._source = None
        self._source_path_spec = None
        self._source_file_entry = None
        self._storage_queue_producer = queue.EventObjectQueueProducer(
            storage_queue)
Exemple #8
0
  def __init__(self, collection_queue, storage_queue, parse_error_queue):
    """Initialize the engine object.

    Args:
      collection_queue: the collection queue object (instance of Queue).
      storage_queue: the storage queue object (instance of Queue).
      parse_error_queue: the parser error queue object (instance of Queue).
    """
    self._collection_queue = collection_queue
    self._source = None
    self._source_path_spec = None
    self._source_file_entry = None
    self._event_queue_producer = queue.EventObjectQueueProducer(storage_queue)
    self._parse_error_queue_producer = queue.ParseErrorQueueProducer(
        parse_error_queue)
    self.knowledge_base = knowledge_base.KnowledgeBase()
Exemple #9
0
    def testWinAnalyzePlugin(self):
        """Test the plugin against mock events."""
        knowledge_base = self._SetUpKnowledgeBase(
            knowledge_base_values={'users': self.WIN_USERS})

        event_queue = queue.SingleThreadedQueue()

        # Fill the incoming queue with events.
        test_queue_producer = queue.EventObjectQueueProducer(event_queue)
        test_queue_producer.ProduceEventObjects(
            [self._CreateTestEventObject(path) for path in self.WIN_PATHS])
        test_queue_producer.SignalEndOfInput()

        # Initialize plugin.
        analysis_plugin = AnalyzeChromeExtensionTestPlugin(event_queue)

        # Run the analysis plugin.
        analysis_report_queue_consumer = self._RunAnalysisPlugin(
            analysis_plugin, knowledge_base)
        analysis_reports = self._GetAnalysisReportsFromQueue(
            analysis_report_queue_consumer)

        self.assertEquals(len(analysis_reports), 1)

        analysis_report = analysis_reports[0]

        self.assertEquals(analysis_plugin._sep, u'\\')

        # Due to the behavior of the join one additional empty string at the end
        # is needed to create the last empty line.
        expected_text = u'\n'.join([
            u' == USER: dude ==',
            u'  Google Keep - notes and lists [hmjkmjkepdijhoojdojkdfohbdgmmhki]',
            u'', u' == USER: frank ==',
            u'  Google Play Music [icppfcnhkcmnfdhfhphakoifcfokfdhg]',
            u'  YouTube [blpcfgokakmgnkcojhhkbfbldkacnbeo]', u'', u''
        ])

        self.assertEquals(analysis_report.text, expected_text)
        self.assertEquals(analysis_report.plugin_name, 'chrome_extension_test')

        expected_keys = set([u'frank', u'dude'])
        self.assertEquals(set(analysis_report.report_dict.keys()),
                          expected_keys)
Exemple #10
0
    def testMacAnalyzerPlugin(self):
        """Test the plugin against mock events."""
        knowledge_base = self._SetUpKnowledgeBase(
            knowledge_base_values={'users': self.MAC_USERS})

        event_queue = queue.SingleThreadedQueue()

        # Fill the incoming queue with events.
        test_queue_producer = queue.EventObjectQueueProducer(event_queue)
        test_queue_producer.ProduceEventObjects(
            [self._CreateTestEventObject(path) for path in self.MAC_PATHS])
        test_queue_producer.SignalEndOfInput()

        # Initialize plugin.
        analysis_plugin = AnalyzeChromeExtensionTestPlugin(event_queue)

        # Run the analysis plugin.
        analysis_report_queue_consumer = self._RunAnalysisPlugin(
            analysis_plugin, knowledge_base)
        analysis_reports = self._GetAnalysisReportsFromQueue(
            analysis_report_queue_consumer)

        self.assertEquals(len(analysis_reports), 1)

        analysis_report = analysis_reports[0]

        self.assertEquals(analysis_plugin._sep, u'/')

        # Due to the behavior of the join one additional empty string at the end
        # is needed to create the last empty line.
        expected_text = u'\n'.join([
            u' == USER: dude ==',
            u'  Google Drive [apdfllckaahabafndbhieahigkjlhalf]', u'',
            u' == USER: frank ==',
            u'  Gmail [pjkljhegncpnkpknbcohdijeoejaedia]', u'', u''
        ])

        self.assertEquals(analysis_report.text, expected_text)
        self.assertEquals(analysis_report.plugin_name, 'chrome_extension_test')

        expected_keys = set([u'frank', u'dude'])
        self.assertEquals(set(analysis_report.report_dict.keys()),
                          expected_keys)
Exemple #11
0
  def testImageCollection(self):
    """Test collection on a storage media image file.

    This images has two files:
      + logs/hidden.zip
      + logs/sys.tgz

    The hidden.zip file contains one file, syslog, which is the
    same for sys.tgz.

    The end results should therefore be:
      + logs/hidden.zip (unchanged)
      + logs/hidden.zip:syslog (the text file extracted out)
      + logs/sys.tgz (unchanged)
      + logs/sys.tgz (read as a GZIP file, so not compressed)
      + logs/sys.tgz:syslog.gz (A GZIP file from the TAR container)
      + logs/sys.tgz:syslog.gz:syslog (the extracted syslog file)

    This means that the collection script should collect 6 files in total.
    """
    test_file = self._GetTestFilePath(['syslog_image.dd'])

    volume_path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
    path_spec = path_spec_factory.Factory.NewPathSpec(
        dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
        parent=volume_path_spec)

    test_collection_queue = queue.SingleThreadedQueue()
    test_storage_queue = queue.SingleThreadedQueue()
    test_storage_queue_producer = queue.EventObjectQueueProducer(
        test_storage_queue)
    resolver_context = context.Context()
    test_collector = collector.Collector(
        test_collection_queue, test_storage_queue_producer, test_file,
        path_spec, resolver_context=resolver_context)
    test_collector.Collect()

    test_collector_queue_consumer = TestCollectorQueueConsumer(
          test_collection_queue)
    test_collector_queue_consumer.ConsumePathSpecs()

    self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 2)
Exemple #12
0
    def ParseStorage(self, options):
        """Open a storage file and parse through it.

    Args:
      options: the command line arguments (instance of argparse.Namespace).

    Returns:
      A counter.

    Raises:
      RuntimeError: if a non-recoverable situation is encountered.
    """
        counter = None

        if options.slice:
            if options.timezone == 'UTC':
                zone = pytz.utc
            else:
                zone = pytz.timezone(options.timezone)

            timestamp = timelib.Timestamp.FromTimeString(options.slice,
                                                         timezone=zone)

            # Convert number of minutes to microseconds.
            range_operator = self._slice_size * 60 * 1000000

            # Set the time range.
            pfilter.TimeRangeCache.SetLowerTimestamp(timestamp -
                                                     range_operator)
            pfilter.TimeRangeCache.SetUpperTimestamp(timestamp +
                                                     range_operator)

        if options.analysis_plugins:
            read_only = False
        else:
            read_only = True

        try:
            storage_file = self.OpenStorageFile(read_only=read_only)
        except IOError as exception:
            raise RuntimeError(
                u'Unable to open storage file: {0:s} with error: {1:s}.'.
                format(self._storage_file_path, exception))

        with storage_file:
            storage_file.SetStoreLimit(self._filter_object)

            try:
                output_module = self._output_module_class(
                    storage_file, self._output_stream, options,
                    self._filter_object)
            except IOError as exception:
                raise RuntimeError(
                    u'Unable to create output module with error: {0:s}'.format(
                        exception))

            if not output_module:
                raise RuntimeError(u'Missing output module.')

            if options.analysis_plugins:
                logging.info(u'Starting analysis plugins.')
                # Within all preprocessing objects, try to get the last one that has
                # time zone information stored in it, the highest chance of it
                # containing the information we are seeking (defaulting to the last
                # one).
                pre_objs = storage_file.GetStorageInformation()
                pre_obj = pre_objs[-1]
                for obj in pre_objs:
                    if getattr(obj, 'time_zone_str', ''):
                        pre_obj = obj

                # Fill in the collection information.
                pre_obj.collection_information = {}
                encoding = getattr(pre_obj, 'preferred_encoding', None)
                if encoding:
                    cmd_line = ' '.join(sys.argv)
                    try:
                        pre_obj.collection_information[
                            'cmd_line'] = cmd_line.decode(encoding)
                    except UnicodeDecodeError:
                        pass
                pre_obj.collection_information['file_processed'] = (
                    self._storage_file_path)
                pre_obj.collection_information[
                    'method'] = 'Running Analysis Plugins'
                pre_obj.collection_information[
                    'plugins'] = options.analysis_plugins
                time_of_run = timelib.Timestamp.GetNow()
                pre_obj.collection_information['time_of_run'] = time_of_run

                pre_obj.counter = collections.Counter()

                # Assign the preprocessing object to the storage.
                # This is normally done in the construction of the storage object,
                # however we cannot do that here since the preprocessing object is
                # stored inside the storage file, so we need to open it first to
                # be able to read it in, before we make changes to it. Thus we need
                # to access this protected member of the class.
                # pylint: disable=protected-access
                storage_file._pre_obj = pre_obj

                # Start queues and load up plugins.
                analysis_output_queue = queue.MultiThreadedQueue()
                event_queue_producers = []
                event_queues = []
                analysis_plugins_list = [
                    x.strip() for x in options.analysis_plugins.split(',')
                ]

                for _ in xrange(0, len(analysis_plugins_list)):
                    event_queues.append(queue.MultiThreadedQueue())
                    event_queue_producers.append(
                        queue.EventObjectQueueProducer(event_queues[-1]))

                knowledge_base_object = knowledge_base.KnowledgeBase()

                analysis_plugins = analysis.LoadPlugins(
                    analysis_plugins_list, event_queues)

                # Now we need to start all the plugins.
                for analysis_plugin in analysis_plugins:
                    analysis_report_queue_producer = queue.AnalysisReportQueueProducer(
                        analysis_output_queue)
                    analysis_context_object = analysis_context.AnalysisContext(
                        analysis_report_queue_producer, knowledge_base_object)
                    analysis_process = multiprocessing.Process(
                        name='Analysis {0:s}'.format(
                            analysis_plugin.plugin_name),
                        target=analysis_plugin.RunPlugin,
                        args=(analysis_context_object, ))
                    self._analysis_processes.append(analysis_process)

                    analysis_process.start()
                    logging.info(u'Plugin: [{0:s}] started.'.format(
                        analysis_plugin.plugin_name))
            else:
                event_queue_producers = []

            output_buffer = output_lib.EventBuffer(output_module,
                                                   options.dedup)
            with output_buffer:
                counter = ProcessOutput(output_buffer, output_module,
                                        self._filter_object,
                                        self._filter_buffer,
                                        event_queue_producers)

            for information in storage_file.GetStorageInformation():
                if hasattr(information, 'counter'):
                    counter['Stored Events'] += information.counter['total']

            if not options.quiet:
                logging.info(u'Output processing is done.')

            # Get all reports and tags from analysis plugins.
            if options.analysis_plugins:
                logging.info(u'Processing data from analysis plugins.')
                for event_queue_producer in event_queue_producers:
                    event_queue_producer.SignalEndOfInput()

                # Wait for all analysis plugins to complete.
                for number, analysis_process in enumerate(
                        self._analysis_processes):
                    logging.debug(
                        u'Waiting for analysis plugin: {0:d} to complete.'.
                        format(number))
                    if analysis_process.is_alive():
                        analysis_process.join(10)
                    else:
                        logging.warning(
                            u'Plugin {0:d} already stopped.'.format(number))
                        analysis_process.terminate()
                logging.debug(u'All analysis plugins are now stopped.')

                # Close the output queue.
                analysis_output_queue.SignalEndOfInput()

                # Go over each output.
                analysis_queue_consumer = PsortAnalysisReportQueueConsumer(
                    analysis_output_queue, storage_file,
                    self._filter_expression, self.preferred_encoding)

                analysis_queue_consumer.ConsumeAnalysisReports()

                if analysis_queue_consumer.tags:
                    storage_file.StoreTagging(analysis_queue_consumer.tags)

                # TODO: analysis_queue_consumer.anomalies:

                for item, value in analysis_queue_consumer.counter.iteritems():
                    counter[item] = value

        if self._filter_object and not counter['Limited By']:
            counter['Filter By Date'] = (counter['Stored Events'] -
                                         counter['Events Included'] -
                                         counter['Events Filtered Out'])

        return counter
Exemple #13
0
def ProcessFile(options):
    """Process a file and produce profile results."""
    if options.proto_file and os.path.isfile(options.proto_file):
        with open(options.proto_file) as fh:
            proto_string = fh.read()

            proto = transmission_pb2.PathSpec()
            try:
                text_format.Merge(proto_string, proto)
            except text_format.ParseError as exception:
                logging.error(
                    u'Unable to parse file, error: {}'.format(exception))
                sys.exit(1)

            serializer = protobuf_serializer.ProtobufPathSpecSerializer
            path_spec = serializer.ReadSerializedObject(proto)
    else:
        path_spec = path_spec_factory.Factory.NewPathSpec(
            definitions.TYPE_INDICATOR_OS, location=options.file_to_parse)

    file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec)

    if file_entry is None:
        logging.error(u'Unable to open file: {0:s}'.format(
            options.file_to_parse))
        sys.exit(1)

    pre_obj = event.PreprocessObject()
    storage_queue = queue.SingleThreadedQueue()
    storage_queue_producer = queue.EventObjectQueueProducer(storage_queue)

    # Set few options the engine expects to be there.
    # TODO: Can we rather set this directly in argparse?
    options.single_process = True
    options.debug = False
    options.text_prepend = u''
    parsers = putils.FindAllParsers(pre_obj, options)
    my_worker = worker.EventExtractionWorker('0', None, storage_queue_producer,
                                             pre_obj, parsers)

    if options.verbose:
        profiler = cProfile.Profile()
        profiler.enable()
    else:
        time_start = time.time()
    my_worker.ParseFile(file_entry)

    if options.verbose:
        profiler.disable()
    else:
        time_end = time.time()

    storage_queue_producer.SignalEndOfInput()

    event_object_consumer = PprofEventObjectQueueConsumer(storage_queue)
    event_object_consumer.ConsumeEventObjects()

    if not options.verbose:
        print frontend_utils.FormatHeader('Time Used')
        print u'{:>20f}s'.format(time_end - time_start)

    print frontend_utils.FormatHeader('Parsers Loaded')
    # Accessing protected member.
    # pylint: disable=protected-access
    plugins = []
    for parser in sorted(my_worker._parsers['all']):
        print frontend_utils.FormatOutputString('', parser.parser_name)
        parser_plugins = getattr(parser, '_plugins', [])
        plugins.extend(parser_plugins)

    print frontend_utils.FormatHeader('Plugins Loaded')
    for plugin in sorted(plugins):
        if isinstance(plugin, basestring):
            print frontend_utils.FormatOutputString('', plugin)
        else:
            plugin_string = getattr(plugin, 'NAME', u'N/A')
            print frontend_utils.FormatOutputString('', plugin_string)

    print frontend_utils.FormatHeader('Parsers Used')
    for parser in sorted(event_object_consumer.parsers):
        print frontend_utils.FormatOutputString('', parser)

    print frontend_utils.FormatHeader('Plugins Used')
    for plugin in sorted(event_object_consumer.plugins):
        print frontend_utils.FormatOutputString('', plugin)

    print frontend_utils.FormatHeader('Counter')
    for key, value in event_object_consumer.counter.most_common():
        print frontend_utils.FormatOutputString(key, value)

    if options.verbose:
        return GetStats(profiler)