def _ParseFile(self, parser_object, path, knowledge_base_object): """Parses a file using the parser object. Args: parser_object: the parser object. path: the path of the file to parse. knowledge_base_object: the knowledge base object (instance of KnowledgeBase). Returns: An event object queue object (instance of Queue). """ event_queue = queue.SingleThreadedQueue() event_queue_producer = queue.EventObjectQueueProducer(event_queue) parse_error_queue = queue.SingleThreadedQueue() parser_context = parsers_context.ParserContext(event_queue_producer, parse_error_queue, knowledge_base_object) path_spec = path_spec_factory.Factory.NewPathSpec( definitions.TYPE_INDICATOR_OS, location=path) file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) parser_object.Parse(parser_context, file_entry) event_queue.SignalEndOfInput() return event_queue
def testStorageWriter(self): """Test the storage writer.""" self.assertEquals(len(self._event_objects), 4) # The storage writer is normally run in a separate thread. # For the purpose of this test it has to be run in sequence, # hence the call to WriteEventObjects after all the event objects # have been queued up. test_queue = queue.MultiThreadedQueue() test_queue_producer = queue.EventObjectQueueProducer(test_queue) test_queue_producer.ProduceEventObjects(self._event_objects) test_queue_producer.SignalEndOfInput() with tempfile.NamedTemporaryFile() as temp_file: storage_writer = storage.StorageFileWriter(test_queue, temp_file) storage_writer.WriteEventObjects() z_file = zipfile.ZipFile(temp_file, 'r', zipfile.ZIP_DEFLATED) expected_z_filename_list = [ 'plaso_index.000001', 'plaso_meta.000001', 'plaso_proto.000001', 'plaso_timestamps.000001'] z_filename_list = sorted(z_file.namelist()) self.assertEquals(len(z_filename_list), 4) self.assertEquals(z_filename_list, expected_z_filename_list)
def _GetParserContext( self, event_queue, parse_error_queue, knowledge_base_values=None): """Retrieves a parser context object. Args: event_queue: the event queue (instance of Queue). parse_error_queue: the parse error queue (instance of Queue). knowledge_base_values: optional dict containing the knowledge base values. The default is None. Returns: A parser context object (instance of ParserContext). """ event_queue_producer = queue.EventObjectQueueProducer(event_queue) parse_error_queue_producer = queue.ParseErrorQueueProducer( parse_error_queue) knowledge_base_object = knowledge_base.KnowledgeBase() if knowledge_base_values: for identifier, value in knowledge_base_values.iteritems(): knowledge_base_object.SetValue(identifier, value) return context.ParserContext( event_queue_producer, parse_error_queue_producer, knowledge_base_object)
def ParseFile(file_entry): """Parse a file given a file entry and yield results.""" if not file_entry: return # Create the necessary items. proc_queue = queue.SingleThreadedQueue() storage_queue = queue.SingleThreadedQueue() storage_queue_producer = queue.EventObjectQueueProducer(storage_queue) pre_obj = event.PreprocessObject() all_parsers = putils.FindAllParsers(pre_obj) # Create a worker. worker_object = worker.EventExtractionWorker( 'my_worker', proc_queue, storage_queue_producer, pre_obj, all_parsers) # Parse the file. worker_object.ParseFile(file_entry) storage_queue.SignalEndOfInput() proc_queue.SignalEndOfInput() while True: try: item = storage_queue.PopItem() except errors.QueueEmpty: break if isinstance(item, queue.QueueEndOfInput): break yield item
def setUp(self): """Sets up the objects used throughout the test.""" self._temp_directory = tempfile.mkdtemp() self._storage_filename = os.path.join(self._temp_directory, 'plaso.db') self._tag_input_filename = os.path.join(self._temp_directory, 'input1.tag') tag_input_file = open(self._tag_input_filename, 'wb') tag_input_file.write('\n'.join([ 'Test Tag', ' filename contains \'/tmp/whoaaaa\'', ' parser is \'TestEvent\' and stuff is \'dude\'' ])) tag_input_file.close() pfilter.TimeRangeCache.ResetTimeConstraints() test_queue = queue.MultiThreadedQueue() test_queue_producer = queue.EventObjectQueueProducer(test_queue) test_queue_producer.ProduceEventObjects([ TestEvent(0), TestEvent(1000), TestEvent(2000000, '/tmp/whoaaaaa'), TestEvent(2500000, '/tmp/whoaaaaa'), TestEvent(5000000, '/tmp/whoaaaaa', 'dude') ]) test_queue_producer.SignalEndOfInput() storage_writer = storage.StorageFileWriter(test_queue, self._storage_filename) storage_writer.WriteEventObjects() self._storage_file = storage.StorageFile(self._storage_filename) self._storage_file.SetStoreLimit()
def testImageWithFilterCollection(self): """Test collection on a storage media image file with a filter.""" test_file = self._GetTestFilePath(['image.dd']) volume_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file) path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', parent=volume_path_spec) filter_name = '' with tempfile.NamedTemporaryFile(delete=False) as temp_file: filter_name = temp_file.name temp_file.write('/a_directory/.+zip\n') temp_file.write('/a_directory/another.+\n') temp_file.write('/passwords.txt\n') test_collection_queue = queue.SingleThreadedQueue() test_storage_queue = queue.SingleThreadedQueue() test_storage_queue_producer = queue.EventObjectQueueProducer( test_storage_queue) resolver_context = context.Context() test_collector = collector.Collector( test_collection_queue, test_storage_queue_producer, test_file, path_spec, resolver_context=resolver_context) find_specs = engine_utils.BuildFindSpecsFromFile(filter_name) test_collector.SetFilter(find_specs) test_collector.Collect() test_collector_queue_consumer = TestCollectorQueueConsumer( test_collection_queue) test_collector_queue_consumer.ConsumePathSpecs() try: os.remove(filter_name) except (OSError, IOError) as exception: logging.warning(( u'Unable to remove temporary file: {0:s} with error: {1:s}').format( filter_name, exception)) self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 2) paths = test_collector_queue_consumer.GetFilePaths() # path_specs[0] # type: TSK # file_path: '/a_directory/another_file' # container_path: 'test_data/image.dd' # image_offset: 0 self.assertEquals(paths[0], u'/a_directory/another_file') # path_specs[1] # type: TSK # file_path: '/passwords.txt' # container_path: 'test_data/image.dd' # image_offset: 0 self.assertEquals(paths[1], u'/passwords.txt')
def __init__(self, collection_queue, storage_queue): """Initialize the engine object. Args: collection_queue: the collection queue object (instance of Queue). storage_queue: the storage queue object (instance of Queue). """ self._collection_queue = collection_queue self._source = None self._source_path_spec = None self._source_file_entry = None self._storage_queue_producer = queue.EventObjectQueueProducer( storage_queue)
def __init__(self, collection_queue, storage_queue, parse_error_queue): """Initialize the engine object. Args: collection_queue: the collection queue object (instance of Queue). storage_queue: the storage queue object (instance of Queue). parse_error_queue: the parser error queue object (instance of Queue). """ self._collection_queue = collection_queue self._source = None self._source_path_spec = None self._source_file_entry = None self._event_queue_producer = queue.EventObjectQueueProducer(storage_queue) self._parse_error_queue_producer = queue.ParseErrorQueueProducer( parse_error_queue) self.knowledge_base = knowledge_base.KnowledgeBase()
def testWinAnalyzePlugin(self): """Test the plugin against mock events.""" knowledge_base = self._SetUpKnowledgeBase( knowledge_base_values={'users': self.WIN_USERS}) event_queue = queue.SingleThreadedQueue() # Fill the incoming queue with events. test_queue_producer = queue.EventObjectQueueProducer(event_queue) test_queue_producer.ProduceEventObjects( [self._CreateTestEventObject(path) for path in self.WIN_PATHS]) test_queue_producer.SignalEndOfInput() # Initialize plugin. analysis_plugin = AnalyzeChromeExtensionTestPlugin(event_queue) # Run the analysis plugin. analysis_report_queue_consumer = self._RunAnalysisPlugin( analysis_plugin, knowledge_base) analysis_reports = self._GetAnalysisReportsFromQueue( analysis_report_queue_consumer) self.assertEquals(len(analysis_reports), 1) analysis_report = analysis_reports[0] self.assertEquals(analysis_plugin._sep, u'\\') # Due to the behavior of the join one additional empty string at the end # is needed to create the last empty line. expected_text = u'\n'.join([ u' == USER: dude ==', u' Google Keep - notes and lists [hmjkmjkepdijhoojdojkdfohbdgmmhki]', u'', u' == USER: frank ==', u' Google Play Music [icppfcnhkcmnfdhfhphakoifcfokfdhg]', u' YouTube [blpcfgokakmgnkcojhhkbfbldkacnbeo]', u'', u'' ]) self.assertEquals(analysis_report.text, expected_text) self.assertEquals(analysis_report.plugin_name, 'chrome_extension_test') expected_keys = set([u'frank', u'dude']) self.assertEquals(set(analysis_report.report_dict.keys()), expected_keys)
def testMacAnalyzerPlugin(self): """Test the plugin against mock events.""" knowledge_base = self._SetUpKnowledgeBase( knowledge_base_values={'users': self.MAC_USERS}) event_queue = queue.SingleThreadedQueue() # Fill the incoming queue with events. test_queue_producer = queue.EventObjectQueueProducer(event_queue) test_queue_producer.ProduceEventObjects( [self._CreateTestEventObject(path) for path in self.MAC_PATHS]) test_queue_producer.SignalEndOfInput() # Initialize plugin. analysis_plugin = AnalyzeChromeExtensionTestPlugin(event_queue) # Run the analysis plugin. analysis_report_queue_consumer = self._RunAnalysisPlugin( analysis_plugin, knowledge_base) analysis_reports = self._GetAnalysisReportsFromQueue( analysis_report_queue_consumer) self.assertEquals(len(analysis_reports), 1) analysis_report = analysis_reports[0] self.assertEquals(analysis_plugin._sep, u'/') # Due to the behavior of the join one additional empty string at the end # is needed to create the last empty line. expected_text = u'\n'.join([ u' == USER: dude ==', u' Google Drive [apdfllckaahabafndbhieahigkjlhalf]', u'', u' == USER: frank ==', u' Gmail [pjkljhegncpnkpknbcohdijeoejaedia]', u'', u'' ]) self.assertEquals(analysis_report.text, expected_text) self.assertEquals(analysis_report.plugin_name, 'chrome_extension_test') expected_keys = set([u'frank', u'dude']) self.assertEquals(set(analysis_report.report_dict.keys()), expected_keys)
def testImageCollection(self): """Test collection on a storage media image file. This images has two files: + logs/hidden.zip + logs/sys.tgz The hidden.zip file contains one file, syslog, which is the same for sys.tgz. The end results should therefore be: + logs/hidden.zip (unchanged) + logs/hidden.zip:syslog (the text file extracted out) + logs/sys.tgz (unchanged) + logs/sys.tgz (read as a GZIP file, so not compressed) + logs/sys.tgz:syslog.gz (A GZIP file from the TAR container) + logs/sys.tgz:syslog.gz:syslog (the extracted syslog file) This means that the collection script should collect 6 files in total. """ test_file = self._GetTestFilePath(['syslog_image.dd']) volume_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file) path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', parent=volume_path_spec) test_collection_queue = queue.SingleThreadedQueue() test_storage_queue = queue.SingleThreadedQueue() test_storage_queue_producer = queue.EventObjectQueueProducer( test_storage_queue) resolver_context = context.Context() test_collector = collector.Collector( test_collection_queue, test_storage_queue_producer, test_file, path_spec, resolver_context=resolver_context) test_collector.Collect() test_collector_queue_consumer = TestCollectorQueueConsumer( test_collection_queue) test_collector_queue_consumer.ConsumePathSpecs() self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 2)
def ParseStorage(self, options): """Open a storage file and parse through it. Args: options: the command line arguments (instance of argparse.Namespace). Returns: A counter. Raises: RuntimeError: if a non-recoverable situation is encountered. """ counter = None if options.slice: if options.timezone == 'UTC': zone = pytz.utc else: zone = pytz.timezone(options.timezone) timestamp = timelib.Timestamp.FromTimeString(options.slice, timezone=zone) # Convert number of minutes to microseconds. range_operator = self._slice_size * 60 * 1000000 # Set the time range. pfilter.TimeRangeCache.SetLowerTimestamp(timestamp - range_operator) pfilter.TimeRangeCache.SetUpperTimestamp(timestamp + range_operator) if options.analysis_plugins: read_only = False else: read_only = True try: storage_file = self.OpenStorageFile(read_only=read_only) except IOError as exception: raise RuntimeError( u'Unable to open storage file: {0:s} with error: {1:s}.'. format(self._storage_file_path, exception)) with storage_file: storage_file.SetStoreLimit(self._filter_object) try: output_module = self._output_module_class( storage_file, self._output_stream, options, self._filter_object) except IOError as exception: raise RuntimeError( u'Unable to create output module with error: {0:s}'.format( exception)) if not output_module: raise RuntimeError(u'Missing output module.') if options.analysis_plugins: logging.info(u'Starting analysis plugins.') # Within all preprocessing objects, try to get the last one that has # time zone information stored in it, the highest chance of it # containing the information we are seeking (defaulting to the last # one). pre_objs = storage_file.GetStorageInformation() pre_obj = pre_objs[-1] for obj in pre_objs: if getattr(obj, 'time_zone_str', ''): pre_obj = obj # Fill in the collection information. pre_obj.collection_information = {} encoding = getattr(pre_obj, 'preferred_encoding', None) if encoding: cmd_line = ' '.join(sys.argv) try: pre_obj.collection_information[ 'cmd_line'] = cmd_line.decode(encoding) except UnicodeDecodeError: pass pre_obj.collection_information['file_processed'] = ( self._storage_file_path) pre_obj.collection_information[ 'method'] = 'Running Analysis Plugins' pre_obj.collection_information[ 'plugins'] = options.analysis_plugins time_of_run = timelib.Timestamp.GetNow() pre_obj.collection_information['time_of_run'] = time_of_run pre_obj.counter = collections.Counter() # Assign the preprocessing object to the storage. # This is normally done in the construction of the storage object, # however we cannot do that here since the preprocessing object is # stored inside the storage file, so we need to open it first to # be able to read it in, before we make changes to it. Thus we need # to access this protected member of the class. # pylint: disable=protected-access storage_file._pre_obj = pre_obj # Start queues and load up plugins. analysis_output_queue = queue.MultiThreadedQueue() event_queue_producers = [] event_queues = [] analysis_plugins_list = [ x.strip() for x in options.analysis_plugins.split(',') ] for _ in xrange(0, len(analysis_plugins_list)): event_queues.append(queue.MultiThreadedQueue()) event_queue_producers.append( queue.EventObjectQueueProducer(event_queues[-1])) knowledge_base_object = knowledge_base.KnowledgeBase() analysis_plugins = analysis.LoadPlugins( analysis_plugins_list, event_queues) # Now we need to start all the plugins. for analysis_plugin in analysis_plugins: analysis_report_queue_producer = queue.AnalysisReportQueueProducer( analysis_output_queue) analysis_context_object = analysis_context.AnalysisContext( analysis_report_queue_producer, knowledge_base_object) analysis_process = multiprocessing.Process( name='Analysis {0:s}'.format( analysis_plugin.plugin_name), target=analysis_plugin.RunPlugin, args=(analysis_context_object, )) self._analysis_processes.append(analysis_process) analysis_process.start() logging.info(u'Plugin: [{0:s}] started.'.format( analysis_plugin.plugin_name)) else: event_queue_producers = [] output_buffer = output_lib.EventBuffer(output_module, options.dedup) with output_buffer: counter = ProcessOutput(output_buffer, output_module, self._filter_object, self._filter_buffer, event_queue_producers) for information in storage_file.GetStorageInformation(): if hasattr(information, 'counter'): counter['Stored Events'] += information.counter['total'] if not options.quiet: logging.info(u'Output processing is done.') # Get all reports and tags from analysis plugins. if options.analysis_plugins: logging.info(u'Processing data from analysis plugins.') for event_queue_producer in event_queue_producers: event_queue_producer.SignalEndOfInput() # Wait for all analysis plugins to complete. for number, analysis_process in enumerate( self._analysis_processes): logging.debug( u'Waiting for analysis plugin: {0:d} to complete.'. format(number)) if analysis_process.is_alive(): analysis_process.join(10) else: logging.warning( u'Plugin {0:d} already stopped.'.format(number)) analysis_process.terminate() logging.debug(u'All analysis plugins are now stopped.') # Close the output queue. analysis_output_queue.SignalEndOfInput() # Go over each output. analysis_queue_consumer = PsortAnalysisReportQueueConsumer( analysis_output_queue, storage_file, self._filter_expression, self.preferred_encoding) analysis_queue_consumer.ConsumeAnalysisReports() if analysis_queue_consumer.tags: storage_file.StoreTagging(analysis_queue_consumer.tags) # TODO: analysis_queue_consumer.anomalies: for item, value in analysis_queue_consumer.counter.iteritems(): counter[item] = value if self._filter_object and not counter['Limited By']: counter['Filter By Date'] = (counter['Stored Events'] - counter['Events Included'] - counter['Events Filtered Out']) return counter
def ProcessFile(options): """Process a file and produce profile results.""" if options.proto_file and os.path.isfile(options.proto_file): with open(options.proto_file) as fh: proto_string = fh.read() proto = transmission_pb2.PathSpec() try: text_format.Merge(proto_string, proto) except text_format.ParseError as exception: logging.error( u'Unable to parse file, error: {}'.format(exception)) sys.exit(1) serializer = protobuf_serializer.ProtobufPathSpecSerializer path_spec = serializer.ReadSerializedObject(proto) else: path_spec = path_spec_factory.Factory.NewPathSpec( definitions.TYPE_INDICATOR_OS, location=options.file_to_parse) file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) if file_entry is None: logging.error(u'Unable to open file: {0:s}'.format( options.file_to_parse)) sys.exit(1) pre_obj = event.PreprocessObject() storage_queue = queue.SingleThreadedQueue() storage_queue_producer = queue.EventObjectQueueProducer(storage_queue) # Set few options the engine expects to be there. # TODO: Can we rather set this directly in argparse? options.single_process = True options.debug = False options.text_prepend = u'' parsers = putils.FindAllParsers(pre_obj, options) my_worker = worker.EventExtractionWorker('0', None, storage_queue_producer, pre_obj, parsers) if options.verbose: profiler = cProfile.Profile() profiler.enable() else: time_start = time.time() my_worker.ParseFile(file_entry) if options.verbose: profiler.disable() else: time_end = time.time() storage_queue_producer.SignalEndOfInput() event_object_consumer = PprofEventObjectQueueConsumer(storage_queue) event_object_consumer.ConsumeEventObjects() if not options.verbose: print frontend_utils.FormatHeader('Time Used') print u'{:>20f}s'.format(time_end - time_start) print frontend_utils.FormatHeader('Parsers Loaded') # Accessing protected member. # pylint: disable=protected-access plugins = [] for parser in sorted(my_worker._parsers['all']): print frontend_utils.FormatOutputString('', parser.parser_name) parser_plugins = getattr(parser, '_plugins', []) plugins.extend(parser_plugins) print frontend_utils.FormatHeader('Plugins Loaded') for plugin in sorted(plugins): if isinstance(plugin, basestring): print frontend_utils.FormatOutputString('', plugin) else: plugin_string = getattr(plugin, 'NAME', u'N/A') print frontend_utils.FormatOutputString('', plugin_string) print frontend_utils.FormatHeader('Parsers Used') for parser in sorted(event_object_consumer.parsers): print frontend_utils.FormatOutputString('', parser) print frontend_utils.FormatHeader('Plugins Used') for plugin in sorted(event_object_consumer.plugins): print frontend_utils.FormatOutputString('', plugin) print frontend_utils.FormatHeader('Counter') for key, value in event_object_consumer.counter.most_common(): print frontend_utils.FormatOutputString(key, value) if options.verbose: return GetStats(profiler)