Esempio n. 1
0
    def testOutput(self):
        with TempDirectory() as dirname:
            dump_file = os.path.join(dirname, 'plaso.db')
            # Copy events to pstorage dump.
            with storage.StorageFile(self.test_filename,
                                     read_only=True) as store:
                formatter_cls = output.GetOutputFormatter('Pstorage')
                formatter = formatter_cls(store, dump_file)
                with output.EventBuffer(formatter,
                                        check_dedups=False) as output_buffer:
                    event_object = formatter.FetchEntry()
                    while event_object:
                        output_buffer.Append(event_object)
                        event_object = formatter.FetchEntry()

            # Make sure original and dump have the same events.
            original = storage.StorageFile(self.test_filename, read_only=True)
            dump = storage.StorageFile(dump_file, read_only=True)
            event_object_original = original.GetSortedEntry()
            event_object_dump = dump.GetSortedEntry()
            original_list = []
            dump_list = []

            while event_object_original:
                original_list.append(event_object_original.EqualityString())
                dump_list.append(event_object_dump.EqualityString())
                event_object_original = original.GetSortedEntry()
                event_object_dump = dump.GetSortedEntry()

            self.assertFalse(event_object_dump)

            for original_str, dump_str in zip(sorted(original_list),
                                              sorted(dump_list)):
                self.assertEqual(original_str, dump_str)
Esempio n. 2
0
  def testFlush(self):
    """Test to ensure we empty our buffers and sends to output properly."""
    with tempfile.NamedTemporaryFile() as fh:

      def CheckBufferLength(event_buffer, expected):
        if not event_buffer.check_dedups:
          expected = 0
        # pylint: disable=protected-access
        self.assertEquals(len(event_buffer._buffer_dict), expected)

      formatter = TestOutput(fh)
      event_buffer = output.EventBuffer(formatter, False)

      event_buffer.Append(DummyEvent(123456, u'Now is now'))
      CheckBufferLength(event_buffer, 1)

      # Add three events.
      event_buffer.Append(DummyEvent(123456, u'OMG I AM DIFFERENT'))
      event_buffer.Append(DummyEvent(123456, u'Now is now'))
      event_buffer.Append(DummyEvent(123456, u'Now is now'))
      CheckBufferLength(event_buffer, 2)

      event_buffer.Flush()
      CheckBufferLength(event_buffer, 0)

      event_buffer.Append(DummyEvent(123456, u'Now is now'))
      event_buffer.Append(DummyEvent(123456, u'Now is now'))
      event_buffer.Append(DummyEvent(123456, u'Different again :)'))
      CheckBufferLength(event_buffer, 2)
      event_buffer.Append(DummyEvent(123457, u'Now is different'))
      CheckBufferLength(event_buffer, 1)
Esempio n. 3
0
    def NoDuplicates(self, dump_filename):
        """Saves a de-duped Plaso Storage.

    This goes through the Plaso storage file, and saves a new dump with
    duplicates removed. The filename is '.[dump_hash]_dedup', and is returned
    at the end of the function. Note that if this function is interrupted,
    incomplete results are recorded and this file must be deleted or subsequent
    runs will use this incomplete data.

    Args:
      dump_filename: the filename of the Plaso Storage to be deduped.
    """
        sys.stdout.write(u'Removing duplicates...\n')
        sys.stdout.flush()
        # Whether these incremental files should remain a feature or not is still
        # being decided. They're just here for now to make development faster.
        nodup_filename = '.{}_dedup'.format(self.plaso_hash)
        if os.path.isfile(nodup_filename):
            sys.stdout.write(u'Using previously calculated results.\n')
        else:
            with SetupStorage(dump_filename) as store:
                total_events = store.GetNumberOfEvents()
                events_per_dot = operator.floordiv(total_events, 80)
                formatter_cls = output_lib.GetOutputFormatter('Pstorage')
                store_dedup = open(nodup_filename, 'wb')
                formatter = formatter_cls(store, store_dedup)
                with output_lib.EventBuffer(
                        formatter, check_dedups=True) as output_buffer:
                    event_object = formatter.FetchEntry()
                    counter = 0
                    while event_object:
                        output_buffer.Append(event_object)
                        counter += 1
                        if counter % events_per_dot == 0:
                            sys.stdout.write(u'.')
                            sys.stdout.flush()
                        event_object = formatter.FetchEntry()
            sys.stdout.write(u'\n')
        return nodup_filename
Esempio n. 4
0
    def ParseStorage(self, options):
        """Open a storage file and parse through it.

    Args:
      options: the command line arguments (instance of argparse.Namespace).

    Returns:
      A counter.

    Raises:
      RuntimeError: if a non-recoverable situation is encountered.
    """
        counter = None

        if options.slice:
            if options.timezone == 'UTC':
                zone = pytz.utc
            else:
                zone = pytz.timezone(options.timezone)

            timestamp = timelib.Timestamp.FromTimeString(options.slice,
                                                         timezone=zone)

            # Convert number of minutes to microseconds.
            range_operator = self._slice_size * 60 * 1000000

            # Set the time range.
            pfilter.TimeRangeCache.SetLowerTimestamp(timestamp -
                                                     range_operator)
            pfilter.TimeRangeCache.SetUpperTimestamp(timestamp +
                                                     range_operator)

        if options.analysis_plugins:
            read_only = False
        else:
            read_only = True

        try:
            storage_file = self.OpenStorageFile(read_only=read_only)
        except IOError as exception:
            raise RuntimeError(
                u'Unable to open storage file: {0:s} with error: {1:s}.'.
                format(self._storage_file_path, exception))

        with storage_file:
            storage_file.SetStoreLimit(self._filter_object)

            try:
                output_module = self._output_module_class(
                    storage_file, self._output_stream, options,
                    self._filter_object)
            except IOError as exception:
                raise RuntimeError(
                    u'Unable to create output module with error: {0:s}'.format(
                        exception))

            if not output_module:
                raise RuntimeError(u'Missing output module.')

            if options.analysis_plugins:
                logging.info(u'Starting analysis plugins.')
                # Within all preprocessing objects, try to get the last one that has
                # time zone information stored in it, the highest chance of it
                # containing the information we are seeking (defaulting to the last
                # one).
                pre_objs = storage_file.GetStorageInformation()
                pre_obj = pre_objs[-1]
                for obj in pre_objs:
                    if getattr(obj, 'time_zone_str', ''):
                        pre_obj = obj

                # Fill in the collection information.
                pre_obj.collection_information = {}
                encoding = getattr(pre_obj, 'preferred_encoding', None)
                if encoding:
                    cmd_line = ' '.join(sys.argv)
                    try:
                        pre_obj.collection_information[
                            'cmd_line'] = cmd_line.decode(encoding)
                    except UnicodeDecodeError:
                        pass
                pre_obj.collection_information['file_processed'] = (
                    self._storage_file_path)
                pre_obj.collection_information[
                    'method'] = 'Running Analysis Plugins'
                pre_obj.collection_information[
                    'plugins'] = options.analysis_plugins
                time_of_run = timelib.Timestamp.GetNow()
                pre_obj.collection_information['time_of_run'] = time_of_run

                pre_obj.counter = collections.Counter()

                # Assign the preprocessing object to the storage.
                # This is normally done in the construction of the storage object,
                # however we cannot do that here since the preprocessing object is
                # stored inside the storage file, so we need to open it first to
                # be able to read it in, before we make changes to it. Thus we need
                # to access this protected member of the class.
                # pylint: disable=protected-access
                storage_file._pre_obj = pre_obj

                # Start queues and load up plugins.
                analysis_output_queue = queue.MultiThreadedQueue()
                analysis_producers = []
                analysis_queues = []
                analysis_plugins_list = [
                    x.strip() for x in options.analysis_plugins.split(',')
                ]
                for _ in xrange(0, len(analysis_plugins_list)):
                    analysis_queues.append(queue.MultiThreadedQueue())
                    analysis_producers.append(
                        queue.AnalysisPluginProducer(analysis_queues[-1]))

                analysis_plugins = analysis.LoadPlugins(
                    analysis_plugins_list, pre_obj, analysis_queues,
                    analysis_output_queue)

                # Now we need to start all the plugins.
                for analysis_plugin in analysis_plugins:
                    self._analysis_processes.append(
                        multiprocessing.Process(
                            name='Analysis {0:s}'.format(
                                analysis_plugin.plugin_name),
                            target=analysis_plugin.RunPlugin))
                    self._analysis_processes[-1].start()
                    logging.info(u'Plugin: [{0:s}] started.'.format(
                        analysis_plugin.plugin_name))
            else:
                analysis_producers = []

            output_buffer = output_lib.EventBuffer(output_module,
                                                   options.dedup)
            with output_buffer:
                counter = ProcessOutput(output_buffer, output_module,
                                        self._filter_object,
                                        self._filter_buffer,
                                        analysis_producers)

            for information in storage_file.GetStorageInformation():
                if hasattr(information, 'counter'):
                    counter['Stored Events'] += information.counter['total']

            if not options.quiet:
                logging.info(u'Output processing is done.')

            # Get all reports and tags from analysis plugins.
            if options.analysis_plugins:
                logging.info(u'Processing data from analysis plugins.')
                for analysis_producer in analysis_producers:
                    analysis_producer.SignalEndOfInput()

                # Wait for all analysis plugins to complete.
                for number, analysis_process in enumerate(
                        self._analysis_processes):
                    logging.debug(
                        u'Waiting for analysis plugin: {0:d} to complete.'.
                        format(number))
                    if analysis_process.is_alive():
                        analysis_process.join(10)
                    else:
                        logging.warning(
                            u'Plugin {0:d} already stopped.'.format(number))
                        analysis_process.terminate()
                logging.debug(u'All analysis plugins are now stopped.')

                # Close the output queue.
                analysis_output_queue.SignalEndOfInput()

                # Go over each output.
                analysis_queue_consumer = PsortAnalysisReportQueueConsumer(
                    analysis_output_queue, storage_file,
                    self._filter_expression, self.preferred_encoding)

                analysis_queue_consumer.ConsumeAnalysisReports()

                if analysis_queue_consumer.tags:
                    storage_file.StoreTagging(analysis_queue_consumer.tags)

                # TODO: analysis_queue_consumer.anomalies:

                for item, value in analysis_queue_consumer.counter.iteritems():
                    counter[item] = value

        if self._filter_object and not counter['Limited By']:
            counter['Filter By Date'] = (counter['Stored Events'] -
                                         counter['Events Included'] -
                                         counter['Events Filtered Out'])

        return counter