Exemple #1
0
    def list_base_pathspecs(evidence):
        '''Gets the base pathspec for the given evidence'''
        decoded_pathspec = PathspecHelper._decode_pathspec(evidence['pathspec'])
        if u'archive_type' in evidence and u'ZIP' in evidence['archive_type']:
            pathspec = path_spec_factory.Factory.NewPathSpec(dfvfs_definitions.TYPE_INDICATOR_ZIP, location=u'/',
                                                             parent=decoded_pathspec)
        elif u'compression_type' in evidence and u'GZIP' in evidence['compression_type']:
            pathspec = path_spec_factory.Factory.NewPathSpec(dfvfs_definitions.TYPE_INDICATOR_GZIP,
                                                             parent=decoded_pathspec)
        elif u'compression_type' in evidence and u'BZIP2' in evidence['compression_type']:
            pathspec = path_spec_factory.Factory.NewPathSpec(
                dfvfs_definitions.TYPE_INDICATOR_COMPRESSED_STREAM,
                compression_method=dfvfs_definitions.COMPRESSION_METHOD_BZIP2,
                parent=decoded_pathspec)
        elif u'archive_type' in evidence and u'TAR' in evidence['archive_type']:
            pathspec = dfvfs.path.tar_path_spec.TARPathSpec(location='/', parent=decoded_pathspec)
        else:
            return PathspecHelper._list_new_base_pathspecs(evidence['pathspec'])

        encoded_base_pathspec = JsonPathSpecSerializer.WriteSerialized(pathspec)
        if hasattr(pathspec, 'location'):
            location = pathspec.location
            if location.endswith('/') or location.endswith('\\'):
                location = location[:-1]
            file_name = os.path.basename(location)
        else:
            file_name = '/'

        return [{'pathspec': encoded_base_pathspec, 'file_name': file_name}]
Exemple #2
0
    def _list_new_base_pathspecs(encoded_pathspec):
        '''Gets a list of the base_pathspecs from in a pathspec using dfvfs_utils'''
        try:
            dfvfs_util = DfvfsUtil(PathspecHelper._decode_pathspec(encoded_pathspec), interactive=True, is_pathspec=True)
        except CacheFullError:
            PathspecHelper._clear_file_entry_cache()
            dfvfs_util = DfvfsUtil(PathspecHelper._decode_pathspec(encoded_pathspec), interactive=True, is_pathspec=True)

        pathspec = dfvfs_util.base_path_specs

        if not isinstance(pathspec, list):
            pathspec = [pathspec]

        pathspecs = []

        previous_partition = ''

        for item in pathspec:
            if hasattr(item.parent, 'location'):
                file_name = item.parent.location
            else:
                file_name = '/'

            # Adds the partition name in front of the shadow volume
            if getattr(item.parent, 'type_indicator', '') == 'TSK_PARTITION':
                previous_partition = item.parent.location
            elif getattr(item.parent, 'type_indicator', '') == 'VSHADOW':
                file_name = previous_partition + file_name

            new_encoded_pathspec = JsonPathSpecSerializer.WriteSerialized(item)
            pathspecs.append({'pathspec': new_encoded_pathspec,
                              'url_query':  urlencode({'pathspec': new_encoded_pathspec}),
                              'file_name': file_name})

        return pathspecs
    def _decode_pathspec(encoded_pathspec):
        """Returns a Path Spec object from an encoded path spec, causes a 400 abort if the decode fails"""
        if not encoded_pathspec:
            logging.warn('Path Spec required but none found')
            abort(400, 'Expected an encoded Path Spec, but none found')

        return JsonPathSpecSerializer.ReadSerialized(encoded_pathspec)
Exemple #4
0
    def _decode_pathspec(encoded_pathspec):
        """Returns a Path Spec object from an encoded path spec, causes a 400 abort if the decode fails"""
        if not encoded_pathspec:
            logging.warn('Path Spec required but none found')
            raise KeyError('Could not find pathspec in request')

        return JsonPathSpecSerializer.ReadSerialized(encoded_pathspec)
Exemple #5
0
    def _list_directory(self, file_entry, recursive=False, depth=0, index='*', offset=0, size=None):
        """Lists a directory using a file entry"""
        directory_list = []

        if depth > 0:
            evidence = {}
            pathspec = file_entry.path_spec
            evidence['pathspec'] = JsonPathSpecSerializer.WriteSerialized(pathspec)
            evidence['url_query'] = urlencode({'pathspec': evidence['pathspec'], 'index': index})
            evidence['path'] = pathspec.location
            location = pathspec.location
            if location.endswith('/') or location.endswith('\\'):
                location = location[:-1]
            file_name = os.path.basename(location)
            evidence['file_name'] = file_name
            evidence.update(self._get_stat_information_from_file_entry(file_entry))
            evidence['file_cache_path'] = self.get_cache_path(evidence['pathspec'])
            evidence['extension'] = self.get_file_extension(evidence['pathspec'])
            directory_list.append(self._append_mimetype(evidence))

        if (recursive or depth == 0) and (file_entry.IsDirectory() or hasattr(file_entry, 'sub_file_entries')):
            count = 0
            for sub_file_entry in file_entry.sub_file_entries:
                if offset > 0:
                    offset -= 1
                    continue
                directory_list.extend(self._list_directory(sub_file_entry, recursive, depth + 1))
                count += 1
                if count == size:
                    break

        return directory_list
Exemple #6
0
    def _GetSanitizedEventValues(self, event_object):
        """Builds a dictionary from an event_object.

    The event object need to be sanitized to prevent certain values from causing
    problems when indexing with Elasticsearch. For example the value of
    the pathspec attribute is a nested dictionary which will cause problems for
    Elasticsearch automatic indexing.

    Args:
      event_object: the event object (instance of EventObject).

    Returns:
      Dictionary with sanitized event object values.
    """
        event_values = {}
        for attribute_name, attribute_value in event_object.GetAttributes():
            # Ignore the regvalue attribute as it cause issues when indexing
            if attribute_name == u'regvalue':
                continue

            if attribute_name == u'pathspec':
                try:
                    attribute_value = JsonPathSpecSerializer.WriteSerialized(
                        attribute_value)
                except TypeError:
                    continue
            event_values[attribute_name] = attribute_value

        # Add string representation of the timestamp
        attribute_value = timelib.Timestamp.RoundToSeconds(
            event_object.timestamp)
        attribute_value = timelib.Timestamp.CopyToIsoFormat(
            attribute_value, timezone=self._output_mediator.timezone)
        event_values[u'datetime'] = attribute_value

        message, _ = self._output_mediator.GetFormattedMessages(event_object)
        if message is None:
            raise errors.NoFormatterFound(
                u'Unable to find event formatter for: {0:s}.'.format(
                    getattr(event_object, u'data_type', u'UNKNOWN')))
        event_values[u'message'] = message

        # Tags needs to be a list for Elasticsearch to index correctly.
        try:
            labels = list(event_values[u'tag'].labels)
        except (KeyError, AttributeError):
            labels = []
        event_values[u'tag'] = labels

        source_short, source = self._output_mediator.GetFormattedSources(
            event_object)
        if source is None or source_short is None:
            raise errors.NoFormatterFound(
                u'Unable to find event formatter for: {0:s}.'.format(
                    getattr(event_object, u'data_type', u'UNKNOWN')))
        event_values[u'source_short'] = source_short
        event_values[u'source_long'] = source

        return event_values
Exemple #7
0
    def _GetSanitizedEventValues(self, event, event_data, event_data_stream,
                                 event_tag):
        """Sanitizes the event for use in Elasticsearch.

    The event values need to be sanitized to prevent certain values from
    causing problems when indexing with Elasticsearch. For example the path
    specification is a nested dictionary which will cause problems for
    Elasticsearch automatic indexing.

    Args:
      event (EventObject): event.
      event_data (EventData): event data.
      event_data_stream (EventDataStream): event data stream.
      event_tag (EventTag): event tag.

    Returns:
      dict[str, object]: sanitized event values.

    Raises:
      NoFormatterFound: if no event formatter can be found to match the data
          type in the event data.
    """
        event_values = {}

        if event_data:
            for attribute_name, attribute_value in event_data.GetAttributes():
                event_values[attribute_name] = attribute_value

        if event_data_stream:
            for attribute_name, attribute_value in event_data_stream.GetAttributes(
            ):
                event_values[attribute_name] = attribute_value

        for attribute_name in self._field_names:
            if attribute_name not in event_values:
                event_values[attribute_name] = None

        field_values = {}
        for attribute_name, attribute_value in event_values.items():
            # Note that support for event_data.pathspec is kept for backwards
            # compatibility. The current value is event_data_stream.path_spec.
            if attribute_name in ('path_spec', 'pathspec'):
                try:
                    field_value = JsonPathSpecSerializer.WriteSerialized(
                        attribute_value)
                except TypeError:
                    continue

            else:
                field_value = self._field_formatting_helper.GetFormattedField(
                    attribute_name, event, event_data, event_data_stream,
                    event_tag)

            field_values[attribute_name] = self._SanitizeField(
                event_data.data_type, attribute_name, field_value)

        return field_values
Exemple #8
0
    def _old_list_directory(self, file_entry, recursive=False, depth=0):
        """Lists a directory using a file entry"""
        directory_list = []

        if depth > 0:
            directory_list.append(self.get_evidence_item(JsonPathSpecSerializer.WriteSerialized(file_entry.path_spec)))

        if (recursive or depth == 0) and file_entry.IsDirectory():
            for sub_file_entry in file_entry.sub_file_entries:
                directory_list.extend(self._list_directory(sub_file_entry, recursive, depth + 1))

        return directory_list
Exemple #9
0
    def get_parent_pathspec(encoded_pathspec):
        '''Gets the parent pathspec of the provided pathspec'''
        file_entry = PathspecHelper._open_file_entry(encoded_pathspec)
        parent_entry = file_entry.GetParentFileEntry()
        PathspecHelper._close_file_entry(encoded_pathspec)

        if not parent_entry:
            parent_path_spec = PathspecHelper.get_parent_pathspec_manually(encoded_pathspec)
        else:
            parent_path_spec = parent_entry.path_spec

        if not parent_path_spec:
            return False

        while getattr(parent_path_spec, 'type_indicator', '') in PathspecHelper._automatically_traverse:
           parent_path_spec = parent_path_spec.parent

        return JsonPathSpecSerializer.WriteSerialized(parent_path_spec)
Exemple #10
0
 def get_encoded_pathspec(pathspec_or_source):
     """Gets the encoded pathspec"""
     return JsonPathSpecSerializer.WriteSerialized(PathspecHelper.get_pathspec(pathspec_or_source))
Exemple #11
0
 def get_encoded_parent_base_pathspec_manually(encoded_pathspec):
     """Returns the encoded parent pathspec, by decoding and getting the 'parent' attribute"""
     return JsonPathSpecSerializer.WriteSerialized(PathspecHelper.get_parent_pathspec_manually(encoded_pathspec))
Exemple #12
0
  def _GetSanitizedEventValues(self, event):
    """Sanitizes the event for use in Elasticsearch.

    The event values need to be sanitized to prevent certain values from
    causing problems when indexing with Elasticsearch. For example the path
    specification is a nested dictionary which will cause problems for
    Elasticsearch automatic indexing.

    Args:
      event (EventObject): event.

    Returns:
      dict[str, object]: sanitized event values.

    Raises:
      NoFormatterFound: if no event formatter can be found to match the data
          type in the event.
    """
    event_values = {}
    for attribute_name, attribute_value in event.GetAttributes():
      # Ignore the regvalue attribute as it cause issues when indexing.
      if attribute_name == 'regvalue':
        continue

      if attribute_name == 'pathspec':
        try:
          attribute_value = JsonPathSpecSerializer.WriteSerialized(
              attribute_value)
        except TypeError:
          continue
      event_values[attribute_name] = attribute_value

    # Add a string representation of the timestamp.
    try:
      attribute_value = timelib.Timestamp.RoundToSeconds(event.timestamp)
    except TypeError as exception:
      logger.warning((
          'Unable to round timestamp {0!s}. error: {1!s}. '
          'Defaulting to 0').format(event.timestamp, exception))
      attribute_value = 0

    attribute_value = timelib.Timestamp.CopyToIsoFormat(
        attribute_value, timezone=self._output_mediator.timezone)
    event_values['datetime'] = attribute_value

    message, _ = self._output_mediator.GetFormattedMessages(event)
    if message is None:
      data_type = getattr(event, 'data_type', 'UNKNOWN')
      raise errors.NoFormatterFound(
          'Unable to find event formatter for: {0:s}.'.format(data_type))

    event_values['message'] = message

    # Tags needs to be a list for Elasticsearch to index correctly.
    try:
      labels = list(event_values['tag'].labels)
    except (KeyError, AttributeError):
      labels = []
    event_values['tag'] = labels

    source_short, source = self._output_mediator.GetFormattedSources(event)
    if source is None or source_short is None:
      data_type = getattr(event, 'data_type', 'UNKNOWN')
      raise errors.NoFormatterFound(
          'Unable to find event formatter for: {0:s}.'.format(data_type))

    event_values['source_short'] = source_short
    event_values['source_long'] = source

    return event_values
Exemple #13
0
 def encode_pathspec(self, pathspec):
     return JsonPathSpecSerializer.WriteSerialized(pathspec)
Exemple #14
0
 def decode_pathspec(self, encoded_pathspec):
     return JsonPathSpecSerializer.ReadSerialized(encoded_pathspec)
Exemple #15
0
    def _GetSanitizedEventValues(self, event, event_data, event_tag):
        """Sanitizes the event for use in Elasticsearch.

    The event values need to be sanitized to prevent certain values from
    causing problems when indexing with Elasticsearch. For example the path
    specification is a nested dictionary which will cause problems for
    Elasticsearch automatic indexing.

    Args:
      event (EventObject): event.
      event_data (EventData): event data.
      event_tag (EventTag): event tag.

    Returns:
      dict[str, object]: sanitized event values.

    Raises:
      NoFormatterFound: if no event formatter can be found to match the data
          type in the event data.
    """
        event_values = {}
        for attribute_name, attribute_value in event_data.GetAttributes():
            # TODO: remove regvalue, which is kept for backwards compatibility.
            # Ignore the regvalue attribute as it cause issues when indexing.
            if attribute_name == 'regvalue':
                continue

            if attribute_name == 'pathspec':
                try:
                    attribute_value = JsonPathSpecSerializer.WriteSerialized(
                        attribute_value)
                except TypeError:
                    continue
            event_values[attribute_name] = attribute_value

            if isinstance(attribute_value, py2to3.BYTES_TYPE):
                # Some parsers have written bytes values to storage.
                attribute_value = attribute_value.decode('utf-8', 'replace')
                logger.warning(
                    'Found bytes value for attribute "{0:s}" for data type: '
                    '{1!s}. Value was converted to UTF-8: "{2:s}"'.format(
                        attribute_name, event_data.data_type, attribute_value))
                event_values[attribute_name] = attribute_value

        # Add a string representation of the timestamp.
        try:
            attribute_value = timelib.Timestamp.RoundToSeconds(event.timestamp)
        except TypeError as exception:
            logger.warning(('Unable to round timestamp {0!s}. error: {1!s}. '
                            'Defaulting to 0').format(event.timestamp,
                                                      exception))
            attribute_value = 0

        attribute_value = timelib.Timestamp.CopyToIsoFormat(
            attribute_value, timezone=self._output_mediator.timezone)
        event_values['datetime'] = attribute_value

        event_values['timestamp'] = event.timestamp
        event_values['timestamp_desc'] = event.timestamp_desc

        message, _ = self._output_mediator.GetFormattedMessages(event_data)
        if message is None:
            data_type = getattr(event_data, 'data_type', 'UNKNOWN')
            raise errors.NoFormatterFound(
                'Unable to find event formatter for: {0:s}.'.format(data_type))

        event_values['message'] = message

        # Tags needs to be a list for Elasticsearch to index correctly.
        labels = []
        if event_tag:
            try:
                labels = list(event_tag.labels)
            except (AttributeError, KeyError):
                pass

        event_values['tag'] = labels

        source_short, source = self._output_mediator.GetFormattedSources(
            event, event_data)
        if source is None or source_short is None:
            data_type = getattr(event_data, 'data_type', 'UNKNOWN')
            raise errors.NoFormatterFound(
                'Unable to find event formatter for: {0:s}.'.format(data_type))

        event_values['source_short'] = source_short
        event_values['source_long'] = source

        return event_values
Exemple #16
0
    def _GetSanitizedEventValues(self, event, event_data, event_data_stream,
                                 event_tag):
        """Sanitizes the event for use in Elasticsearch.

    The event values need to be sanitized to prevent certain values from
    causing problems when indexing with Elasticsearch. For example the path
    specification is a nested dictionary which will cause problems for
    Elasticsearch automatic indexing.

    Args:
      event (EventObject): event.
      event_data (EventData): event data.
      event_data_stream (EventDataStream): event data stream.
      event_tag (EventTag): event tag.

    Returns:
      dict[str, object]: sanitized event values.

    Raises:
      NoFormatterFound: if no event formatter can be found to match the data
          type in the event data.
    """
        event_attributes = list(event_data.GetAttributes())
        if event_data_stream:
            event_attributes.extend(event_data_stream.GetAttributes())

        event_values = {}
        for attribute_name, attribute_value in sorted(event_attributes):
            # Note that support for event_data.pathspec is kept for backwards
            # compatibility. The current value is event_data_stream.path_spec.
            if attribute_name in ('path_spec', 'pathspec'):
                try:
                    attribute_value = JsonPathSpecSerializer.WriteSerialized(
                        attribute_value)
                except TypeError:
                    continue
            event_values[attribute_name] = attribute_value

            if isinstance(attribute_value, bytes):
                # Some parsers have written bytes values to storage.
                attribute_value = attribute_value.decode('utf-8', 'replace')
                logger.warning(
                    'Found bytes value for attribute "{0:s}" for data type: '
                    '{1!s}. Value was converted to UTF-8: "{2:s}"'.format(
                        attribute_name, event_data.data_type, attribute_value))
                event_values[attribute_name] = attribute_value

        event_values['timestamp'] = event.timestamp
        event_values['timestamp_desc'] = event.timestamp_desc

        # Add a date and time string representation of the timestamp.
        date_time = dfdatetime_posix_time.PosixTimeInMicroseconds(
            timestamp=event.timestamp)
        event_values['datetime'] = date_time.CopyToDateTimeStringISO8601()

        message, _ = self._output_mediator.GetFormattedMessages(event_data)
        if message is None:
            data_type = getattr(event_data, 'data_type', 'UNKNOWN')
            raise errors.NoFormatterFound(
                'Unable to find event formatter for: {0:s}.'.format(data_type))

        event_values['message'] = message

        # Tags needs to be a list for Elasticsearch to index correctly.
        labels = []
        if event_tag:
            try:
                labels = list(event_tag.labels)
            except (AttributeError, KeyError):
                pass

        event_values['tag'] = labels

        source_short, source = self._output_mediator.GetFormattedSources(
            event, event_data)
        if source is None or source_short is None:
            data_type = getattr(event_data, 'data_type', 'UNKNOWN')
            raise errors.NoFormatterFound(
                'Unable to find event formatter for: {0:s}.'.format(data_type))

        event_values['source_short'] = source_short
        event_values['source_long'] = source

        return event_values