def list_base_pathspecs(evidence): '''Gets the base pathspec for the given evidence''' decoded_pathspec = PathspecHelper._decode_pathspec(evidence['pathspec']) if u'archive_type' in evidence and u'ZIP' in evidence['archive_type']: pathspec = path_spec_factory.Factory.NewPathSpec(dfvfs_definitions.TYPE_INDICATOR_ZIP, location=u'/', parent=decoded_pathspec) elif u'compression_type' in evidence and u'GZIP' in evidence['compression_type']: pathspec = path_spec_factory.Factory.NewPathSpec(dfvfs_definitions.TYPE_INDICATOR_GZIP, parent=decoded_pathspec) elif u'compression_type' in evidence and u'BZIP2' in evidence['compression_type']: pathspec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_COMPRESSED_STREAM, compression_method=dfvfs_definitions.COMPRESSION_METHOD_BZIP2, parent=decoded_pathspec) elif u'archive_type' in evidence and u'TAR' in evidence['archive_type']: pathspec = dfvfs.path.tar_path_spec.TARPathSpec(location='/', parent=decoded_pathspec) else: return PathspecHelper._list_new_base_pathspecs(evidence['pathspec']) encoded_base_pathspec = JsonPathSpecSerializer.WriteSerialized(pathspec) if hasattr(pathspec, 'location'): location = pathspec.location if location.endswith('/') or location.endswith('\\'): location = location[:-1] file_name = os.path.basename(location) else: file_name = '/' return [{'pathspec': encoded_base_pathspec, 'file_name': file_name}]
def _list_new_base_pathspecs(encoded_pathspec): '''Gets a list of the base_pathspecs from in a pathspec using dfvfs_utils''' try: dfvfs_util = DfvfsUtil(PathspecHelper._decode_pathspec(encoded_pathspec), interactive=True, is_pathspec=True) except CacheFullError: PathspecHelper._clear_file_entry_cache() dfvfs_util = DfvfsUtil(PathspecHelper._decode_pathspec(encoded_pathspec), interactive=True, is_pathspec=True) pathspec = dfvfs_util.base_path_specs if not isinstance(pathspec, list): pathspec = [pathspec] pathspecs = [] previous_partition = '' for item in pathspec: if hasattr(item.parent, 'location'): file_name = item.parent.location else: file_name = '/' # Adds the partition name in front of the shadow volume if getattr(item.parent, 'type_indicator', '') == 'TSK_PARTITION': previous_partition = item.parent.location elif getattr(item.parent, 'type_indicator', '') == 'VSHADOW': file_name = previous_partition + file_name new_encoded_pathspec = JsonPathSpecSerializer.WriteSerialized(item) pathspecs.append({'pathspec': new_encoded_pathspec, 'url_query': urlencode({'pathspec': new_encoded_pathspec}), 'file_name': file_name}) return pathspecs
def _decode_pathspec(encoded_pathspec): """Returns a Path Spec object from an encoded path spec, causes a 400 abort if the decode fails""" if not encoded_pathspec: logging.warn('Path Spec required but none found') abort(400, 'Expected an encoded Path Spec, but none found') return JsonPathSpecSerializer.ReadSerialized(encoded_pathspec)
def _decode_pathspec(encoded_pathspec): """Returns a Path Spec object from an encoded path spec, causes a 400 abort if the decode fails""" if not encoded_pathspec: logging.warn('Path Spec required but none found') raise KeyError('Could not find pathspec in request') return JsonPathSpecSerializer.ReadSerialized(encoded_pathspec)
def _list_directory(self, file_entry, recursive=False, depth=0, index='*', offset=0, size=None): """Lists a directory using a file entry""" directory_list = [] if depth > 0: evidence = {} pathspec = file_entry.path_spec evidence['pathspec'] = JsonPathSpecSerializer.WriteSerialized(pathspec) evidence['url_query'] = urlencode({'pathspec': evidence['pathspec'], 'index': index}) evidence['path'] = pathspec.location location = pathspec.location if location.endswith('/') or location.endswith('\\'): location = location[:-1] file_name = os.path.basename(location) evidence['file_name'] = file_name evidence.update(self._get_stat_information_from_file_entry(file_entry)) evidence['file_cache_path'] = self.get_cache_path(evidence['pathspec']) evidence['extension'] = self.get_file_extension(evidence['pathspec']) directory_list.append(self._append_mimetype(evidence)) if (recursive or depth == 0) and (file_entry.IsDirectory() or hasattr(file_entry, 'sub_file_entries')): count = 0 for sub_file_entry in file_entry.sub_file_entries: if offset > 0: offset -= 1 continue directory_list.extend(self._list_directory(sub_file_entry, recursive, depth + 1)) count += 1 if count == size: break return directory_list
def _GetSanitizedEventValues(self, event_object): """Builds a dictionary from an event_object. The event object need to be sanitized to prevent certain values from causing problems when indexing with Elasticsearch. For example the value of the pathspec attribute is a nested dictionary which will cause problems for Elasticsearch automatic indexing. Args: event_object: the event object (instance of EventObject). Returns: Dictionary with sanitized event object values. """ event_values = {} for attribute_name, attribute_value in event_object.GetAttributes(): # Ignore the regvalue attribute as it cause issues when indexing if attribute_name == u'regvalue': continue if attribute_name == u'pathspec': try: attribute_value = JsonPathSpecSerializer.WriteSerialized( attribute_value) except TypeError: continue event_values[attribute_name] = attribute_value # Add string representation of the timestamp attribute_value = timelib.Timestamp.RoundToSeconds( event_object.timestamp) attribute_value = timelib.Timestamp.CopyToIsoFormat( attribute_value, timezone=self._output_mediator.timezone) event_values[u'datetime'] = attribute_value message, _ = self._output_mediator.GetFormattedMessages(event_object) if message is None: raise errors.NoFormatterFound( u'Unable to find event formatter for: {0:s}.'.format( getattr(event_object, u'data_type', u'UNKNOWN'))) event_values[u'message'] = message # Tags needs to be a list for Elasticsearch to index correctly. try: labels = list(event_values[u'tag'].labels) except (KeyError, AttributeError): labels = [] event_values[u'tag'] = labels source_short, source = self._output_mediator.GetFormattedSources( event_object) if source is None or source_short is None: raise errors.NoFormatterFound( u'Unable to find event formatter for: {0:s}.'.format( getattr(event_object, u'data_type', u'UNKNOWN'))) event_values[u'source_short'] = source_short event_values[u'source_long'] = source return event_values
def _GetSanitizedEventValues(self, event, event_data, event_data_stream, event_tag): """Sanitizes the event for use in Elasticsearch. The event values need to be sanitized to prevent certain values from causing problems when indexing with Elasticsearch. For example the path specification is a nested dictionary which will cause problems for Elasticsearch automatic indexing. Args: event (EventObject): event. event_data (EventData): event data. event_data_stream (EventDataStream): event data stream. event_tag (EventTag): event tag. Returns: dict[str, object]: sanitized event values. Raises: NoFormatterFound: if no event formatter can be found to match the data type in the event data. """ event_values = {} if event_data: for attribute_name, attribute_value in event_data.GetAttributes(): event_values[attribute_name] = attribute_value if event_data_stream: for attribute_name, attribute_value in event_data_stream.GetAttributes( ): event_values[attribute_name] = attribute_value for attribute_name in self._field_names: if attribute_name not in event_values: event_values[attribute_name] = None field_values = {} for attribute_name, attribute_value in event_values.items(): # Note that support for event_data.pathspec is kept for backwards # compatibility. The current value is event_data_stream.path_spec. if attribute_name in ('path_spec', 'pathspec'): try: field_value = JsonPathSpecSerializer.WriteSerialized( attribute_value) except TypeError: continue else: field_value = self._field_formatting_helper.GetFormattedField( attribute_name, event, event_data, event_data_stream, event_tag) field_values[attribute_name] = self._SanitizeField( event_data.data_type, attribute_name, field_value) return field_values
def _old_list_directory(self, file_entry, recursive=False, depth=0): """Lists a directory using a file entry""" directory_list = [] if depth > 0: directory_list.append(self.get_evidence_item(JsonPathSpecSerializer.WriteSerialized(file_entry.path_spec))) if (recursive or depth == 0) and file_entry.IsDirectory(): for sub_file_entry in file_entry.sub_file_entries: directory_list.extend(self._list_directory(sub_file_entry, recursive, depth + 1)) return directory_list
def get_parent_pathspec(encoded_pathspec): '''Gets the parent pathspec of the provided pathspec''' file_entry = PathspecHelper._open_file_entry(encoded_pathspec) parent_entry = file_entry.GetParentFileEntry() PathspecHelper._close_file_entry(encoded_pathspec) if not parent_entry: parent_path_spec = PathspecHelper.get_parent_pathspec_manually(encoded_pathspec) else: parent_path_spec = parent_entry.path_spec if not parent_path_spec: return False while getattr(parent_path_spec, 'type_indicator', '') in PathspecHelper._automatically_traverse: parent_path_spec = parent_path_spec.parent return JsonPathSpecSerializer.WriteSerialized(parent_path_spec)
def get_encoded_pathspec(pathspec_or_source): """Gets the encoded pathspec""" return JsonPathSpecSerializer.WriteSerialized(PathspecHelper.get_pathspec(pathspec_or_source))
def get_encoded_parent_base_pathspec_manually(encoded_pathspec): """Returns the encoded parent pathspec, by decoding and getting the 'parent' attribute""" return JsonPathSpecSerializer.WriteSerialized(PathspecHelper.get_parent_pathspec_manually(encoded_pathspec))
def _GetSanitizedEventValues(self, event): """Sanitizes the event for use in Elasticsearch. The event values need to be sanitized to prevent certain values from causing problems when indexing with Elasticsearch. For example the path specification is a nested dictionary which will cause problems for Elasticsearch automatic indexing. Args: event (EventObject): event. Returns: dict[str, object]: sanitized event values. Raises: NoFormatterFound: if no event formatter can be found to match the data type in the event. """ event_values = {} for attribute_name, attribute_value in event.GetAttributes(): # Ignore the regvalue attribute as it cause issues when indexing. if attribute_name == 'regvalue': continue if attribute_name == 'pathspec': try: attribute_value = JsonPathSpecSerializer.WriteSerialized( attribute_value) except TypeError: continue event_values[attribute_name] = attribute_value # Add a string representation of the timestamp. try: attribute_value = timelib.Timestamp.RoundToSeconds(event.timestamp) except TypeError as exception: logger.warning(( 'Unable to round timestamp {0!s}. error: {1!s}. ' 'Defaulting to 0').format(event.timestamp, exception)) attribute_value = 0 attribute_value = timelib.Timestamp.CopyToIsoFormat( attribute_value, timezone=self._output_mediator.timezone) event_values['datetime'] = attribute_value message, _ = self._output_mediator.GetFormattedMessages(event) if message is None: data_type = getattr(event, 'data_type', 'UNKNOWN') raise errors.NoFormatterFound( 'Unable to find event formatter for: {0:s}.'.format(data_type)) event_values['message'] = message # Tags needs to be a list for Elasticsearch to index correctly. try: labels = list(event_values['tag'].labels) except (KeyError, AttributeError): labels = [] event_values['tag'] = labels source_short, source = self._output_mediator.GetFormattedSources(event) if source is None or source_short is None: data_type = getattr(event, 'data_type', 'UNKNOWN') raise errors.NoFormatterFound( 'Unable to find event formatter for: {0:s}.'.format(data_type)) event_values['source_short'] = source_short event_values['source_long'] = source return event_values
def encode_pathspec(self, pathspec): return JsonPathSpecSerializer.WriteSerialized(pathspec)
def decode_pathspec(self, encoded_pathspec): return JsonPathSpecSerializer.ReadSerialized(encoded_pathspec)
def _GetSanitizedEventValues(self, event, event_data, event_tag): """Sanitizes the event for use in Elasticsearch. The event values need to be sanitized to prevent certain values from causing problems when indexing with Elasticsearch. For example the path specification is a nested dictionary which will cause problems for Elasticsearch automatic indexing. Args: event (EventObject): event. event_data (EventData): event data. event_tag (EventTag): event tag. Returns: dict[str, object]: sanitized event values. Raises: NoFormatterFound: if no event formatter can be found to match the data type in the event data. """ event_values = {} for attribute_name, attribute_value in event_data.GetAttributes(): # TODO: remove regvalue, which is kept for backwards compatibility. # Ignore the regvalue attribute as it cause issues when indexing. if attribute_name == 'regvalue': continue if attribute_name == 'pathspec': try: attribute_value = JsonPathSpecSerializer.WriteSerialized( attribute_value) except TypeError: continue event_values[attribute_name] = attribute_value if isinstance(attribute_value, py2to3.BYTES_TYPE): # Some parsers have written bytes values to storage. attribute_value = attribute_value.decode('utf-8', 'replace') logger.warning( 'Found bytes value for attribute "{0:s}" for data type: ' '{1!s}. Value was converted to UTF-8: "{2:s}"'.format( attribute_name, event_data.data_type, attribute_value)) event_values[attribute_name] = attribute_value # Add a string representation of the timestamp. try: attribute_value = timelib.Timestamp.RoundToSeconds(event.timestamp) except TypeError as exception: logger.warning(('Unable to round timestamp {0!s}. error: {1!s}. ' 'Defaulting to 0').format(event.timestamp, exception)) attribute_value = 0 attribute_value = timelib.Timestamp.CopyToIsoFormat( attribute_value, timezone=self._output_mediator.timezone) event_values['datetime'] = attribute_value event_values['timestamp'] = event.timestamp event_values['timestamp_desc'] = event.timestamp_desc message, _ = self._output_mediator.GetFormattedMessages(event_data) if message is None: data_type = getattr(event_data, 'data_type', 'UNKNOWN') raise errors.NoFormatterFound( 'Unable to find event formatter for: {0:s}.'.format(data_type)) event_values['message'] = message # Tags needs to be a list for Elasticsearch to index correctly. labels = [] if event_tag: try: labels = list(event_tag.labels) except (AttributeError, KeyError): pass event_values['tag'] = labels source_short, source = self._output_mediator.GetFormattedSources( event, event_data) if source is None or source_short is None: data_type = getattr(event_data, 'data_type', 'UNKNOWN') raise errors.NoFormatterFound( 'Unable to find event formatter for: {0:s}.'.format(data_type)) event_values['source_short'] = source_short event_values['source_long'] = source return event_values
def _GetSanitizedEventValues(self, event, event_data, event_data_stream, event_tag): """Sanitizes the event for use in Elasticsearch. The event values need to be sanitized to prevent certain values from causing problems when indexing with Elasticsearch. For example the path specification is a nested dictionary which will cause problems for Elasticsearch automatic indexing. Args: event (EventObject): event. event_data (EventData): event data. event_data_stream (EventDataStream): event data stream. event_tag (EventTag): event tag. Returns: dict[str, object]: sanitized event values. Raises: NoFormatterFound: if no event formatter can be found to match the data type in the event data. """ event_attributes = list(event_data.GetAttributes()) if event_data_stream: event_attributes.extend(event_data_stream.GetAttributes()) event_values = {} for attribute_name, attribute_value in sorted(event_attributes): # Note that support for event_data.pathspec is kept for backwards # compatibility. The current value is event_data_stream.path_spec. if attribute_name in ('path_spec', 'pathspec'): try: attribute_value = JsonPathSpecSerializer.WriteSerialized( attribute_value) except TypeError: continue event_values[attribute_name] = attribute_value if isinstance(attribute_value, bytes): # Some parsers have written bytes values to storage. attribute_value = attribute_value.decode('utf-8', 'replace') logger.warning( 'Found bytes value for attribute "{0:s}" for data type: ' '{1!s}. Value was converted to UTF-8: "{2:s}"'.format( attribute_name, event_data.data_type, attribute_value)) event_values[attribute_name] = attribute_value event_values['timestamp'] = event.timestamp event_values['timestamp_desc'] = event.timestamp_desc # Add a date and time string representation of the timestamp. date_time = dfdatetime_posix_time.PosixTimeInMicroseconds( timestamp=event.timestamp) event_values['datetime'] = date_time.CopyToDateTimeStringISO8601() message, _ = self._output_mediator.GetFormattedMessages(event_data) if message is None: data_type = getattr(event_data, 'data_type', 'UNKNOWN') raise errors.NoFormatterFound( 'Unable to find event formatter for: {0:s}.'.format(data_type)) event_values['message'] = message # Tags needs to be a list for Elasticsearch to index correctly. labels = [] if event_tag: try: labels = list(event_tag.labels) except (AttributeError, KeyError): pass event_values['tag'] = labels source_short, source = self._output_mediator.GetFormattedSources( event, event_data) if source is None or source_short is None: data_type = getattr(event_data, 'data_type', 'UNKNOWN') raise errors.NoFormatterFound( 'Unable to find event formatter for: {0:s}.'.format(data_type)) event_values['source_short'] = source_short event_values['source_long'] = source return event_values