def JoinEvents(self, first_event, second_event): """Joins two event objects. Args: first_event: the first event object (instance of EventObject). second_event: the second event object (instance of EventObject). """ self.duplicate_counter += 1 # TODO: Currently we are using the first event pathspec, perhaps that # is not the best approach. There is no need to have all the pathspecs # inside the combined event, however which one should be chosen is # perhaps something that can be evaluated here (regular TSK in favor of # an event stored deep inside a VSS for instance). for attr in self.MERGE_ATTRIBUTES: # TODO: remove need for GetUnicodeString. first_value = set(utils.GetUnicodeString( getattr(first_event, attr, u'')).split(u';')) second_value = set(utils.GetUnicodeString( getattr(second_event, attr, u'')).split(u';')) values_list = list(first_value | second_value) values_list.sort() # keeping this consistent across runs helps with diffs setattr(first_event, attr, u';'.join(values_list)) # Special instance if this is a filestat entry we need to combine the # description field. if getattr(first_event, u'parser', u'') == u'filestat': first_description = set( getattr(first_event, u'timestamp_desc', u'').split(u';')) second_description = set( getattr(second_event, u'timestamp_desc', u'').split(u';')) descriptions = list(first_description | second_description) descriptions.sort() if second_event.timestamp_desc not in first_event.timestamp_desc: setattr(first_event, u'timestamp_desc', u';'.join(descriptions))
def __eq__(self, event_object): """Return a boolean indicating if two EventObject are considered equal. Compares two EventObject objects together and evaluates if they are the same or close enough to be considered to represent the same event. For two EventObject objects to be considered the same they need to have the following conditions: + Have the same timestamp. + Have the same data_type value. + Have the same set of attributes. + Compare all other attributes than those that are reserved, and they all have to match. The following attributes are considered to be 'reserved' and not used for the comparison, so they may be different yet the EventObject is still considered to be equal: + inode + pathspec + filename + display_name + store_number + store_index Args: event_object: The EventObject that is being compared to this one. Returns: True: if both EventObjects are considered equal, otherwise False. """ # Note: if this method changes, the above EqualityString method MUST be # updated as well if not isinstance(event_object, EventObject): return False if self.timestamp != event_object.timestamp: return False if self.data_type != event_object.data_type: return False attributes = self.GetAttributes() if attributes != event_object.GetAttributes(): return False # Here we have to deal with "near" duplicates, so not all attributes # should be compared. for attribute in attributes.difference(self.COMPARE_EXCLUDE): if getattr(self, attribute) != getattr(event_object, attribute): return False # If we are dealing with the stat parser the inode number is the one # attribute that really matters, unlike others. if 'filestat' in getattr(self, 'parser', ''): return utils.GetUnicodeString(getattr( self, 'inode', 'a')) == utils.GetUnicodeString( getattr(event_object, 'inode', 'b')) return True
def JoinEvents(self, event_a, event_b): """Join this EventObject with another one.""" self.duplicate_counter += 1 # TODO: Currently we are using the first event pathspec, perhaps that # is not the best approach. There is no need to have all the pathspecs # inside the combined event, however which one should be chosen is # perhaps something that can be evaluated here (regular TSK in favor of # an event stored deep inside a VSS for instance). for attr in self.MERGE_ATTRIBUTES: # TODO: remove need for GetUnicodeString. val_a = set( utils.GetUnicodeString(getattr(event_a, attr, u'')).split(u';')) val_b = set( utils.GetUnicodeString(getattr(event_b, attr, u'')).split(u';')) values_list = list(val_a | val_b) values_list.sort( ) # keeping this consistent across runs helps with diffs setattr(event_a, attr, u';'.join(values_list)) # Special instance if this is a filestat entry we need to combine the # description field. if getattr(event_a, u'parser', u'') == u'filestat': description_a = set( getattr(event_a, u'timestamp_desc', u'').split(u';')) description_b = set( getattr(event_b, u'timestamp_desc', u'').split(u';')) descriptions = list(description_a | description_b) descriptions.sort() if event_b.timestamp_desc not in event_a.timestamp_desc: setattr(event_a, u'timestamp_desc', u';'.join(descriptions))
def __init__(self, data): """Take a date object and use that for comparison. Args: data: A string, datetime object or an integer containing the number of micro seconds since January 1, 1970, 00:00:00 UTC. Raises: ValueError: if the date string is invalid. """ self.text = utils.GetUnicodeString(data) if isinstance(data, int) or isinstance(data, long): self.data = data elif isinstance(data, float): self.data = long(data) elif isinstance(data, str) or isinstance(data, unicode): try: self.data = timelib.Timestamp.FromTimeString( utils.GetUnicodeString(data)) except (ValueError, errors.TimestampError): raise ValueError(u'Wrongly formatted date string: {0:s}'.format(data)) elif isinstance(data, datetime.datetime): self.data = timelib.Timestamp.FromPythonDatetime(data) elif isinstance(DateCompareObject, data): self.data = data.data else: raise ValueError(u'Unsupported type: {0:s}.'.format(type(data)))
def __init__(self, data): """Take a date object and use that for comparison. Args: data: A string, datetime object or an integer that represents the time to compare against. Time should be stored as microseconds since UTC in Epoch format. Raises: ValueError: if the date string is invalid. """ self.text = utils.GetUnicodeString(data) if type(data) in (int, long): self.data = data elif type(data) == float: self.data = long(data) elif type(data) in (str, unicode): try: self.data = timelib.Timestamp.FromTimeString( utils.GetUnicodeString(data)) except ValueError as exception: raise ValueError(u'Wrongly formatted date string: {0:s} - {1:s}'.format( data, exception)) elif type(data) == datetime.datetime: self.data = timelib.Timestamp.FromPythonDatetime(data) elif isinstance(DateCompareObject, data): self.data = data.data else: raise ValueError(u'Unsupported type: {0:s}.'.format(type(data)))
def __unicode__(self): """Print a human readable string from the EventObject.""" out_write = [] out_write.append(u'+-' * 40) out_write.append(u'[Timestamp]:\n {0:s}'.format( timelib.Timestamp.CopyToIsoFormat(self.timestamp))) out_write.append(u'\n[Message Strings]:') # TODO: move formatting testing to a formatters (manager) test. event_formatter = formatters_manager.EventFormatterManager.GetFormatter( self) if not event_formatter: out_write.append(u'None') else: msg, msg_short = event_formatter.GetMessages(self) source_short, source_long = event_formatter.GetSources(self) out_write.append(u'{2:>7}: {0}\n{3:>7}: {1}\n'.format( utils.GetUnicodeString(msg_short), utils.GetUnicodeString(msg), 'Short', 'Long')) out_write.append(u'{2:>7}: {0}\n{3:>7}: {1}\n'.format( utils.GetUnicodeString(source_short), utils.GetUnicodeString(source_long), 'Source Short', 'Source Long')) if hasattr(self, 'pathspec'): pathspec_string = self.pathspec.comparable out_write.append(u'[Pathspec]:\n {0:s}\n'.format( pathspec_string.replace('\n', '\n '))) out_additional = [] out_write.append(u'[Reserved attributes]:') out_additional.append(u'[Additional attributes]:') for attr_key, attr_value in sorted(self.GetValues().items()): if attr_key in utils.RESERVED_VARIABLES: if attr_key == 'pathspec': continue else: out_write.append(u' {{{key}}} {value}'.format( key=attr_key, value=attr_value)) else: out_additional.append(u' {{{key}}} {value}'.format( key=attr_key, value=attr_value)) out_write.append(u'\n') out_additional.append(u'') part_1 = u'\n'.join(out_write) part_2 = u'\n'.join(out_additional) return part_1 + part_2
def _OpenPlistFile(self, searcher, path_spec): """Open a Plist file given a path and returns a plist top level object. Args: searcher: The file system searcher object (instance of dfvfs.FileSystemSearcher). path_spec: The path specification (instance of dfvfs.PathSpec) of the plist file. Raises: errors.PreProcessFail: if the preprocessing fails. """ plist_file_location = getattr(path_spec, 'location', u'') file_entry = searcher.GetFileEntryByPathSpec(path_spec) file_object = file_entry.GetFileObject() try: plist_file = binplist.BinaryPlist(file_object) top_level_object = plist_file.Parse() except binplist.FormatError as exception: exception = utils.GetUnicodeString(exception) raise errors.PreProcessFail( u'File is not a plist: {0:s}'.format(exception)) except OverflowError as exception: raise errors.PreProcessFail( u'Error processing: {0:s} with error: {1:s}'.format( plist_file_location, exception)) if not plist_file: raise errors.PreProcessFail( u'File is not a plist: {0:s}'.format(plist_file_location)) return top_level_object
def GetEntries(self, parser_mediator, key=None, registry_file_type=None, codepage=u'cp1252', **kwargs): """Returns an event object based on a Registry key name and values. Args: parser_mediator: A parser mediator object (instance of ParserMediator). key: Optional Registry key (instance of winreg.WinRegKey). The default is None. registry_file_type: Optional string containing the Windows Registry file type, e.g. NTUSER, SOFTWARE. The default is None. codepage: Optional extended ASCII string codepage. The default is cp1252. """ text_dict = {} if key.number_of_values == 0: text_dict[u'Value'] = u'No values stored in key.' else: for value in key.GetValues(): if not value.name: value_name = u'(default)' else: value_name = u'{0:s}'.format(value.name) if value.data is None: value_string = u'[{0:s}] Empty'.format( value.data_type_string) elif value.DataIsString(): string_decode = utils.GetUnicodeString(value.data) value_string = u'[{0:s}] {1:s}'.format( value.data_type_string, string_decode) elif value.DataIsInteger(): value_string = u'[{0:s}] {1:d}'.format( value.data_type_string, value.data) elif value.DataIsMultiString(): if not isinstance(value.data, (list, tuple)): value_string = u'[{0:s}]'.format( value.data_type_string) # TODO: Add a flag or some sort of an anomaly alert. else: value_string = u'[{0:s}] {1:s}'.format( value.data_type_string, u''.join(value.data)) else: value_string = u'[{0:s}]'.format(value.data_type_string) text_dict[value_name] = value_string event_object = windows_events.WindowsRegistryEvent( key.last_written_timestamp, key.path, text_dict, offset=key.offset, registry_file_type=registry_file_type) parser_mediator.ProduceEvent(event_object)
def Operation(self, x, unused_y): try: if self.compiled_re.search(utils.GetUnicodeString(x)): return True except TypeError: pass return False
def GetEntries(self, parser_mediator, registry_key, **kwargs): """Returns an event object based on a Registry key name and values. Args: parser_mediator: A parser mediator object (instance of ParserMediator). registry_key: A Windows Registry key (instance of dfwinreg.WinRegistryKey). """ values_dict = {} if registry_key.number_of_values == 0: values_dict[u'Value'] = u'No values stored in key.' else: for registry_value in registry_key.GetValues(): value_name = registry_value.name or u'(default)' if registry_value.data is None: value_string = u'[{0:s}] Empty'.format( registry_value.data_type_string) elif registry_value.DataIsString(): string_decode = utils.GetUnicodeString( registry_value.GetDataAsObject()) value_string = u'[{0:s}] {1:s}'.format( registry_value.data_type_string, string_decode) elif registry_value.DataIsInteger(): value_string = u'[{0:s}] {1:d}'.format( registry_value.data_type_string, registry_value.GetDataAsObject()) elif registry_value.DataIsMultiString(): multi_string = registry_value.GetDataAsObject() if not isinstance(multi_string, (list, tuple)): value_string = u'[{0:s}]'.format( registry_value.data_type_string) # TODO: Add a flag or some sort of an anomaly alert. else: value_string = u'[{0:s}] {1:s}'.format( registry_value.data_type_string, u''.join(multi_string)) else: value_string = u'[{0:s}]'.format( registry_value.data_type_string) values_dict[value_name] = value_string event_object = windows_events.WindowsRegistryEvent( registry_key.last_written_time, registry_key.path, values_dict, offset=registry_key.offset) parser_mediator.ProduceEvent(event_object)
def ParseString(self, match=None, **unused_kwargs): """Parses a (body text) string. This is a callback function for the text parser (lexer) and is called by the STRING lexer state. Args: match: The regular expression match object. """ self.attributes[u'body'] += utils.GetUnicodeString(match.group(1))
def GetEntries(self, parser_context, key=None, registry_type=None, **unused_kwargs): """Returns an event object based on a Registry key name and values. Args: parser_context: A parser context object (instance of ParserContext). key: Optional Registry key (instance of winreg.WinRegKey). The default is None. registry_type: Optional Registry type string. The default is None. """ text_dict = {} if key.number_of_values == 0: text_dict[u'Value'] = u'No values stored in key.' else: for value in key.GetValues(): if not value.name: value_name = '(default)' else: value_name = u'{0:s}'.format(value.name) if value.data is None: value_string = u'[{0:s}] Empty'.format( value.data_type_string) elif value.DataIsString(): string_decode = utils.GetUnicodeString(value.data) value_string = u'[{0:s}] {1:s}'.format( value.data_type_string, string_decode) elif value.DataIsInteger(): value_string = u'[{0:s}] {1:d}'.format( value.data_type_string, value.data) elif value.DataIsMultiString(): if type(value.data) not in (list, tuple): value_string = u'[{0:s}]'.format( value.data_type_string) # TODO: Add a flag or some sort of an anomaly alert. else: value_string = u'[{0:s}] {1:s}'.format( value.data_type_string, u''.join(value.data)) else: value_string = u'[{0:s}]'.format(value.data_type_string) text_dict[value_name] = value_string event_object = windows_events.WindowsRegistryEvent( key.last_written_timestamp, key.path, text_dict, offset=key.offset, registry_type=registry_type) parser_context.ProduceEvent(event_object, plugin_name=self.NAME)
def GetTopLevel(self, file_object, file_name=u''): """Returns the deserialized content of a plist as a dictionary object. Args: file_object: A file-like object to parse. file_name: The name of the file-like object. Returns: A dictionary object representing the contents of the plist. """ # Note that binplist.readPlist does not seek to offset 0. try: top_level_object = binplist.readPlist(file_object) except binplist.FormatError as exception: raise errors.UnableToParseFile( u'[{0:s}] File is not a plist file: {1:s}'.format( self.NAME, utils.GetUnicodeString(exception))) except (LookupError, binascii.Error, ValueError, AttributeError) as exception: raise errors.UnableToParseFile( u'[{0:s}] Unable to parse XML file, reason: {1:s}'.format( self.NAME, exception)) except OverflowError as exception: raise errors.UnableToParseFile( u'[{0:s}] Unable to parse: {1:s} with error: {2:s}'.format( self.NAME, file_name, exception)) if not top_level_object: raise errors.UnableToParseFile( u'[{0:s}] File is not a plist: {1:s}'.format( self.NAME, utils.GetUnicodeString(file_name))) # Since we are using readPlist from binplist now instead of manually # opening the binary plist file we loose this option. Keep it commented # out for now but this needs to be tested a bit more. # TODO: Re-evaluate if we can delete this or still require it. #if bpl.is_corrupt: # logging.warning( # u'[{0:s}] corruption detected in binary plist: {1:s}'.format( # self.NAME, file_name)) return top_level_object
def __init__(self, *children, **kwargs): super(RegexpInsensitive, self).__init__(*children, **kwargs) # Note that right_operand is not necessarily a string. logging.debug(u'Compiled: {0!s}'.format(self.right_operand)) try: self.compiled_re = re.compile( utils.GetUnicodeString(self.right_operand), re.I | re.DOTALL) except re.error: raise ValueError( u'Regular expression "{0!s}" is malformed.'.format( self.right_operand))
def ParseString(self, match, **_): """Parses a (body text) string. This is a callback function for the text parser (lexer) and is called by the STRING lexer state. Args: match: A regular expression match group that contains the match by the lexer. """ self.attributes['body'] += utils.GetUnicodeString(match.group(1))
def WriteEventBody(self, event_object): """Writes the body of an event object to the spreadsheet. Args: event_object: the event object (instance of EventObject). """ for field_name in self._fields: callback_name = self.FIELD_FORMAT_CALLBACKS.get(field_name, None) callback_function = None if callback_name: callback_function = getattr(self, callback_name, None) if callback_function: value = callback_function(event_object) else: value = getattr(event_object, field_name, u'-') if not isinstance( value, (bool, py2to3.INTEGER_TYPES, float, datetime.datetime)): value = utils.GetUnicodeString(value) value = utils.RemoveIllegalXMLCharacters(value) # Auto adjust column width based on length of value. column_index = self._fields.index(field_name) self._column_widths.setdefault(column_index, 0) self._column_widths[column_index] = max( self._MIN_COLUMN_WIDTH, self._column_widths[column_index], min(self._MAX_COLUMN_WIDTH, len(utils.GetUnicodeString(value)) + 2)) self._sheet.set_column(column_index, column_index, self._column_widths[column_index]) if isinstance(value, datetime.datetime): self._sheet.write_datetime(self._current_row, column_index, value) else: self._sheet.write(self._current_row, column_index, value) self._current_row += 1
def WriteSerializedObject(cls, proto_attribute, attribute_name, attribute_value): """Writes an event attribute to serialized form. The attribute of an event object can store almost any arbitrary data, so the corresponding protobuf storage must deal with the various data types. This method identifies the data type and assigns it properly to the attribute protobuf. Args: proto_attribute: a protobuf attribute object. attribute_name: the name of the attribute. attribute_value: the value of the attribute. Returns: A protobuf object containing the serialized form. """ if attribute_name: proto_attribute.key = attribute_name if isinstance(attribute_value, basestring): proto_attribute.string = utils.GetUnicodeString(attribute_value) elif isinstance(attribute_value, bool): proto_attribute.boolean = attribute_value elif isinstance(attribute_value, py2to3.INTEGER_TYPES): # TODO: add some bounds checking. proto_attribute.integer = attribute_value elif isinstance(attribute_value, dict): cls.WriteSerializedDictObject(proto_attribute, u'dict', attribute_value) elif isinstance(attribute_value, (list, tuple)): cls.WriteSerializedListObject(proto_attribute, u'array', attribute_value) elif isinstance(attribute_value, float): proto_attribute.float = attribute_value elif not attribute_value: proto_attribute.none = True else: proto_attribute.data = attribute_value
def GetEntries(self, key, **unused_kwargs): """Returns an event object based on a Registry key name and values.""" text_dict = {} if key.number_of_values == 0: text_dict[u'Value'] = u'No values stored in key.' else: for value in key.GetValues(): if not value.name: value_name = '(default)' else: value_name = u'{0:s}'.format(value.name) if value.data is None: value_string = u'[{0:s}] Empty'.format( value.data_type_string) elif value.DataIsString(): string_decode = utils.GetUnicodeString(value.data) value_string = u'[{0:s}] {1:s}'.format( value.data_type_string, string_decode) elif value.DataIsInteger(): value_string = u'[{0:s}] {1:d}'.format( value.data_type_string, value.data) elif value.DataIsMultiString(): if type(value.data) not in (list, tuple): value_string = u'[{0:s}]'.format( value.data_type_string) # TODO: Add a flag or some sort of an anomaly alert. else: value_string = u'[{0:s}] {1:s}'.format( value.data_type_string, u''.join(value.data)) else: value_string = u'[{0:s}]'.format(value.data_type_string) text_dict[value_name] = value_string yield event.WinRegistryEvent(key.path, text_dict, timestamp=key.last_written_timestamp, offset=key.offset)
def __init__(self, data): """Take a date object and use that for comparison. Args: data: A string, datetime object or an integer containing the number of micro seconds since January 1, 1970, 00:00:00 UTC. Raises: ValueError: if the date string is invalid. """ if isinstance(data, py2to3.INTEGER_TYPES): self.data = data self.text = u'{0:d}'.format(data) elif isinstance(data, float): self.data = py2to3.LONG_TYPE(data) self.text = u'{0:f}'.format(data) elif isinstance(data, py2to3.STRING_TYPES): self.text = utils.GetUnicodeString(data) try: self.data = timelib.Timestamp.FromTimeString(data) except (ValueError, errors.TimestampError): raise ValueError( u'Wrongly formatted date string: {0:s}'.format(data)) elif isinstance(data, datetime.datetime): self.data = timelib.Timestamp.FromPythonDatetime(data) self.text = u'{0!s}'.format(data) elif isinstance(data, DateCompareObject): self.data = data.data self.text = u'{0!s}'.format(data) else: raise ValueError(u'Unsupported type: {0:s}.'.format(type(data)))
def __eq__(self, event_object): """Return a boolean indicating if two event objects are considered equal. Compares two event objects together and evaluates if they are the same or close enough to be considered to represent the same event. For two event objects to be considered the same they need to have the following conditions: * Have the same timestamp. * Have the same data_type value. * Have the same set of attributes. * Compare all other attributes than those that are reserved, and they all have to match. The following attributes are considered to be 'reserved' and not used for the comparison, so they may be different yet the event object is still considered to be equal: * inode * pathspec * filename * display_name * store_number * store_index Args: event_object: The event object to compare to (instance of EventObject). Returns: A boolean value indicating if both event objects are considered equal. """ # Note: if this method changes, the above EqualityString method MUST be # updated accordingly. if (not isinstance(event_object, EventObject) or self.timestamp != event_object.timestamp or self.data_type != event_object.data_type): return False attribute_names = set(self.__dict__.keys()) if attribute_names != set(event_object.__dict__.keys()): return False # Here we have to deal with "near" duplicates, so not all attributes # should be compared. for attribute in attribute_names.difference(self.COMPARE_EXCLUDE): if getattr(self, attribute) != getattr(event_object, attribute): return False # If we are dealing with a filesystem event the inode number is # the attribute that really matters. if self.data_type.startswith(u'fs:'): inode = self.inode if inode is not None: inode = utils.GetUnicodeString(inode) event_object_inode = event_object.inode if event_object_inode is not None: event_object_inode = utils.GetUnicodeString(event_object_inode) return inode == event_object_inode return True
def WriteSerializedObject(cls, event_object): """Writes an event object to serialized form. Args: event_object: an event object (instance of EventObject). Returns: A protobuf object containing the serialized form (instance of plaso_storage_pb2.EventObject). """ proto = plaso_storage_pb2.EventObject() proto.data_type = getattr(event_object, u'data_type', u'event') for attribute_name in event_object.GetAttributes(): if attribute_name == u'source_short': proto.source_short = cls._SOURCE_SHORT_TO_PROTO_MAP[ event_object.source_short] elif attribute_name == u'pathspec': attribute_value = getattr(event_object, attribute_name, None) if attribute_value: attribute_value = cls._path_spec_serializer.WriteSerialized( attribute_value) setattr(proto, attribute_name, attribute_value) elif attribute_name == u'tag': attribute_value = getattr(event_object, attribute_name, None) if attribute_value: event_tag_proto = ProtobufEventTagSerializer.WriteSerializedObject( attribute_value) proto.tag.MergeFrom(event_tag_proto) elif hasattr(proto, attribute_name): attribute_value = getattr(event_object, attribute_name) if attribute_value is None: continue if isinstance(attribute_value, basestring): attribute_value = utils.GetUnicodeString(attribute_value) if not attribute_value: continue if isinstance(attribute_value, dict): ProtobufEventAttributeSerializer.WriteSerializedDictObject( proto, attribute_name, attribute_value) elif isinstance(attribute_value, (list, tuple)): ProtobufEventAttributeSerializer.WriteSerializedListObject( proto, attribute_name, attribute_value) else: try: setattr(proto, attribute_name, attribute_value) except ValueError as exception: path_spec = getattr(event_object, u'pathspec', None) path = getattr(path_spec, u'location', u'') logging.error(( u'Unable to save value for: {0:s} [{1:s}] with error: {2:s} ' u'coming from file: {3:s}').format( attribute_name, type(attribute_value), exception, path)) # Catch potential out of range errors. if isinstance(attribute_value, py2to3.INTEGER_TYPES): setattr(proto, attribute_name, -1) else: attribute_value = getattr(event_object, attribute_name) # TODO: check if the next TODO still applies. # Serialize the attribute value only if it is an integer type # (int or long) or if it has a value. # TODO: fix logic. if (isinstance(attribute_value, (bool, int, float, long)) or attribute_value): proto_attribute = proto.attributes.add() ProtobufEventAttributeSerializer.WriteSerializedObject( proto_attribute, attribute_name, attribute_value) return proto