def testReadlineUTF16(self): """Test the readline() function on UTF-16 encoded text.""" test_file = self._GetTestFilePath(['another_file.utf16']) self._SkipIfPathNotExists(test_file) test_path_spec = os_path_spec.OSPathSpec(location=test_file) file_object = os_file_io.OSFile(self._resolver_context) file_object.open(test_path_spec) text_file_object = text_file.TextFile(file_object, encoding='utf-16-le') line = text_file_object.readline() self.assertEqual(line, 'This is another file.\n') offset = text_file_object.get_offset() self.assertEqual(offset, 46) text_file_object = text_file.TextFile(file_object, encoding='utf-16-le') line = text_file_object.readline(size=24) self.assertEqual(line, 'This is ano') offset = text_file_object.get_offset() self.assertEqual(offset, 24) file_object.close()
def testReadlineUTF16(self): """Test the readline() function on UTF-16 encoded text.""" test_path = self._GetTestFilePath(['another_file.utf16']) self._SkipIfPathNotExists(test_path) test_os_path_spec = path_spec_factory.Factory.NewPathSpec( definitions.TYPE_INDICATOR_OS, location=test_path) file_object = resolver.Resolver.OpenFileObject( test_os_path_spec, resolver_context=self._resolver_context) text_file_object = text_file.TextFile(file_object, encoding='utf-16-le') line = text_file_object.readline() self.assertEqual(line, 'This is another file.\n') offset = text_file_object.get_offset() self.assertEqual(offset, 46) text_file_object = text_file.TextFile(file_object, encoding='utf-16-le') line = text_file_object.readline(size=24) self.assertEqual(line, 'This is ano') offset = text_file_object.get_offset() self.assertEqual(offset, 24)
def testReadlineWithError(self): """Test the readline() function with an encoding error.""" test_path = self._GetTestFilePath(['another_file_with_error']) self._SkipIfPathNotExists(test_path) test_os_path_spec = path_spec_factory.Factory.NewPathSpec( definitions.TYPE_INDICATOR_OS, location=test_path) file_object = resolver.Resolver.OpenFileObject( test_os_path_spec, resolver_context=self._resolver_context) text_file_object = text_file.TextFile(file_object) with self.assertRaises(UnicodeDecodeError): text_file_object.readline() text_file_object = text_file.TextFile( file_object, encoding_errors='replace') line = text_file_object.readline() self.assertEqual(line, 'This is ano\ufffdher file.\n') offset = text_file_object.get_offset() self.assertEqual(offset, 22) text_file_object = text_file.TextFile(file_object) line = text_file_object.readline(size=11) self.assertEqual(line, 'This is ano') offset = text_file_object.get_offset() self.assertEqual(offset, 11)
def testReadline(self): """Test the readline() function.""" test_file = self._GetTestFilePath(['another_file']) self._SkipIfPathNotExists(test_file) test_path_spec = os_path_spec.OSPathSpec(location=test_file) file_object = os_file_io.OSFile(self._resolver_context) file_object.open(test_path_spec) text_file_object = text_file.TextFile(file_object) line = text_file_object.readline() self.assertEqual(line, 'This is another file.\n') offset = text_file_object.get_offset() self.assertEqual(offset, 22) text_file_object = text_file.TextFile(file_object) line = text_file_object.readline(size=11) self.assertEqual(line, 'This is ano') offset = text_file_object.get_offset() self.assertEqual(offset, 11) file_object.close()
def testReadLine(self): """Tests the _ReadLine function.""" resolver_context = dfvfs_context.Context() test_path_spec = fake_path_spec.FakePathSpec(location='/file.txt') data = b'This is another file.' file_object = fake_file_io.FakeFile(resolver_context, test_path_spec, data) file_object.Open() test_parser = TestPyparsingSingleLineTextParser() test_text_file = dfvfs_text_file.TextFile(file_object, encoding='utf-8') line = test_parser._ReadLine(test_text_file) self.assertEqual(line, 'This is another file.') test_path_spec = fake_path_spec.FakePathSpec(location='/file.txt') data = b'This is an\xbather file.' file_object = fake_file_io.FakeFile(resolver_context, test_path_spec, data) file_object.Open() test_parser = TestPyparsingSingleLineTextParser() test_text_file = dfvfs_text_file.TextFile(file_object, encoding='utf8') with self.assertRaises(UnicodeDecodeError): test_parser._ReadLine(test_text_file) test_path_spec = fake_path_spec.FakePathSpec(location='/file.txt') data = b'This is an\xbather file.' file_object = fake_file_io.FakeFile(resolver_context, test_path_spec, data) file_object.Open() test_parser = TestPyparsingSingleLineTextParser() test_text_file = dfvfs_text_file.TextFile(file_object, encoding='utf8', encoding_errors='replace') line = test_parser._ReadLine(test_text_file) self.assertEqual(line, 'This is an\ufffdther file.') self._encoding_errors = [] codecs.register_error('test_handler', self._EncodingErrorHandler) test_path_spec = fake_path_spec.FakePathSpec(location='/file.txt') data = b'This is an\xbather file.' file_object = fake_file_io.FakeFile(resolver_context, test_path_spec, data) file_object.Open() test_parser = TestPyparsingSingleLineTextParser() test_text_file = dfvfs_text_file.TextFile( file_object, encoding='utf8', encoding_errors='test_handler') line = test_parser._ReadLine(test_text_file) self.assertEqual(line, 'This is an\\xbather file.') self.assertEqual(len(self._encoding_errors), 1) self.assertEqual(self._encoding_errors[0], (10, 0xba))
def _ParseFileData(self, knowledge_base, file_object): """Parses file content (data) for a time zone preprocessing attribute. Args: knowledge_base (KnowledgeBase): to fill with preprocessing information. file_object (dfvfs.FileIO): file-like object that contains the artifact value data. Returns: bool: True if all the preprocessing attributes were found and the preprocessor plugin is done. Raises: errors.PreProcessFail: if the preprocessing fails. """ result = False text_file_object = dfvfs_text_file.TextFile(file_object) file_data = text_file_object.readline() time_zone = file_data.strip() if time_zone: try: knowledge_base.SetTimeZone(time_zone) result = True except ValueError: # TODO: add and store preprocessing errors. pass return result
def _ParseFileData(self, mediator, file_object): """Parses file content (data) for system product preprocessing attribute. Args: mediator (PreprocessMediator): mediates interactions between preprocess plugins and other components, such as storage and knowledge base. file_object (dfvfs.FileIO): file-like object that contains the artifact value data. Raises: errors.PreProcessFail: if the preprocessing fails. """ text_file_object = dfvfs_text_file.TextFile(file_object, encoding='utf-8') product_values = {} for line in text_file_object.readlines(): line = line.strip() if line.startswith('#'): continue key, value = line.split('=') key = key.strip().upper() value = value.strip().strip('"') product_values[key] = value if not mediator.knowledge_base.GetValue('operating_system_product'): system_product = product_values.get('DISTRIB_DESCRIPTION', None) if system_product: mediator.knowledge_base.SetValue('operating_system_product', system_product)
def testReadlineMultipleLines(self): """Test the readline() function on multiple lines.""" test_path = self._GetTestFilePath(['password.txt']) self._SkipIfPathNotExists(test_path) test_os_path_spec = path_spec_factory.Factory.NewPathSpec( definitions.TYPE_INDICATOR_OS, location=test_path) file_object = resolver.Resolver.OpenFileObject( test_os_path_spec, resolver_context=self._resolver_context) text_file_object = text_file.TextFile(file_object) line = text_file_object.readline() self.assertEqual(line, 'place,user,password\n') offset = text_file_object.get_offset() self.assertEqual(offset, 20) line = text_file_object.readline(size=5) self.assertEqual(line, 'bank,') offset = text_file_object.get_offset() self.assertEqual(offset, 25) line = text_file_object.readline() self.assertEqual(line, 'joesmith,superrich\n') offset = text_file_object.get_offset() self.assertEqual(offset, 44) line = text_file_object.readline() self.assertEqual(line, 'alarm system,-,1234\n') offset = text_file_object.get_offset() self.assertEqual(offset, 64)
def _ParseFileData(self, knowledge_base, file_object): """Parses file content (data) for a hostname preprocessing attribute. Args: knowledge_base (KnowledgeBase): to fill with preprocessing information. file_object (dfvfs.FileIO): file-like object that contains the artifact value data. Returns: bool: True if all the preprocessing attributes were found and the preprocessor plugin is done. Raises: errors.PreProcessFail: if the preprocessing fails. """ result = False text_file_object = dfvfs_text_file.TextFile(file_object) hostname = text_file_object.readline() try: hostname = hostname.decode('utf-8') except UnicodeDecodeError: # TODO: add and store preprocessing errors. hostname = hostname.decode('utf-8', errors='replace') hostname = hostname.strip() if hostname: hostname_artifact = artifacts.HostnameArtifact(name=hostname) knowledge_base.SetHostname(hostname_artifact) result = True return result
def _ParseFileData(self, mediator, file_object): """Parses file content (data) for system product preprocessing attribute. Args: mediator (PreprocessMediator): mediates interactions between preprocess plugins and other components, such as storage and knowledge base. file_object (dfvfs.FileIO): file-like object that contains the artifact value data. Raises: errors.PreProcessFail: if the preprocessing fails. """ text_file_object = dfvfs_text_file.TextFile(file_object, encoding='utf-8') system_product = text_file_object.readline() # Only parse known default /etc/issue file contents. if system_product.startswith('Debian GNU/Linux '): system_product, _, _ = system_product.partition('\\') system_product = system_product.rstrip() else: system_product = None if system_product: mediator.SetValue('operating_system_product', system_product)
def _ParseFileObject(self, knowledge_base, file_object): """Parses a passwd file-like object. A passwd file consist of colon seperated values in the format: "username:password:uid:gid:full name:home directory:shell". Args: knowledge_base (KnowledgeBase): to fill with preprocessing information. file_object (dfvfs.FileIO): file-like object. Raises: errors.PreProcessFail: if the preprocessing fails. """ text_file_object = text_file.TextFile(file_object) try: reader = csv.reader(text_file_object, delimiter=b':') except csv.Error: raise errors.PreProcessFail(u'Unable to read: {0:s}.'.format(self._PATH)) for row in reader: if len(row) < 7 or not row[0] or not row[2]: # TODO: add and store preprocessing errors. continue user_account = artifacts.UserAccountArtifact( identifier=row[2], username=row[0]) user_account.group_identifier = row[3] or None user_account.full_name = row[4] or None user_account.user_directory = row[5] or None user_account.shell = row[6] or None # TODO: refactor the use of store number. user_account.store_number = 0 knowledge_base.SetUserAccount(user_account)
def _ParseContainerLogJSON(self, parser_mediator, file_object): """Extract events from a Docker container log files. The format is one JSON formatted log message per line. The path of each container log file (which logs the container stdout and stderr) is: DOCKER_DIR/containers/<container_id>/<container_id>-json.log Args: parser_mediator: a parser mediator object (instance of ParserMediator). file_object: a file-like object. """ event_attributes = { u'container_id': self._GetIDFromPath(parser_mediator) } text_file_object = text_file.TextFile(file_object) for log_line in text_file_object: json_log_line = json.loads(log_line) if u'log' in json_log_line and u'time' in json_log_line: event_attributes[u'log_line'] = json_log_line[u'log'] event_attributes[u'log_source'] = json_log_line[u'stream'] timestamp = timelib.Timestamp.FromTimeString( json_log_line[u'time']) parser_mediator.ProduceEvent( DockerJSONContainerLogEvent(timestamp, 0, event_attributes))
def _CreateLineReader(self, file_object): """Creates an object that reads lines from a text file. The line reader is advanced to the beginning of the DSV content, skipping any header lines. Args: file_object (dfvfs.FileIO): file-like object. Returns: TextFile|BinaryLineReader: an object that implements an iterator over lines in a text file. Raises: UnicodeDecodeError: if the file cannot be read with the specified encoding. """ # The Python 2 csv module reads bytes and the Python 3 csv module Unicode # reads strings. if py2to3.PY_3: line_reader = text_file.TextFile(file_object, encoding=self._encoding, end_of_line=self._end_of_line) else: line_reader = line_reader_file.BinaryLineReader( file_object, end_of_line=self._end_of_line) # If we specifically define a number of lines we should skip, do that here. for _ in range(0, self.NUMBER_OF_HEADER_LINES): try: line_reader.readline(self._maximum_line_length) except UnicodeDecodeError: raise return line_reader
def testReadlineMultipleLines(self): """Test the readline() function on multiple lines.""" test_file = self._GetTestFilePath(['password.txt']) test_path_spec = os_path_spec.OSPathSpec(location=test_file) file_object = os_file_io.OSFile(self._resolver_context) file_object.open(test_path_spec) text_file_object = text_file.TextFile(file_object) line = text_file_object.readline() self.assertEqual(line, 'place,user,password\n') offset = text_file_object.get_offset() self.assertEqual(offset, 20) line = text_file_object.readline(size=5) self.assertEqual(line, 'bank,') offset = text_file_object.get_offset() self.assertEqual(offset, 25) line = text_file_object.readline() self.assertEqual(line, 'joesmith,superrich\n') offset = text_file_object.get_offset() self.assertEqual(offset, 44) line = text_file_object.readline() self.assertEqual(line, 'alarm system,-,1234\n') offset = text_file_object.get_offset() self.assertEqual(offset, 64) file_object.close()
def _ParseFileData(self, knowledge_base, file_object): """Parses file content (data) for system product preprocessing attribute. Args: knowledge_base (KnowledgeBase): to fill with preprocessing information. file_object (dfvfs.FileIO): file-like object that contains the artifact value data. Returns: bool: True if all the preprocessing attributes were found and the preprocessor plugin is done. Raises: errors.PreProcessFail: if the preprocessing fails. """ result = False text_file_object = dfvfs_text_file.TextFile(file_object) system_product = text_file_object.readline() try: system_product = system_product.decode('utf-8') except UnicodeDecodeError: # TODO: add and store preprocessing errors. system_product = system_product.decode('utf-8', errors='replace') system_product = system_product.strip() if system_product: knowledge_base.SetValue('operating_system_product', system_product) result = True return result
def _ParseFileData(self, knowledge_base, file_object): """Parses file content (data) for system product preprocessing attribute. Args: knowledge_base (KnowledgeBase): to fill with preprocessing information. file_object (dfvfs.FileIO): file-like object that contains the artifact value data. Raises: errors.PreProcessFail: if the preprocessing fails. """ text_file_object = dfvfs_text_file.TextFile(file_object, encoding='utf-8') system_product = text_file_object.readline() # Only parse known default /etc/issue file contents. if system_product.startswith('Debian GNU/Linux '): system_product, _, _ = system_product.partition('\\') system_product = system_product.rstrip() else: system_product = None if not knowledge_base.GetValue('operating_system_product'): if system_product: knowledge_base.SetValue('operating_system_product', system_product)
def _ParseFileData(self, knowledge_base, file_object): """Parses file content (data) for system product preprocessing attribute. Args: knowledge_base (KnowledgeBase): to fill with preprocessing information. file_object (dfvfs.FileIO): file-like object that contains the artifact value data. Raises: errors.PreProcessFail: if the preprocessing fails. """ text_file_object = dfvfs_text_file.TextFile(file_object, encoding='utf-8') product_values = {} for line in text_file_object.readlines(): line = line.strip() # Ignore lines that do not define a key value pair. if '=' not in line: continue key, value = line.split('=') key = key.upper() value = value.strip('"') product_values[key] = value if not knowledge_base.GetValue('operating_system_product'): system_product = product_values.get('PRETTY_NAME', None) if system_product: knowledge_base.SetValue('operating_system_product', system_product)
def _ParseFileData(self, mediator, file_object): """Parses file content (data) for system product preprocessing attribute. Args: mediator (PreprocessMediator): mediates interactions between preprocess plugins and other components, such as storage and knowledge base. file_object (dfvfs.FileIO): file-like object that contains the artifact value data. Raises: errors.PreProcessFail: if the preprocessing fails. """ text_file_object = dfvfs_text_file.TextFile(file_object, encoding='utf-8') product_values = {} for line in text_file_object.readlines(): line = line.strip() # Ignore lines that do not define a key value pair. if '=' not in line: continue key, value = line.split('=') key = key.upper() value = value.strip('"') product_values[key] = value system_product = product_values.get('PRETTY_NAME', None) if system_product: mediator.SetValue('operating_system_product', system_product)
def _CreateLineReader(self, file_object, encoding=None): """Creates an object that reads lines from a text file. The line reader is advanced to the beginning of the DSV content, skipping any header lines. Args: file_object (dfvfs.FileIO): file-like object. encoding (Optional[str]): encoding used in the DSV file, where None indicates the codepage of the parser mediator should be used. Returns: TextFile: an object that implements an iterator over lines in a text file. Raises: UnicodeDecodeError: if the file cannot be read with the specified encoding. """ line_reader = text_file.TextFile(file_object, encoding=encoding, end_of_line=self._end_of_line) # pylint: disable=protected-access maximum_read_buffer_size = line_reader._MAXIMUM_READ_BUFFER_SIZE # Line length is one less than the maximum read buffer size so that we # tell if there's a line that doesn't end at the end before the end of # the file. if self._maximum_line_length > maximum_read_buffer_size: self._maximum_line_length = maximum_read_buffer_size - 1 # If we specifically define a number of lines we should skip, do that here. for _ in range(0, self.NUMBER_OF_HEADER_LINES): line_reader.readline(self._maximum_line_length) return line_reader
def GetValue(self, searcher, unused_knowledge_base): """Determines the timezone based on the contents of /etc/timezone. Args: searcher: The file system searcher object (instance of dfvfs.FileSystemSearcher). knowledge_base: A knowledge base object (instance of KnowledgeBase), which contains information from the source data needed for parsing. Returns: A string containing a tzdata (Olsen) timezone name (for example, America/New_York). Raises: errors.PreProcessFail: if the preprocessing fails. """ path = u'/etc/timezone' file_entry = self._FindFileEntry(searcher, path) if not file_entry: raise errors.PreProcessFail( u'Unable to find file entry for path: {0:s}.'.format(path)) file_object = file_entry.GetFileObject() try: text_file_object = text_file.TextFile(file_object) file_data = text_file_object.readline() finally: file_object.close() return file_data.strip()
def ParseFileObject(self, parser_mediator, file_object, **unused_kwargs): """Parses a CSV text file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ file_entry = parser_mediator.GetFileEntry() path_spec_printable = file_entry.path_spec.comparable.replace( u'\n', u';') text_file_object = text_file.TextFile(file_object) # If we specifically define a number of lines we should skip do that here. for _ in range(0, self.NUMBER_OF_HEADER_LINES): _ = text_file_object.readline() reader = csv.DictReader(text_file_object, fieldnames=self.COLUMNS, restkey=self.MAGIC_TEST_STRING, restval=self.MAGIC_TEST_STRING, delimiter=self.VALUE_SEPARATOR, quotechar=self.QUOTE_CHAR) try: row = reader.next() except (csv.Error, StopIteration): raise errors.UnableToParseFile( u'[{0:s}] Unable to parse CSV file: {1:s}.'.format( self.NAME, path_spec_printable)) number_of_columns = len(self.COLUMNS) number_of_records = len(row) if number_of_records != number_of_columns: raise errors.UnableToParseFile( (u'[{0:s}] Unable to parse CSV file: {1:s}. Wrong number of ' u'records (expected: {2:d}, got: {3:d})').format( self.NAME, path_spec_printable, number_of_columns, number_of_records)) for key, value in row.items(): if key == self.MAGIC_TEST_STRING or value == self.MAGIC_TEST_STRING: raise errors.UnableToParseFile( (u'[{0:s}] Unable to parse CSV file: {1:s}. Signature ' u'mismatch.').format(self.NAME, path_spec_printable)) if not self.VerifyRow(parser_mediator, row): raise errors.UnableToParseFile( (u'[{0:s}] Unable to parse CSV file: {1:s}. Verification ' u'failed.').format(self.NAME, path_spec_printable)) self.ParseRow(parser_mediator, text_file_object.tell(), row) for row in reader: self.ParseRow(parser_mediator, text_file_object.tell(), row)
def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses an Opera typed history file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ file_object.seek(0, os.SEEK_SET) text_file_object = text_file.TextFile(file_object) # Need to verify the first line to make sure this is a) XML and # b) the right XML. first_line = text_file_object.readline(90) # Note that we must check the data here as a string first, otherwise # forcing first_line to convert to Unicode can raise a UnicodeDecodeError. if not first_line.startswith(b'<?xml version="1.0'): raise errors.UnableToParseFile( u'Not an Opera typed history file [not a XML]') # We read in the second line due to the fact that ElementTree # reads the entire file in memory to parse the XML string and # we only care about the XML file with the correct root key, # which denotes a typed_history.xml file. second_line = text_file_object.readline(50).strip() # Note that we must check the data here as a string first, otherwise # forcing second_line to convert to Unicode can raise a UnicodeDecodeError. if second_line != b'<typed_history>': raise errors.UnableToParseFile( u'Not an Opera typed history file [wrong XML root key]') # For ElementTree to work we need to work on a file object seeked # to the beginning. file_object.seek(0, os.SEEK_SET) xml = ElementTree.parse(file_object) for history_item in xml.iterfind(u'typed_history_item'): content = history_item.get(u'content', u'') last_typed = history_item.get(u'last_typed', u'') entry_type = history_item.get(u'type', u'') try: timestamp = timelib.Timestamp.FromTimeString(last_typed) except errors.TimestampError: parser_mediator.ProduceParseError( u'Unable to parse time string: {0:s}'.format(last_typed)) continue event_object = OperaTypedHistoryEvent(timestamp, content, entry_type) parser_mediator.ProduceEvent(event_object)
def Parse(self, parser_context, file_entry): """Extract the Android usage-history file. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). """ file_object = file_entry.GetFileObject() file_object.seek(0, os.SEEK_SET) text_file_object = text_file.TextFile(file_object) # Need to verify the first line to make sure this is a) XML and # b) the right XML. first_line = text_file_object.readline(90) # Note that we must check the data here as a string first, otherwise # forcing first_line to convert to Unicode can raise a UnicodeDecodeError. if not first_line.startswith('<?xml'): raise errors.UnableToParseFile( u'Not an Android usage history file [not XML]') # We read in the second line due to the fact that ElementTree # reads the entire file in memory to parse the XML string and # we only care about the XML file with the correct root key, # which denotes a typed_history.xml file. second_line = text_file_object.readline(50).strip() if second_line != u'<usage-history>': raise errors.UnableToParseFile( u'Not an Android usage history file [wrong XML root key]') # For ElementTree to work we need to work on a filehandle seeked # to the beginning. file_object.seek(0, os.SEEK_SET) xml = ElementTree.parse(file_object) root = xml.getroot() for app in root: for part in app.iter(): if part.tag == 'comp': package = app.get(u'name', '') component = part.get(u'name', '') try: last_resume_time = int(part.get('lrt', u''), 10) except ValueError: continue event_object = AndroidAppUsageEvent( last_resume_time, package, component) parser_context.ProduceEvent( event_object, parser_name=self.NAME, file_entry=file_entry) file_object.close()
def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses an Android usage-history file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ file_object.seek(0, os.SEEK_SET) text_file_object = text_file.TextFile(file_object) # Need to verify the first line to make sure this is a) XML and # b) the right XML. first_line = text_file_object.readline(90) # Note that we must check the data here as a string first, otherwise # forcing first_line to convert to Unicode can raise a UnicodeDecodeError. if not first_line.startswith(b'<?xml'): raise errors.UnableToParseFile( u'Not an Android usage history file [not XML]') # We read in the second line due to the fact that ElementTree # reads the entire file in memory to parse the XML string and # we only care about the XML file with the correct root key, # which denotes a typed_history.xml file. second_line = text_file_object.readline(50).strip() # Note that we must check the data here as a string first, otherwise # forcing second_line to convert to Unicode can raise a UnicodeDecodeError. if second_line != b'<usage-history>': raise errors.UnableToParseFile( u'Not an Android usage history file [wrong XML root key]') # The current offset of the file-like object needs to point at # the start of the file for ElementTree to parse the XML data correctly. file_object.seek(0, os.SEEK_SET) xml = ElementTree.parse(file_object) root = xml.getroot() for app in root: for part in app.iter(): if part.tag == u'comp': package = app.get(u'name', u'') component = part.get(u'name', u'') try: last_resume_time = int(part.get(u'lrt', u''), 10) except ValueError: continue event_object = AndroidAppUsageEvent( last_resume_time, package, component) parser_mediator.ProduceEvent(event_object)
def testReadline(self): """Test the readline() function.""" file_object = os_file_io.OSFile(self._resolver_context) file_object.open(self._os_path_spec1) text_file_object = text_file.TextFile(file_object) self.assertEqual(text_file_object.readline(), b'This is another file.\n') self.assertEqual(text_file_object.get_offset(), 22) file_object.close()
def testReadlinesWithFileWithoutNewLineAtEnd(self): """Test reading lines from a file without a new line char at the end.""" test_file = self._GetTestFilePath(['fls_bodyfile.txt']) test_file_path_spec = os_path_spec.OSPathSpec(location=test_file) file_object = os_file_io.OSFile(self._resolver_context) file_object.open(test_file_path_spec) text_file_object = text_file.TextFile(file_object) lines = text_file_object.readlines() self.assertEqual(len(lines), 25)
def _ParseFileObject(self, knowledge_base, file_object): """Parses a time zone file-like object. Args: knowledge_base (KnowledgeBase): to fill with preprocessing information. file_object (dfvfs.FileIO): file-like object. """ text_file_object = text_file.TextFile(file_object) file_data = text_file_object.readline() timezone = file_data.strip() if timezone: knowledge_base.SetValue(u'time_zone_str', timezone)
def testReadlinesWithSizeHint(self): """Test the readlines() function.""" file_object = os_file_io.OSFile(self._resolver_context) file_object.open(self._os_path_spec2) text_file_object = text_file.TextFile(file_object) lines = text_file_object.readlines(sizehint=60) self.assertEqual(len(lines), 3) self.assertEqual(lines[0], b'place,user,password\n') self.assertEqual(lines[1], b'bank,joesmith,superrich\n') self.assertEqual(lines[2], b'alarm system,-,1234\n') file_object.close()
def Parse(self, parser_context, file_entry): """Extract data from an Opera global history file. Args: parser_context: A parser context object (instance of ParserContext). file_entry: A file entry object (instance of dfvfs.FileEntry). """ file_object = file_entry.GetFileObject() file_object.seek(0, os.SEEK_SET) text_file_object = text_file.TextFile(file_object) try: title, url, timestamp, popularity_index = self._ReadRecord( text_file_object, 400) except errors.NotAText: file_object.close() raise errors.UnableToParseFile( u'Not an Opera history file [not a text file].') if not title: file_object.close() raise errors.UnableToParseFile( u'Not an Opera history file [no title present].') if not self._IsValidUrl(url): file_object.close() raise errors.UnableToParseFile( u'Not an Opera history file [not a valid URL].') if not timestamp: file_object.close() raise errors.UnableToParseFile( u'Not an Opera history file [timestamp does not exist].') event_object = OperaGlobalHistoryEvent( timestamp, url, title, popularity_index) parser_context.ProduceEvent( event_object, parser_name=self.NAME, file_entry=file_entry) # Read in the rest of the history file. for title, url, timestamp, popularity_index in self._ReadRecords( text_file_object): event_object = OperaGlobalHistoryEvent( timestamp, url, title, popularity_index) parser_context.ProduceEvent( event_object, parser_name=self.NAME, file_entry=file_entry) file_object.close()
def testReadlinesWithFileWithoutNewLineAtEnd(self): """Test reading lines from a file without a new line char at the end.""" test_path = self._GetTestFilePath(['fls_bodyfile.txt']) self._SkipIfPathNotExists(test_path) test_os_path_spec = path_spec_factory.Factory.NewPathSpec( definitions.TYPE_INDICATOR_OS, location=test_path) file_object = resolver.Resolver.OpenFileObject( test_os_path_spec, resolver_context=self._resolver_context) text_file_object = text_file.TextFile(file_object) lines = text_file_object.readlines() self.assertEqual(len(lines), 25)