def ParseFiles(self, responses): """Take each file we retrieved and get the history from it.""" if responses: for response in responses: client_path = db.ClientPath.FromPathSpec( self.client_id, response.stat_entry.pathspec) fd = file_store.OpenFile(client_path) hist = firefox3_history.Firefox3History() count = 0 for epoch64, dtype, url, dat1, in hist.Parse(fd): count += 1 str_entry = "%s %s %s %s" % ( datetime.datetime.utcfromtimestamp( epoch64 / 1e6), url, dat1, dtype) self.SendReply(rdfvalue.RDFString(str_entry)) self.Log( "Wrote %d Firefox History entries for user %s from %s", count, self.args.username, response.stat_entry.pathspec.Basename()) self.state.hist_count += count
def Handle(self, args, context=None): result = ApiGetFileDecodersResult() path_type, components = rdf_objects.ParseCategorizedPath( args.file_path) client_path = db.ClientPath(client_id=str(args.client_id), path_type=path_type, components=components) for decoder_name in decoders.FACTORY.Names(): decoder = decoders.FACTORY.Create(decoder_name) filedesc = file_store.OpenFile(client_path) filectx = context_lib.NullContext(filedesc) with filectx as filedesc: if decoder.Check(filedesc): result.decoder_names.append(decoder_name) return result
def testDownloadDirectory(self): """Test a FileFinder flow with depth=1.""" # Mock the client actions FileFinder uses. client_mock = action_mocks.FileFinderClientMock() test_dir = self._SetupTestDir("testDownloadDirectory") flow_test_lib.TestFlowHelper( compatibility.GetName(file_finder.FileFinder), client_mock, client_id=self.client_id, paths=[test_dir + "/*"], action=rdf_file_finder.FileFinderAction.Download(), token=self.token) # There should be 5 children: expected_filenames = ["a.txt", "b.txt", "c.txt", "d.txt", "sub1"] if data_store.AFF4Enabled(): output_path = self.client_id.Add("fs/os").Add(test_dir) output_fd = aff4.FACTORY.Open(output_path, token=self.token) children = list(output_fd.OpenChildren()) filenames = [child.urn.Basename() for child in children] self.assertCountEqual(filenames, expected_filenames) fd = aff4.FACTORY.Open(output_path.Add("a.txt")) self.assertEqual(fd.read(), "Hello World!\n") else: children = data_store.REL_DB.ListChildPathInfos( self.client_id.Basename(), rdf_objects.PathInfo.PathType.OS, test_dir.strip("/").split("/")) filenames = [child.components[-1] for child in children] self.assertCountEqual(filenames, expected_filenames) fd = file_store.OpenFile( db.ClientPath.FromPathInfo(self.client_id.Basename(), children[0])) self.assertEqual(fd.read(), "Hello World!\n")
def testMultiGetFileSetsFileHashAttributeWhenMultipleChunksDownloaded(self): client_mock = action_mocks.MultiGetFileClientMock() pathspec = rdf_paths.PathSpec( pathtype=rdf_paths.PathSpec.PathType.OS, path=os.path.join(self.base_path, "test_img.dd")) args = transfer.MultiGetFileArgs(pathspecs=[pathspec]) flow_test_lib.TestFlowHelper( transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) h = hashlib.sha256() with open(os.path.join(self.base_path, "test_img.dd"), "rb") as model_fd: h.update(model_fd.read()) if not data_store.RelationalDBReadEnabled(category="filestore"): # Fix path for Windows testing. pathspec.path = pathspec.path.replace("\\", "/") # Test the AFF4 file that was created. urn = pathspec.AFF4Path(self.client_id) fd_hash = data_store_utils.GetUrnHashEntry(urn) self.assertTrue(fd_hash) self.assertEqual(fd_hash.sha256, h.digest()) if data_store.RelationalDBReadEnabled(category="filestore"): cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), pathspec) fd_rel_db = file_store.OpenFile(cp) self.assertEqual(fd_rel_db.hash_id.AsBytes(), h.digest()) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5)
def testMultiGetFileDeduplication(self): client_mock = action_mocks.MultiGetFileClientMock() pathspecs = [] # Make 10 files to download. for i in range(10): path = os.path.join(self.temp_dir, "test_%s.txt" % i) with open(path, "wb") as fd: fd.write("Hello") pathspecs.append( rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=path)) # All those files are the same so the individual chunks should # only be downloaded once. By forcing maximum_pending_files=1, # there should only be a single TransferBuffer call. args = transfer.MultiGetFileArgs(pathspecs=pathspecs, maximum_pending_files=1) flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) self.assertEqual(client_mock.action_counts["TransferBuffer"], 1) for pathspec in pathspecs: # Check that each referenced file can be read. cp = db.ClientPath.FromPathSpec(self.client_id, pathspec) fd_rel_db = file_store.OpenFile(cp) self.assertEqual("Hello", fd_rel_db.read()) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5)
def ParseFiles(self, responses): """Take each file we retrieved and get the history from it.""" # Note that some of these Find requests will fail because some paths don't # exist, e.g. Chromium on most machines, so we don't check for success. if responses: for response in responses: client_path = db.ClientPath.FromPathSpec(self.client_id, response.stat_entry.pathspec) filepath = response.stat_entry.pathspec.CollapsePath() fd = file_store.OpenFile(client_path) hist = chrome_history.ChromeParser() count = 0 for epoch64, dtype, url, dat1, dat2, dat3 in hist.Parse(filepath, fd): count += 1 str_entry = "%s %s %s %s %s %s" % (datetime.datetime.utcfromtimestamp( epoch64 / 1e6), url, dat1, dat2, dat3, dtype) self.SendReply(rdfvalue.RDFString(str_entry)) self.Log("Wrote %d Chrome History entries for user %s from %s", count, self.args.username, response.stat_entry.pathspec.Basename()) self.state.hist_count += count
def ParseFiles(self, responses): """Take each file we retrieved and get the history from it.""" if responses: for response in responses: if data_store.RelationalDBReadEnabled("filestore"): client_path = db.ClientPath.FromPathSpec(self.client_id, response.stat_entry.pathspec) fd = file_store.OpenFile(client_path) else: fd = aff4.FACTORY.Open( response.stat_entry.AFF4Path(self.client_urn), token=self.token) hist = firefox3_history.Firefox3History(fd) count = 0 for epoch64, dtype, url, dat1, in hist.Parse(): count += 1 str_entry = "%s %s %s %s" % (datetime.datetime.utcfromtimestamp( epoch64 / 1e6), url, dat1, dtype) self.SendReply(rdfvalue.RDFString(utils.SmartStr(str_entry))) self.Log("Wrote %d Firefox History entries for user %s from %s", count, self.args.username, response.stat_entry.pathspec.Basename()) self.state.hist_count += count
def CheckFilesDownloaded(self, fnames): for fname in fnames: path_info = data_store.REL_DB.ReadPathInfo( self.client_id, rdf_objects.PathInfo.PathType.OS, components=self.FilenameToPathComponents(fname)) size = path_info.stat_entry.st_size filepath = os.path.join(self.base_path, "searching", fname) with io.open(filepath, mode="rb") as fd: test_data = fd.read() self.assertEqual(size, len(test_data)) fd = file_store.OpenFile( db.ClientPath(self.client_id, rdf_objects.PathInfo.PathType.OS, components=self.FilenameToPathComponents(fname))) # Make sure we can actually read the file. self.assertEqual(fd.read(), test_data)
def CheckFilesNotDownloaded(self, fnames): for fname in fnames: if data_store.RelationalDBReadEnabled(category="filestore"): try: file_store.OpenFile( db.ClientPath( self.client_id.Basename(), rdf_objects.PathInfo.PathType.OS, components=self.FilenameToPathComponents(fname))) self.Fail("Found downloaded file: %s" % fname) except file_store.FileHasNoContentError: pass else: file_urn = self.FileNameToURN(fname) with aff4.FACTORY.Open(file_urn, token=self.token) as fd: # Directories have no size attribute. if fd.Get(fd.Schema.TYPE ) == aff4_standard.VFSDirectory.__name__: continue self.assertEqual(fd.Get(fd.Schema.SIZE), 0)
def testMultiGetFile(self): """Test MultiGetFile.""" client_mock = action_mocks.MultiGetFileClientMock() pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=os.path.join( self.base_path, "test_img.dd")) expected_size = os.path.getsize(pathspec.path) args = transfer.MultiGetFileArgs(pathspecs=[pathspec, pathspec]) with test_lib.Instrument(transfer.MultiGetFile, "_StoreStat") as storestat_instrument: flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) # We should only have called StoreStat once because the two paths # requested were identical. self.assertLen(storestat_instrument.args, 1) # Fix path for Windows testing. pathspec.path = pathspec.path.replace("\\", "/") with open(pathspec.path, "rb") as fd2: # Test the file that was created. cp = db.ClientPath.FromPathSpec(self.client_id, pathspec) fd_rel_db = file_store.OpenFile(cp) self.CompareFDs(fd2, fd_rel_db) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertEqual(history[-1].hash_entry.num_bytes, expected_size) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5)
def testMultiGetFileSizeLimit(self): client_mock = action_mocks.MultiGetFileClientMock() image_path = os.path.join(self.base_path, "test_img.dd") pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=image_path) # Read a bit more than one chunk (600 * 1024). expected_size = 750 * 1024 args = transfer.MultiGetFileArgs(pathspecs=[pathspec], file_size=expected_size) flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) with open(image_path, "rb") as fd: expected_data = fd.read(expected_size) cp = db.ClientPath.FromPathSpec(self.client_id, pathspec) fd_rel_db = file_store.OpenFile(cp) self.assertEqual(fd_rel_db.size, expected_size) data = fd_rel_db.read(2 * expected_size) self.assertLen(data, expected_size) d = hashlib.sha256() d.update(expected_data) self.assertEqual(fd_rel_db.hash_id.AsBytes(), d.digest()) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertEqual(history[-1].hash_entry.num_bytes, expected_size) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5)
def testGetFilePathCorrection(self): """Tests that the pathspec returned is used for the aff4path.""" client_mock = action_mocks.GetFileClientMock() # Deliberately using the wrong casing. pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=os.path.join( self.base_path, "TEST_IMG.dd")) expected_size = os.path.getsize( os.path.join(self.base_path, "test_img.dd")) session_id = flow_test_lib.TestFlowHelper(transfer.GetFile.__name__, client_mock, token=self.token, client_id=self.client_id, pathspec=pathspec) results = flow_test_lib.GetFlowResults(self.client_id, session_id) self.assertLen(results, 1) res_pathspec = results[0].pathspec # Fix path for Windows testing. pathspec.path = pathspec.path.replace("\\", "/") with open(res_pathspec.path, "rb") as fd2: fd2.seek(0, 2) cp = db.ClientPath.FromPathSpec(self.client_id, res_pathspec) fd_rel_db = file_store.OpenFile(cp) self.CompareFDs(fd2, fd_rel_db) # Only the sha256 hash of the contents should have been calculated: # in order to put file contents into the file store. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertEqual(history[-1].hash_entry.num_bytes, expected_size) self.assertIsNone(history[-1].hash_entry.sha1) self.assertIsNone(history[-1].hash_entry.md5)
def testClientFileFinderUpload(self): paths = [os.path.join(self.base_path, "{**,.}/*.plist")] action = rdf_file_finder.FileFinderAction.Download() session_id = self._RunClientFileFinder(paths, action) collection = flow.GRRFlow.ResultCollectionForFID(session_id) results = list(collection) self.assertEqual(len(results), 5) relpaths = [ os.path.relpath(p.stat_entry.pathspec.path, self.base_path) for p in results ] self.assertItemsEqual(relpaths, [ "History.plist", "History.xml.plist", "test.plist", "parser_test/com.google.code.grr.plist", "parser_test/InstallHistory.plist" ]) for r in results: aff4_obj = aff4.FACTORY.Open(r.stat_entry.pathspec.AFF4Path( self.client_id), token=self.token) data = open(r.stat_entry.pathspec.path, "rb").read() self.assertEqual(aff4_obj.Read(100), data[:100]) if data_store.RelationalDBReadEnabled(category="filestore"): fd = file_store.OpenFile( db.ClientPath.FromPathSpec(self.client_id.Basename(), r.stat_entry.pathspec)) self.assertEqual(fd.read(100), data[:100]) self.assertEqual(fd.hash_id.AsBytes(), hashlib.sha256(data).digest()) else: hash_obj = data_store_utils.GetFileHashEntry(aff4_obj) self.assertEqual(hash_obj.sha1, hashlib.sha1(data).hexdigest()) self.assertEqual(hash_obj.sha256, hashlib.sha256(data).hexdigest()) self.assertEqual(hash_obj.md5, hashlib.md5(data).hexdigest())
def testDownloadsSingleHuntFileIfAuthorizationIsPresent(self): hunt = self._CreateHuntWithDownloadedFile() results = self.GetHuntResults(hunt) self.RequestAndGrantHuntApproval(hunt.Basename()) self.Open("/") self.Click("css=a[grrtarget=hunts]") self.Click("css=td:contains('GenericHunt')") self.Click("css=li[heading=Results]") if data_store.RelationalDBEnabled(): fd = file_store.OpenFile( flow_export.CollectionItemToClientPath(results[0])) else: fd = aff4.FACTORY.Open( flow_export.CollectionItemToAff4Path(results[0]), token=self.token) with mock.patch.object(fd.__class__, "Read") as mock_obj: self.Click( "css=grr-results-collection button:has(span.glyphicon-download)") self.WaitUntil(lambda: mock_obj.called)
def testGetFile(self): """Test that the GetFile flow works.""" client_mock = action_mocks.GetFileClientMock() pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=os.path.join( self.base_path, "test_img.dd")) flow_test_lib.TestFlowHelper(transfer.GetFile.__name__, client_mock, token=self.token, client_id=self.client_id, pathspec=pathspec) # Fix path for Windows testing. pathspec.path = pathspec.path.replace("\\", "/") fd2 = open(pathspec.path, "rb") if data_store.RelationalDBReadEnabled(): cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), pathspec) fd_rel_db = file_store.OpenFile(cp) self.CompareFDs(fd2, fd_rel_db) # Only the sha256 hash of the contents should have been calculated: # in order to put file contents into the file store. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNone(history[-1].hash_entry.sha1) self.assertIsNone(history[-1].hash_entry.md5) else: # Test the AFF4 file that was created. urn = pathspec.AFF4Path(self.client_id) fd1 = aff4.FACTORY.Open(urn, token=self.token) fd2.seek(0, 2) self.assertEqual(fd2.tell(), int(fd1.Get(fd1.Schema.SIZE))) self.CompareFDs(fd1, fd2)
def testMultiGetFileMultiFiles(self): """Test MultiGetFile downloading many files at once.""" client_mock = action_mocks.MultiGetFileClientMock() pathspecs = [] # Make 30 files to download. for i in range(30): path = os.path.join(self.temp_dir, "test_%s.txt" % i) with io.open(path, "wb") as fd: fd.write(b"Hello") pathspecs.append( rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=path)) args = transfer.MultiGetFileArgs(pathspecs=pathspecs, maximum_pending_files=10) flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) # Now open each file and make sure the data is there. for pathspec in pathspecs: cp = db.ClientPath.FromPathSpec(self.client_id, pathspec) fd_rel_db = file_store.OpenFile(cp) self.assertEqual(b"Hello", fd_rel_db.read()) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertEqual(history[-1].hash_entry.num_bytes, 5) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5)
def testFlowCollectFile(self): with temp.AutoTempFilePath() as temp_file_path: with io.open(temp_file_path, mode="w", encoding="utf-8") as fd: fd.write("Just sample text to put in the file.") table = f""" [ {{ "collect_column": "{temp_file_path}" }} ] """ with osquery_test_lib.FakeOsqueryiOutput(stdout=table, stderr=""): results = self._RunFlow("Doesn't matter", ["collect_column"]) self.assertLen(results, 2) self.assertIsInstance(results[0], rdf_osquery.OsqueryResult) self.assertIsInstance(results[1], rdf_client_fs.StatEntry) pathspec = results[1].pathspec client_path = db.ClientPath.FromPathSpec(self.client_id, pathspec) fd_rel_db = file_store.OpenFile(client_path) file_text = fd_rel_db.read().decode("utf-8") self.assertEqual(file_text, "Just sample text to put in the file.")
def testFirefoxHistoryFetch(self): """Test that downloading the Firefox history works.""" with self.MockClientRawDevWithImage(): # Run the flow in the simulated way session_id = flow_test_lib.TestFlowHelper( webhistory.FirefoxHistory.__name__, self.client_mock, check_flow_errors=False, client_id=self.client_id, username="******", token=self.token, pathtype=rdf_paths.PathSpec.PathType.TSK) # Now check that the right files were downloaded. fs_path = "/home/test/.mozilla/firefox/adts404t.default/places.sqlite" output_path = self.client_id.Add("fs/tsk").Add("/".join( [self.base_path.replace("\\", "/"), "test_img.dd"])).Add( fs_path.replace("\\", "/")) # Check if the History file is created. if data_store.RelationalDBReadEnabled("filestore"): cp = db.ClientPath.TSK(self.client_id.Basename(), tuple(output_path.Split()[3:])) rel_fd = file_store.OpenFile(cp) self.assertEqual(rel_fd.read(15), "SQLite format 3") else: fd = aff4.FACTORY.Open(output_path, token=self.token) self.assertGreater(fd.size, 20000) self.assertEqual(fd.read(15), "SQLite format 3") # Check for analysis file. results = flow_test_lib.GetFlowResults(self.client_id, session_id) self.assertGreater(len(results), 3) data = "\n".join(map(str, results)) self.assertTrue(data.find("Welcome to Firefox") != -1) self.assertTrue(data.find("sport.orf.at") != -1)
def testMultiGetFileMultiFiles(self): """Test MultiGetFile downloading many files at once.""" client_mock = action_mocks.MultiGetFileClientMock() pathspecs = [] # Make 30 files to download. for i in range(30): path = os.path.join(self.temp_dir, "test_%s.txt" % i) with open(path, "wb") as fd: fd.write("Hello") pathspecs.append( rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=path)) args = transfer.MultiGetFileArgs(pathspecs=pathspecs, maximum_pending_files=10) session_id = flow_test_lib.TestFlowHelper( transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) if data_store.RelationalDBReadEnabled(): # Now open each file and make sure the data is there. for pathspec in pathspecs: cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), pathspec) fd_rel_db = file_store.OpenFile(cp) self.assertEqual("Hello", fd_rel_db.read()) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5) else: # Check up on the internal flow state. flow_state = flow_test_lib.GetFlowState(self.client_id, session_id, token=self.token) # All the pathspecs should be in this list. self.assertLen(flow_state.indexed_pathspecs, 30) # At any one time, there should not be more than 10 files or hashes # pending. self.assertLessEqual(len(flow_state.pending_files), 10) self.assertLessEqual(len(flow_state.pending_hashes), 10) # When we finish there should be no pathspecs stored in the flow state. for flow_pathspec in flow_state.indexed_pathspecs: self.assertIsNone(flow_pathspec) for flow_request_data in flow_state.request_data_list: self.assertIsNone(flow_request_data) for pathspec in pathspecs: urn = pathspec.AFF4Path(self.client_id) fd = aff4.FACTORY.Open(urn, token=self.token) self.assertEqual("Hello", fd.read())
def testMultiGetFileOfSpecialFiles(self): """Test that special /proc/ files are handled correctly. /proc/ files have the property that they are non seekable from their end (i.e. seeking them relative to the end is not supported). They also return an st_size of 0. For example: $ stat /proc/self/maps File: '/proc/self/maps' Size: 0 Blocks: 0 IO Block: 1024 regular empty file $ head /proc/self/maps 00400000-00409000 r-xp 00000000 fc:01 9180740 /usr/bin/head 00608000-00609000 r--p 00008000 fc:01 9180740 /usr/bin/head ... When we try to use the MultiGetFile flow, it deduplicates the files and since it thinks the file has a zero size, the flow will not download the file, and instead copy the zero size file into it. """ client_mock = action_mocks.MultiGetFileClientMock() # # Create a zero sized file. zero_sized_filename = os.path.join(self.temp_dir, "zero_size") with open(zero_sized_filename, "wb") as fd: pass pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=zero_sized_filename) flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, file_size="1MiB", client_id=self.client_id, pathspecs=[pathspec]) # Now if we try to fetch a real /proc/ filename this will fail because the # filestore already contains the zero length file # aff4:/files/nsrl/da39a3ee5e6b4b0d3255bfef95601890afd80709. pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path="/proc/self/environ") flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, file_size=1024 * 1024, client_id=self.client_id, pathspecs=[pathspec]) data = open(pathspec.last.path, "rb").read() if data_store.RelationalDBReadEnabled(): cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), pathspec) fd_rel_db = file_store.OpenFile(cp) self.assertEqual(fd_rel_db.size, len(data)) self.assertMultiLineEqual(fd_rel_db.read(), data) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5) else: # Test the AFF4 file that was created - it should be empty since by # default we judge the file size based on its stat.st_size. urn = pathspec.AFF4Path(self.client_id) fd = aff4.FACTORY.Open(urn, token=self.token) self.assertEqual(fd.size, len(data)) self.assertMultiLineEqual(fd.read(len(data)), data)
def _ReadFromPathspec(self, pathspec, num_bytes): fd = file_store.OpenFile( db.ClientPath.FromPathSpec(self.client_id, pathspec)) return fd.read(num_bytes)
def Generate(self, items, token=None): """Generates archive from a given collection. Iterates the collection and generates an archive by yielding contents of every referenced AFF4Stream. Args: items: Iterable of rdf_client_fs.StatEntry objects token: User's ACLToken. Yields: Binary chunks comprising the generated archive. """ del token # unused, to be removed with AFF4 code client_ids = set() for item_batch in collection.Batch(items, self.BATCH_SIZE): fds_to_write = {} for item in item_batch: try: urn = flow_export.CollectionItemToAff4Path( item, self.client_id) client_path = flow_export.CollectionItemToClientPath( item, self.client_id) except flow_export.ItemNotExportableError: continue fd = file_store.OpenFile(client_path) self.total_files += 1 if not self.predicate(client_path): self.ignored_files.append(utils.SmartUnicode(urn)) continue client_ids.add(client_path.client_id) # content_path = os.path.join(self.prefix, *urn_components) self.archived_files += 1 # Make sure size of the original file is passed. It's required # when output_writer is StreamingTarWriter. st = os.stat_result((0o644, 0, 0, 0, 0, 0, fd.size, 0, 0, 0)) fds_to_write[fd] = (client_path, urn, st) if fds_to_write: for fd, (client_path, urn, st) in iteritems(fds_to_write): try: for i, chunk in enumerate( file_store.StreamFilesChunks([client_path])): if i == 0: target_path = os.path.join( self.prefix, urn.Path()[1:]) yield self.archive_generator.WriteFileHeader( target_path, st=st) yield self.archive_generator.WriteFileChunk( chunk.data) yield self.archive_generator.WriteFileFooter() except Exception as exception: # pylint: disable=broad-except logging.exception(exception) self.archived_files -= 1 self.failed_files.append(unicode(urn)) if self.archive_generator.is_file_write_in_progress: yield self.archive_generator.WriteFileFooter() if client_ids: for client_id, client_info in iteritems( data_store.REL_DB.MultiReadClientFullInfo(client_ids)): client = api_client.ApiClient().InitFromClientInfo(client_info) for chunk in self._GenerateClientInfo(client_id, client): yield chunk for chunk in self._GenerateDescription(): yield chunk yield self.archive_generator.Close()
def testMultiGetFileOfSpecialFiles(self): """Test that special /proc/ files are handled correctly. /proc/ files have the property that they are non seekable from their end (i.e. seeking them relative to the end is not supported). They also return an st_size of 0. For example: $ stat /proc/self/maps File: '/proc/self/maps' Size: 0 Blocks: 0 IO Block: 1024 regular empty file $ head /proc/self/maps 00400000-00409000 r-xp 00000000 fc:01 9180740 /usr/bin/head 00608000-00609000 r--p 00008000 fc:01 9180740 /usr/bin/head ... When we try to use the MultiGetFile flow, it deduplicates the files and since it thinks the file has a zero size, the flow will not download the file, and instead copy the zero size file into it. """ client_mock = action_mocks.MultiGetFileClientMock() # # Create a zero sized file. zero_sized_filename = os.path.join(self.temp_dir, "zero_size") with open(zero_sized_filename, "wb"): pass pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=zero_sized_filename) flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, creator=self.test_username, file_size="1MiB", client_id=self.client_id, pathspecs=[pathspec]) # Now if we try to fetch a real /proc/ filename this will fail because the # filestore already contains the zero length file pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path="/proc/self/environ") flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, creator=self.test_username, file_size=1024 * 1024, client_id=self.client_id, pathspecs=[pathspec]) with open(pathspec.last.path, "rb") as fd: data = fd.read() cp = db.ClientPath.FromPathSpec(self.client_id, pathspec) fd_rel_db = file_store.OpenFile(cp) self.assertEqual(fd_rel_db.size, len(data)) self.assertEqual(fd_rel_db.read(), data) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertEqual(history[-1].hash_entry.num_bytes, len(data)) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5)
def testRaisesForNonExistentFile(self): with self.assertRaises(file_store.FileNotFoundError): file_store.OpenFile(self.client_path)
def _OpenFile(self, pathspec: rdf_paths.PathSpec) -> file_store.BlobStream: # TODO(amoser): This is not super efficient, AFF4 provided an api to open # all pathspecs at the same time, investigate if optimizing this is worth # it. client_path = db.ClientPath.FromPathSpec(self._client_id, pathspec) return file_store.OpenFile(client_path, max_timestamp=self._timestamp)
def testRaisesForFileWithSinglePathInfoWithUnknownHash(self): data_store.REL_DB.WritePathInfos( self.client_id, [self._PathInfo(self.invalid_hash_id)]) with self.assertRaises(file_store.FileHasNoContentError): file_store.OpenFile(self.client_path)
def testOpensFileWithSinglePathInfoWithHash(self): data_store.REL_DB.WritePathInfos(self.client_id, [self._PathInfo(self.hash_id)]) fd = file_store.OpenFile(self.client_path) self.assertEqual(fd.read(), self.data)
def ApplyParsersToResponses(parser_factory, responses, flow_obj): """Parse responses with applicable parsers. Args: parser_factory: A parser factory for specific artifact. responses: A list of responses from the client. flow_obj: An artifact collection flow. Returns: A list of (possibly parsed) responses. """ if not parser_factory.HasParsers(): # If we don't have any parsers, we expect to use the unparsed responses. return responses # We have some processors to run. knowledge_base = flow_obj.state.knowledge_base @contextlib.contextmanager def ParseErrorHandler(): try: yield except parsers.ParseError as error: flow_obj.Log("Error encountered when parsing responses: %s", error) parsed_responses = [] if parser_factory.HasSingleResponseParsers(): for response in responses: for parser in parser_factory.SingleResponseParsers(): with ParseErrorHandler(): parsed_responses.extend( parser.ParseResponse(knowledge_base, response, flow_obj.args.path_type)) for parser in parser_factory.MultiResponseParsers(): with ParseErrorHandler(): parsed_responses.extend(parser.ParseResponses(knowledge_base, responses)) has_single_file_parsers = parser_factory.HasSingleFileParsers() has_multi_file_parsers = parser_factory.HasMultiFileParsers() if has_single_file_parsers or has_multi_file_parsers: precondition.AssertIterableType(responses, rdf_client_fs.StatEntry) pathspecs = [response.pathspec for response in responses] # TODO(amoser): This is not super efficient, AFF4 provided an api to open # all pathspecs at the same time, investigate if optimizing this is worth # it. filedescs = [] for pathspec in pathspecs: client_path = db.ClientPath.FromPathSpec(flow_obj.client_id, pathspec) filedescs.append(file_store.OpenFile(client_path)) if has_single_file_parsers: for response, filedesc in zip(responses, filedescs): for parser in parser_factory.SingleFileParsers(): with ParseErrorHandler(): parsed_responses.extend( parser.ParseFile(knowledge_base, response.pathspec, filedesc)) if has_multi_file_parsers: for parser in parser_factory.MultiFileParsers(): with ParseErrorHandler(): parsed_responses.extend( parser.ParseFiles(knowledge_base, pathspecs, filedescs)) return parsed_responses
def testRaisesForFileWithSinglePathInfoWithoutHash(self): data_store.REL_DB.WritePathInfos(self.client_id, [self._PathInfo()]) with self.assertRaises(file_store.FileHasNoContent): file_store.OpenFile(self.client_path)