def _RunTSKFileFinder(self, paths): image_path = os.path.join(self.base_path, "ntfs_img.dd") with utils.Stubber( vfs, "_VFS_VIRTUALROOTS", { rdf_paths.PathSpec.PathType.TSK: rdf_paths.PathSpec( path=image_path, pathtype="OS", offset=63 * 512) }): action = rdf_file_finder.FileFinderAction.Action.DOWNLOAD with test_lib.SuppressLogs(): flow_test_lib.TestFlowHelper( file_finder.FileFinder.__name__, self.client_mock, client_id=self.client_id, paths=paths, pathtype=rdf_paths.PathSpec.PathType.TSK, action=rdf_file_finder.FileFinderAction(action_type=action), token=self.token)
def setUp(self): super(ApiGetHuntFilesArchiveHandlerTest, self).setUp() self.handler = hunt_plugin.ApiGetHuntFilesArchiveHandler() self.client_ids = self.SetupClients(10) self.hunt_id = self.StartHunt( flow_runner_args=rdf_flow_runner.FlowRunnerArgs( flow_name=file_finder.FileFinder.__name__), flow_args=rdf_file_finder.FileFinderArgs( paths=[os.path.join(self.base_path, "test.plist")], action=rdf_file_finder.FileFinderAction( action_type="DOWNLOAD"), ), client_rate=0, creator=self.token.username) self.RunHunt(client_ids=self.client_ids, client_mock=action_mocks.FileFinderClientMock())
def testDownloadUnicode(self): results = self.RunFileFinder( rdf_file_finder.FileFinderArgs( paths=[ "/HKEY_LOCAL_MACHINE/SOFTWARE/GRR_TEST/{}".format( _LONG_KEY) ], pathtype="REGISTRY", action=rdf_file_finder.FileFinderAction( action_type="DOWNLOAD"))) self.assertLen(results, 2) res_by_type = _GroupItemsByType(results) self.assertEqual(_DecodeDataBlob(res_by_type["DataBlob"][0]), _LONG_STRING_VALUE) self.assertEqual( res_by_type["FileFinderResult"][0].stat_entry.pathspec.path, "/HKEY_LOCAL_MACHINE/SOFTWARE/GRR_TEST/{}".format(_LONG_KEY))
def testUseExternalStores(self): if not data_store.RelationalDBEnabled(): self.skipTest("Test uses relational filestore.") paths = [os.path.join(self.base_path, "test.plist")] action = rdf_file_finder.FileFinderAction( action_type=rdf_file_finder.FileFinderAction.Action.DOWNLOAD) action.download.use_external_stores = False with mock.patch.object(file_store.EXTERNAL_FILE_STORE, "AddFiles") as efs: flow_id = flow_test_lib.TestFlowHelper( compatibility.GetName(file_finder.ClientFileFinder), action_mocks.ClientFileFinderClientMock(), client_id=self.client_id, paths=paths, pathtype=rdf_paths.PathSpec.PathType.OS, action=action, process_non_regular_files=True, token=self.token) results = flow_test_lib.GetFlowResults(self.client_id, flow_id) self.assertLen(results, 1) self.assertEqual(efs.call_count, 0) action.download.use_external_stores = True with mock.patch.object(file_store.EXTERNAL_FILE_STORE, "AddFiles") as efs: flow_id = flow_test_lib.TestFlowHelper( compatibility.GetName(file_finder.ClientFileFinder), action_mocks.ClientFileFinderClientMock(), client_id=self.client_id, paths=paths, pathtype=rdf_paths.PathSpec.PathType.OS, action=action, process_non_regular_files=True, token=self.token) results = flow_test_lib.GetFlowResults(self.client_id, flow_id) self.assertLen(results, 1) self.assertEqual(efs.call_count, 1)
def testStatSingleGlob(self): results = self.RunFileFinder( rdf_file_finder.FileFinderArgs( paths=["/HKEY_LOCAL_MACHINE/SOFTWARE/GRR_TEST/a*"], pathtype="REGISTRY", action=rdf_file_finder.FileFinderAction(action_type="STAT"))) self.assertCountEqual( [res.stat_entry.pathspec.path for res in results], [ "/HKEY_LOCAL_MACHINE/SOFTWARE/GRR_TEST/aaa", "/HKEY_LOCAL_MACHINE/SOFTWARE/GRR_TEST/aba", ]) self.assertCountEqual( [res.stat_entry.pathspec.path for res in results], [ "/HKEY_LOCAL_MACHINE/SOFTWARE/GRR_TEST/aaa", "/HKEY_LOCAL_MACHINE/SOFTWARE/GRR_TEST/aba", ]) self.assertEqual(results[0].stat_entry.pathspec.pathtype, "REGISTRY") self.assertEqual(results[1].stat_entry.pathspec.pathtype, "REGISTRY")
def testResultsAreCorrectlyCounted(self): path = os.path.join(self.base_path, "*hello*") num_files = len(glob.glob(path)) self.assertGreater(num_files, 1) hunt_args = rdf_hunt_objects.HuntArguments.Standard( flow_name=compatibility.GetName(file_finder.FileFinder), flow_args=rdf_file_finder.FileFinderArgs( paths=[path], action=rdf_file_finder.FileFinderAction(action_type="STAT"), )) hunt_id, _ = self._CreateAndRunHunt( num_clients=5, client_mock=action_mocks.FileFinderClientMock(), client_rule_set=foreman_rules.ForemanClientRuleSet(), client_rate=0, args=hunt_args) hunt_counters = data_store.REL_DB.ReadHuntCounters(hunt_id) self.assertEqual(hunt_counters.num_clients_with_results, 5) self.assertEqual(hunt_counters.num_results, 5 * num_files)
def setUp(self): super(ApiGetHuntFilesArchiveHandlerTest, self).setUp() self.handler = hunt_plugin.ApiGetHuntFilesArchiveHandler() self.hunt = implementation.StartHunt( hunt_name=standard.GenericHunt.__name__, flow_runner_args=rdf_flow_runner.FlowRunnerArgs( flow_name=file_finder.FileFinder.__name__), flow_args=rdf_file_finder.FileFinderArgs( paths=[os.path.join(self.base_path, "test.plist")], action=rdf_file_finder.FileFinderAction(action_type="DOWNLOAD"), ), client_rate=0, token=self.token) self.hunt.Run() client_ids = self.SetupClients(10) self.AssignTasksToClients(client_ids=client_ids) action_mock = action_mocks.FileFinderClientMock() hunt_test_lib.TestHuntHelper(action_mock, client_ids, token=self.token)
def testFileFinderResultExportConverterConvertsContent(self): client_mock = action_mocks.FileFinderClientMockWithTimestamps() action = rdf_file_finder.FileFinderAction( action_type=rdf_file_finder.FileFinderAction.Action.DOWNLOAD) path = os.path.join(self.base_path, "winexec_img.dd") flow_id = flow_test_lib.TestFlowHelper( file_finder.FileFinder.__name__, client_mock, client_id=self.client_id, paths=[path], pathtype=rdf_paths.PathSpec.PathType.OS, action=action, creator=self.test_username) flow_results = flow_test_lib.GetFlowResults(self.client_id, flow_id) self.assertLen(flow_results, 1) converter = file.FileFinderResultConverter() results = list(converter.Convert(self.metadata, flow_results[0])) self.assertLen(results, 1) self.assertEqual(results[0].basename, "winexec_img.dd") # Check that by default file contents are not exported self.assertFalse(results[0].content) self.assertFalse(results[0].content_sha256) # Convert again, now specifying export_files_contents=True in options. converter = file.FileFinderResultConverter(options=base.ExportOptions( export_files_contents=True)) results = list(converter.Convert(self.metadata, flow_results[0])) self.assertTrue(results[0].content) self.assertEqual( results[0].content_sha256, "0652da33d5602c165396856540c173cd37277916fba07a9bf3080bc5a6236f03")
def testFileFinderHashMaxFileSizeCanBeOverriden(self): router = self._CreateRouter( file_finder_flow=rr.RobotRouterFileFinderFlowParams( enabled=True, max_file_size=42)) ha = rdf_file_finder.FileFinderHashActionOptions() ha.max_size = 80 ha.oversized_file_policy = ha.OversizedFilePolicy.HASH_TRUNCATED path = "/foo/bar" handler = router.CreateFlow(api_flow.ApiCreateFlowArgs( flow=api_flow.ApiFlow(name=file_finder.FileFinder.__name__, args=rdf_file_finder.FileFinderArgs( paths=[path], action=rdf_file_finder.FileFinderAction( action_type="HASH", hash=ha))), client_id=self.client_id), token=self.token) ha = handler.override_flow_args.action.hash self.assertEqual(ha.oversized_file_policy, ha.OversizedFilePolicy.SKIP) self.assertEqual(ha.max_size, 42)
def setUp(self): super().setUp() self.handler = hunt_plugin.ApiGetHuntFileHandler() self.file_path = os.path.join(self.base_path, "test.plist") self.vfs_file_path = "fs/os/%s" % self.file_path self.hunt_id = self.StartHunt( flow_runner_args=rdf_flow_runner.FlowRunnerArgs( flow_name=file_finder.FileFinder.__name__), flow_args=rdf_file_finder.FileFinderArgs( paths=[self.file_path], action=rdf_file_finder.FileFinderAction( action_type="DOWNLOAD"), ), client_rate=0, creator=self.context.username) self.client_id = self.SetupClient(0) self.RunHunt(client_ids=[self.client_id], client_mock=action_mocks.FileFinderClientMock())
def Start(self): """Issue the find request.""" super(FileFinder, self).Start() if not self.args.paths: # Nothing to do. return self.state.files_found = 0 self.state.sorted_conditions = sorted(self.args.conditions, key=self._ConditionWeight) # TODO(user): We may change self.args just by accessing self.args.action # (a nested message will be created). Therefore we should be careful # about not modifying self.args: they will be written as FLOW_ARGS attribute # and will be different from what the user has actually passed in. # We need better semantics for RDFStructs - creating a nested field on # read access is totally unexpected. if self.args.HasField("action"): action = self.args.action.Copy() else: action = rdf_file_finder.FileFinderAction() # This is used by MultiGetFileMixin. if action.action_type == rdf_file_finder.FileFinderAction.Action.HASH: self.state.file_size = action.hash.max_size elif action.action_type == rdf_file_finder.FileFinderAction.Action.DOWNLOAD: self.state.file_size = action.download.max_size if self.args.pathtype == rdf_paths.PathSpec.PathType.REGISTRY: # Registry StatEntries won't pass the file type check. self.args.process_non_regular_files = True self.GlobForPaths( self.args.paths, pathtype=self.args.pathtype, process_non_regular_files=self.args.process_non_regular_files, collect_ext_attrs=action.stat.collect_ext_attrs)
def RunFlowAndCheckResults( self, conditions=None, action=rdf_file_finder.FileFinderAction.Action.STAT, expected_files=None, non_expected_files=None, paths=None): if not isinstance(action, rdf_file_finder.FileFinderAction): action = rdf_file_finder.FileFinderAction(action_type=action) action_type = action.action_type conditions = conditions or [] expected_files = expected_files or [] non_expected_files = non_expected_files or [] for fname in expected_files + non_expected_files: aff4.FACTORY.Delete(self.FileNameToURN(fname), token=self.token) results = self.RunFlow(paths=paths, conditions=conditions, action=action) self.CheckReplies(results, action_type, expected_files) self.CheckFilesInCollection(expected_files) if action_type == rdf_file_finder.FileFinderAction.Action.STAT: self.CheckFilesNotDownloaded(expected_files + non_expected_files) self.CheckFilesNotHashed(expected_files + non_expected_files) elif action_type == rdf_file_finder.FileFinderAction.Action.DOWNLOAD: self.CheckFilesHashed(expected_files) self.CheckFilesNotHashed(non_expected_files) self.CheckFilesDownloaded(expected_files) self.CheckFilesNotDownloaded(non_expected_files) # Downloaded files are hashed to allow for deduping. elif action_type == rdf_file_finder.FileFinderAction.Action.HASH: self.CheckFilesNotDownloaded(expected_files + non_expected_files) self.CheckFilesHashed(expected_files) self.CheckFilesNotHashed(non_expected_files)
def testFileFinderDownloadMaxFileSizeCanBeOverriden(self): router = self._CreateRouter( file_finder_flow=rr.RobotRouterFileFinderFlowParams( enabled=True, max_file_size=42)) da = rdf_file_finder.FileFinderDownloadActionOptions() da.max_size = 80 da.oversized_file_policy = da.OversizedFilePolicy.DOWNLOAD_TRUNCATED path = "/foo/bar" handler = router.CreateFlow(api_flow.ApiCreateFlowArgs( flow=api_flow.ApiFlow(name=file_finder.FileFinder.__name__, args=rdf_file_finder.FileFinderArgs( paths=[path], action=rdf_file_finder.FileFinderAction( action_type="DOWNLOAD", download=da))), client_id=self.client_id), token=self.token) da = handler.override_flow_args.action.download self.assertEqual(da.oversized_file_policy, da.OversizedFilePolicy.SKIP) self.assertEqual(da.max_size, 42)
def testFileFinderThrottlingByFlowCountWorks(self): self.InitRouterConfig(self.__class__.FILE_FINDER_THROTTLED_ROUTER_CONFIG % self.token.username) args = [] for p in ["tests.plist", "numbers.txt", "numbers.txt.ver2"]: args.append( rdf_file_finder.FileFinderArgs( action=rdf_file_finder.FileFinderAction(action_type="STAT"), paths=[p]).AsPrimitiveProto()) client_ref = self.api.Client(client_id=self.client_id) flow_obj = client_ref.CreateFlow( name=file_finder.FileFinder.__name__, args=args[0]) self.assertEqual(flow_obj.data.state, flow_obj.data.RUNNING) flow_obj = client_ref.CreateFlow( name=file_finder.FileFinder.__name__, args=args[1]) self.assertEqual(flow_obj.data.state, flow_obj.data.RUNNING) with self.assertRaisesRegex(RuntimeError, "2 flows run since"): client_ref.CreateFlow(name=file_finder.FileFinder.__name__, args=args[2])
def RunFlowAndCheckResults( self, conditions=None, action=rdf_file_finder.FileFinderAction.Action.STAT, expected_files=None, non_expected_files=None, paths=None): if not isinstance(action, rdf_file_finder.FileFinderAction): action = rdf_file_finder.FileFinderAction(action_type=action) action_type = action.action_type conditions = conditions or [] expected_files = expected_files or [] non_expected_files = non_expected_files or [] results = self.RunFlow(paths=paths, conditions=conditions, action=action) self.CheckReplies(results, action_type, expected_files) self.CheckFiles(expected_files, results) if action_type == rdf_file_finder.FileFinderAction.Action.STAT: self.CheckFilesNotDownloaded(expected_files + non_expected_files) self.CheckFilesNotHashed(expected_files + non_expected_files) elif action_type == rdf_file_finder.FileFinderAction.Action.DOWNLOAD: self.CheckFilesHashed(expected_files) self.CheckFilesNotHashed(non_expected_files) self.CheckFilesDownloaded(expected_files) self.CheckFilesNotDownloaded(non_expected_files) # Downloaded files are hashed to allow for deduping. elif action_type == rdf_file_finder.FileFinderAction.Action.HASH: self.CheckFilesNotDownloaded(expected_files + non_expected_files) self.CheckFilesHashed(expected_files) self.CheckFilesNotHashed(non_expected_files) return results
def _CreateHuntFromFlow(self): self.client_id = self.SetupClient(0) flow_args = rdf_file_finder.FileFinderArgs( paths=["a/*", "b/*"], action=rdf_file_finder.FileFinderAction(action_type="STAT")) session_id = flow_test_lib.StartFlow(file_finder.FileFinder, client_id=self.client_id, flow_args=flow_args) ref = rdf_hunts.FlowLikeObjectReference.FromFlowIdAndClientId( session_id, self.client_id) # Modify flow_args so that there are differences. flow_args.paths = ["b/*", "c/*"] flow_args.action.action_type = "DOWNLOAD" flow_args.conditions = [ rdf_file_finder.FileFinderCondition( condition_type="SIZE", size=rdf_file_finder.FileFinderSizeCondition(min_file_size=42)) ] return self.StartHunt(flow_args=flow_args, flow_runner_args=rdf_flow_runner.FlowRunnerArgs( flow_name=file_finder.FileFinder.__name__), original_object=ref), session_id
def Grep(self, source, pathtype): """Grep files in paths for any matches to content_regex_list. Args: source: artifact source pathtype: pathspec path type When multiple regexes are supplied, combine them into a single regex as an OR match so that we check all regexes at once. """ path_list = self.InterpolateList(source.attributes.get("paths", [])) content_regex_list = self.InterpolateList( source.attributes.get("content_regex_list", [])) regex_condition = rdf_file_finder.FileFinderContentsRegexMatchCondition( regex=self._CombineRegex(content_regex_list), bytes_before=0, bytes_after=0, mode="ALL_HITS") file_finder_condition = rdf_file_finder.FileFinderCondition( condition_type=( rdf_file_finder.FileFinderCondition.Type.CONTENTS_REGEX_MATCH), contents_regex_match=regex_condition) self.CallFlow( file_finder.FileFinder.__name__, paths=path_list, conditions=[file_finder_condition], action=rdf_file_finder.FileFinderAction(), pathtype=pathtype, request_data={ "artifact_name": self.current_artifact_name, "source": source.ToPrimitiveDict() }, next_state="ProcessCollected")
def testFlowDuplicateLimit(self): # Disable the request limit checking by setting it to 0. throttler = throttle.FlowThrottler( daily_req_limit=0, dup_interval=rdfvalue.Duration("1200s")) # Running the same flow immediately should fail with test_lib.FakeTime(self.BASE_TIME): throttler.EnforceLimits(self.client_id, self.token.username, flow_test_lib.DummyLogFlow.__name__, None, token=self.token) api_regression_test_lib.StartFlow( client_id=self.client_id, flow_cls=flow_test_lib.DummyLogFlow, token=self.token) with self.assertRaises(throttle.DuplicateFlowError): throttler.EnforceLimits(self.client_id, self.token.username, flow_test_lib.DummyLogFlow.__name__, None, token=self.token) # Doing the same outside the window should work with test_lib.FakeTime(self.BASE_TIME + 1200 + 1): throttler.EnforceLimits(self.client_id, self.token.username, flow_test_lib.DummyLogFlow.__name__, None, token=self.token) api_regression_test_lib.StartFlow( client_id=self.client_id, flow_cls=flow_test_lib.DummyLogFlow, token=self.token) with self.assertRaises(throttle.DuplicateFlowError): throttler.EnforceLimits(self.client_id, self.token.username, flow_test_lib.DummyLogFlow.__name__, None, token=self.token) # Now try a flow with more complicated args args = rdf_file_finder.FileFinderArgs( paths=["/tmp/1", "/tmp/2"], action=rdf_file_finder.FileFinderAction(action_type="STAT")) with test_lib.FakeTime(self.BASE_TIME): throttler.EnforceLimits(self.client_id, self.token.username, file_finder.FileFinder.__name__, args, token=self.token) new_args = rdf_file_finder.FileFinderArgs( paths=["/tmp/1", "/tmp/2"], action=rdf_file_finder.FileFinderAction(action_type="STAT")) api_regression_test_lib.StartFlow(client_id=self.client_id, flow_cls=file_finder.FileFinder, token=self.token, flow_args=new_args) with self.assertRaises(throttle.DuplicateFlowError): throttler.EnforceLimits(self.client_id, self.token.username, file_finder.FileFinder.__name__, args, token=self.token) # Different args should succeed. args = rdf_file_finder.FileFinderArgs( paths=["/tmp/1", "/tmp/3"], action=rdf_file_finder.FileFinderAction(action_type="STAT")) throttler.EnforceLimits(self.client_id, self.token.username, file_finder.FileFinder.__name__, args, token=self.token)