def testFileFinderStat(self): files_to_check = [ # Some files. "netgroup", "osx_fsdata", # Matches lsb-release, lsb-release-bad, lsb-release-notubuntu "lsb-release*", # Some directories. "a", "checks", "profiles" ] paths = [ os.path.join(self.fixture_path, name) for name in files_to_check ] expected_files = [] for name in paths: for result in glob.glob(name): expected_files.append( self.FileNameToURN(os.path.basename(result))) # There was a bug in FileFinder with files/directories in the root dir. paths.append("/bin") expected_files.append(self.client_id.Add("fs/os/bin")) results = self.RunFlow(action=rdf_file_finder.FileFinderAction( action_type=rdf_file_finder.FileFinderAction.Action.STAT), paths=paths) stat_entries = [result[1].stat_entry for result in results] result_paths = [stat.AFF4Path(self.client_id) for stat in stat_entries] self.assertItemsEqual(expected_files, result_paths)
def Grep(self, source, pathtype): """Grep files in paths for any matches to content_regex_list. Args: source: artifact source pathtype: pathspec path type When multiple regexes are supplied, combine them into a single regex as an OR match so that we check all regexes at once. """ path_list = self.InterpolateList(source.attributes.get("paths", [])) content_regex_list = self.InterpolateList( source.attributes.get("content_regex_list", [])) regex_condition = rdf_file_finder.FileFinderContentsRegexMatchCondition( regex=self._CombineRegex(content_regex_list), bytes_before=0, bytes_after=0, mode="ALL_HITS") file_finder_condition = rdf_file_finder.FileFinderCondition( condition_type=( rdf_file_finder.FileFinderCondition.Type.CONTENTS_REGEX_MATCH), contents_regex_match=regex_condition) self.CallFlow(file_finder.FileFinder.__name__, paths=path_list, conditions=[file_finder_condition], action=rdf_file_finder.FileFinderAction(), pathtype=pathtype, request_data={ "artifact_name": self.current_artifact_name, "source": source.ToPrimitiveDict() }, next_state="ProcessCollected")
def _CreateHuntFromHunt(self): flow_args = rdf_file_finder.FileFinderArgs( paths=["a/*", "b/*"], action=rdf_file_finder.FileFinderAction(action_type="STAT")) flow_runner_args = rdf_flow_runner.FlowRunnerArgs( flow_name=file_finder.FileFinder.__name__) client_rule_set = self._CreateForemanClientRuleSet() source_h = self.CreateHunt(flow_args=flow_args, flow_runner_args=flow_runner_args, description="foo-description", client_rule_set=client_rule_set) ref = rdf_hunts.FlowLikeObjectReference.FromHuntId( source_h.urn.Basename()) # Modify flow_args so that there are differences. flow_args.paths = ["b/*", "c/*"] client_rule_set.rules[0].regex.field = "FQDN" output_plugins = [ output_plugin.OutputPluginDescriptor( plugin_name="TestOutputPlugin") ] new_h = self.CreateHunt(flow_args=flow_args, flow_runner_args=flow_runner_args, description="bar-description", client_rule_set=client_rule_set, output_plugins=output_plugins, original_object=ref) return new_h, source_h
def _CreateHuntFromFlow(self): self.client_id = self.SetupClient(0) flow_args = rdf_file_finder.FileFinderArgs( paths=["a/*", "b/*"], action=rdf_file_finder.FileFinderAction(action_type="STAT")) flow_runner_args = rdf_flow_runner.FlowRunnerArgs( flow_name=file_finder.FileFinder.__name__) flow_urn = flow.StartFlow(client_id=self.client_id, args=flow_args, runner_args=flow_runner_args, token=self.token) ref = rdf_hunts.FlowLikeObjectReference.FromFlowIdAndClientId( flow_urn.Basename(), self.client_id.Basename()) # Modify flow_args so that there are differences. flow_args.paths = ["b/*", "c/*"] flow_args.action.action_type = "DOWNLOAD" flow_args.conditions = [ rdf_file_finder.FileFinderCondition( condition_type="SIZE", size=rdf_file_finder.FileFinderSizeCondition(min_file_size=42)) ] return self.CreateHunt(flow_args=flow_args, flow_runner_args=flow_runner_args, original_object=ref), flow_urn
def testFileFinderThrottlingByFlowCountWorks(self): self.InitRouterConfig( self.__class__.FILE_FINDER_THROTTLED_ROUTER_CONFIG % self.token.username) args = [] for p in ["tests.plist", "numbers.txt", "numbers.txt.ver2"]: args.append( rdf_file_finder.FileFinderArgs( action=rdf_file_finder.FileFinderAction( action_type="STAT"), paths=[p]).AsPrimitiveProto()) client_ref = self.api.Client(client_id=self.client_id.Basename()) flow_obj = client_ref.CreateFlow(name=file_finder.FileFinder.__name__, args=args[0]) self.assertEqual(flow_obj.data.state, flow_obj.data.RUNNING) flow_obj = client_ref.CreateFlow(name=file_finder.FileFinder.__name__, args=args[1]) self.assertEqual(flow_obj.data.state, flow_obj.data.RUNNING) with self.assertRaisesRegexp(RuntimeError, "2 flows run since"): client_ref.CreateFlow(name=file_finder.FileFinder.__name__, args=args[2])
def setUp(self): super(ApiGetHuntFileHandlerTest, self).setUp() self.handler = hunt_plugin.ApiGetHuntFileHandler() self.file_path = os.path.join(self.base_path, "test.plist") self.hunt = implementation.StartHunt( hunt_name=standard.GenericHunt.__name__, flow_runner_args=rdf_flow_runner.FlowRunnerArgs( flow_name=file_finder.FileFinder.__name__), flow_args=rdf_file_finder.FileFinderArgs( paths=[self.file_path], action=rdf_file_finder.FileFinderAction( action_type="DOWNLOAD"), ), client_rate=0, token=self.token) self.hunt.Run() self.aff4_file_path = "fs/os/%s" % self.file_path self.client_id = self.SetupClient(0) self.AssignTasksToClients(client_ids=[self.client_id]) action_mock = action_mocks.FileFinderClientMock() hunt_test_lib.TestHuntHelper(action_mock, [self.client_id], token=self.token)
def _RunCFF(self, paths, action): session_id = flow_test_lib.TestFlowHelper( file_finder.ClientFileFinder.__name__, action_mocks.ClientFileFinderClientMock(), client_id=self.client_id, paths=paths, pathtype=rdf_paths.PathSpec.PathType.OS, action=rdf_file_finder.FileFinderAction(action_type=action), process_non_regular_files=True, token=self.token) collection = flow.GRRFlow.ResultCollectionForFID(session_id) results = list(collection) return results
def testNoThrottlingDoneByDefault(self): self.InitRouterConfig(self.__class__.FILE_FINDER_ROUTER_CONFIG % self.token.username) args = rdf_file_finder.FileFinderArgs( action=rdf_file_finder.FileFinderAction(action_type="STAT"), paths=["tests.plist"]).AsPrimitiveProto() client_ref = self.api.Client(client_id=self.client_id.Basename()) # Create 60 flows in a row to check that no throttling is applied. for _ in range(20): flow_obj = client_ref.CreateFlow( name=file_finder.FileFinder.__name__, args=args) self.assertEqual(flow_obj.data.state, flow_obj.data.RUNNING)
def testFileFinderMaxFileSizeOverrideWorks(self): self.InitRouterConfig( self.__class__.FILE_FINDER_MAX_SIZE_OVERRIDE_CONFIG % self.token.username) args = rdf_file_finder.FileFinderArgs( action=rdf_file_finder.FileFinderAction(action_type="DOWNLOAD"), paths=["tests.plist"]).AsPrimitiveProto() client_ref = self.api.Client(client_id=self.client_id.Basename()) flow_obj = client_ref.CreateFlow(name=file_finder.FileFinder.__name__, args=args) flow_args = self.api.types.UnpackAny(flow_obj.data.args) self.assertEqual(flow_args.action.download.max_size, 5000000) self.assertEqual(flow_args.action.download.oversized_file_policy, flow_args.action.download.SKIP)
def testFileFinderThrottlingByDuplicateIntervalWorks(self): self.InitRouterConfig( self.__class__.FILE_FINDER_THROTTLED_ROUTER_CONFIG % self.token.username) args = rdf_file_finder.FileFinderArgs( action=rdf_file_finder.FileFinderAction(action_type="STAT"), paths=["tests.plist"]).AsPrimitiveProto() client_ref = self.api.Client(client_id=self.client_id.Basename()) flow_obj = client_ref.CreateFlow(name=file_finder.FileFinder.__name__, args=args) self.assertEqual(flow_obj.data.state, flow_obj.data.RUNNING) flow_obj_2 = client_ref.CreateFlow( name=file_finder.FileFinder.__name__, args=args) self.assertEqual(flow_obj.flow_id, flow_obj_2.flow_id)
def setUp(self): super(ApiGetFlowFilesArchiveHandlerTest, self).setUp() self.handler = flow_plugin.ApiGetFlowFilesArchiveHandler() self.client_id = self.SetupClient(0) self.flow_urn = flow.GRRFlow.StartFlow( flow_name=file_finder.FileFinder.__name__, client_id=self.client_id, paths=[os.path.join(self.base_path, "test.plist")], action=rdf_file_finder.FileFinderAction(action_type="DOWNLOAD"), token=self.token) action_mock = action_mocks.FileFinderClientMock() flow_test_lib.TestFlowHelper(self.flow_urn, action_mock, client_id=self.client_id, token=self.token)
def _RunTSKFileFinder(self, paths): image_path = os.path.join(self.base_path, "ntfs_img.dd") with utils.Stubber( vfs, "VFS_VIRTUALROOTS", { rdf_paths.PathSpec.PathType.TSK: rdf_paths.PathSpec( path=image_path, pathtype="OS", offset=63 * 512) }): action = rdf_file_finder.FileFinderAction.Action.DOWNLOAD flow_test_lib.TestFlowHelper( file_finder.FileFinder.__name__, self.client_mock, client_id=self.client_id, paths=paths, pathtype=rdf_paths.PathSpec.PathType.TSK, action=rdf_file_finder.FileFinderAction(action_type=action), token=self.token)
def testFileFinderHashMaxFileSizeCanBeOverriden(self): router = self._CreateRouter( file_finder_flow=rr.RobotRouterFileFinderFlowParams( enabled=True, max_file_size=42)) ha = rdf_file_finder.FileFinderHashActionOptions() ha.max_size = 80 ha.oversized_file_policy = ha.OversizedFilePolicy.HASH_TRUNCATED path = "/foo/bar" handler = router.CreateFlow(api_flow.ApiCreateFlowArgs( flow=api_flow.ApiFlow(name=file_finder.FileFinder.__name__, args=rdf_file_finder.FileFinderArgs( paths=[path], action=rdf_file_finder.FileFinderAction( action_type="HASH", hash=ha))), client_id=self.client_id), token=self.token) ha = handler.override_flow_args.action.hash self.assertEqual(ha.oversized_file_policy, ha.OversizedFilePolicy.SKIP) self.assertEqual(ha.max_size, 42)
def Start(self): """Issue the find request.""" super(FileFinder, self).Start() if not self.args.paths: # Nothing to do. return self.state.files_found = 0 self.state.sorted_conditions = sorted( self.args.conditions, key=self._ConditionWeight) # TODO(user): We may change self.args just by accessing self.args.action # (a nested message will be created). Therefore we should be careful # about not modifying self.args: they will be written as FLOW_ARGS attribute # and will be different from what the user has actually passed in. # We need better semantics for RDFStructs - creating a nested field on # read access is totally unexpected. if self.args.HasField("action"): action = self.args.action.Copy() else: action = rdf_file_finder.FileFinderAction() # This is used by MultiGetFileMixin. if action.action_type == rdf_file_finder.FileFinderAction.Action.HASH: self.state.file_size = action.hash.max_size elif action.action_type == rdf_file_finder.FileFinderAction.Action.DOWNLOAD: self.state.file_size = action.download.max_size if self.args.pathtype == rdf_paths.PathSpec.PathType.REGISTRY: # Registry StatEntries won't pass the file type check. self.args.process_non_regular_files = True self.GlobForPaths( self.args.paths, pathtype=self.args.pathtype, process_non_regular_files=self.args.process_non_regular_files, collect_ext_attrs=action.stat.collect_ext_attrs)
def RunFlowAndCheckResults( self, conditions=None, action=rdf_file_finder.FileFinderAction.Action.STAT, expected_files=None, non_expected_files=None, paths=None): if not isinstance(action, rdf_file_finder.FileFinderAction): action = rdf_file_finder.FileFinderAction(action_type=action) action_type = action.action_type conditions = conditions or [] expected_files = expected_files or [] non_expected_files = non_expected_files or [] for fname in expected_files + non_expected_files: aff4.FACTORY.Delete(self.FileNameToURN(fname), token=self.token) results = self.RunFlow(paths=paths, conditions=conditions, action=action) self.CheckReplies(results, action_type, expected_files) self.CheckFilesInCollection(expected_files) if action_type == rdf_file_finder.FileFinderAction.Action.STAT: self.CheckFilesNotDownloaded(expected_files + non_expected_files) self.CheckFilesNotHashed(expected_files + non_expected_files) elif action_type == rdf_file_finder.FileFinderAction.Action.DOWNLOAD: self.CheckFilesHashed(expected_files) self.CheckFilesNotHashed(non_expected_files) self.CheckFilesDownloaded(expected_files) self.CheckFilesNotDownloaded(non_expected_files) # Downloaded files are hashed to allow for deduping. elif action_type == rdf_file_finder.FileFinderAction.Action.HASH: self.CheckFilesNotDownloaded(expected_files + non_expected_files) self.CheckFilesHashed(expected_files) self.CheckFilesNotHashed(non_expected_files)
def testFileFinderDownloadMaxFileSizeCanBeOverriden(self): router = self._CreateRouter( file_finder_flow=rr.RobotRouterFileFinderFlowParams( enabled=True, max_file_size=42)) da = rdf_file_finder.FileFinderDownloadActionOptions() da.max_size = 80 da.oversized_file_policy = da.OversizedFilePolicy.DOWNLOAD_TRUNCATED path = "/foo/bar" handler = router.CreateFlow(api_flow.ApiCreateFlowArgs( flow=api_flow.ApiFlow(name=file_finder.FileFinder.__name__, args=rdf_file_finder.FileFinderArgs( paths=[path], action=rdf_file_finder.FileFinderAction( action_type="DOWNLOAD", download=da))), client_id=self.client_id), token=self.token) da = handler.override_flow_args.action.download self.assertEqual(da.oversized_file_policy, da.OversizedFilePolicy.SKIP) self.assertEqual(da.max_size, 42)
def testFlowDuplicateLimit(self): # Disable the request limit checking by setting it to 0. throttler = throttle.FlowThrottler( daily_req_limit=0, dup_interval=rdfvalue.Duration("1200s")) # Running the same flow immediately should fail with test_lib.FakeTime(self.BASE_TIME): throttler.EnforceLimits(self.client_id, self.token.username, flow_test_lib.DummyLogFlow.__name__, None, token=self.token) flow.StartFlow(client_id=self.client_id, flow_name=flow_test_lib.DummyLogFlow.__name__, token=self.token) with self.assertRaises(throttle.ErrorFlowDuplicate): throttler.EnforceLimits(self.client_id, self.token.username, flow_test_lib.DummyLogFlow.__name__, None, token=self.token) # Doing the same outside the window should work with test_lib.FakeTime(self.BASE_TIME + 1200 + 1): throttler.EnforceLimits(self.client_id, self.token.username, flow_test_lib.DummyLogFlow.__name__, None, token=self.token) flow.StartFlow(client_id=self.client_id, flow_name=flow_test_lib.DummyLogFlow.__name__, token=self.token) with self.assertRaises(throttle.ErrorFlowDuplicate): throttler.EnforceLimits(self.client_id, self.token.username, flow_test_lib.DummyLogFlow.__name__, None, token=self.token) # Now try a flow with more complicated args args = rdf_file_finder.FileFinderArgs( paths=["/tmp/1", "/tmp/2"], action=rdf_file_finder.FileFinderAction(action_type="STAT")) with test_lib.FakeTime(self.BASE_TIME): throttler.EnforceLimits(self.client_id, self.token.username, file_finder.FileFinder.__name__, args, token=self.token) flow.StartFlow( client_id=self.client_id, flow_name=file_finder.FileFinder.__name__, token=self.token, paths=["/tmp/1", "/tmp/2"], action=rdf_file_finder.FileFinderAction(action_type="STAT")) with self.assertRaises(throttle.ErrorFlowDuplicate): throttler.EnforceLimits(self.client_id, self.token.username, file_finder.FileFinder.__name__, args, token=self.token) # Different args should succeed. args = rdf_file_finder.FileFinderArgs( paths=["/tmp/1", "/tmp/3"], action=rdf_file_finder.FileFinderAction(action_type="STAT")) throttler.EnforceLimits(self.client_id, self.token.username, file_finder.FileFinder.__name__, args, token=self.token)