def testFindActionCrossDev(self): """Test that devices boundaries don't get crossed, also by default.""" pathspec = rdf_paths.PathSpec(path="/mock2/", pathtype=rdf_paths.PathSpec.PathType.OS) request = rdf_client.FindSpec(pathspec=pathspec, cross_devs=True, path_regex=".") request.iterator.number = 200 results = self.RunAction(searching.Find, request) all_files = [ x.hit for x in results if isinstance(x, rdf_client.FindSpec) ] self.assertEqual(len(all_files), 9) request = rdf_client.FindSpec(pathspec=pathspec, cross_devs=False, path_regex=".") request.iterator.number = 200 results = self.RunAction(searching.Find, request) all_files = [ x.hit for x in results if isinstance(x, rdf_client.FindSpec) ] self.assertEqual(len(all_files), 7) request = rdf_client.FindSpec(pathspec=pathspec, path_regex=".") request.iterator.number = 200 results = self.RunAction(searching.Find, request) all_files = [ x.hit for x in results if isinstance(x, rdf_client.FindSpec) ] self.assertEqual(len(all_files), 7)
def testFindWithMaxFiles(self): """Test that the Find flow works when specifying proto directly.""" client_mock = action_mocks.ActionMock(searching.Find) # Prepare a findspec. findspec = rdf_client.FindSpec( path_regex=".*", pathspec=rdf_paths.PathSpec( path="/", pathtype=rdf_paths.PathSpec.PathType.OS)) session_id = flow_test_lib.TestFlowHelper( find.FindFiles.__name__, client_mock, client_id=self.client_id, token=self.token, findspec=findspec, iteration_count=3, max_results=7) # Check the output file is created collection = flow.GRRFlow.ResultCollectionForFID(session_id) # Make sure we got the right number of results. self.assertEqual(len(collection), 7)
def testCollectionOverwriting(self): """Test we overwrite the collection every time the flow is executed.""" client_mock = action_mocks.ActionMock(searching.Find) # Prepare a findspec. findspec = rdf_client.FindSpec() findspec.path_regex = "bin" findspec.pathspec.path = "/" findspec.pathspec.pathtype = rdf_paths.PathSpec.PathType.OS session_id = flow_test_lib.TestFlowHelper(find.FindFiles.__name__, client_mock, client_id=self.client_id, token=self.token, findspec=findspec) # Check the results collection. fd = flow.GRRFlow.ResultCollectionForFID(session_id) self.assertEqual(len(fd), 2) # Now find a new result, should overwrite the collection findspec.path_regex = "dd" session_id = flow_test_lib.TestFlowHelper(find.FindFiles.__name__, client_mock, client_id=self.client_id, token=self.token, findspec=findspec, max_results=1) # Check the results collection. fd = flow.GRRFlow.ResultCollectionForFID(session_id) self.assertEqual(len(fd), 1)
def testFindFilesWithGlob(self): """Test that the Find flow works with glob.""" client_mock = action_mocks.ActionMock(searching.Find) # Prepare a findspec. findspec = rdf_client.FindSpec( path_glob="bash*", pathspec=rdf_paths.PathSpec( path="/", pathtype=rdf_paths.PathSpec.PathType.OS)) session_id = flow_test_lib.TestFlowHelper(find.FindFiles.__name__, client_mock, client_id=self.client_id, token=self.token, findspec=findspec) # Check the results collection. fd = flow.GRRFlow.ResultCollectionForFID(session_id) # Make sure that bash is a file. matches = set([x.AFF4Path(self.client_id).Basename() for x in fd]) self.assertEqual(sorted(matches), ["bash"]) self.assertEqual(len(fd), 2) for child in fd: path = utils.SmartStr(child.AFF4Path(self.client_id)) self.assertTrue(path.endswith("bash")) self.assertEqual(child.__class__.__name__, "StatEntry")
def testFindDirectories(self): """Test that the Find flow works with directories.""" client_mock = action_mocks.ActionMock(searching.Find) # Prepare a findspec. findspec = rdf_client.FindSpec( path_regex="bin", pathspec=rdf_paths.PathSpec( path="/", pathtype=rdf_paths.PathSpec.PathType.OS)) session_id = flow_test_lib.TestFlowHelper(find.FindFiles.__name__, client_mock, client_id=self.client_id, token=self.token, findspec=findspec) # Check the results collection. fd = flow.GRRFlow.ResultCollectionForFID(session_id) # Make sure that bin is a directory self.assertEqual(len(fd), 2) for child in fd: path = utils.SmartStr(child.AFF4Path(self.client_id)) self.assertTrue("bin" in path) self.assertEqual(child.__class__.__name__, "StatEntry")
def testGIDFilter(self): """Test filtering based on gid happens correctly.""" pathspec = rdf_paths.PathSpec(path="/mock2/", pathtype=rdf_paths.PathSpec.PathType.OS) # Look for files that have gid of 500 request = rdf_client.FindSpec(pathspec=pathspec, path_regex=".", gid=500, cross_devs=True) request.iterator.number = 200 result = self.RunAction(searching.Find, request) all_files = [ x.hit for x in result if isinstance(x, rdf_client.FindSpec) ] self.assertEqual(len(all_files), 2) self.assertEqual(all_files[0].pathspec.Dirname().Basename(), "directory2") self.assertEqual(all_files[0].pathspec.Basename(), "file.jpg") self.assertEqual(all_files[1].pathspec.Dirname().Basename(), "directory2") self.assertEqual(all_files[1].pathspec.Basename(), "file.mp3") # Look for files that have uid of 900 request = rdf_client.FindSpec(pathspec=pathspec, path_regex=".", gid=900, cross_devs=True) request.iterator.number = 200 result = self.RunAction(searching.Find, request) all_files = [ x.hit for x in result if isinstance(x, rdf_client.FindSpec) ] self.assertEqual(len(all_files), 2) self.assertEqual(all_files[0].pathspec.Dirname().Basename(), "directory1") self.assertEqual(all_files[0].pathspec.Basename(), "file1.txt") self.assertEqual(all_files[1].pathspec.Dirname().Basename(), "directory1") self.assertEqual(all_files[1].pathspec.Basename(), "file2.txt")
def RunFind(): pathspec = rdf_paths.PathSpec( path=self.base_path, pathtype=rdf_paths.PathSpec.PathType.OS) request = rdf_client.FindSpec(pathspec=pathspec) request.iterator.number = 80 result = self.RunAction(searching.Find, request) # 80 results plus one iterator. self.assertEqual(len(result), 81)
def ProcessEntry(self, responses): """Process the responses from the client.""" if not responses.success: return # If we get a response with an unfinished iterator then we missed some # files. Call Find on the client until we're done. if (responses.iterator and responses.iterator.state != responses.iterator.State.FINISHED): findspec = rdf_client.FindSpec(responses.request.request.payload) findspec.iterator = responses.iterator self.CallClient(server_stubs.Find, findspec, next_state="ProcessEntry", request_data=responses.request_data) # The Find client action does not return a StatEntry but a # FindSpec. Normalize to a StatEntry. stat_responses = [ r.hit if isinstance(r, rdf_client.FindSpec) else r for r in responses ] # If this was a pure path matching call without any regex / recursion, we # know exactly which node in the component tree we have to process next and # get it from the component_path. If this was a regex match though, we # sent the client a combined regex that matches all nodes in order to save # round trips and client processing time. In that case we only get the # base node and have to check for all subnodes if the response actually # matches that subnode before we continue processing. component_path = responses.request_data.get("component_path") if component_path is not None: for response in stat_responses: self._ProcessResponse(response, [component_path]) else: # This is a combined match. base_path = responses.request_data["base_path"] base_node = self.FindNode(base_path) for response in stat_responses: matching_components = [] for next_node in base_node.keys(): pathspec = rdf_paths.PathSpec.FromSerializedString( next_node) if self._MatchPath(pathspec, response): matching_path = base_path + [next_node] matching_components.append(matching_path) if matching_components: self._ProcessResponse(response, matching_components, base_wildcard=True)
def testNoFilters(self): """Test the we get all files with no filters in place.""" # First get all the files at once pathspec = rdf_paths.PathSpec(path="/mock2/", pathtype=rdf_paths.PathSpec.PathType.OS) request = rdf_client.FindSpec(pathspec=pathspec, cross_devs=True) request.iterator.number = 200 result = self.RunAction(searching.Find, request) all_files = [ x.hit for x in result if isinstance(x, rdf_client.FindSpec) ] self.assertEqual(len(all_files), 9)
def testFindAction2(self): """Test the find action path regex.""" pathspec = rdf_paths.PathSpec(path="/mock2/", pathtype=rdf_paths.PathSpec.PathType.OS) request = rdf_client.FindSpec(pathspec=pathspec, path_regex=".*mp3") request.iterator.number = 200 result = self.RunAction(searching.Find, request) all_files = [ x.hit for x in result if isinstance(x, rdf_client.FindSpec) ] self.assertEqual(len(all_files), 1) self.assertEqual(all_files[0].pathspec.Basename(), "file.mp3")
def testFindAction(self): """Test the find action.""" # First get all the files at once pathspec = rdf_paths.PathSpec(path="/mock2/", pathtype=rdf_paths.PathSpec.PathType.OS) request = rdf_client.FindSpec(pathspec=pathspec, path_regex=".") request.iterator.number = 200 result = self.RunAction(searching.Find, request) all_files = [ x.hit for x in result if isinstance(x, rdf_client.FindSpec) ] # Ask for the files one at the time files = [] request = rdf_client.FindSpec(pathspec=pathspec, path_regex=".") request.iterator.number = 1 while True: result = self.RunAction(searching.Find, request) if request.iterator.state == rdf_client.Iterator.State.FINISHED: break self.assertEqual(len(result), 2) self.assertTrue(isinstance(result[0], rdf_client.FindSpec)) self.assertTrue(isinstance(result[1], rdf_client.Iterator)) files.append(result[0].hit) request.iterator = result[1].Copy() for x, y in zip(all_files, files): self.assertRDFValuesEqual(x, y) # Make sure the iterator is finished self.assertEqual(request.iterator.state, rdf_client.Iterator.State.FINISHED) # Ensure we remove old states from client_state self.assertEqual(len(request.iterator.client_state.dat), 0)
def testFindAction3(self): """Test the find action data regex.""" # First get all the files at once pathspec = rdf_paths.PathSpec(path="/mock2/", pathtype=rdf_paths.PathSpec.PathType.OS) request = rdf_client.FindSpec(pathspec=pathspec, data_regex="Secret", cross_devs=True) request.iterator.number = 200 result = self.RunAction(searching.Find, request) all_files = [ x.hit for x in result if isinstance(x, rdf_client.FindSpec) ] self.assertEqual(len(all_files), 2) self.assertEqual(all_files[0].pathspec.Basename(), "file1.txt") self.assertEqual(all_files[1].pathspec.Basename(), "long_file.text")
def testFindSizeLimits(self): """Test the find action size limits.""" # First get all the files at once request = rdf_client.FindSpec(min_file_size=4, max_file_size=15, cross_devs=True) request.pathspec.Append(path="/mock2/", pathtype=rdf_paths.PathSpec.PathType.OS) request.iterator.number = 200 results = self.RunAction(searching.Find, request) all_files = [] for result in results: if isinstance(result, rdf_client.FindSpec): all_files.append(result.hit.pathspec.Basename()) self.assertEqual(len(all_files), 5) for filename in all_files: # Our mock filesize is the length of the base filename, check all the # files we got match the size criteria self.assertTrue(4 <= len(filename) <= 15)
def Iterate(self, request, client_state): """Restores its way through the directory using an Iterator.""" self.request = request filters = self.BuildChecks(request) limit = request.iterator.number # TODO(user): What is a reasonable measure of work here? for count, f in enumerate( self.ListDirectory(request.pathspec, client_state)): self.Progress() # Ignore this file if any of the checks fail. if not any((check(f) for check in filters)): self.SendReply(rdf_client.FindSpec(hit=f)) # We only check a limited number of files in each iteration. This might # result in returning an empty response - but the iterator is not yet # complete. Flows must check the state of the iterator explicitly. if count >= limit - 1: logging.debug("Processed %s entries, quitting", count) return # End this iterator request.iterator.state = rdf_client.Iterator.State.FINISHED
def testExtAttrsCollection(self): with test_lib.AutoTempDirPath(remove_non_empty=True) as temp_dirpath: foo_filepath = test_lib.TempFilePath(dir=temp_dirpath) client_test_lib.SetExtAttr(foo_filepath, name="user.quux", value="foo") bar_filepath = test_lib.TempFilePath(dir=temp_dirpath) client_test_lib.SetExtAttr(bar_filepath, name="user.quux", value="bar") baz_filepath = test_lib.TempFilePath(dir=temp_dirpath) client_test_lib.SetExtAttr(baz_filepath, name="user.quux", value="baz") request = rdf_client.FindSpec(pathspec=rdf_paths.PathSpec( path=temp_dirpath, pathtype=rdf_paths.PathSpec.PathType.OS), path_glob="*", collect_ext_attrs=True) request.iterator.number = 100 hits = [] for response in self.RunAction(searching.Find, request): if isinstance(response, rdf_client.FindSpec): hits.append(response.hit) self.assertEqual(len(hits), 3) values = [] for hit in hits: self.assertEqual(len(hit.ext_attrs), 1) values.append(hit.ext_attrs[0].value) self.assertItemsEqual(values, ["foo", "bar", "baz"])
def testPermissionFilter(self): """Test filtering based on file/folder permission happens correctly.""" pathspec = rdf_paths.PathSpec(path="/mock2/", pathtype=rdf_paths.PathSpec.PathType.OS) # Look for files that match exact permissions request = rdf_client.FindSpec(pathspec=pathspec, path_regex=".", perm_mode=0o644, cross_devs=True) request.iterator.number = 200 result = self.RunAction(searching.Find, request) all_files = [ x.hit for x in result if isinstance(x, rdf_client.FindSpec) ] self.assertEqual(len(all_files), 2) self.assertEqual(all_files[0].pathspec.Dirname().Basename(), "directory2") self.assertEqual(all_files[0].pathspec.Basename(), "file.jpg") self.assertEqual(all_files[1].pathspec.Dirname().Basename(), "directory2") self.assertEqual(all_files[1].pathspec.Basename(), "file.mp3") # Look for files/folders where 'others' have 'write' permission. All other # attributes don't matter. Setuid bit must also be set and guid or sticky # bit must not be set. request = rdf_client.FindSpec(pathspec=pathspec, path_regex=".", perm_mode=0o4002, perm_mask=0o7002, cross_devs=True) request.iterator.number = 200 result = self.RunAction(searching.Find, request) all_files = [ x.hit for x in result if isinstance(x, rdf_client.FindSpec) ] self.assertEqual(len(all_files), 2) self.assertEqual(all_files[0].pathspec.Dirname().Basename(), "directory1") self.assertEqual(all_files[0].pathspec.Basename(), "file1.txt") self.assertEqual(all_files[1].pathspec.Dirname().Basename(), "directory1") self.assertEqual(all_files[1].pathspec.Basename(), "file2.txt") # Look for files where 'others' have 'execute' permission. All other # attributes don't matter. Only look for 'regular' files. request = rdf_client.FindSpec(pathspec=pathspec, path_regex=".", perm_mode=0o0100001, perm_mask=0o0100001, cross_devs=True) request.iterator.number = 200 result = self.RunAction(searching.Find, request) all_files = [ x.hit for x in result if isinstance(x, rdf_client.FindSpec) ] self.assertEqual(len(all_files), 2) self.assertEqual(all_files[0].pathspec.Dirname().Basename(), "directory3") self.assertEqual(all_files[0].pathspec.Basename(), "file1.txt") self.assertEqual(all_files[1].pathspec.Dirname().Basename(), "directory3") self.assertEqual(all_files[1].pathspec.Basename(), "long_file.text") # Look for folders where 'group' have 'execute' permission. All other # attributes don't matter. Only look for folders. request = rdf_client.FindSpec(pathspec=pathspec, path_regex=".", perm_mode=0o0040010, perm_mask=0o0040010, cross_devs=True) request.iterator.number = 200 result = self.RunAction(searching.Find, request) all_files = [ x.hit for x in result if isinstance(x, rdf_client.FindSpec) ] self.assertEqual(len(all_files), 3) self.assertEqual(all_files[0].pathspec.Basename(), "directory2") self.assertEqual(all_files[1].pathspec.Basename(), "directory1") self.assertEqual(all_files[2].pathspec.Basename(), "directory3")
def _ProcessResponse(self, response, component_paths, base_wildcard=False): for component_path in component_paths: regexes_to_get = [] recursions_to_get = {} node = self.FindNode(component_path) if not node: # Node is empty representing a leaf node - we found a hit - report it. self.GlobReportMatch(response) return # There are further components in the tree - iterate over them. for component_str, next_node in node.items(): component = rdf_paths.PathSpec.FromSerializedString( component_str) next_component = component_path + [component_str] # If we reach this point, we are instructed to go deeper into the # directory structure. We only want to actually do this if # - the last response was a proper directory, # - or it was a file (an image) that was explicitly given meaning # no wildcards or groupings, # - or process_non_regular_files was set. # # This reduces the number of TSK opens on the client that may # sometimes lead to instabilities due to bugs in the library. if response and (not (stat.S_ISDIR(response.st_mode) or not base_wildcard or self.state.process_non_regular_files)): continue if component.path_options == component.Options.RECURSIVE: recursions_to_get.setdefault(component.recursion_depth, []).append(component) elif component.path_options == component.Options.REGEX: regexes_to_get.append(component) elif component.path_options == component.Options.CASE_INSENSITIVE: # Here we need to create the next pathspec by appending the current # component to what we already have. If we don't have anything yet, we # fall back to the root path. If there is no root path either, the # current component becomes the new base. base_pathspec = self._GetBasePathspec(response) if base_pathspec: pathspec = base_pathspec.Append(component) else: pathspec = component if not next_node: # Check for the existence of the last node. if (response is None or (response and (response.st_mode == 0 or not stat.S_ISREG(response.st_mode)))): # If next node is empty, this node is a leaf node, we therefore # must stat it to check that it is there. There is a special case # here where this pathspec points to a file/directory in the root # directory. In this case, response will be None but we still need # to stat it. # TODO(hanuszczak): Support for old clients ends on 2021-01-01. # This conditional should be removed after that date. if self.client_version >= 3221: stub = server_stubs.GetFileStat request = rdf_client.GetFileStatRequest( pathspec=pathspec, collect_ext_attrs=self.state. collect_ext_attrs) else: stub = server_stubs.StatFile request = rdf_client.ListDirRequest( pathspec=pathspec) self.CallClient(stub, request, next_state="ProcessEntry", request_data=dict( component_path=next_component)) else: # There is no need to go back to the client for intermediate # paths in the prefix tree, just emulate this by recursively # calling this state inline. self.CallStateInline( [rdf_client.StatEntry(pathspec=pathspec)], next_state="ProcessEntry", request_data=dict(component_path=next_component)) if recursions_to_get or regexes_to_get: # Recursions or regexes need a base pathspec to operate on. If we # have neither a response or a root path, we send a default pathspec # that opens the root with pathtype "OS". base_pathspec = self._GetBasePathspec(response) if not base_pathspec: base_pathspec = rdf_paths.PathSpec(path="/", pathtype="OS") for depth, recursions in recursions_to_get.iteritems(): path_regex = "(?i)^" + "$|^".join( set([c.path for c in recursions])) + "$" findspec = rdf_client.FindSpec(pathspec=base_pathspec, cross_devs=True, max_depth=depth, path_regex=path_regex) findspec.iterator.number = self.FILE_MAX_PER_DIR self.CallClient( server_stubs.Find, findspec, next_state="ProcessEntry", request_data=dict(base_path=component_path)) if regexes_to_get: path_regex = "(?i)^" + "$|^".join( set([c.path for c in regexes_to_get])) + "$" findspec = rdf_client.FindSpec(pathspec=base_pathspec, max_depth=1, path_regex=path_regex) findspec.iterator.number = self.FILE_MAX_PER_DIR self.CallClient( server_stubs.Find, findspec, next_state="ProcessEntry", request_data=dict(base_path=component_path))