def testGrepLength(self): data = "X" * 100 + "HIT" MockVFSHandlerFind.filesystem[self.filename] = data request = rdfvalue.GrepSpec( literal=utils.Xor("HIT", self.XOR_IN_KEY), xor_in_key=self.XOR_IN_KEY, xor_out_key=self.XOR_OUT_KEY) request.target.path = self.filename request.target.pathtype = rdfvalue.PathSpec.PathType.OS request.start_offset = 0 result = self.RunAction("Grep", request) self.assertEqual(len(result), 1) self.assertEqual(result[0].offset, 100) request = rdfvalue.GrepSpec( literal=utils.Xor("HIT", self.XOR_IN_KEY), xor_in_key=self.XOR_IN_KEY, xor_out_key=self.XOR_OUT_KEY) request.target.path = self.filename request.target.pathtype = rdfvalue.PathSpec.PathType.OS request.start_offset = 0 request.length = 100 result = self.RunAction("Grep", request) self.assertEqual(len(result), 0)
def testXor(self): test_str = "Hello World!!" for key in [1, 5, 123, 255]: xor_str = utils.Xor(test_str, key) self.assertNotEqual(xor_str, test_str) xor_str = utils.Xor(xor_str, key) self.assertEqual(xor_str, test_str)
def Start(self): self.args.grep.xor_in_key = self.XOR_IN_KEY self.args.grep.xor_out_key = self.XOR_OUT_KEY # For literal matches we xor the search term. This stops us matching the GRR # client itself. if self.args.grep.literal: self.args.grep.literal = utils.Xor( self.args.grep.literal, self.XOR_IN_KEY) self.CallFlow("LoadMemoryDriver", next_state="Grep")
def testGrep(self): # Use the real file system. vfs.VFSInit().Run() request = rdfvalue.GrepSpec( literal=utils.Xor("10", self.XOR_IN_KEY), xor_in_key=self.XOR_IN_KEY, xor_out_key=self.XOR_OUT_KEY) request.target.path = os.path.join(self.base_path, "numbers.txt") request.target.pathtype = rdfvalue.PathSpec.PathType.OS request.start_offset = 0 result = self.RunAction("Grep", request) hits = [x.offset for x in result] self.assertEqual(hits, [18, 288, 292, 296, 300, 304, 308, 312, 316, 320, 324, 329, 729, 1129, 1529, 1929, 2329, 2729, 3129, 3529, 3888]) for x in result: self.assertTrue("10" in utils.Xor(x.data, self.XOR_OUT_KEY)) self.assertEqual(request.target.path, x.pathspec.path)
def testHitLimit(self): limit = searching.Grep.HIT_LIMIT hit = "x" * 10 + "HIT" + "x" * 10 data = hit * (limit + 100) MockVFSHandlerFind.filesystem[self.filename] = data request = rdfvalue.GrepSpec(literal=utils.Xor("HIT", self.XOR_IN_KEY), xor_in_key=self.XOR_IN_KEY, xor_out_key=self.XOR_OUT_KEY) request.target.path = self.filename request.target.pathtype = rdfvalue.PathSpec.PathType.OS request.start_offset = 0 request.bytes_before = 10 request.bytes_after = 10 result = self.RunAction("Grep", request) self.assertEqual(len(result), limit + 1) error = "maximum number of hits" self.assertTrue(error in utils.Xor(result[-1].data, self.XOR_OUT_KEY))
def testBufferBoundaries(self): for offset in xrange(-20, 20): data = "X" * (1000 + offset) + "HIT" + "X" * 100 MockVFSHandlerFind.filesystem[self.filename] = data request = rdf_client.GrepSpec( literal=utils.Xor("HIT", self.XOR_IN_KEY), xor_in_key=self.XOR_IN_KEY, xor_out_key=self.XOR_OUT_KEY) request.target.path = self.filename request.target.pathtype = rdf_paths.PathSpec.PathType.OS request.start_offset = 0 result = self.RunAction(searching.Grep, request) self.assertEqual(len(result), 1) self.assertEqual(result[0].offset, 1000 + offset) expected = "X" * 10 + "HIT" + "X" * 10 self.assertEqual(result[0].length, len(expected)) self.assertEqual(utils.Xor(result[0].data, self.XOR_OUT_KEY), expected)
def Done(self, responses): if responses.success: for hit in responses: # Decode the hit data from the client. hit.data = utils.Xor(hit.data, self.XOR_OUT_KEY) self.SendReply(hit) if self.args.also_download: self.CallFlow("DownloadMemoryImage", next_state="End") else: raise flow.FlowError("Error grepping memory: %s." % responses.status)
def testGrepOffset(self): data = "X" * 10 + "HIT" + "X" * 100 MockVFSHandlerFind.filesystem[self.filename] = data request = rdf_client.GrepSpec(literal=utils.Xor( "HIT", self.XOR_IN_KEY), xor_in_key=self.XOR_IN_KEY, xor_out_key=self.XOR_OUT_KEY) request.target.path = self.filename request.target.pathtype = rdf_paths.PathSpec.PathType.OS request.start_offset = 0 result = self.RunAction(searching.Grep, request) self.assertEqual(len(result), 1) self.assertEqual(result[0].offset, 10) request = rdf_client.GrepSpec(literal=utils.Xor( "HIT", self.XOR_IN_KEY), xor_in_key=self.XOR_IN_KEY, xor_out_key=self.XOR_OUT_KEY) request.target.path = self.filename request.target.pathtype = rdf_paths.PathSpec.PathType.OS request.start_offset = 5 result = self.RunAction(searching.Grep, request) self.assertEqual(len(result), 1) # This should still report 10. self.assertEqual(result[0].offset, 10) request = rdf_client.GrepSpec(literal=utils.Xor( "HIT", self.XOR_IN_KEY), xor_in_key=self.XOR_IN_KEY, xor_out_key=self.XOR_OUT_KEY) request.target.path = self.filename request.target.pathtype = rdf_paths.PathSpec.PathType.OS request.start_offset = 11 result = self.RunAction(searching.Grep, request) self.assertEqual(len(result), 0)
def testGrepEverywhere(self): for offset in xrange(500): data = "X" * offset + "HIT" + "X" * (500 - offset) MockVFSHandlerFind.filesystem[self.filename] = data request = rdf_client.GrepSpec( literal=utils.Xor("HIT", self.XOR_IN_KEY), xor_in_key=self.XOR_IN_KEY, xor_out_key=self.XOR_OUT_KEY) request.target.path = self.filename request.target.pathtype = rdf_paths.PathSpec.PathType.OS request.start_offset = 0 request.bytes_before = 10 request.bytes_after = 10 result = self.RunAction(searching.Grep, request) self.assertEqual(len(result), 1) self.assertEqual(result[0].offset, offset) expected = data[max(0, offset - 10):offset + 3 + 10] self.assertEqual(result[0].length, len(expected)) self.assertEqual(utils.Xor(result[0].data, self.XOR_OUT_KEY), expected)
def testSnippetSize(self): data = "X" * 100 + "HIT" + "X" * 100 MockVFSHandlerFind.filesystem[self.filename] = data for before in [50, 10, 1, 0]: for after in [50, 10, 1, 0]: request = rdf_client.GrepSpec( literal=utils.Xor("HIT", self.XOR_IN_KEY), xor_in_key=self.XOR_IN_KEY, xor_out_key=self.XOR_OUT_KEY) request.target.path = self.filename request.target.pathtype = rdf_paths.PathSpec.PathType.OS request.start_offset = 0 request.bytes_before = before request.bytes_after = after result = self.RunAction(searching.Grep, request) self.assertEqual(len(result), 1) self.assertEqual(result[0].offset, 100) expected = "X" * before + "HIT" + "X" * after self.assertEqual(result[0].length, len(expected)) self.assertEqual(utils.Xor(result[0].data, self.XOR_OUT_KEY), expected)
def testOffsetAndLength(self): data = "X" * 10 + "HIT" + "X" * 100 + "HIT" + "X" * 10 MockVFSHandlerFind.filesystem[self.filename] = data request = rdf_client.GrepSpec(literal=utils.Xor( "HIT", self.XOR_IN_KEY), xor_in_key=self.XOR_IN_KEY, xor_out_key=self.XOR_OUT_KEY) request.target.path = self.filename request.target.pathtype = rdf_paths.PathSpec.PathType.OS request.start_offset = 11 request.length = 100 result = self.RunAction(searching.Grep, request) self.assertEqual(len(result), 0)
def testSecondBuffer(self): data = "X" * 1500 + "HIT" + "X" * 100 MockVFSHandlerFind.filesystem[self.filename] = data request = rdf_client.GrepSpec(literal=utils.Xor( "HIT", self.XOR_IN_KEY), xor_in_key=self.XOR_IN_KEY, xor_out_key=self.XOR_OUT_KEY) request.target.path = self.filename request.target.pathtype = rdf_paths.PathSpec.PathType.OS request.start_offset = 0 result = self.RunAction("Grep", request) self.assertEqual(len(result), 1) self.assertEqual(result[0].offset, 1500)
def Grep(self, responses): """Run Grep on memory device pathspec.""" if not responses.success: raise flow.FlowError("Error while loading memory driver: %s" % responses.status.error_message) memory_information = responses.First() # Coerce the BareGrepSpec into a GrepSpec explicitly. grep_request = rdfvalue.GrepSpec(target=memory_information.device, **self.args.grep.AsDict()) # For literal matches we xor the search term. This stops us matching the GRR # client itself. if self.args.grep.literal: grep_request.literal = utils.Xor( utils.SmartStr(self.args.grep.literal), self.XOR_IN_KEY) self.CallClient("Grep", request=grep_request, next_state="Done")
def testGrepRegex(self): # Use the real file system. vfs.VFSInit().Run() request = rdf_client.GrepSpec( regex="1[0]", xor_out_key=self.XOR_OUT_KEY, start_offset=0, target=rdf_paths.PathSpec(path=os.path.join( self.base_path, "numbers.txt"), pathtype=rdf_paths.PathSpec.PathType.OS)) result = self.RunAction(searching.Grep, request) hits = [x.offset for x in result] self.assertEqual(hits, [ 18, 288, 292, 296, 300, 304, 308, 312, 316, 320, 324, 329, 729, 1129, 1529, 1929, 2329, 2729, 3129, 3529, 3888 ]) for x in result: self.assertTrue("10" in utils.Xor(x.data, self.XOR_OUT_KEY))
def Run(self, args): """Search the file for the pattern. This implements the grep algorithm used to scan files. It reads the data in chunks of BUFF_SIZE (10 MB currently) and can use different functions to search for matching patterns. In every step, a buffer that is a bit bigger than the block size is used in order to return all the requested results. Specifically, a preamble is used in order to not miss any patterns that start in one block of data and end in the next and also a postscript buffer is kept such that the algorithm can return bytes trailing the pattern even if the pattern is at the end of one block. One block: ----------------------------- | Pre | Data | Post | ----------------------------- Searching the pattern is done here: <-------------------> The following block is constructed like this: ----------------------------- | Pre | Data | Post | ----------------------------- | ----------------------------- | Pre | Data | Post | ----------------------------- The preamble is filled from Data so every hit that happens to fall entirely into the preamble has to be discarded since it has already been discovered in the step before. Grepping for memory If this action is used to grep the memory of a client machine using one of the GRR memory acquisition drivers, we have to be very careful not to have any hits in the GRR process memory space itself. Therefore, if the input is a literal, it is XOR encoded and only visible in memory when the pattern is matched. This is done using bytearrays which guarantees in place updates and no leaking patterns. Also the returned data is encoded using a different XOR 'key'. This should guarantee that there are no hits when the pattern is not present in memory. However, since the data will be copied to the preamble and the postscript, a single pattern might in some cases produce multiple hits. Args: args: A protobuf describing the grep request. Raises: RuntimeError: No search pattern has been given in the request. """ fd = vfs.VFSOpen(args.target, progress_callback=self.Progress) fd.Seek(args.start_offset) base_offset = args.start_offset self.xor_in_key = args.xor_in_key self.xor_out_key = args.xor_out_key if args.regex: find_func = functools.partial(self.FindRegex, args.regex) elif args.literal: find_func = functools.partial( self.FindLiteral, bytearray(utils.SmartStr(args.literal))) else: raise RuntimeError("Grep needs a regex or a literal.") preamble_size = 0 postscript_size = 0 hits = 0 data = "" while fd.Tell() < args.start_offset + args.length: # Base size to read is at most the buffer size. to_read = min(args.length, self.BUFF_SIZE, args.start_offset + args.length - fd.Tell()) # Read some more data for the snippet. to_read += self.ENVELOPE_SIZE - postscript_size read_data = fd.Read(to_read) data = data[-postscript_size - self.ENVELOPE_SIZE:] + read_data postscript_size = max( 0, self.ENVELOPE_SIZE - (to_read - len(read_data))) data_size = len(data) - preamble_size - postscript_size if data_size == 0 and postscript_size == 0: break for (start, end) in find_func(data): # Ignore hits in the preamble. if end <= preamble_size: continue # Ignore hits in the postscript. if end > preamble_size + data_size: continue # Offset of file in the end after length. if end + base_offset - preamble_size > args.start_offset + args.length: break out_data = "" for i in xrange(max(0, start - args.bytes_before), min(len(data), end + args.bytes_after)): # pyformat: disable out_data += chr(ord(data[i]) ^ self.xor_out_key) hits += 1 self.SendReply( rdf_client.BufferReference(offset=base_offset + start - preamble_size, data=out_data, length=len(out_data), pathspec=fd.pathspec)) if args.mode == rdf_client.GrepSpec.Mode.FIRST_HIT: return if hits >= self.HIT_LIMIT: msg = utils.Xor( "This Grep has reached the maximum number of hits" " (%d)." % self.HIT_LIMIT, self.xor_out_key) self.SendReply( rdf_client.BufferReference(offset=0, data=msg, length=len(msg))) return self.Progress() base_offset += data_size # Allow for overlap with previous matches. preamble_size = min(len(data), self.ENVELOPE_SIZE)