def _GetMatches(self, psutil_process, scan_request): if scan_request.per_process_timeout: deadline = rdfvalue.RDFDatetime.Now( ) + scan_request.per_process_timeout else: deadline = rdfvalue.RDFDatetime.Now() + rdfvalue.Duration.From( 1, rdfvalue.WEEKS) process = client_utils.OpenProcessForMemoryAccess( pid=psutil_process.pid) with process: streamer = streaming.Streamer( chunk_size=scan_request.chunk_size, overlap_size=scan_request.overlap_size) matches = [] try: for region in client_utils.MemoryRegions( process, scan_request): chunks = streamer.StreamRanges(offset=region.start, amount=region.size) for m in self._ScanRegion(process, chunks, deadline): matches.append(m) if 0 < scan_request.max_results_per_process <= len( matches): return matches except TooManyMatchesError: # We need to report this as a hit, not an error. return matches return matches
def DumpProcess(self, psutil_process, args): response = rdf_memory.YaraProcessDumpInformation() response.process = rdf_client.Process.FromPsutilProcess(psutil_process) streamer = streaming.Streamer(chunk_size=args.chunk_size) with client_utils.OpenProcessForMemoryAccess( psutil_process.pid) as process: regions = list(client_utils.MemoryRegions(process, args)) if args.prioritize_offsets: regions = _PrioritizeRegions(regions, args.prioritize_offsets) if args.size_limit: total_regions = len(regions) regions = _ApplySizeLimit(regions, args.size_limit) if len(regions) < total_regions: response.error = ("Byte limit exceeded. Writing {} of {} " "regions.").format( len(regions), total_regions) else: for region in regions: region.dumped_size = region.size regions = sorted(regions, key=lambda r: r.start) with tempfiles.TemporaryDirectory(cleanup=False) as tmp_dir: for region in regions: self.Progress() pathspec = self._SaveRegionToDirectory( psutil_process, process, region, tmp_dir, streamer) if pathspec is not None: region.file = pathspec response.memory_regions.Append(region) return response
def _GetMatches(self, psutil_process, scan_request): if scan_request.per_process_timeout: deadline = rdfvalue.RDFDatetime.Now() + scan_request.per_process_timeout else: deadline = rdfvalue.RDFDatetime.Now() + rdfvalue.Duration.From( 1, rdfvalue.WEEKS) rules = scan_request.yara_signature.GetRules() process = client_utils.OpenProcessForMemoryAccess(pid=psutil_process.pid) with process: streamer = streaming.Streamer( chunk_size=scan_request.chunk_size, overlap_size=scan_request.overlap_size) matches = [] try: for region in client_utils.MemoryRegions(process, scan_request): chunks = streamer.StreamMemory( process, offset=region.start, amount=region.size) for m in self._ScanRegion(rules, chunks, deadline): matches.append(m) if 0 < scan_request.max_results_per_process <= len(matches): return matches except yara.Error as e: # Yara internal error 30 is too many hits (obviously...). We # need to report this as a hit, not an error. if "internal error: 30" in str(e): return matches raise return matches
def Scan(self, path, matcher): """Scans given file searching for occurrences of given pattern. Args: path: A path to the file that needs to be searched. matcher: A matcher object specifying a pattern to search for. Yields: `BufferReference` objects pointing to file parts with matching content. """ streamer = streaming.Streamer( chunk_size=self.CHUNK_SIZE, overlap_size=self.OVERLAP_SIZE) offset = self.params.start_offset amount = self.params.length for chunk in streamer.StreamFilePath(path, offset=offset, amount=amount): for span in chunk.Scan(matcher): ctx_begin = max(span.begin - self.params.bytes_before, 0) ctx_end = min(span.end + self.params.bytes_after, len(chunk.data)) ctx_data = chunk.data[ctx_begin:ctx_end] yield rdf_client.BufferReference( offset=chunk.offset + ctx_begin, length=len(ctx_data), data=ctx_data) if self.params.mode == self.params.Mode.FIRST_HIT: return
def _ScanProcess(self, psutil_process, args): if args.per_process_timeout: deadline = rdfvalue.RDFDatetime.Now() + args.per_process_timeout else: deadline = rdfvalue.RDFDatetime.Now() + rdfvalue.Duration("1w") rules = args.yara_signature.GetRules() process = client_utils.OpenProcessForMemoryAccess( pid=psutil_process.pid) with process: streamer = streaming.Streamer(chunk_size=args.chunk_size, overlap_size=args.overlap_size) matches = [] try: for start, length in client_utils.MemoryRegions(process, args): chunks = streamer.StreamMemory(process, offset=start, amount=length) for m in self._ScanRegion(rules, chunks, deadline): matches.append(m) if (args.max_results_per_process > 0 and len(matches) >= args.max_results_per_process): return matches except yara.Error as e: # Yara internal error 30 is too many hits (obviously...). We # need to report this as a hit, not an error. if e.message == "internal error: 30": return matches raise return matches
def testSingleChunk(self): streamer = streaming.Streamer(chunk_size=8, overlap_size=2) method = self.Stream(streamer, b"abcdef") chunks = list(method(amount=7)) self.assertLen(chunks, 1) self.assertEqual(chunks[0].data, b"abcdef") self.assertEqual(chunks[0].offset, 0) self.assertEqual(chunks[0].overlap, 0)
def DumpProcess(self, psutil_process, args): response = rdf_memory.YaraProcessDumpInformation() response.process = rdf_client.Process.FromPsutilProcess(psutil_process) process = client_utils.OpenProcessForMemoryAccess( pid=psutil_process.pid) bytes_limit = args.size_limit with process: streamer = streaming.Streamer(chunk_size=args.chunk_size) with tempfiles.TemporaryDirectory(cleanup=False) as tmp_dir: for region in client_utils.MemoryRegions(process, args): if bytes_limit and self.bytes_written + region.size > bytes_limit: response.error = ( "Byte limit exceeded. Wrote %d bytes, " "next block is %d bytes, limit is %d." % (self.bytes_written, region.size, bytes_limit)) return response end = region.start + region.size # _ReplaceDumpPathspecsWithMultiGetFilePathspec in DumpProcessMemory # flow asserts that MemoryRegions can be uniquely identified by their # file's basename. filename = "%s_%d_%x_%x.tmp" % (psutil_process.name(), psutil_process.pid, region.start, end) filepath = os.path.join(tmp_dir.path, filename) chunks = streamer.StreamMemory(process, offset=region.start, amount=region.size) bytes_written = self._SaveMemDumpToFilePath( filepath, chunks) if not bytes_written: continue self.bytes_written += bytes_written # TODO: Remove workaround after client_utils are fixed. canonical_path = client_utils.LocalPathToCanonicalPath( filepath) if not canonical_path.startswith("/"): canonical_path = "/" + canonical_path region.file = rdf_paths.PathSpec( path=canonical_path, pathtype=rdf_paths.PathSpec.PathType.TMPFILE) response.memory_regions.Append(region) return response
def __init__(self, action, chunk_size=None): """Initializes the uploader. Args: action: A parent action that creates the uploader. Used to communicate with the parent flow. chunk_size: A number of (uncompressed) bytes per a chunk. """ chunk_size = chunk_size or self.DEFAULT_CHUNK_SIZE self._action = action self._streamer = streaming.Streamer(chunk_size=chunk_size)
def testSmallAmount(self): streamer = streaming.Streamer(chunk_size=1, overlap_size=0) method = self.Stream(streamer, b"abc") chunks = list(method(amount=2)) self.assertLen(chunks, 2) self.assertEqual(chunks[0].data, b"a") self.assertEqual(chunks[1].data, b"b") self.assertEqual(chunks[0].offset, 0) self.assertEqual(chunks[1].offset, 1) self.assertEqual(chunks[0].overlap, 0) self.assertEqual(chunks[1].overlap, 0)
def testOneByteOverlap(self): streamer = streaming.Streamer(chunk_size=3, overlap_size=1) method = self.Stream(streamer, b"abcdef") chunks = list(method(amount=8)) self.assertLen(chunks, 3) self.assertEqual(chunks[0].data, b"abc") self.assertEqual(chunks[1].data, b"cde") self.assertEqual(chunks[2].data, b"ef") self.assertEqual(chunks[0].offset, 0) self.assertEqual(chunks[1].offset, 2) self.assertEqual(chunks[2].offset, 4) self.assertEqual(chunks[0].overlap, 0) self.assertEqual(chunks[1].overlap, 1) self.assertEqual(chunks[2].overlap, 1)
def testShorterOverlap(self): streamer = streaming.Streamer(chunk_size=4, overlap_size=2) method = self.Stream(streamer, b"abcdefg") chunks = list(method(amount=1024)) self.assertLen(chunks, 3) self.assertEqual(chunks[0].data, b"abcd") self.assertEqual(chunks[1].data, b"cdef") self.assertEqual(chunks[2].data, b"efg") self.assertEqual(chunks[0].offset, 0) self.assertEqual(chunks[1].offset, 2) self.assertEqual(chunks[2].offset, 4) self.assertEqual(chunks[0].overlap, 0) self.assertEqual(chunks[1].overlap, 2) self.assertEqual(chunks[2].overlap, 2)
def testNoOverlap(self): streamer = streaming.Streamer(chunk_size=3, overlap_size=0) method = self.Stream(streamer, b"abcdefgh") chunks = list(method(amount=8)) self.assertLen(chunks, 3) self.assertEqual(chunks[0].data, b"abc") self.assertEqual(chunks[1].data, b"def") self.assertEqual(chunks[2].data, b"gh") self.assertEqual(chunks[0].offset, 0) self.assertEqual(chunks[1].offset, 3) self.assertEqual(chunks[2].offset, 6) self.assertEqual(chunks[0].overlap, 0) self.assertEqual(chunks[1].overlap, 0) self.assertEqual(chunks[2].overlap, 0)
def testOffset(self): streamer = streaming.Streamer(chunk_size=3, overlap_size=2) method = self.Stream(streamer, b"abcdefghi") chunks = list(method(offset=4, amount=108)) self.assertLen(chunks, 3) self.assertEqual(chunks[0].data, b"efg") self.assertEqual(chunks[1].data, b"fgh") self.assertEqual(chunks[2].data, b"ghi") self.assertEqual(chunks[0].offset, 4) self.assertEqual(chunks[1].offset, 5) self.assertEqual(chunks[2].offset, 6) self.assertEqual(chunks[0].overlap, 0) self.assertEqual(chunks[1].overlap, 2) self.assertEqual(chunks[2].overlap, 2)
def DumpProcess(self, psutil_process, args): response = rdf_memory.YaraProcessDumpInformation() response.process = rdf_client.Process.FromPsutilProcess(psutil_process) process = client_utils.OpenProcessForMemoryAccess( pid=psutil_process.pid) bytes_limit = args.size_limit with process: streamer = streaming.Streamer(chunk_size=args.chunk_size) with tempfiles.TemporaryDirectory(cleanup=False) as tmp_dir: for start, length in client_utils.MemoryRegions(process, args): if bytes_limit and self.bytes_written + length > bytes_limit: response.error = ( "Byte limit exceeded. Wrote %d bytes, " "next block is %d bytes, limit is %d." % (self.bytes_written, length, bytes_limit)) return response end = start + length # TODO: The filename is parsed on the server side to # extract the memory address again. This should be changed by # saving the `start` and `end` in YaraProcessDumpInformation. filename = "%s_%d_%x_%x.tmp" % ( psutil_process.name(), psutil_process.pid, start, end) filepath = os.path.join(tmp_dir.path, filename) chunks = streamer.StreamMemory(process, offset=start, amount=length) bytes_written = self._SaveMemDumpToFilePath( filepath, chunks) if not bytes_written: continue self.bytes_written += bytes_written response.dump_files.Append( rdf_paths.PathSpec( path=filepath, pathtype=rdf_paths.PathSpec.PathType.TMPFILE)) return response
def testUnbound(self): streamer = streaming.Streamer(chunk_size=9, overlap_size=2) method = self.Stream(streamer, b"abcdefghijklmnopqrstuvwxyz") chunks = list(method()) self.assertLen(chunks, 4) self.assertEqual(chunks[0].data, b"abcdefghi") self.assertEqual(chunks[1].data, b"hijklmnop") self.assertEqual(chunks[2].data, b"opqrstuvw") self.assertEqual(chunks[3].data, b"vwxyz") self.assertEqual(chunks[0].offset, 0) self.assertEqual(chunks[1].offset, 7) self.assertEqual(chunks[2].offset, 14) self.assertEqual(chunks[3].offset, 21) self.assertEqual(chunks[0].overlap, 0) self.assertEqual(chunks[1].overlap, 2) self.assertEqual(chunks[2].overlap, 2) self.assertEqual(chunks[3].overlap, 2)
def _BatchIterateRegions( self, process, scan_request: rdf_memory.YaraProcessScanRequest ) -> Iterator[List[streaming.Chunk]]: streamer = streaming.Streamer(chunk_size=scan_request.chunk_size, overlap_size=scan_request.overlap_size) batch = [] batch_size_bytes = 0 for region in client_utils.MemoryRegions(process, scan_request): chunks = streamer.StreamRanges(offset=region.start, amount=region.size) for chunk in chunks: batch.append(chunk) batch_size_bytes += chunk.amount if (len(batch) >= self.MAX_BATCH_SIZE_CHUNKS or batch_size_bytes >= scan_request.chunk_size): yield batch batch = [] batch_size_bytes = 0 if batch: yield batch
def testProcessScan(self): self._client.UploadSignature(_SIGNATURE) all_scan_matches = [] for region in self._process.Regions(): streamer = streaming.Streamer(chunk_size=1024 * 1024, overlap_size=32 * 1024) for chunk in streamer.StreamRanges(region.start, region.size): response = self._client.ProcessScan( self._process_file_descriptor.Serialize(), [memory_pb2.Chunk(offset=chunk.offset, size=chunk.amount)], 60) self.assertEqual( response.status, memory_pb2.ProcessScanResponse.Status.NO_ERROR) all_scan_matches.extend(response.scan_result.scan_match) self.assertTrue(all_scan_matches) found_in_actual_memory_count = 0 for scan_match in all_scan_matches: self.assertEqual(scan_match.rule_name, "test_rule") for string_match in scan_match.string_matches: self.assertEqual(string_match.string_id, "$s1") self.assertEqual(string_match.data, _SEARCH_STRING) # Check that the reported result resides in memory of the # scanned process. actual_memory = self._process.ReadBytes( string_match.offset, len(string_match.data)) # Since copies of the string might be in dynamic memory, we won't be # able to read back every match. We'll check that at least one of the # reads succeeds later. if actual_memory == _SEARCH_STRING: found_in_actual_memory_count += 1 self.assertTrue(found_in_actual_memory_count)
def testNoData(self): streamer = streaming.Streamer(chunk_size=3, overlap_size=1) method = self.Stream(streamer, b"") chunks = list(method(amount=5)) self.assertEmpty(chunks)
def testZeroAmount(self): streamer = streaming.Streamer(chunk_size=3, overlap_size=0) method = self.Stream(streamer, b"abcdef") chunks = list(method(amount=0)) self.assertEmpty(chunks)