Example #1
0
    def _GetMatches(self, psutil_process, scan_request):
        if scan_request.per_process_timeout:
            deadline = rdfvalue.RDFDatetime.Now(
            ) + scan_request.per_process_timeout
        else:
            deadline = rdfvalue.RDFDatetime.Now() + rdfvalue.Duration.From(
                1, rdfvalue.WEEKS)

        process = client_utils.OpenProcessForMemoryAccess(
            pid=psutil_process.pid)
        with process:
            streamer = streaming.Streamer(
                chunk_size=scan_request.chunk_size,
                overlap_size=scan_request.overlap_size)
            matches = []

            try:
                for region in client_utils.MemoryRegions(
                        process, scan_request):
                    chunks = streamer.StreamRanges(offset=region.start,
                                                   amount=region.size)
                    for m in self._ScanRegion(process, chunks, deadline):
                        matches.append(m)
                        if 0 < scan_request.max_results_per_process <= len(
                                matches):
                            return matches
            except TooManyMatchesError:
                # We need to report this as a hit, not an error.
                return matches

        return matches
Example #2
0
    def DumpProcess(self, psutil_process, args):
        response = rdf_memory.YaraProcessDumpInformation()
        response.process = rdf_client.Process.FromPsutilProcess(psutil_process)
        streamer = streaming.Streamer(chunk_size=args.chunk_size)

        with client_utils.OpenProcessForMemoryAccess(
                psutil_process.pid) as process:
            regions = list(client_utils.MemoryRegions(process, args))

            if args.prioritize_offsets:
                regions = _PrioritizeRegions(regions, args.prioritize_offsets)

            if args.size_limit:
                total_regions = len(regions)
                regions = _ApplySizeLimit(regions, args.size_limit)
                if len(regions) < total_regions:
                    response.error = ("Byte limit exceeded. Writing {} of {} "
                                      "regions.").format(
                                          len(regions), total_regions)
            else:
                for region in regions:
                    region.dumped_size = region.size

            regions = sorted(regions, key=lambda r: r.start)

            with tempfiles.TemporaryDirectory(cleanup=False) as tmp_dir:
                for region in regions:
                    self.Progress()
                    pathspec = self._SaveRegionToDirectory(
                        psutil_process, process, region, tmp_dir, streamer)
                    if pathspec is not None:
                        region.file = pathspec
                        response.memory_regions.Append(region)

        return response
Example #3
0
  def _GetMatches(self, psutil_process, scan_request):
    if scan_request.per_process_timeout:
      deadline = rdfvalue.RDFDatetime.Now() + scan_request.per_process_timeout
    else:
      deadline = rdfvalue.RDFDatetime.Now() + rdfvalue.Duration.From(
          1, rdfvalue.WEEKS)

    rules = scan_request.yara_signature.GetRules()

    process = client_utils.OpenProcessForMemoryAccess(pid=psutil_process.pid)
    with process:
      streamer = streaming.Streamer(
          chunk_size=scan_request.chunk_size,
          overlap_size=scan_request.overlap_size)
      matches = []

      try:
        for region in client_utils.MemoryRegions(process, scan_request):
          chunks = streamer.StreamMemory(
              process, offset=region.start, amount=region.size)
          for m in self._ScanRegion(rules, chunks, deadline):
            matches.append(m)
            if 0 < scan_request.max_results_per_process <= len(matches):
              return matches
      except yara.Error as e:
        # Yara internal error 30 is too many hits (obviously...). We
        # need to report this as a hit, not an error.
        if "internal error: 30" in str(e):
          return matches
        raise

    return matches
Example #4
0
  def Scan(self, path, matcher):
    """Scans given file searching for occurrences of given pattern.

    Args:
      path: A path to the file that needs to be searched.
      matcher: A matcher object specifying a pattern to search for.

    Yields:
      `BufferReference` objects pointing to file parts with matching content.
    """
    streamer = streaming.Streamer(
        chunk_size=self.CHUNK_SIZE, overlap_size=self.OVERLAP_SIZE)

    offset = self.params.start_offset
    amount = self.params.length
    for chunk in streamer.StreamFilePath(path, offset=offset, amount=amount):
      for span in chunk.Scan(matcher):
        ctx_begin = max(span.begin - self.params.bytes_before, 0)
        ctx_end = min(span.end + self.params.bytes_after, len(chunk.data))
        ctx_data = chunk.data[ctx_begin:ctx_end]

        yield rdf_client.BufferReference(
            offset=chunk.offset + ctx_begin,
            length=len(ctx_data),
            data=ctx_data)

        if self.params.mode == self.params.Mode.FIRST_HIT:
          return
Example #5
0
    def _ScanProcess(self, psutil_process, args):
        if args.per_process_timeout:
            deadline = rdfvalue.RDFDatetime.Now() + args.per_process_timeout
        else:
            deadline = rdfvalue.RDFDatetime.Now() + rdfvalue.Duration("1w")

        rules = args.yara_signature.GetRules()

        process = client_utils.OpenProcessForMemoryAccess(
            pid=psutil_process.pid)
        with process:
            streamer = streaming.Streamer(chunk_size=args.chunk_size,
                                          overlap_size=args.overlap_size)
            matches = []

            try:
                for start, length in client_utils.MemoryRegions(process, args):
                    chunks = streamer.StreamMemory(process,
                                                   offset=start,
                                                   amount=length)
                    for m in self._ScanRegion(rules, chunks, deadline):
                        matches.append(m)
                        if (args.max_results_per_process > 0 and
                                len(matches) >= args.max_results_per_process):
                            return matches
            except yara.Error as e:
                # Yara internal error 30 is too many hits (obviously...). We
                # need to report this as a hit, not an error.
                if e.message == "internal error: 30":
                    return matches
                raise

        return matches
Example #6
0
    def testSingleChunk(self):
        streamer = streaming.Streamer(chunk_size=8, overlap_size=2)
        method = self.Stream(streamer, b"abcdef")
        chunks = list(method(amount=7))

        self.assertLen(chunks, 1)
        self.assertEqual(chunks[0].data, b"abcdef")
        self.assertEqual(chunks[0].offset, 0)
        self.assertEqual(chunks[0].overlap, 0)
Example #7
0
    def DumpProcess(self, psutil_process, args):
        response = rdf_memory.YaraProcessDumpInformation()
        response.process = rdf_client.Process.FromPsutilProcess(psutil_process)

        process = client_utils.OpenProcessForMemoryAccess(
            pid=psutil_process.pid)

        bytes_limit = args.size_limit

        with process:
            streamer = streaming.Streamer(chunk_size=args.chunk_size)

            with tempfiles.TemporaryDirectory(cleanup=False) as tmp_dir:
                for region in client_utils.MemoryRegions(process, args):

                    if bytes_limit and self.bytes_written + region.size > bytes_limit:
                        response.error = (
                            "Byte limit exceeded. Wrote %d bytes, "
                            "next block is %d bytes, limit is %d." %
                            (self.bytes_written, region.size, bytes_limit))
                        return response

                    end = region.start + region.size

                    # _ReplaceDumpPathspecsWithMultiGetFilePathspec in DumpProcessMemory
                    # flow asserts that MemoryRegions can be uniquely identified by their
                    # file's basename.
                    filename = "%s_%d_%x_%x.tmp" % (psutil_process.name(),
                                                    psutil_process.pid,
                                                    region.start, end)
                    filepath = os.path.join(tmp_dir.path, filename)

                    chunks = streamer.StreamMemory(process,
                                                   offset=region.start,
                                                   amount=region.size)
                    bytes_written = self._SaveMemDumpToFilePath(
                        filepath, chunks)

                    if not bytes_written:
                        continue

                    self.bytes_written += bytes_written

                    # TODO: Remove workaround after client_utils are fixed.
                    canonical_path = client_utils.LocalPathToCanonicalPath(
                        filepath)
                    if not canonical_path.startswith("/"):
                        canonical_path = "/" + canonical_path

                    region.file = rdf_paths.PathSpec(
                        path=canonical_path,
                        pathtype=rdf_paths.PathSpec.PathType.TMPFILE)

                    response.memory_regions.Append(region)

        return response
Example #8
0
  def __init__(self, action, chunk_size=None):
    """Initializes the uploader.

    Args:
      action: A parent action that creates the uploader. Used to communicate
              with the parent flow.
      chunk_size: A number of (uncompressed) bytes per a chunk.
    """
    chunk_size = chunk_size or self.DEFAULT_CHUNK_SIZE

    self._action = action
    self._streamer = streaming.Streamer(chunk_size=chunk_size)
Example #9
0
    def testSmallAmount(self):
        streamer = streaming.Streamer(chunk_size=1, overlap_size=0)
        method = self.Stream(streamer, b"abc")
        chunks = list(method(amount=2))

        self.assertLen(chunks, 2)
        self.assertEqual(chunks[0].data, b"a")
        self.assertEqual(chunks[1].data, b"b")
        self.assertEqual(chunks[0].offset, 0)
        self.assertEqual(chunks[1].offset, 1)
        self.assertEqual(chunks[0].overlap, 0)
        self.assertEqual(chunks[1].overlap, 0)
Example #10
0
    def testOneByteOverlap(self):
        streamer = streaming.Streamer(chunk_size=3, overlap_size=1)
        method = self.Stream(streamer, b"abcdef")
        chunks = list(method(amount=8))

        self.assertLen(chunks, 3)
        self.assertEqual(chunks[0].data, b"abc")
        self.assertEqual(chunks[1].data, b"cde")
        self.assertEqual(chunks[2].data, b"ef")
        self.assertEqual(chunks[0].offset, 0)
        self.assertEqual(chunks[1].offset, 2)
        self.assertEqual(chunks[2].offset, 4)
        self.assertEqual(chunks[0].overlap, 0)
        self.assertEqual(chunks[1].overlap, 1)
        self.assertEqual(chunks[2].overlap, 1)
Example #11
0
    def testShorterOverlap(self):
        streamer = streaming.Streamer(chunk_size=4, overlap_size=2)
        method = self.Stream(streamer, b"abcdefg")
        chunks = list(method(amount=1024))

        self.assertLen(chunks, 3)
        self.assertEqual(chunks[0].data, b"abcd")
        self.assertEqual(chunks[1].data, b"cdef")
        self.assertEqual(chunks[2].data, b"efg")
        self.assertEqual(chunks[0].offset, 0)
        self.assertEqual(chunks[1].offset, 2)
        self.assertEqual(chunks[2].offset, 4)
        self.assertEqual(chunks[0].overlap, 0)
        self.assertEqual(chunks[1].overlap, 2)
        self.assertEqual(chunks[2].overlap, 2)
Example #12
0
    def testNoOverlap(self):
        streamer = streaming.Streamer(chunk_size=3, overlap_size=0)
        method = self.Stream(streamer, b"abcdefgh")
        chunks = list(method(amount=8))

        self.assertLen(chunks, 3)
        self.assertEqual(chunks[0].data, b"abc")
        self.assertEqual(chunks[1].data, b"def")
        self.assertEqual(chunks[2].data, b"gh")
        self.assertEqual(chunks[0].offset, 0)
        self.assertEqual(chunks[1].offset, 3)
        self.assertEqual(chunks[2].offset, 6)
        self.assertEqual(chunks[0].overlap, 0)
        self.assertEqual(chunks[1].overlap, 0)
        self.assertEqual(chunks[2].overlap, 0)
Example #13
0
    def testOffset(self):
        streamer = streaming.Streamer(chunk_size=3, overlap_size=2)
        method = self.Stream(streamer, b"abcdefghi")
        chunks = list(method(offset=4, amount=108))

        self.assertLen(chunks, 3)
        self.assertEqual(chunks[0].data, b"efg")
        self.assertEqual(chunks[1].data, b"fgh")
        self.assertEqual(chunks[2].data, b"ghi")
        self.assertEqual(chunks[0].offset, 4)
        self.assertEqual(chunks[1].offset, 5)
        self.assertEqual(chunks[2].offset, 6)
        self.assertEqual(chunks[0].overlap, 0)
        self.assertEqual(chunks[1].overlap, 2)
        self.assertEqual(chunks[2].overlap, 2)
Example #14
0
    def DumpProcess(self, psutil_process, args):
        response = rdf_memory.YaraProcessDumpInformation()
        response.process = rdf_client.Process.FromPsutilProcess(psutil_process)

        process = client_utils.OpenProcessForMemoryAccess(
            pid=psutil_process.pid)

        bytes_limit = args.size_limit

        with process:
            streamer = streaming.Streamer(chunk_size=args.chunk_size)

            with tempfiles.TemporaryDirectory(cleanup=False) as tmp_dir:
                for start, length in client_utils.MemoryRegions(process, args):

                    if bytes_limit and self.bytes_written + length > bytes_limit:
                        response.error = (
                            "Byte limit exceeded. Wrote %d bytes, "
                            "next block is %d bytes, limit is %d." %
                            (self.bytes_written, length, bytes_limit))
                        return response

                    end = start + length
                    # TODO: The filename is parsed on the server side to
                    # extract the memory address again. This should be changed by
                    # saving the `start` and `end` in YaraProcessDumpInformation.
                    filename = "%s_%d_%x_%x.tmp" % (
                        psutil_process.name(), psutil_process.pid, start, end)
                    filepath = os.path.join(tmp_dir.path, filename)

                    chunks = streamer.StreamMemory(process,
                                                   offset=start,
                                                   amount=length)
                    bytes_written = self._SaveMemDumpToFilePath(
                        filepath, chunks)

                    if not bytes_written:
                        continue

                    self.bytes_written += bytes_written
                    response.dump_files.Append(
                        rdf_paths.PathSpec(
                            path=filepath,
                            pathtype=rdf_paths.PathSpec.PathType.TMPFILE))

        return response
Example #15
0
    def testUnbound(self):
        streamer = streaming.Streamer(chunk_size=9, overlap_size=2)
        method = self.Stream(streamer, b"abcdefghijklmnopqrstuvwxyz")
        chunks = list(method())

        self.assertLen(chunks, 4)
        self.assertEqual(chunks[0].data, b"abcdefghi")
        self.assertEqual(chunks[1].data, b"hijklmnop")
        self.assertEqual(chunks[2].data, b"opqrstuvw")
        self.assertEqual(chunks[3].data, b"vwxyz")
        self.assertEqual(chunks[0].offset, 0)
        self.assertEqual(chunks[1].offset, 7)
        self.assertEqual(chunks[2].offset, 14)
        self.assertEqual(chunks[3].offset, 21)
        self.assertEqual(chunks[0].overlap, 0)
        self.assertEqual(chunks[1].overlap, 2)
        self.assertEqual(chunks[2].overlap, 2)
        self.assertEqual(chunks[3].overlap, 2)
Example #16
0
 def _BatchIterateRegions(
     self, process, scan_request: rdf_memory.YaraProcessScanRequest
 ) -> Iterator[List[streaming.Chunk]]:
     streamer = streaming.Streamer(chunk_size=scan_request.chunk_size,
                                   overlap_size=scan_request.overlap_size)
     batch = []
     batch_size_bytes = 0
     for region in client_utils.MemoryRegions(process, scan_request):
         chunks = streamer.StreamRanges(offset=region.start,
                                        amount=region.size)
         for chunk in chunks:
             batch.append(chunk)
             batch_size_bytes += chunk.amount
             if (len(batch) >= self.MAX_BATCH_SIZE_CHUNKS
                     or batch_size_bytes >= scan_request.chunk_size):
                 yield batch
                 batch = []
                 batch_size_bytes = 0
     if batch:
         yield batch
Example #17
0
    def testProcessScan(self):

        self._client.UploadSignature(_SIGNATURE)

        all_scan_matches = []

        for region in self._process.Regions():
            streamer = streaming.Streamer(chunk_size=1024 * 1024,
                                          overlap_size=32 * 1024)
            for chunk in streamer.StreamRanges(region.start, region.size):
                response = self._client.ProcessScan(
                    self._process_file_descriptor.Serialize(),
                    [memory_pb2.Chunk(offset=chunk.offset, size=chunk.amount)],
                    60)
                self.assertEqual(
                    response.status,
                    memory_pb2.ProcessScanResponse.Status.NO_ERROR)
                all_scan_matches.extend(response.scan_result.scan_match)

        self.assertTrue(all_scan_matches)

        found_in_actual_memory_count = 0

        for scan_match in all_scan_matches:
            self.assertEqual(scan_match.rule_name, "test_rule")
            for string_match in scan_match.string_matches:
                self.assertEqual(string_match.string_id, "$s1")
                self.assertEqual(string_match.data, _SEARCH_STRING)
                # Check that the reported result resides in memory of the
                # scanned process.
                actual_memory = self._process.ReadBytes(
                    string_match.offset, len(string_match.data))
                # Since copies of the string might be in dynamic memory, we won't be
                # able to read back every match. We'll check that at least one of the
                # reads succeeds later.
                if actual_memory == _SEARCH_STRING:
                    found_in_actual_memory_count += 1

        self.assertTrue(found_in_actual_memory_count)
Example #18
0
    def testNoData(self):
        streamer = streaming.Streamer(chunk_size=3, overlap_size=1)
        method = self.Stream(streamer, b"")
        chunks = list(method(amount=5))

        self.assertEmpty(chunks)
Example #19
0
    def testZeroAmount(self):
        streamer = streaming.Streamer(chunk_size=3, overlap_size=0)
        method = self.Stream(streamer, b"abcdef")
        chunks = list(method(amount=0))

        self.assertEmpty(chunks)