コード例 #1
0
    def setUp(self):
        super(CollectionArchiveGeneratorTest, self).setUp()
        self.client_id = self.SetupClients(1)[0]
        path1 = self.client_id.Add("fs/os/foo/bar/hello1.txt")
        archive_path1 = (u"test_prefix/%s/fs/os/foo/bar/hello1.txt" %
                         self.client_id.Basename())

        with aff4.FACTORY.Create(path1,
                                 aff4.AFF4MemoryStream,
                                 token=self.token) as fd:
            fd.Write("hello1")
            fd.Set(fd.Schema.HASH,
                   rdf_crypto.Hash(sha256=hashlib.sha256("hello1").digest()))

        path2 = self.client_id.Add(u"fs/os/foo/bar/中国新闻网新闻中.txt")
        archive_path2 = (u"test_prefix/%s/fs/os/foo/bar/"
                         u"中国新闻网新闻中.txt") % self.client_id.Basename()
        with aff4.FACTORY.Create(path2,
                                 aff4.AFF4MemoryStream,
                                 token=self.token) as fd:
            fd.Write("hello2")
            fd.Set(fd.Schema.HASH,
                   rdf_crypto.Hash(sha256=hashlib.sha256("hello2").digest()))

        self.stat_entries = []
        self.paths = [path1, path2]
        self.archive_paths = [archive_path1, archive_path2]
        for path in self.paths:
            self.stat_entries.append(
                rdf_client.StatEntry(pathspec=rdf_paths.PathSpec(
                    path="foo/bar/" + str(path).split("/")[-1],
                    pathtype=rdf_paths.PathSpec.PathType.OS)))

        self.fd = None
コード例 #2
0
ファイル: export_test.py プロジェクト: bwagner5/grr
  def setUp(self):
    super(TestExportCollectionFilesAsArchive, self).setUp()

    path1 = "aff4:/C.0000000000000000/fs/os/foo/bar/hello1.txt"
    fd = aff4.FACTORY.Create(path1, aff4.AFF4MemoryStream, token=self.token)
    fd.Write("hello1")
    fd.Set(fd.Schema.HASH,
           rdf_crypto.Hash(sha256=hashlib.sha256("hello1").digest()))
    fd.Close()

    path2 = u"aff4:/C.0000000000000000/fs/os/foo/bar/中国新闻网新闻中.txt"
    fd = aff4.FACTORY.Create(path2, aff4.AFF4MemoryStream, token=self.token)
    fd.Write("hello2")
    fd.Set(fd.Schema.HASH,
           rdf_crypto.Hash(sha256=hashlib.sha256("hello2").digest()))
    fd.Close()

    self.collection_urn = aff4.ROOT_URN.Add("hunts/H:ABCDEF/Results")
    self.paths = [path1, path2]
    with aff4.FACTORY.Create(
        self.collection_urn, aff4_type=collects.RDFValueCollection, mode="w",
        token=self.token) as collection:

      for path in self.paths:
        collection.Add(rdf_client.StatEntry(
            aff4path=path,
            pathspec=rdf_paths.PathSpec(
                path="fs/os/foo/bar/" + path.split("/")[-1],
                pathtype=rdf_paths.PathSpec.PathType.OS)))
コード例 #3
0
    def setUp(self):
        super(CollectionArchiveGeneratorTest, self).setUp()

        path1 = "aff4:/C.0000000000000000/fs/os/foo/bar/hello1.txt"
        with aff4.FACTORY.Create(path1,
                                 aff4.AFF4MemoryStream.__name__,
                                 token=self.token) as fd:
            fd.Write("hello1")
            fd.Set(fd.Schema.HASH,
                   rdf_crypto.Hash(sha256=hashlib.sha256("hello1").digest()))

        path2 = u"aff4:/C.0000000000000000/fs/os/foo/bar/中国新闻网新闻中.txt"
        with aff4.FACTORY.Create(path2,
                                 aff4.AFF4MemoryStream.__name__,
                                 token=self.token) as fd:
            fd.Write("hello2")
            fd.Set(fd.Schema.HASH,
                   rdf_crypto.Hash(sha256=hashlib.sha256("hello2").digest()))

        self.stat_entries = []
        self.paths = [path1, path2]
        for path in self.paths:
            self.stat_entries.append(
                rdf_client.StatEntry(
                    aff4path=path,
                    pathspec=rdf_paths.PathSpec(
                        path="fs/os/foo/bar/" + path.split("/")[-1],
                        pathtype=rdf_paths.PathSpec.PathType.OS)))

        self.fd = None
コード例 #4
0
    def testFindPathInfoByPathIDTimestampStatAndHashEntry(self):
        client_id = self.InitializeClient()

        path_info = objects.PathInfo.OS(components=["foo"])
        path_id = objects.PathID(["foo"])

        path_info.stat_entry = rdf_client.StatEntry(st_mode=42)
        path_info.hash_entry = None
        self.db.WritePathInfos(client_id, [path_info])
        timestamp_1 = rdfvalue.RDFDatetime.Now()

        path_info.stat_entry = None
        path_info.hash_entry = rdf_crypto.Hash(md5=b"quux")
        self.db.WritePathInfos(client_id, [path_info])
        timestamp_2 = rdfvalue.RDFDatetime.Now()

        path_info.stat_entry = rdf_client.StatEntry(st_mode=1337)
        path_info.hash_entry = None
        self.db.WritePathInfos(client_id, [path_info])
        timestamp_3 = rdfvalue.RDFDatetime.Now()

        path_info.stat_entry = rdf_client.StatEntry(st_mode=4815162342)
        path_info.hash_entry = rdf_crypto.Hash(md5=b"norf")
        self.db.WritePathInfos(client_id, [path_info])
        timestamp_4 = rdfvalue.RDFDatetime.Now()

        path_info_1 = self.db.FindPathInfoByPathID(
            client_id,
            objects.PathInfo.PathType.OS,
            path_id,
            timestamp=timestamp_1)
        self.assertEqual(path_info_1.stat_entry.st_mode, 42)
        self.assertFalse(path_info_1.HasField("hash_entry"))

        path_info_2 = self.db.FindPathInfoByPathID(
            client_id,
            objects.PathInfo.PathType.OS,
            path_id,
            timestamp=timestamp_2)
        self.assertEqual(path_info_2.stat_entry.st_mode, 42)
        self.assertEqual(path_info_2.hash_entry.md5, b"quux")

        path_info_3 = self.db.FindPathInfoByPathID(
            client_id,
            objects.PathInfo.PathType.OS,
            path_id,
            timestamp=timestamp_3)
        self.assertEqual(path_info_3.stat_entry.st_mode, 1337)
        self.assertEqual(path_info_3.hash_entry.md5, b"quux")

        path_info_4 = self.db.FindPathInfoByPathID(
            client_id,
            objects.PathInfo.PathType.OS,
            path_id,
            timestamp=timestamp_4)
        self.assertEqual(path_info_4.stat_entry.st_mode, 4815162342)
        self.assertEqual(path_info_4.hash_entry.md5, b"norf")
コード例 #5
0
ファイル: standard.py プロジェクト: wprelic/grr
    def Run(self, args):
        hashers = {}
        for t in args.tuples:
            for hash_name in t.hashers:
                hashers[str(hash_name).lower()] = self._hash_types[str(
                    hash_name)]()

        with vfs.VFSOpen(args.pathspec,
                         progress_callback=self.Progress) as file_obj:
            # Only read as many bytes as we were told.
            bytes_read = 0
            while bytes_read < args.max_filesize:
                self.Progress()
                data = file_obj.Read(MAX_BUFFER_SIZE)
                if not data:
                    break
                for hasher in hashers.values():
                    hasher.update(data)

                bytes_read += len(data)

            response = rdf_client.FingerprintResponse(
                pathspec=file_obj.pathspec,
                bytes_read=bytes_read,
                hash=rdf_crypto.Hash(**dict(
                    (k, v.digest()) for k, v in hashers.iteritems())))

            self.SendReply(response)
コード例 #6
0
 def GetHashObject(self):
     """Returns a `Hash` object with appropriate fields filled-in."""
     hash_object = rdf_crypto.Hash()
     hash_object.num_bytes = self._bytes_read
     for algorithm in self._hashers:
         setattr(hash_object, algorithm, self._hashers[algorithm].digest())
     return hash_object
コード例 #7
0
ファイル: file_finder.py プロジェクト: stephanas50/grr
  def Hash(self,
           fname,
           stat_object,
           policy_max_hash_size,
           oversized_file_policy,
           resolve_links=True):
    file_size = stat_object.st_size
    if file_size <= policy_max_hash_size:
      max_hash_size = file_size
    else:
      ff_opts = rdf_file_finder.FileFinderHashActionOptions
      if oversized_file_policy == ff_opts.OversizedFilePolicy.SKIP:
        return
      elif oversized_file_policy == ff_opts.OversizedFilePolicy.HASH_TRUNCATED:
        max_hash_size = policy_max_hash_size

    try:
      file_obj = open(fname, "rb")
    except IOError:
      return

    with file_obj:
      hashers, bytes_read = standard_actions.HashFile().HashFile(
          ["md5", "sha1", "sha256"], file_obj, max_hash_size)
    result = rdf_crypto.Hash(**dict((k, v.digest())
                                    for k, v in hashers.iteritems()))
    result.num_bytes = bytes_read
    return result
コード例 #8
0
ファイル: flow_management_test.py プロジェクト: frntn/grr
 def Start(self):
   hash_result = rdf_crypto.Hash(
       sha256=("9e8dc93e150021bb4752029ebbff51394aa36f069cf19901578"
               "e4f06017acdb5").decode("hex"),
       sha1="6dd6bee591dfcb6d75eb705405302c3eab65e21a".decode("hex"),
       md5="8b0a15eefe63fd41f8dc9dee01c5cf9a".decode("hex"))
   self.SendReply(hash_result)
コード例 #9
0
  def DownloadCollectionFiles(self, collection, output_writer, prefix):
    """Download all files from the collection and deduplicate along the way."""

    hashes = set()
    for fd_urn_batch in utils.Grouper(self.ResultsToUrns(collection),
                                      self.BATCH_SIZE):
      self.HeartBeat()

      for fd in aff4.FACTORY.MultiOpen(fd_urn_batch, token=self.token):
        self.state.total_files += 1

        # Any file-like object with data in AFF4 should inherit AFF4Stream.
        if isinstance(fd, aff4.AFF4Stream):
          archive_path = os.path.join(prefix, *fd.urn.Split())
          self.state.archived_files += 1

          sha256_hash = fd.Get(fd.Schema.HASH, rdf_crypto.Hash()).sha256
          content_path = os.path.join(prefix, "hashes", str(sha256_hash))
          if sha256_hash not in hashes:
            # Make sure size of the original file is passed. It's required
            # when output_writer is StreamingTarWriter.
            st = os.stat_result((0644, 0, 0, 0, 0, 0, fd.size, 0, 0, 0))
            output_writer.WriteFromFD(fd, content_path, st=st)
            hashes.add(sha256_hash)
            self.Log("Written contents: " + content_path)

          up_prefix = "../" * len(fd.urn.Split())
          output_writer.WriteSymlink(up_prefix + content_path, archive_path)
          self.Log("Written symlink %s -> %s", archive_path,
                   up_prefix + content_path)
コード例 #10
0
    def testWritePathInfoHashAndStatEntrySeparateWrites(self):
        client_id = self.InitializeClient()

        stat_entry = rdf_client.StatEntry(st_mode=1337)
        stat_entry_path_info = objects.PathInfo.OS(components=["foo"],
                                                   stat_entry=stat_entry)

        stat_entry_timestamp = rdfvalue.RDFDatetime.Now()
        self.db.WritePathInfos(client_id, [stat_entry_path_info])

        hash_entry = rdf_crypto.Hash(sha256=hashlib.sha256("foo").digest())
        hash_entry_path_info = objects.PathInfo.OS(components=["foo"],
                                                   hash_entry=hash_entry)

        hash_entry_timestamp = rdfvalue.RDFDatetime.Now()
        self.db.WritePathInfos(client_id, [hash_entry_path_info])

        result = self.db.FindPathInfoByPathID(client_id,
                                              objects.PathInfo.PathType.OS,
                                              objects.PathID(["foo"]))

        now = rdfvalue.RDFDatetime.Now()

        self.assertEqual(result.components, ["foo"])
        self.assertTrue(result.HasField("stat_entry"))
        self.assertTrue(result.HasField("hash_entry"))
        self.assertEqual(result.stat_entry, stat_entry)
        self.assertEqual(result.hash_entry, hash_entry)
        self.assertGreater(result.last_stat_entry_timestamp,
                           stat_entry_timestamp)
        self.assertLess(result.last_stat_entry_timestamp, hash_entry_timestamp)
        self.assertGreater(result.last_hash_entry_timestamp,
                           hash_entry_timestamp)
        self.assertLess(result.last_hash_entry_timestamp, now)
コード例 #11
0
  def ReceiveFileHash(self, responses):
    """Add hash digest to tracker and check with filestore."""
    # Support old clients which may not have the new client action in place yet.
    # TODO(user): Deprecate once all clients have the HashFile action.
    if not responses.success and responses.request.request.name == "HashFile":
      logging.debug(
          "HashFile action not available, falling back to FingerprintFile.")
      self.CallClient(
          server_stubs.FingerprintFile,
          responses.request.request.payload,
          next_state="ReceiveFileHash",
          request_data=responses.request_data)
      return

    index = responses.request_data["index"]
    if not responses.success:
      self.Log("Failed to hash file: %s", responses.status)
      self.state.pending_hashes.pop(index, None)
      # Report the error.
      self._FileFetchFailed(index, responses.request.request.name)
      return

    self.state.files_hashed += 1
    response = responses.First()
    if response.HasField("hash"):
      hash_obj = response.hash
    else:
      # Deprecate this method of returning hashes.
      hash_obj = rdf_crypto.Hash()

      if len(response.results) < 1 or response.results[0]["name"] != "generic":
        self.Log("Failed to hash file: %s", self.state.indexed_pathspecs[index])
        self.state.pending_hashes.pop(index, None)
        return

      result = response.results[0]

      try:
        for hash_type in ["md5", "sha1", "sha256"]:
          value = result.GetItem(hash_type)
          setattr(hash_obj, hash_type, value)
      except AttributeError:
        self.Log("Failed to hash file: %s", self.state.indexed_pathspecs[index])
        self.state.pending_hashes.pop(index, None)
        return

    try:
      tracker = self.state.pending_hashes[index]
    except KeyError:
      # Hashing the file failed, but we did stat it.
      self._FileFetchFailed(index, responses.request.request.name)
      return

    tracker["hash_obj"] = hash_obj
    tracker["bytes_read"] = response.bytes_read

    self.state.files_hashed_since_check += 1
    if self.state.files_hashed_since_check >= self.MIN_CALL_TO_FILE_STORE:
      self._CheckHashesWithFileStore()
コード例 #12
0
ファイル: export_test.py プロジェクト: mutedmouse/grr
    def setUp(self):
        super(TestExportHuntResultsFilesAsArchive, self).setUp()

        path1 = "aff4:/C.0000000000000000/fs/os/foo/bar/hello1.txt"
        fd = aff4.FACTORY.Create(path1, "AFF4MemoryStream", token=self.token)
        fd.Write("hello1")
        fd.Set(fd.Schema.HASH,
               rdf_crypto.Hash(sha256=hashlib.sha256("hello1").digest()))
        fd.Close()

        path2 = u"aff4:/C.0000000000000000/fs/os/foo/bar/中国新闻网新闻中.txt"
        fd = aff4.FACTORY.Create(path2, "AFF4MemoryStream", token=self.token)
        fd.Write("hello2")
        fd.Set(fd.Schema.HASH,
               rdf_crypto.Hash(sha256=hashlib.sha256("hello2").digest()))
        fd.Close()

        self.paths = [path1, path2]

        with hunts.GRRHunt.StartHunt(hunt_name="GenericHunt",
                                     regex_rules=[
                                         rdf_foreman.ForemanAttributeRegex(
                                             attribute_name="GRR client",
                                             attribute_regex="GRR")
                                     ],
                                     output_plugins=[],
                                     token=self.token) as hunt:

            self.hunt_urn = hunt.urn

            runner = hunt.GetRunner()
            runner.Start()

            with aff4.FACTORY.Create(runner.context.results_collection_urn,
                                     aff4_type="RDFValueCollection",
                                     mode="w",
                                     token=self.token) as collection:

                for path in self.paths:
                    collection.Add(
                        rdf_client.StatEntry(
                            aff4path=path,
                            pathspec=rdf_paths.PathSpec(
                                path="fs/os/foo/bar/" + path.split("/")[-1],
                                pathtype=rdf_paths.PathSpec.PathType.OS)))
コード例 #13
0
  def _RunFileFinderDownloadHello(self, upload, opts=None):
    action = rdf_file_finder.FileFinderAction.Download()
    action.download = opts

    upload.return_value = rdf_client.UploadedFile(
        bytes_uploaded=42, file_id="foo", hash=rdf_crypto.Hash())

    hello_path = os.path.join(self.base_path, "hello.exe")
    return self._RunFileFinder([hello_path], action)
コード例 #14
0
    def Generate(self, collection, token=None):
        """Generates archive from a given collection.

    Iterates the collection and generates an archive by yielding contents
    of every referenced AFF4Stream.

    Args:
      collection: Iterable with items that point to aff4 paths.
      token: User's ACLToken.

    Yields:
      Binary chunks comprising the generated archive.
    """
        hashes = set()
        for fd_urn_batch in utils.Grouper(self._ItemsToUrns(collection),
                                          self.BATCH_SIZE):

            for fd in aff4.FACTORY.MultiOpen(fd_urn_batch, token=token):
                self.total_files += 1

                # Any file-like object with data in AFF4 should inherit AFF4Stream.
                if isinstance(fd, aff4.AFF4Stream):
                    archive_path = os.path.join(self.prefix, *fd.urn.Split())

                    sha256_hash = fd.Get(fd.Schema.HASH,
                                         rdf_crypto.Hash()).sha256
                    if not sha256_hash:
                        continue
                    self.archived_files += 1

                    content_path = os.path.join(self.prefix, "hashes",
                                                str(sha256_hash))
                    if sha256_hash not in hashes:
                        # Make sure size of the original file is passed. It's required
                        # when output_writer is StreamingTarWriter.
                        st = os.stat_result(
                            (0644, 0, 0, 0, 0, 0, fd.size, 0, 0, 0))
                        try:
                            for chunk in self.archive_generator.WriteFromFD(
                                    fd, content_path, st=st):
                                yield chunk

                            hashes.add(sha256_hash)
                        except Exception:  # pylint: disable=broad-except
                            self.failed_files += 1
                            continue

                    up_prefix = "../" * len(fd.urn.Split())
                    yield self.archive_generator.WriteSymlink(
                        up_prefix + content_path, archive_path)

        for chunk in self._WriteDescription():
            yield chunk

        yield self.archive_generator.Close()
コード例 #15
0
ファイル: vfs.py プロジェクト: firefalc0n/grr
  def Handle(self, args, token=None):
    ValidateVfsPath(args.file_path)

    if args.timestamp:
      age = args.timestamp
    else:
      age = aff4.ALL_TIMES

    file_obj = aff4.FACTORY.Open(
        args.client_id.ToClientURN().Add(args.file_path),
        mode="r",
        age=age,
        token=token)

    if data_store.RelationalDBReadEnabled(category="vfs"):
      # These are not really "files" so they cannot be stored in the database
      # but they still can be queried so we need to return something. Sometimes
      # they contain a trailing slash so we need to take care of that.
      #
      # TODO(hanuszczak): Require VFS paths to be normalized so that trailing
      # slash is either forbidden or mandatory.
      if args.file_path.endswith("/"):
        args.file_path = args.file_path[:-1]
      if args.file_path in ["fs", "registry", "temp", "fs/os", "fs/tsk"]:
        api_file = ApiFile()
        api_file.name = api_file.path = args.file_path
        api_file.is_directory = True
        return ApiGetFileDetailsResult(file=api_file)

      path_type, components = rdf_objects.ParseCategorizedPath(args.file_path)

      # TODO(hanuszczak): The tests passed even without support for timestamp
      # filtering. The test suite should be probably improved in that regard.
      path_id = rdf_objects.PathID(components)
      path_info = data_store.REL_DB.FindPathInfoByPathID(
          str(args.client_id), path_type, path_id, timestamp=args.timestamp)

      if path_info:
        stat_entry = path_info.stat_entry
        hash_entry = path_info.hash_entry
      else:
        stat_entry = rdf_client.StatEntry()
        hash_entry = rdf_crypto.Hash()
    else:
      stat_entry = None
      hash_entry = None

    return ApiGetFileDetailsResult(file=ApiFile().InitFromAff4Object(
        file_obj,
        stat_entry=stat_entry,
        hash_entry=hash_entry,
        with_details=True))
コード例 #16
0
    def testFindPathInfoByPathIDTimestampHashEntry(self):
        client_id = self.InitializeClient()

        path_info = objects.PathInfo.OS(components=["foo"])
        path_id = objects.PathID(["foo"])

        path_info.hash_entry = rdf_crypto.Hash(md5=b"bar")
        self.db.WritePathInfos(client_id, [path_info])
        bar_timestamp = rdfvalue.RDFDatetime.Now()

        path_info.hash_entry = rdf_crypto.Hash(md5=b"baz")
        self.db.WritePathInfos(client_id, [path_info])
        baz_timestamp = rdfvalue.RDFDatetime.Now()

        path_info.hash_entry = rdf_crypto.Hash(md5=b"quux")
        self.db.WritePathInfos(client_id, [path_info])
        quux_timestamp = rdfvalue.RDFDatetime.Now()

        bar_path_info = self.db.FindPathInfoByPathID(
            client_id,
            objects.PathInfo.PathType.OS,
            path_id,
            timestamp=bar_timestamp)
        self.assertEqual(bar_path_info.hash_entry.md5, b"bar")

        baz_path_info = self.db.FindPathInfoByPathID(
            client_id,
            objects.PathInfo.PathType.OS,
            path_id,
            timestamp=baz_timestamp)
        self.assertEqual(baz_path_info.hash_entry.md5, b"baz")

        quux_path_info = self.db.FindPathInfoByPathID(
            client_id,
            objects.PathInfo.PathType.OS,
            path_id,
            timestamp=quux_timestamp)
        self.assertEqual(quux_path_info.hash_entry.md5, b"quux")
コード例 #17
0
    def Run(self, args):
        hash_types = set()
        for t in args.tuples:
            for hash_name in t.hashers:
                hash_types.add(str(hash_name).lower())

        with vfs.VFSOpen(args.pathspec,
                         progress_callback=self.Progress) as file_obj:
            hashers, bytes_read = self.HashFile(hash_types, file_obj,
                                                args.max_filesize)

        self.SendReply(
            rdf_client.FingerprintResponse(
                pathspec=file_obj.pathspec,
                bytes_read=bytes_read,
                hash=rdf_crypto.Hash(**dict(
                    (k, v.digest()) for k, v in hashers.iteritems()))))
コード例 #18
0
    def _CreateFile(self, path, content, hashing=False):
        with aff4.FACTORY.Create(path, aff4.AFF4MemoryStream,
                                 token=self.token) as fd:
            fd.Write(content)

            if hashing:
                digest = hashlib.sha256(content).digest()
                fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=digest))

                if data_store.RelationalDBWriteEnabled():
                    client_id, vfs_path = path.Split(2)
                    path_type, components = rdf_objects.ParseCategorizedPath(
                        vfs_path)

                    path_info = rdf_objects.PathInfo()
                    path_info.path_type = path_type
                    path_info.components = components
                    path_info.hash_entry.sha256 = digest
                    data_store.REL_DB.WritePathInfos(client_id, [path_info])
コード例 #19
0
    def testWritePathInfosHashAndStatEntry(self):
        client_id = self.InitializeClient()

        stat_entry = rdf_client.StatEntry(st_mode=1337)
        hash_entry = rdf_crypto.Hash(md5=hashlib.md5("foo").digest())

        path_info = objects.PathInfo.OS(components=["foo", "bar", "baz"],
                                        stat_entry=stat_entry,
                                        hash_entry=hash_entry)
        self.db.WritePathInfos(client_id, [path_info])

        result = self.db.FindPathInfoByPathID(
            client_id, objects.PathInfo.PathType.OS,
            objects.PathID(["foo", "bar", "baz"]))

        self.assertEqual(result.components, ["foo", "bar", "baz"])
        self.assertTrue(result.HasField("stat_entry"))
        self.assertTrue(result.HasField("hash_entry"))
        self.assertEqual(result.stat_entry, stat_entry)
        self.assertEqual(result.hash_entry, hash_entry)
コード例 #20
0
  def testWritePathInfosHashEntry(self):
    client_id = self.InitializeClient()

    hash_entry = rdf_crypto.Hash()
    hash_entry.sha256 = hashlib.sha256("foo").digest()
    hash_entry.md5 = hashlib.md5("foo").digest()
    hash_entry.num_bytes = len("foo")

    path_info = objects.PathInfo.OS(
        components=["foo", "bar", "baz"], hash_entry=hash_entry)
    self.db.WritePathInfos(client_id, [path_info])

    result = self.db.FindPathInfoByPathID(client_id,
                                          objects.PathInfo.PathType.OS,
                                          objects.PathID(["foo", "bar", "baz"]))

    self.assertEqual(result.components, ["foo", "bar", "baz"])
    self.assertTrue(result.HasField("hash_entry"))
    self.assertFalse(result.HasField("stat_entry"))
    self.assertEqual(result.hash_entry.sha256, hashlib.sha256("foo").digest())
    self.assertEqual(result.hash_entry.md5, hashlib.md5("foo").digest())
    self.assertEqual(result.hash_entry.num_bytes, len("foo"))
コード例 #21
0
 def HashObject(self):
     return crypto.Hash(sha256=self.hashers["sha256"].digest(),
                        sha1=self.hashers["sha1"].digest(),
                        md5=self.hashers["md5"].digest())
コード例 #22
0
    def ReceiveFileHash(self, responses):
        """Add hash digest to tracker and check with filestore."""
        # Support old clients which may not have the new client action in place yet.
        # TODO(user): Deprecate once all clients have the HashFile action.

        if not responses.success and responses.request.request.name == "HashFile":
            logging.debug(
                "HashFile action not available, falling back to FingerprintFile."
            )
            self.CallClient("FingerprintFile",
                            responses.request.request.payload,
                            next_state="ReceiveFileHash",
                            request_data=responses.request_data)
            return

        index = responses.request_data["index"]
        if not responses.success:
            self.Log("Failed to hash file: %s", responses.status)
            self.state.pending_hashes.pop(index, None)
            self.FileFetchFailed(responses.request.request.payload.pathspec,
                                 responses.request.request.name,
                                 request_data=responses.request_data)
            return

        self.state.files_hashed += 1
        response = responses.First()
        if response.HasField("hash"):
            hash_obj = response.hash
        else:
            # Deprecate this method of returning hashes.
            hash_obj = rdf_crypto.Hash()

            if len(response.results
                   ) < 1 or response.results[0]["name"] != "generic":
                self.Log("Failed to hash file: %s",
                         self.state.indexed_pathspecs[index])
                self.state.pending_hashes.pop(index, None)
                return

            result = response.results[0]

            try:
                for hash_type in ["md5", "sha1", "sha256"]:
                    value = result.GetItem(hash_type)
                    setattr(hash_obj, hash_type, value)
            except AttributeError:
                self.Log("Failed to hash file: %s",
                         self.state.indexed_pathspecs[index])
                self.state.pending_hashes.pop(index, None)
                return

        try:
            tracker = self.state.pending_hashes[index]
        except KeyError:
            # TODO(user): implement a test for this and handle the failure
            # gracefully: i.e. maybe we can continue with an empty StatEntry.
            self.Error(
                "Couldn't stat the file, but got the hash (%s): %s" %
                (utils.SmartStr(index), utils.SmartStr(response.pathspec)))
            return

        tracker.hash_obj = hash_obj
        tracker.bytes_read = response.bytes_read

        self.state.files_hashed_since_check += 1
        if self.state.files_hashed_since_check >= self.MIN_CALL_TO_FILE_STORE:
            self._CheckHashesWithFileStore()
コード例 #23
0
    def Generate(self, collection, token=None):
        """Generates archive from a given collection.

    Iterates the collection and generates an archive by yielding contents
    of every referenced AFF4Stream.

    Args:
      collection: Iterable with items that point to aff4 paths.
      token: User's ACLToken.

    Yields:
      Binary chunks comprising the generated archive.
    """
        hashes = set()
        for fd_urn_batch in utils.Grouper(self._ItemsToUrns(collection),
                                          self.BATCH_SIZE):

            fds_to_write = {}
            for fd in aff4.FACTORY.MultiOpen(fd_urn_batch, token=token):
                self.total_files += 1

                if not self.predicate(fd):
                    self.ignored_files.append(utils.SmartUnicode(fd.urn))
                    continue

                # Any file-like object with data in AFF4 should inherit AFF4Stream.
                if isinstance(fd, aff4.AFF4Stream):
                    archive_path = os.path.join(self.prefix, *fd.urn.Split())

                    sha256_hash = fd.Get(fd.Schema.HASH,
                                         rdf_crypto.Hash()).sha256
                    if not sha256_hash:
                        continue
                    self.archived_files += 1

                    content_path = os.path.join(self.prefix, "hashes",
                                                str(sha256_hash))
                    if sha256_hash not in hashes:
                        # Make sure size of the original file is passed. It's required
                        # when output_writer is StreamingTarWriter.
                        st = os.stat_result(
                            (0644, 0, 0, 0, 0, 0, fd.size, 0, 0, 0))
                        fds_to_write[fd] = (content_path, st)
                        hashes.add(sha256_hash)

                    up_prefix = "../" * len(fd.urn.Split())
                    yield self.archive_generator.WriteSymlink(
                        up_prefix + content_path, archive_path)

            if fds_to_write:
                prev_fd = None
                for fd, chunk, exception in aff4.AFF4Stream.MultiStream(
                        fds_to_write):
                    if exception:
                        logging.exception(exception)

                        self.archived_files -= 1
                        self.failed_files.append(utils.SmartUnicode(fd.urn))
                        continue

                    if prev_fd != fd:
                        if prev_fd:
                            yield self.archive_generator.WriteFileFooter()
                        prev_fd = fd

                        content_path, st = fds_to_write[fd]
                        yield self.archive_generator.WriteFileHeader(
                            content_path, st=st)

                    yield self.archive_generator.WriteFileChunk(chunk)

                if self.archive_generator.is_file_write_in_progress:
                    yield self.archive_generator.WriteFileFooter()

        for chunk in self._WriteDescription():
            yield chunk

        yield self.archive_generator.Close()
コード例 #24
0
    def testWritePathInfosMetadataTimestampUpdate(self):
        now = rdfvalue.RDFDatetime.Now

        client_id = self.InitializeClient()

        timestamp_0 = now()

        self.db.WritePathInfos(client_id,
                               [objects.PathInfo.OS(components=["foo"])])

        result = self.db.FindPathInfoByPathID(client_id,
                                              objects.PathInfo.PathType.OS,
                                              objects.PathID(["foo"]))
        self.assertEqual(result.components, ["foo"])
        self.assertGreater(result.timestamp, timestamp_0)
        self.assertLess(result.timestamp, now())
        self.assertEqual(result.last_stat_entry_timestamp, None)
        self.assertEqual(result.last_hash_entry_timestamp, None)

        timestamp_1 = now()

        stat_entry = rdf_client.StatEntry(st_mode=42)
        self.db.WritePathInfos(
            client_id,
            [objects.PathInfo.OS(components=["foo"], stat_entry=stat_entry)])

        result = self.db.FindPathInfoByPathID(client_id,
                                              objects.PathInfo.PathType.OS,
                                              objects.PathID(["foo"]))
        self.assertEqual(result.components, ["foo"])
        self.assertEqual(result.stat_entry.st_mode, 42)
        self.assertGreater(result.timestamp, timestamp_1)
        self.assertLess(result.timestamp, now())
        self.assertGreater(result.last_stat_entry_timestamp, timestamp_1)
        self.assertLess(result.last_stat_entry_timestamp, now())

        timestamp_2 = now()

        hash_entry = rdf_crypto.Hash(md5=b"foo")
        self.db.WritePathInfos(
            client_id,
            [objects.PathInfo.OS(components=["foo"], hash_entry=hash_entry)])

        result = self.db.FindPathInfoByPathID(client_id,
                                              objects.PathInfo.PathType.OS,
                                              objects.PathID(["foo"]))
        self.assertEqual(result.components, ["foo"])
        self.assertEqual(result.hash_entry.md5, b"foo")
        self.assertGreater(result.timestamp, timestamp_2)
        self.assertLess(result.timestamp, now())
        self.assertGreater(result.last_hash_entry_timestamp, timestamp_2)
        self.assertLess(result.last_hash_entry_timestamp, now())

        timestamp_3 = now()

        self.db.WritePathInfos(
            client_id,
            [objects.PathInfo.OS(components=["foo"], directory=True)])

        result = self.db.FindPathInfoByPathID(client_id,
                                              objects.PathInfo.PathType.OS,
                                              objects.PathID(["foo"]))
        self.assertEqual(result.components, ["foo"])
        self.assertEqual(result.stat_entry.st_mode, 42)
        self.assertEqual(result.hash_entry.md5, b"foo")
        self.assertTrue(result.directory)
        self.assertGreater(result.timestamp, timestamp_3)
        self.assertLess(result.timestamp, now())
        self.assertGreater(result.last_stat_entry_timestamp, timestamp_1)
        self.assertLess(result.last_stat_entry_timestamp, timestamp_2)
        self.assertGreater(result.last_hash_entry_timestamp, timestamp_2)
        self.assertLess(result.last_hash_entry_timestamp, timestamp_3)

        timestamp_4 = now()

        path_info = objects.PathInfo.OS(components=["foo"])
        path_info.stat_entry.st_mode = 108
        path_info.hash_entry.sha256 = b"norf"
        self.db.WritePathInfos(client_id, [path_info])

        result = self.db.FindPathInfoByPathID(client_id,
                                              objects.PathInfo.PathType.OS,
                                              objects.PathID(["foo"]))
        self.assertEqual(result.components, ["foo"])
        self.assertEqual(result.stat_entry.st_mode, 108)
        self.assertEqual(result.hash_entry.sha256, b"norf")
        self.assertGreater(result.timestamp, timestamp_4)
        self.assertGreater(result.last_stat_entry_timestamp, timestamp_4)
        self.assertGreater(result.last_hash_entry_timestamp, timestamp_4)
        self.assertLess(result.timestamp, now())
        self.assertLess(result.last_stat_entry_timestamp, now())
        self.assertLess(result.last_hash_entry_timestamp, now())