Example #1
0
    def testLargeNumberOfBlobs(self):
        def Blobs(prefix):
            for idx in range(1337):
                yield prefix + str(idx).encode("ascii")

        foo_blobs = list(Blobs(b"foo"))
        foo_blob_refs = _BlobRefsFromByteArray(foo_blobs)
        foo_blob_ids = [ref.blob_id for ref in foo_blob_refs]
        foo_hash_id = rdf_objects.SHA256HashID.FromData(b"".join(foo_blobs))
        data_store.BLOBS.WriteBlobs(dict(zip(foo_blob_ids, foo_blobs)))

        bar_blobs = list(Blobs(b"bar"))
        bar_blob_refs = _BlobRefsFromByteArray(bar_blobs)
        bar_blob_ids = [ref.blob_id for ref in bar_blob_refs]
        bar_hash_id = rdf_objects.SHA256HashID.FromData(b"".join(bar_blobs))
        data_store.BLOBS.WriteBlobs(dict(zip(bar_blob_ids, bar_blobs)))

        client_id = self.SetupClient(0)
        foo_path = db.ClientPath.OS(client_id=client_id, components=("foo", ))
        bar_path = db.ClientPath.OS(client_id=client_id, components=("bar", ))

        with mock.patch.object(file_store, "_BLOBS_READ_BATCH_SIZE", 42):
            hash_ids = file_store.AddFilesWithUnknownHashes({
                foo_path:
                foo_blob_refs,
                bar_path:
                bar_blob_refs,
            })
        self.assertLen(hash_ids, 2)
        self.assertEqual(hash_ids[foo_path], foo_hash_id)
        self.assertEqual(hash_ids[bar_path], bar_hash_id)
Example #2
0
    def _WriteFilesContentRel(self, responses):
        """Writes file contents of multiple files to the relational database."""
        client_path_blob_refs = dict()
        client_path_path_info = dict()

        for response in responses:
            path_info = rdf_objects.PathInfo.FromStatEntry(response.stat_entry)

            chunks = response.transferred_file.chunks
            chunks = sorted(chunks, key=lambda _: _.offset)

            client_path = db.ClientPath.FromPathInfo(self.client_id, path_info)
            blob_refs = []
            for c in chunks:
                blob_refs.append(
                    rdf_objects.BlobReference(
                        offset=c.offset,
                        size=c.length,
                        blob_id=rdf_objects.BlobID.FromBytes(c.digest)))

            client_path_path_info[client_path] = path_info
            client_path_blob_refs[client_path] = blob_refs

        if (data_store.RelationalDBReadEnabled() and client_path_blob_refs):
            use_external_stores = self.args.action.download.use_external_stores
            client_path_hash_id = file_store.AddFilesWithUnknownHashes(
                client_path_blob_refs, use_external_stores=use_external_stores)
            for client_path, hash_id in iteritems(client_path_hash_id):
                path_info = client_path_path_info[client_path]
                path_info.hash_entry.sha256 = hash_id.AsBytes()

        path_infos = list(itervalues(client_path_path_info))
        data_store.REL_DB.WritePathInfos(self.client_id, path_infos)
Example #3
0
    def testSimpleOverlappingBlobIds(self):
        foo_blobs = [b"foo", b"norf", b"quux", b"thud"]
        bar_blobs = [b"bar", b"norf", b"blag", b"thud"]

        foo_blob_refs = _BlobRefsFromByteArray(foo_blobs)
        foo_blob_ids = [ref.blob_id for ref in foo_blob_refs]
        foo_hash_id = rdf_objects.SHA256HashID.FromData(b"".join(foo_blobs))

        bar_blob_refs = _BlobRefsFromByteArray(bar_blobs)
        bar_blob_ids = [ref.blob_id for ref in bar_blob_refs]
        bar_hash_id = rdf_objects.SHA256HashID.FromData(b"".join(bar_blobs))

        data_store.BLOBS.WriteBlobs(dict(zip(foo_blob_ids, foo_blobs)))
        data_store.BLOBS.WriteBlobs(dict(zip(bar_blob_ids, bar_blobs)))

        client_id = self.SetupClient(0)
        foo_path = db.ClientPath.OS(client_id=client_id,
                                    components=("foo", "quux"))
        bar_path = db.ClientPath.OS(client_id=client_id,
                                    components=("bar", "blag"))

        hash_ids = file_store.AddFilesWithUnknownHashes({
            foo_path:
            foo_blob_refs,
            bar_path:
            bar_blob_refs,
        })

        self.assertLen(hash_ids, 2)
        self.assertEqual(hash_ids[foo_path], foo_hash_id)
        self.assertEqual(hash_ids[bar_path], bar_hash_id)
Example #4
0
    def _WriteFilesContentRel(self, responses):
        """Writes file contents of multiple files to the relational database."""
        client_path_blob_ids = dict()
        client_path_path_info = dict()

        for response in responses:
            path_info = rdf_objects.PathInfo.FromStatEntry(response.stat_entry)

            chunks = response.transferred_file.chunks
            chunks = sorted(chunks, key=lambda _: _.offset)

            client_path = db.ClientPath.FromPathInfo(self.client_id, path_info)
            blob_ids = [rdf_objects.BlobID.FromBytes(c.digest) for c in chunks]

            client_path_blob_ids[client_path] = blob_ids
            client_path_path_info[client_path] = path_info

        if data_store.RelationalDBReadEnabled("filestore"):
            client_path_hash_id = file_store.AddFilesWithUnknownHashes(
                client_path_blob_ids)
            for client_path, hash_id in iteritems(client_path_hash_id):
                client_path_path_info[
                    client_path].hash_entry.sha256 = hash_id.AsBytes()

        path_infos = list(itervalues(client_path_path_info))
        data_store.REL_DB.WritePathInfos(self.client_id, path_infos)
Example #5
0
    def testFilesWithOneBlobAreStillReadToEnsureBlobExists(self):
        _, long_blob_refs = vfs_test_lib.GenerateBlobRefs(self.blob_size, "cd")
        _, short_blob_refs1 = vfs_test_lib.GenerateBlobRefs(
            self.blob_size, "a")
        _, short_blob_refs2 = vfs_test_lib.GenerateBlobRefs(
            self.blob_size, "b")

        path1 = db.ClientPath.OS(self.client_id, ["foo"])
        path2 = db.ClientPath.OS(self.client_id, ["bar"])
        path3 = db.ClientPath.OS(self.client_id, ["baz"])

        # One small file, blob is still read.
        with mock.patch.object(data_store.BLOBS,
                               "ReadBlobs",
                               wraps=data_store.BLOBS.ReadBlobs) as p:
            file_store.AddFileWithUnknownHash(path1, short_blob_refs1)
            p.assert_called_once()

        # Same for multiple small files.
        with mock.patch.object(data_store.BLOBS,
                               "ReadBlobs",
                               wraps=data_store.BLOBS.ReadBlobs) as p:
            file_store.AddFilesWithUnknownHashes({
                path1: short_blob_refs1,
                path2: short_blob_refs2
            })
            p.assert_called_once()

        # One large file and two small ones result in a single read for the
        # all three blobs.
        with mock.patch.object(data_store.BLOBS,
                               "ReadBlobs",
                               wraps=data_store.BLOBS.ReadBlobs) as p:
            file_store.AddFilesWithUnknownHashes({
                path1: short_blob_refs1,
                path2: short_blob_refs2,
                path3: long_blob_refs
            })
            p.assert_called_once()
            self.assertLen(p.call_args[POSITIONAL_ARGS], 1)
            self.assertEmpty(p.call_args[KEYWORD_ARGS])
            self.assertCountEqual(p.call_args[0][0], [
                r.blob_id for r in itertools.chain(
                    short_blob_refs1, short_blob_refs2, long_blob_refs)
            ])
Example #6
0
    def testOptimizationForSmallFiles(self):
        _, long_blob_refs = _GenerateBlobRefs(self.blob_size, b"ab")
        _, short_blob_refs1 = _GenerateBlobRefs(self.blob_size, b"a")
        _, short_blob_refs2 = _GenerateBlobRefs(self.blob_size, b"b")

        path1 = db.ClientPath.OS(self.client_id, ["foo"])
        path2 = db.ClientPath.OS(self.client_id, ["bar"])
        path3 = db.ClientPath.OS(self.client_id, ["baz"])

        # One small file, no need to read blobs.
        with mock.patch.object(data_store.BLOBS,
                               "ReadBlobs",
                               wraps=data_store.BLOBS.ReadBlobs) as p:
            file_store.AddFileWithUnknownHash(path1, short_blob_refs1)
            p.assert_not_called()

        # Same for multiple small files.
        with mock.patch.object(data_store.BLOBS,
                               "ReadBlobs",
                               wraps=data_store.BLOBS.ReadBlobs) as p:
            file_store.AddFilesWithUnknownHashes({
                path1: short_blob_refs1,
                path2: short_blob_refs2
            })
            p.assert_not_called()

        # One large file and two small ones result in a single read for the two
        # blobs of the large file only.
        with mock.patch.object(data_store.BLOBS,
                               "ReadBlobs",
                               wraps=data_store.BLOBS.ReadBlobs) as p:
            file_store.AddFilesWithUnknownHashes({
                path1: short_blob_refs1,
                path2: short_blob_refs2,
                path3: long_blob_refs
            })
            p.assert_called_once()
            self.assertLen(p.call_args[POSITIONAL_ARGS], 1)
            self.assertEmpty(p.call_args[KEYWORD_ARGS])
            self.assertCountEqual(p.call_args[0][0],
                                  [r.blob_id for r in long_blob_refs])
Example #7
0
  def testDoesNotFailForEmptyFiles(self):
    client_id = self.SetupClient(0)

    paths = []
    for idx in range(100):
      components = ("foo", "bar", str(idx))
      paths.append(db.ClientPath.OS(client_id=client_id, components=components))

    hash_ids = file_store.AddFilesWithUnknownHashes(
        {path: [] for path in paths})

    empty_hash_id = rdf_objects.SHA256HashID.FromData(b"")
    for path in paths:
      self.assertEqual(hash_ids[path], empty_hash_id)
Example #8
0
  def testLargeNumberOfPaths(self):
    client_id = self.SetupClient(0).Basename()

    paths = []
    for idx in range(1337):
      components = ("foo", "bar", str(idx))
      paths.append(db.ClientPath.OS(client_id=client_id, components=components))

    blobs = [b"foo", b"bar", b"baz"]
    blob_ids = list(map(rdf_objects.BlobID.FromBlobData, blobs))
    data_store.BLOBS.WriteBlobs(dict(zip(blob_ids, blobs)))

    hash_ids = file_store.AddFilesWithUnknownHashes(
        {path: blob_ids for path in paths})

    expected_hash_id = rdf_objects.SHA256HashID.FromData(b"foobarbaz")
    for path in paths:
      self.assertEqual(hash_ids[path], expected_hash_id)
Example #9
0
  def testSimpleMultiplePaths(self):
    foo_blobs = [b"foo", b"norf", b"thud"]
    foo_blob_ids = list(map(rdf_objects.BlobID.FromBlobData, foo_blobs))
    foo_hash_id = rdf_objects.SHA256HashID.FromData(b"".join(foo_blobs))
    data_store.BLOBS.WriteBlobs(dict(zip(foo_blob_ids, foo_blobs)))

    bar_blobs = [b"bar", b"quux", b"blargh"]
    bar_blob_ids = list(map(rdf_objects.BlobID.FromBlobData, bar_blobs))
    bar_hash_id = rdf_objects.SHA256HashID.FromData(b"".join(bar_blobs))
    data_store.BLOBS.WriteBlobs(dict(zip(bar_blob_ids, bar_blobs)))

    client_id = self.SetupClient(0).Basename()
    foo_path = db.ClientPath.OS(client_id=client_id, components=("foo",))
    bar_path = db.ClientPath.OS(client_id=client_id, components=("bar",))

    hash_ids = file_store.AddFilesWithUnknownHashes({
        foo_path: foo_blob_ids,
        bar_path: bar_blob_ids,
    })

    self.assertLen(hash_ids, 2)
    self.assertEqual(hash_ids[foo_path], foo_hash_id)
    self.assertEqual(hash_ids[bar_path], bar_hash_id)
Example #10
0
 def testDoesNotFailForEmptyDict(self):
     file_store.AddFilesWithUnknownHashes({})