Esempio n. 1
0
    def upload_tree(self, directory_path, queue=True):
        """Stores a local folder into the remote CAS storage as a :obj:`Tree`.

        If queuing is allowed (`queue=True`), the upload request **may** be
        defer. An explicit call to :func:`~flush` can force the request to be
        send immediately (allong with the rest of the queued batch).

        Args:
            directory_path (str): absolute or relative path to a local folder.
            queue (bool, optional): wheter or not the upload requests may be
                queued and submitted as part of a batch upload request. Defaults
                to True.

        Returns:
            :obj:`Digest`: The digest of the :obj:`Tree`.

        Raises:
            FileNotFoundError: If `directory_path` does not exist.
            PermissionError: If `directory_path` is not readable.
        """
        if not os.path.isabs(directory_path):
            directory_path = os.path.abspath(directory_path)

        directories = []

        if not queue:
            for node, blob, _ in merkle_tree_maker(directory_path):
                if node.DESCRIPTOR is remote_execution_pb2.DirectoryNode.DESCRIPTOR:
                    # TODO: Get the Directory object from merkle_tree_maker():
                    directory = remote_execution_pb2.Directory()
                    directory.ParseFromString(blob)
                    directories.append(directory)

                self._send_blob(blob, digest=node.digest)

        else:
            for node, blob, _ in merkle_tree_maker(directory_path):
                if node.DESCRIPTOR is remote_execution_pb2.DirectoryNode.DESCRIPTOR:
                    # TODO: Get the Directory object from merkle_tree_maker():
                    directory = remote_execution_pb2.Directory()
                    directory.ParseFromString(blob)
                    directories.append(directory)

                self._queue_blob(blob, digest=node.digest)

        tree = remote_execution_pb2.Tree()
        tree.root.CopyFrom(directories[-1])
        tree.children.extend(directories[:-1])

        return self.put_message(tree, queue=queue)
Esempio n. 2
0
def _cas_tree_maker(cas, directory_digest):
    # Generates and stores a Tree for a given Directory. This is very inefficient
    # and only temporary. See https://gitlab.com/BuildStream/buildbox/issues/7.
    output_tree = remote_execution_pb2.Tree()

    def __cas_tree_maker(cas, parent_directory):
        digests, directories = [], []
        for directory_node in parent_directory.directories:
            directories.append(remote_execution_pb2.Directory())
            digests.append(directory_node.digest)

        cas.get_messages(digests, directories)

        for directory in directories[:]:
            directories.extend(__cas_tree_maker(cas, directory))

        return directories

    root_directory = cas.get_message(directory_digest,
                                     remote_execution_pb2.Directory())

    output_tree.children.extend(__cas_tree_maker(cas, root_directory))
    output_tree.root.CopyFrom(root_directory)

    return output_tree
Esempio n. 3
0
    def __merkle_tree_maker(directory_path, directory_name):
        if not os.path.isabs(directory_path):
            directory_path = os.path.abspath(directory_path)

        directory = remote_execution_pb2.Directory()

        files, directories, symlinks = [], [], []
        for directory_entry in os.scandir(directory_path):
            node_name, node_path = directory_entry.name, directory_entry.path

            if directory_entry.is_file(follow_symlinks=False):
                node_blob = read_file(directory_entry.path)
                node_digest = create_digest(node_blob)

                node = remote_execution_pb2.FileNode()
                node.name = node_name
                node.digest.CopyFrom(node_digest)
                node.is_executable = os.access(node_path, os.X_OK)

                files.append(node)

                yield node, node_blob, node_path

            elif directory_entry.is_dir(follow_symlinks=False):
                node, node_blob, _ = yield from __merkle_tree_maker(
                    node_path, node_name)

                directories.append(node)

                yield node, node_blob, node_path

            # Create a SymlinkNode;
            elif os.path.islink(directory_entry.path):
                node_target = os.readlink(directory_entry.path)

                node = remote_execution_pb2.SymlinkNode()
                node.name = directory_entry.name
                node.target = node_target

                symlinks.append(node)

        files.sort(key=attrgetter('name'))
        directories.sort(key=attrgetter('name'))
        symlinks.sort(key=attrgetter('name'))

        directory.files.extend(files)
        directory.directories.extend(directories)
        directory.symlinks.extend(symlinks)

        node_blob = directory.SerializeToString()
        node_digest = create_digest(node_blob)

        node = remote_execution_pb2.DirectoryNode()
        node.name = directory_name
        node.digest.CopyFrom(node_digest)

        return node, node_blob, directory_path
Esempio n. 4
0
    def __cas_tree_maker(cas, parent_directory):
        digests, directories = [], []
        for directory_node in parent_directory.directories:
            directories.append(remote_execution_pb2.Directory())
            digests.append(directory_node.digest)

        cas.get_messages(digests, directories)

        for directory in directories[:]:
            directories.extend(__cas_tree_maker(cas, directory))

        return directories
Esempio n. 5
0
    def _fetch_directory(self, digest, directory_path):
        """Fetches a file using ByteStream.GetTree()"""
        # Better fail early if the local root path cannot be created:
        os.makedirs(directory_path, exist_ok=True)

        directories = {}
        directory_fetched = False
        # First, try GetTree() if not known to be unimplemented yet:
        if not _CallCache.unimplemented(self.channel, 'GetTree'):
            tree_request = remote_execution_pb2.GetTreeRequest()
            tree_request.root_digest.CopyFrom(digest)
            tree_request.page_size = MAX_REQUEST_COUNT
            if self.instance_name is not None:
                tree_request.instance_name = self.instance_name

            try:
                for tree_response in self.__cas_stub.GetTree(tree_request):
                    for directory in tree_response.directories:
                        directory_blob = directory.SerializeToString()
                        directory_hash = HASH(directory_blob).hexdigest()

                        directories[directory_hash] = directory

                assert digest.hash in directories

                directory = directories[digest.hash]
                self._write_directory(directory,
                                      directory_path,
                                      directories=directories,
                                      root_barrier=directory_path)

                directory_fetched = True
            except grpc.RpcError as e:
                status_code = e.code()
                if status_code == grpc.StatusCode.UNIMPLEMENTED:
                    _CallCache.mark_unimplemented(self.channel, 'GetTree')

                elif status_code == grpc.StatusCode.NOT_FOUND:
                    raise FileNotFoundError(
                        "Requested directory does not exist on the remote.")

                else:
                    raise ConnectionError(e.details())

        # If no GetTree(), _write_directory() will use BatchReadBlobs()
        # if available or Read() if not.
        if not directory_fetched:
            directory = remote_execution_pb2.Directory()
            directory.ParseFromString(self._fetch_blob(digest))

            self._write_directory(directory,
                                  directory_path,
                                  root_barrier=directory_path)
Esempio n. 6
0
def test_cas_get_tree(mocked, instance):
    '''Directory Structure:
        |--root
           |--subEmptyDir
           |--subParentDir
              |--subChildDir
    '''
    root = re_pb2.Digest(hash=HASH(b'abc').hexdigest(), size_bytes=3)
    rootDir = re_pb2.DirectoryNode(name=b'abc', digest=root)
    digest1 = re_pb2.Digest(hash=HASH(b'def').hexdigest(), size_bytes=3)
    subEmptyDir = re_pb2.DirectoryNode(name=b'def', digest=digest1)
    digest2 = re_pb2.Digest(hash=HASH(b'ghi').hexdigest(), size_bytes=3)
    subParentDir = re_pb2.DirectoryNode(name=b'ghi', digest=digest2)
    digest3 = re_pb2.Digest(hash=HASH(b'xyz').hexdigest(), size_bytes=3)
    subChildDir = re_pb2.DirectoryNode(name=b'xyz', digest=digest3)

    storage = SimpleStorage({b'abc': [subEmptyDir, subParentDir], b'def': [],
                            b'ghi': [subChildDir], b'xyz': []})
    cas_instance = ContentAddressableStorageInstance(storage)
    servicer = ContentAddressableStorageService(server)
    servicer.add_instance(instance, cas_instance)

    request = re_pb2.GetTreeRequest(
        instance_name=instance, root_digest=root)
    result = []
    for response in servicer.GetTree(request, context):
        result.extend(response.directories)

    expectedRoot = re_pb2.Directory()
    expectedRoot.directories.extend([subEmptyDir, subParentDir])
    expectedEmpty = re_pb2.Directory()
    expectedParent = re_pb2.Directory()
    expectedParent.directories.extend([subChildDir])
    expectedChild = re_pb2.Directory()

    expected = [expectedRoot, expectedEmpty, expectedParent, expectedChild]
    assert result == expected
Esempio n. 7
0
        def __compare_folders(digest, path):
            directory = remote_execution_pb2.Directory()
            directory.ParseFromString(self.__storage.get_blob(digest).read())

            files, directories, symlinks = [], [], []
            for entry in os.scandir(path):
                if entry.is_file(follow_symlinks=False):
                    files.append(entry.name)

                elif entry.is_dir(follow_symlinks=False):
                    directories.append(entry.name)

                elif os.path.islink(entry.path):
                    symlinks.append(entry.name)

            assert len(files) == len(directory.files)
            assert len(directories) == len(directory.directories)
            assert len(symlinks) == len(directory.symlinks)

            for file_node in directory.files:
                file_path = os.path.join(path, file_node.name)

                assert file_node.name in files
                assert os.path.isfile(file_path)
                assert not os.path.islink(file_path)
                if file_node.is_executable:
                    assert os.access(file_path, os.X_OK)

                assert self.compare_files(file_node.digest, file_path)

            for directory_node in directory.directories:
                directory_path = os.path.join(path, directory_node.name)

                assert directory_node.name in directories
                assert os.path.exists(directory_path)
                assert not os.path.islink(directory_path)

                assert __compare_folders(directory_node.digest, directory_path)

            for symlink_node in directory.symlinks:
                symlink_path = os.path.join(path, symlink_node.name)

                assert symlink_node.name in symlinks
                assert os.path.islink(symlink_path)
                assert os.readlink(symlink_path) == symlink_node.target

            return True
Esempio n. 8
0
    def _write_directory(self,
                         root_directory,
                         root_path,
                         directories=None,
                         root_barrier=None):
        """Generates a local directory structure"""

        # i) Files:
        for file_node in root_directory.files:
            file_path = os.path.join(root_path, file_node.name)

            self.download_file(file_node.digest,
                               file_path,
                               is_executable=file_node.is_executable)
        self.flush()

        # ii) Directories:
        pending_directory_digests = []
        pending_directory_paths = {}
        for directory_node in root_directory.directories:
            directory_hash = directory_node.digest.hash

            directory_path = os.path.join(root_path, directory_node.name)
            os.makedirs(directory_path, exist_ok=True)

            if directories and directory_node.digest.hash in directories:
                # We already have the directory; just write it:
                directory = directories[directory_hash]

                self._write_directory(directory,
                                      directory_path,
                                      directories=directories,
                                      root_barrier=root_barrier)
            else:
                # Gather all the directories that we need to get to
                # try fetching them in a single batch request:
                pending_directory_digests.append(directory_node.digest)
                pending_directory_paths[directory_hash] = directory_path

        if pending_directory_paths:
            fetched_blobs = self._fetch_blob_batch(pending_directory_digests)

            for directory_blob in fetched_blobs:
                directory = remote_execution_pb2.Directory()
                directory.ParseFromString(directory_blob)

                # Assuming that the server might not return the blobs in
                # the same order than they were asked for, we read
                # the hashes of the returned blobs:
                directory_hash = create_digest(directory_blob).hash
                # Guarantees for the reply orderings might change in
                # the specification at some point.
                # See: github.com/bazelbuild/remote-apis/issues/52

                directory_path = pending_directory_paths[directory_hash]

                self._write_directory(directory,
                                      directory_path,
                                      directories=directories,
                                      root_barrier=root_barrier)

        # iii) Symlinks:
        for symlink_node in root_directory.symlinks:
            symlink_path = os.path.join(root_path, symlink_node.name)
            if not os.path.isabs(symlink_node.target):
                target_path = os.path.join(root_path, symlink_node.target)
            else:
                target_path = symlink_node.target
            target_path = os.path.normpath(target_path)

            # Do not create links pointing outside the barrier:
            if root_barrier is not None:
                common_path = os.path.commonprefix([root_barrier, target_path])
                if not common_path.startswith(root_barrier):
                    continue

            os.symlink(symlink_path, target_path)
Esempio n. 9
0
 def get_message(self, digest, message_type):
     datum = self.data[(digest.hash, digest.size_bytes)]
     message = re_pb2.Directory()
     message.directories.extend(self.map_data[datum])
     return message
Esempio n. 10
0
import os
import tempfile

import grpc
import pytest

from buildgrid.client.cas import download, upload
from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
from buildgrid.utils import create_digest

from ..utils.cas import serve_cas
from ..utils.utils import run_in_subprocess

INTANCES = ['', 'instance']
BLOBS = [(b'', ), (b'test-string', ), (b'test', b'string')]
MESSAGES = [(remote_execution_pb2.Directory(), ),
            (remote_execution_pb2.SymlinkNode(name='name', target='target'), ),
            (remote_execution_pb2.Action(do_not_cache=True),
             remote_execution_pb2.ActionResult(exit_code=12))]
DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data')
FILES = [(os.path.join(DATA_DIR, 'void'), ),
         (os.path.join(DATA_DIR, 'hello.cc'), ),
         (os.path.join(DATA_DIR, 'hello',
                       'hello.c'), os.path.join(DATA_DIR, 'hello', 'hello.h'),
          os.path.join(DATA_DIR, 'hello', 'hello.sh')),
         (os.path.join(DATA_DIR, 'hello', 'docs', 'reference', 'api.xml'), )]
FOLDERS = [(DATA_DIR, ), (os.path.join(DATA_DIR, 'hello'), ),
           (os.path.join(DATA_DIR, 'hello', 'docs'), ),
           (os.path.join(DATA_DIR, 'hello', 'utils'), ),
           (os.path.join(DATA_DIR, 'hello', 'docs', 'reference'), )]
DIRECTORIES = [(DATA_DIR, ), (os.path.join(DATA_DIR, 'hello'), )]