def test_do_not_cache_no_deduplication(do_not_cache, instance, controller, context): scheduler = controller.execution_instance._scheduler # The default action already has do_not_cache set, so use that job_name1 = scheduler.queue_job_action(action, action_digest, skip_cache_lookup=True) message_queue = queue.Queue() operation_name1 = controller.execution_instance.register_job_peer( job_name1, context.peer(), message_queue) action2 = remote_execution_pb2.Action(command_digest=command_digest, do_not_cache=do_not_cache) action_digest2 = create_digest(action2.SerializeToString()) job_name2 = scheduler.queue_job_action(action2, action_digest2, skip_cache_lookup=True) operation_name2 = controller.execution_instance.register_job_peer( job_name2, context.peer(), message_queue) # The jobs are not be deduplicated because of do_not_cache, # and two operations are created assert job_name1 != job_name2 assert operation_name1 != operation_name2 if isinstance(scheduler.data_store, SQLDataStore): with scheduler.data_store.session() as session: job_count = session.query(models.Job).count() assert job_count == 2 operation_count = session.query(models.Operation).count() assert operation_count == 2
def test_job_deduplication_in_scheduling(instance, controller, context): scheduler = controller.execution_instance._scheduler action = remote_execution_pb2.Action(command_digest=command_digest, do_not_cache=False) action_digest = create_digest(action.SerializeToString()) job_name1 = scheduler.queue_job_action(action, action_digest, skip_cache_lookup=True) message_queue = queue.Queue() operation_name1 = controller.execution_instance.register_job_peer( job_name1, context.peer(), message_queue) job_name2 = scheduler.queue_job_action(action, action_digest, skip_cache_lookup=True) operation_name2 = controller.execution_instance.register_job_peer( job_name2, context.peer(), message_queue) # The jobs are be deduplicated, but and operations are created assert job_name1 == job_name2 assert operation_name1 != operation_name2 if isinstance(scheduler.data_store, SQLDataStore): with scheduler.data_store.session() as session: query = session.query(models.Job) job_count = query.filter_by(name=job_name1).count() assert job_count == 1 query = session.query(models.Operation) operation_count = query.filter_by(job_name=job_name1).count() assert operation_count == 2
def test_upload_tree(instance, directory_paths): # Actual test function, to be run in a subprocess: def __test_upload_tree(queue, remote, instance, directory_paths): # Open a channel to the remote CAS server: channel = grpc.insecure_channel(remote) digests = [] with upload(channel, instance) as uploader: if len(directory_paths) > 1: for directory_path in directory_paths: digest = uploader.upload_tree(directory_path, queue=True) digests.append(digest.SerializeToString()) else: digest = uploader.upload_tree(directory_paths[0], queue=False) digests.append(digest.SerializeToString()) queue.put(digests) # Start a minimal CAS server in a subprocess: with serve_cas([instance]) as server: digests = run_in_subprocess(__test_upload_tree, server.remote, instance, directory_paths) for directory_path, digest_blob in zip(directory_paths, digests): digest = remote_execution_pb2.Digest() digest.ParseFromString(digest_blob) assert server.has(digest) tree = remote_execution_pb2.Tree() tree.ParseFromString(server.get(digest)) directory_digest = create_digest(tree.root.SerializeToString()) assert server.compare_directories(directory_digest, directory_path)
def store_blob(self, blob): digest = create_digest(blob) write_buffer = self.__storage.begin_write(digest) write_buffer.write(blob) self.__storage.commit_write(digest, write_buffer) return digest
def store_message(self, message): message_blob = message.SerializeToString() message_digest = create_digest(message_blob) write_buffer = self.__storage.begin_write(message_digest) write_buffer.write(message_blob) self.__storage.commit_write(message_digest, write_buffer) return message_digest
def store_file(self, file_path): with open(file_path, 'rb') as file_bytes: file_blob = file_bytes.read() file_digest = create_digest(file_blob) write_buffer = self.__storage.begin_write(file_digest) write_buffer.write(file_blob) self.__storage.commit_write(file_digest, write_buffer) return file_digest
def test_create_digest(blob, digest_hash, digest_size): # Generate a Digest message from given blob: blob_digest = create_digest(blob) assert get_hash_type() == remote_execution_pb2.DigestFunction.SHA256 assert hasattr(blob_digest, 'DESCRIPTOR') assert blob_digest.DESCRIPTOR == remote_execution_pb2.Digest.DESCRIPTOR assert blob_digest.hash == digest_hash assert blob_digest.size_bytes == digest_size
def get(context, action_digest_string, json): """Entry-point of the ``bgd action-cache get`` CLI command. Note: Digest strings are expected to be like: ``{hash}/{size_bytes}``. """ action_digest = parse_digest(action_digest_string) if action_digest is None: click.echo( "Error: Invalid digest string '{}'.".format(action_digest_string), err=True) sys.exit(-1) # Simply hit the action cache with the given action digest: with query(context.channel, instance=context.instance_name) as action_cache: try: action_result = action_cache.get(action_digest) except ConnectionError as e: click.echo('Error: Fetching from the action cache: {}'.format(e), err=True) sys.exit(-1) if action_result is not None: if not json: action_result_digest = create_digest( action_result.SerializeToString()) click.echo("Hit: {}/{}: Result cached with digest=[{}/{}]".format( action_digest.hash[:8], action_digest.size_bytes, action_result_digest.hash, action_result_digest.size_bytes)) # TODO: Print ActionResult details? else: click.echo(json_format.MessageToJson(action_result)) else: click.echo( "Miss: {}/{}: No associated result found in cache...".format( action_digest.hash[:8], action_digest.size_bytes))
def download_file(context, digest_path_list, verify): # Downloading files: downloaded_files = {} try: with download(context.channel, instance=context.instance_name) as downloader: for (digest_string, file_path) in zip(digest_path_list[0::2], digest_path_list[1::2]): if os.path.exists(file_path): click.echo("Error: Invalid value for " + "path=[{}] already exists.".format(file_path), err=True) continue digest = parse_digest(digest_string) downloader.download_file(digest, file_path) downloaded_files[file_path] = digest except Exception as e: click.echo('Error: Downloading file: {}'.format(e), err=True) sys.exit(-1) except FileNotFoundError: click.echo('Error: Blob not found in CAS', err=True) sys.exit(-1) # Verifying: for (file_path, digest) in downloaded_files.items(): if verify: file_digest = create_digest(read_file(file_path)) if file_digest != digest: click.echo( "Error: Failed to verify path=[{}]".format(file_path), err=True) continue if os.path.isfile(file_path): click.echo("Success: Pulled path=[{}] from digest=[{}/{}]".format( file_path, digest.hash, digest.size_bytes)) else: click.echo('Error: Failed pulling "{}"'.format(file_path), err=True)
def test_job_reprioritisation(instance, controller, context): scheduler = controller.execution_instance._scheduler action = remote_execution_pb2.Action(command_digest=command_digest) action_digest = create_digest(action.SerializeToString()) job_name1 = scheduler.queue_job_action(action, action_digest, skip_cache_lookup=True, priority=10) job = scheduler.data_store.get_job_by_name(job_name1) assert job.priority == 10 job_name2 = scheduler.queue_job_action(action, action_digest, skip_cache_lookup=True, priority=1) assert job_name1 == job_name2 job = scheduler.data_store.get_job_by_name(job_name1) assert job.priority == 1
from buildgrid._enums import LeaseState, OperationStage from buildgrid._protos.build.bazel.remote.execution.v2 import remote_execution_pb2 from buildgrid.utils import create_digest from buildgrid.server.controller import ExecutionController from buildgrid.server.cas.storage import lru_memory_cache from buildgrid.server.actioncache.instance import ActionCache from buildgrid.server.execution import service from buildgrid.server.execution.service import ExecutionService from buildgrid.server.persistence.mem.impl import MemoryDataStore from buildgrid.server.persistence.sql.impl import SQLDataStore server = mock.create_autospec(grpc.server) command = remote_execution_pb2.Command() command_digest = create_digest(command.SerializeToString()) action = remote_execution_pb2.Action(command_digest=command_digest, do_not_cache=True) action_digest = create_digest(action.SerializeToString()) @pytest.fixture def context(): cxt = mock.MagicMock(spec=_Context) yield cxt PARAMS = [(impl, use_cache) for impl in ["sql", "mem"] for use_cache in ["action-cache", "no-action-cache"]]
def _write_directory(self, root_directory, root_path, directories=None, root_barrier=None): """Generates a local directory structure""" # i) Files: for file_node in root_directory.files: file_path = os.path.join(root_path, file_node.name) self.download_file(file_node.digest, file_path, is_executable=file_node.is_executable) self.flush() # ii) Directories: pending_directory_digests = [] pending_directory_paths = {} for directory_node in root_directory.directories: directory_hash = directory_node.digest.hash directory_path = os.path.join(root_path, directory_node.name) os.makedirs(directory_path, exist_ok=True) if directories and directory_node.digest.hash in directories: # We already have the directory; just write it: directory = directories[directory_hash] self._write_directory(directory, directory_path, directories=directories, root_barrier=root_barrier) else: # Gather all the directories that we need to get to # try fetching them in a single batch request: pending_directory_digests.append(directory_node.digest) pending_directory_paths[directory_hash] = directory_path if pending_directory_paths: fetched_blobs = self._fetch_blob_batch(pending_directory_digests) for directory_blob in fetched_blobs: directory = remote_execution_pb2.Directory() directory.ParseFromString(directory_blob) # Assuming that the server might not return the blobs in # the same order than they were asked for, we read # the hashes of the returned blobs: directory_hash = create_digest(directory_blob).hash # Guarantees for the reply orderings might change in # the specification at some point. # See: github.com/bazelbuild/remote-apis/issues/52 directory_path = pending_directory_paths[directory_hash] self._write_directory(directory, directory_path, directories=directories, root_barrier=root_barrier) # iii) Symlinks: for symlink_node in root_directory.symlinks: symlink_path = os.path.join(root_path, symlink_node.name) if not os.path.isabs(symlink_node.target): target_path = os.path.join(root_path, symlink_node.target) else: target_path = symlink_node.target target_path = os.path.normpath(target_path) # Do not create links pointing outside the barrier: if root_barrier is not None: common_path = os.path.commonprefix([root_barrier, target_path]) if not common_path.startswith(root_barrier): continue os.symlink(symlink_path, target_path)