Ejemplo n.º 1
0
    def register_job_peer(self, job_name, peer, message_queue):
        """Subscribes to the job's :class:`Operation` stage changes.

        Args:
            job_name (str): name of the job to subscribe to.
            peer (str): a unique string identifying the client.
            message_queue (queue.Queue): the event queue to register.

        Returns:
            str: The name of the subscribed :class:`Operation`.

        Raises:
            NotFoundError: If no job with `job_name` exists.
        """
        with self.__operation_lock:
            job = self.data_store.get_job_by_name(job_name)

            if job is None:
                raise NotFoundError(
                    "Job name does not exist: [{}]".format(job_name))

            operation_name = job.register_new_operation_peer(
                peer,
                message_queue,
                self.__operations_by_peer,
                self.__peer_message_queues,
                data_store=self.data_store)

        return operation_name
Ejemplo n.º 2
0
    def register_operation_peer(self, operation_name, peer, message_queue,
                                operations_by_peer, peer_message_queues, *,
                                data_store):
        """Subscribes to one of the job's :class:`Operation` stage changes.

        Args:
            operation_name (str): an existing operation's name to subscribe to.
            peer (str): a unique string identifying the client.
            message_queue (queue.Queue): the event queue to register.

        Returns:
            str: The name of the subscribed :class:`Operation`.

        Raises:
            NotFoundError: If no operation with `operation_name` exists.
        """
        if operation_name not in self.__operations_by_name:
            raise NotFoundError(
                "Operation name does not exist: [{}]".format(operation_name))

        if peer in operations_by_peer:
            operations_by_peer[peer].add(operation_name)
        else:
            operations_by_peer[peer] = set([operation_name])

        if peer in peer_message_queues:
            peer_message_queues[peer][operation_name] = message_queue
        else:
            peer_message_queues[peer] = {operation_name: message_queue}

        self._send_operations_updates(peers=[peer],
                                      operations_by_peer=operations_by_peer,
                                      peer_message_queues=peer_message_queues,
                                      data_store=data_store)
Ejemplo n.º 3
0
    def get_action_result(self, action_digest):
        """Retrieves the cached ActionResult for the given Action digest.

        Args:
            action_digest: The digest to get the result for

        Returns:
            The cached ActionResult matching the given key or raises
            NotFoundError.
        """
        storage_digest = self._get_digest_from_cache(action_digest)
        if storage_digest:
            action_result = self.__storage.get_message(
                storage_digest, remote_execution_pb2.ActionResult)

            if action_result is not None:
                if self._action_result_blobs_still_exist(action_result):
                    return action_result

        if self._allow_updates:
            self.__logger.debug(
                "Removing {}/{} from cache due to missing "
                "blobs in CAS".format(action_digest.hash,
                                      action_digest.size_bytes))
            self._delete_key_from_cache(action_digest)

        raise NotFoundError("Key not found: {}/{}".format(
            action_digest.hash, action_digest.size_bytes))
Ejemplo n.º 4
0
    def retry_job_lease(self, job_name):
        """Re-queues a job on lease execution failure.

        Note:
            This may trigger a job's :class:`Operation` stage transition.

        Args:
            job_name (str): name of the job to retry the lease from.

        Raises:
            NotFoundError: If no job with `job_name` exists.
        """
        job = self.data_store.get_job_by_name(job_name)

        if job is None:
            raise NotFoundError(
                "Job name does not exist: [{}]".format(job_name))

        operation_stage = None
        if job.n_tries >= self.MAX_N_TRIES:
            # TODO: Decide what to do with these jobs
            operation_stage = OperationStage.COMPLETED
            # TODO: Mark these jobs as done

        else:
            operation_stage = OperationStage.QUEUED
            self.data_store.queue_job(job.name)

            job.update_lease_state(LeaseState.PENDING,
                                   data_store=self.data_store)

            if self._is_instrumented:
                self.__retries_count += 1

        self._update_job_operation_stage(job_name, operation_stage)
Ejemplo n.º 5
0
    def unregister_job_operation_peer(self, operation_name, peer):
        """Unsubscribes to one of the job's :class:`Operation` stage change.

        Args:
            operation_name (str): name of the operation to unsubscribe from.
            peer (str): a unique string identifying the client.

        Raises:
            NotFoundError: If no operation with `operation_name` exists.
        """
        with self.__operation_lock:
            job = self.data_store.get_job_by_operation(operation_name)

            if job is None:
                raise NotFoundError(
                    "Operation name does not exist: [{}]".format(
                        operation_name))

            job.unregister_operation_peer(operation_name, peer)
            self.__operations_by_peer[peer].remove(operation_name)
            self.__peer_message_queues[peer].pop(operation_name)

            if not job.n_peers_for_operation(operation_name,
                                             self.__operations_by_peer):
                self.data_store.delete_operation(operation_name)

            if not job.n_peers(
                    self.__operations_by_peer) and job.done and not job.lease:
                self.data_store.delete_job(job.name)
Ejemplo n.º 6
0
    def read(self, digest_hash, digest_size, read_offset, read_limit):
        if len(digest_hash) != HASH_LENGTH or not digest_size.isdigit():
            raise InvalidArgumentError("Invalid digest [{}/{}]"
                                       .format(digest_hash, digest_size))

        digest = re_pb2.Digest(hash=digest_hash, size_bytes=int(digest_size))

        # Check the given read offset and limit.
        if read_offset < 0 or read_offset > digest.size_bytes:
            raise OutOfRangeError("Read offset out of range")

        elif read_limit == 0:
            bytes_remaining = digest.size_bytes - read_offset

        elif read_limit > 0:
            bytes_remaining = read_limit

        else:
            raise InvalidArgumentError("Negative read_limit is invalid")

        # Read the blob from storage and send its contents to the client.
        result = self.__storage.get_blob(digest)
        if result is None:
            raise NotFoundError("Blob not found")

        elif result.seekable():
            result.seek(read_offset)

        else:
            result.read(read_offset)

        while bytes_remaining > 0:
            yield bytestream_pb2.ReadResponse(
                data=result.read(min(self.BLOCK_SIZE, bytes_remaining)))
            bytes_remaining -= self.BLOCK_SIZE
Ejemplo n.º 7
0
    def unregister_operation_peer(self, operation_name, peer):
        """Unsubscribes to the job's :class:`Operation` stage change.

        Args:
            operation_name (str): an existing operation's name to unsubscribe from.
            peer (str): a unique string identifying the client.

        Raises:
            NotFoundError: If no operation with `operation_name` exists.
        """
        if operation_name not in self.__operations_by_name:
            raise NotFoundError(
                "Operation name does not exist: [{}]".format(operation_name))
Ejemplo n.º 8
0
    def update_job_lease_state(self, job_name, lease):
        """Requests a state transition for a job's current :class:Lease.

        Note:
            This may trigger a job's :class:`Operation` stage transition.

        Args:
            job_name (str): name of the job to update lease state from.
            lease (Lease): the lease holding the new state.

        Raises:
            NotFoundError: If no job with `job_name` exists.
        """
        job = self.data_store.get_job_by_name(job_name)

        if job is None:
            raise NotFoundError(
                "Job name does not exist: [{}]".format(job_name))

        lease_state = LeaseState(lease.state)

        operation_stage = None
        if lease_state == LeaseState.PENDING:
            job.update_lease_state(LeaseState.PENDING,
                                   data_store=self.data_store)
            operation_stage = OperationStage.QUEUED

        elif lease_state == LeaseState.ACTIVE:
            job.update_lease_state(LeaseState.ACTIVE,
                                   data_store=self.data_store)
            operation_stage = OperationStage.EXECUTING

        elif lease_state == LeaseState.COMPLETED:
            job.update_lease_state(LeaseState.COMPLETED,
                                   status=lease.status,
                                   result=lease.result,
                                   data_store=self.data_store)

            if (self._action_cache is not None
                    and self._action_cache.allow_updates
                    and not job.do_not_cache):
                self._action_cache.update_action_result(
                    job.action_digest, job.action_result)
            self.data_store.store_response(job)

            operation_stage = OperationStage.COMPLETED

        self._update_job_operation_stage(job_name, operation_stage)
Ejemplo n.º 9
0
    def get_job_lease_cancelled(self, job_name):
        """Returns true if the lease is cancelled.

        Args:
            job_name (str): name of the job to query the lease state from.

        Raises:
            NotFoundError: If no job with `job_name` exists.
        """
        job = self.data_store.get_job_by_name(job_name)

        if job is None:
            raise NotFoundError(
                "Job name does not exist: [{}]".format(job_name))

        return job.lease_cancelled
Ejemplo n.º 10
0
    def get_job_lease(self, job_name):
        """Returns the lease associated to job, if any have been emitted yet.

        Args:
            job_name (str): name of the job to query the lease from.

        Raises:
            NotFoundError: If no job with `job_name` exists.
        """
        job = self.data_store.get_job_by_name(job_name)

        if job is None:
            raise NotFoundError(
                "Job name does not exist: [{}]".format(job_name))

        return job.lease
Ejemplo n.º 11
0
    def get_job_operation(self, operation_name):
        """Retrieves a job's :class:`Operation` by name.

        Args:
            operation_name (str): name of the operation to query.

        Raises:
            NotFoundError: If no operation with `operation_name` exists.
        """
        job = self.data_store.get_job_by_operation(operation_name)

        if job is None:
            raise NotFoundError(
                "Operation name does not exist: [{}]".format(operation_name))

        return job.get_operation(operation_name)
Ejemplo n.º 12
0
    def get_operation(self, operation_name):
        """Returns a copy of the the job's :class:`Operation`.

        Args:
            operation_name (str): the operation's name.

        Raises:
            NotFoundError: If no operation with `operation_name` exists.
        """
        try:
            operation = self.__operations_by_name[operation_name]

        except KeyError:
            raise NotFoundError(
                "Operation name does not exist: [{}]".format(operation_name))

        return self._copy_operation(operation)
Ejemplo n.º 13
0
    def delete_job_operation(self, operation_name):
        """"Removes a job.

        Args:
            operation_name (str): name of the operation to delete.

        Raises:
            NotFoundError: If no operation with `operation_name` exists.
        """
        with self.__operation_lock:
            job = self.data_store.get_job_by_operation(operation_name)

            if job is None:
                raise NotFoundError(
                    "Operation name does not exist: [{}]".format(
                        operation_name))
            if not job.n_peers(
                    self.__operations_by_peer) and job.done and not job.lease:
                self.data_store.delete_job(job.name)
Ejemplo n.º 14
0
    def cancel_job_operation(self, operation_name):
        """"Cancels a job's :class:`Operation` by name.

        Args:
            operation_name (str): name of the operation to cancel.

        Raises:
            NotFoundError: If no operation with `operation_name` exists.
        """
        job = self.data_store.get_job_by_operation(operation_name)

        if job is None:
            raise NotFoundError(
                "Operation name does not exist: [{}]".format(operation_name))

        job.cancel_operation(operation_name,
                             self.__operations_by_peer,
                             self.__peer_message_queues,
                             data_store=self.data_store)
Ejemplo n.º 15
0
    def delete_job_lease(self, job_name):
        """Discards the lease associated with a job.

        Args:
            job_name (str): name of the job to delete the lease from.

        Raises:
            NotFoundError: If no job with `job_name` exists.
        """
        with self.__operation_lock:
            job = self.data_store.get_job_by_name(job_name)

            if job is None:
                raise NotFoundError(
                    "Job name does not exist: [{}]".format(job_name))

            job.delete_lease()

            if not job.n_peers(self.__operations_by_peer) and job.done:
                self.data_store.delete_job(job.name)
Ejemplo n.º 16
0
    def get_digest_reference(self, key):
        """Retrieves the cached Digest for the given key.

        Args:
            key: key for Digest to query.

        Returns:
            The cached Digest matching the given key or raises
            NotFoundError.
        """
        if key in self._digest_map:
            reference_result = self.__storage.get_message(self._digest_map[key],
                                                          remote_execution_pb2.Digest)

            if reference_result is not None:
                return reference_result

            del self._digest_map[key]

        raise NotFoundError("Key not found: {}".format(key))
Ejemplo n.º 17
0
    def get_action_reference(self, key):
        """Retrieves the cached ActionResult for the given Action digest.

        Args:
            key: key for ActionResult to query.

        Returns:
            The cached ActionResult matching the given key or raises
            NotFoundError.
        """
        if key in self._digest_map:
            reference_result = self.__storage.get_message(self._digest_map[key],
                                                          remote_execution_pb2.ActionResult)

            if reference_result is not None:
                if self._action_result_blobs_still_exist(reference_result):
                    self._digest_map.move_to_end(key)
                    return reference_result

            del self._digest_map[key]

        raise NotFoundError("Key not found: {}".format(key))
Ejemplo n.º 18
0
    def cancel_operation(self, operation_name, operations_by_peer,
                         peer_message_queues, *, data_store):
        """Triggers a job's :class:`Operation` cancellation.

        This may cancel any job's :class:`Lease` that may have been issued.

        Args:
            operation_name (str): the operation's name.

        Raises:
            NotFoundError: If no operation with `operation_name` exists.
        """
        try:
            operation = self.__operations_by_name[operation_name]

        except KeyError:
            raise NotFoundError(
                "Operation name does not exist: [{}]".format(operation_name))

        self.__operations_cancelled.add(operation.name)

        self.__logger.debug("Operation cancelled for job [%s]: [%s]",
                            self._name, operation.name)

        ongoing_operations = set(self.__operations_by_name.keys())
        # Job is cancelled if all the operation are:
        self.__job_cancelled = ongoing_operations.issubset(
            self.__operations_cancelled)

        if self.__job_cancelled:
            self.__operation_metadata.stage = OperationStage.COMPLETED.value
            changes = {
                "stage": OperationStage.COMPLETED.value,
                "cancelled": True
            }
            data_store.update_job(self.name, changes)
            if self._lease is not None:
                self.cancel_lease(data_store=data_store)

        peers_to_notify = set()
        # If the job is not cancelled, notify all the peers watching the given
        # operation; if the job is cancelled, only notify the peers for which
        # the operation status changed.
        for peer, names in operations_by_peer.items():
            relevant_names = [
                n for n in names if n in self.__operations_by_name
            ]
            if self.__job_cancelled:
                if not any(name in self.__operations_cancelled
                           for name in relevant_names):
                    peers_to_notify.add(peer)
                elif operation_name in relevant_names:
                    peers_to_notify.add(peer)

            else:
                if operation_name in relevant_names:
                    peers_to_notify.add(peer)

        self._send_operations_updates(peers=peers_to_notify,
                                      notify_cancelled=True,
                                      operations_by_peer=operations_by_peer,
                                      peer_message_queues=peer_message_queues,
                                      data_store=data_store)