def test_is_task_done_done(agg):
    """Test that is_task_done returns True in the corresponded case."""
    round_num = 0
    task_name = 'test_task_name'
    col1 = 'one'
    col2 = 'two'
    agg.assigner.get_collaborators_for_task = mock.Mock(return_value=[col1, col2])
    agg.collaborator_tasks_results = {
        TaskResultKey(task_name, col1, round_num): 1,
        TaskResultKey(task_name, col2, round_num): 1
    }
    is_task_done = agg._is_task_done(task_name)

    assert is_task_done is True
def test_collaborator_task_completed_true(agg):
    """Test that returns True if there are collaborator tasks results."""
    round_num = 0
    task_name = 'test_task_name'
    col1 = 'one'
    agg.collaborator_tasks_results = {
        TaskResultKey(task_name, col1, round_num): 1
    }
    is_completed = agg._collaborator_task_completed(
        col1, task_name, round_num)

    assert is_completed is True
Ejemplo n.º 3
0
    def _collaborator_task_completed(self, collaborator, task_name, round_num):
        """
        Check if the collaborator has completed the task for the round.

        The aggregator doesn't actually know which tensors should be sent from the collaborator \
            so it must to rely specifically on the presence of previous results

        Args:
            collaborator : str
                collaborator to check if their task has been completed
            task_name : str
                The name of the task (TaskRunner function)
            round_num : int

        Returns:
            task_competed : bool
                Whether or not the collaborator has completed the task for this
                round
        """
        task_key = TaskResultKey(task_name, collaborator, round_num)
        return task_key in self.collaborator_tasks_results
Ejemplo n.º 4
0
    def _compute_validation_related_task_metrics(self, task_name):
        """
        Compute all validation related metrics.

        Args:
            task_name : str
                The task name to compute
        """
        self.logger.info('{} task metrics...'.format(task_name))
        # By default, print out all of the metrics that the validation
        # task sent
        # This handles getting the subset of collaborators that may be
        # part of the validation task
        collaborators_for_task = self.assigner.get_collaborators_for_task(
            task_name, self.round_number)
        # The collaborator data sizes for that task
        collaborator_weights_unnormalized = {
            c: self.collaborator_task_weight[TaskResultKey(
                task_name, c, self.round_number)]
            for c in collaborators_for_task
        }
        weight_total = sum(collaborator_weights_unnormalized.values())
        collaborator_weight_dict = {
            k: v / weight_total
            for k, v in collaborator_weights_unnormalized.items()
        }

        # The validation task should have just a couple tensors (i.e.
        # metrics) associated with it. Because each collaborator should
        # have sent the same tensor list, we can use the first
        # collaborator in our subset, and apply the correct
        # transformations to the tensorkey to resolve the aggregated
        # tensor for that round
        agg_functions = self.assigner.get_aggregation_type_for_task(task_name)
        task_key = TaskResultKey(task_name, collaborators_for_task[0],
                                 self.round_number)
        for tensor_key in self.collaborator_tasks_results[task_key]:
            tensor_name, origin, round_number, report, tags = tensor_key
            assert (tags[-1] == collaborators_for_task[0]), \
                'Tensor {} in task {} has not been processed' \
                ' correctly'.format(tensor_key, task_name)
            # Strip the collaborator label, and lookup aggregated tensor
            new_tags = tuple(list(tags[:-1]))
            agg_tensor_key = TensorKey(tensor_name, origin, round_number,
                                       report, new_tags)
            agg_tensor_name, agg_origin, agg_round_number, agg_report, agg_tags = agg_tensor_key
            agg_results, agg_metadata_dict = self.tensor_db.get_aggregated_tensor(
                agg_tensor_key, collaborator_weight_dict, agg_functions)
            if report:
                # Print the aggregated metric
                if agg_results is None:
                    self.logger.warning(
                        'Aggregated metric {} could not be collected for round {}. '
                        'Skipping reporting for this round'.format(
                            agg_tensor_name, self.round_number))
                if agg_functions is not None:
                    self.logger.info('{0} {1}:\t{2:.4f}'.format(
                        agg_functions[0], agg_tensor_name, agg_results))
                else:
                    self.logger.info('{0}:\t{1:.4f}'.format(
                        agg_tensor_name, agg_results))
                for met in agg_metadata_dict:
                    self.logger.info('{0} {1}:\t{2:.4f}'.format(
                        met, agg_tensor_name, agg_metadata_dict[met]))
                # TODO Add all of the logic for saving the model based
                #  on best accuracy, lowest loss, etc.
                if 'validate_agg' in tags:
                    # Compare the accuracy of the model, and
                    # potentially save it
                    if self.best_model_score is None or self.best_model_score < agg_results:
                        self.logger.info(
                            'Saved the best model with score {:f}'.format(
                                agg_results))
                        self.best_model_score = agg_results
                        self._save_model(round_number, self.best_state_path)
            if 'trained' in tags:
                self._prepare_trained(tensor_name, origin, round_number,
                                      report, agg_results)
Ejemplo n.º 5
0
    def send_local_task_results(self, collaborator_name, round_number,
                                task_name, data_size, named_tensors):
        """
        RPC called by collaborator.

        Transmits collaborator's task results to the aggregator.

        Args:
            collaborator_name: str
            task_name: str
            round_number: int
            data_size: int
            named_tensors: protobuf NamedTensor
        Returns:
             None
        """
        self.logger.info(
            'Collaborator {} is sending task results for {}, round {}'.format(
                collaborator_name, task_name, round_number))

        # TODO: do we drop these on the floor?
        # if round_number != self.round_number:
        #     return Acknowledgement(header=self.get_header(collaborator_name))

        task_key = TaskResultKey(task_name, collaborator_name, round_number)

        # we mustn't have results already
        if self._collaborator_task_completed(collaborator_name, task_name,
                                             round_number):
            raise ValueError(
                "Aggregator already has task results from collaborator {}"
                " for task {}".format(collaborator_name, task_key))

        # initialize the list of tensors that go with this task
        # Setting these incrementally is leading to missing values
        # self.collaborator_tasks_results[task_key] = []
        task_results = []

        # go through the tensors and add them to the tensor dictionary and the
        # task dictionary
        for named_tensor in named_tensors:
            # sanity check that this tensor has been updated
            if named_tensor.round_number != round_number:
                raise ValueError(
                    'Collaborator {} is reporting results for the wrong round.'
                    ' Exiting...'.format(collaborator_name))

            # quite a bit happens in here, including decompression, delta
            # handling, etc...
            tensor_key, nparray = self._process_named_tensor(
                named_tensor, collaborator_name)

            task_results.append(tensor_key)
            # By giving task_key it's own weight, we can support different
            # training/validation weights
            # As well as eventually supporting weights that change by round
            # (if more data is added)
            self.collaborator_task_weight[task_key] = data_size

        self.collaborator_tasks_results[task_key] = task_results

        self._end_of_task_check(task_name)