def test_is_task_done_done(agg): """Test that is_task_done returns True in the corresponded case.""" round_num = 0 task_name = 'test_task_name' col1 = 'one' col2 = 'two' agg.assigner.get_collaborators_for_task = mock.Mock(return_value=[col1, col2]) agg.collaborator_tasks_results = { TaskResultKey(task_name, col1, round_num): 1, TaskResultKey(task_name, col2, round_num): 1 } is_task_done = agg._is_task_done(task_name) assert is_task_done is True
def test_collaborator_task_completed_true(agg): """Test that returns True if there are collaborator tasks results.""" round_num = 0 task_name = 'test_task_name' col1 = 'one' agg.collaborator_tasks_results = { TaskResultKey(task_name, col1, round_num): 1 } is_completed = agg._collaborator_task_completed( col1, task_name, round_num) assert is_completed is True
def _collaborator_task_completed(self, collaborator, task_name, round_num): """ Check if the collaborator has completed the task for the round. The aggregator doesn't actually know which tensors should be sent from the collaborator \ so it must to rely specifically on the presence of previous results Args: collaborator : str collaborator to check if their task has been completed task_name : str The name of the task (TaskRunner function) round_num : int Returns: task_competed : bool Whether or not the collaborator has completed the task for this round """ task_key = TaskResultKey(task_name, collaborator, round_num) return task_key in self.collaborator_tasks_results
def _compute_validation_related_task_metrics(self, task_name): """ Compute all validation related metrics. Args: task_name : str The task name to compute """ self.logger.info('{} task metrics...'.format(task_name)) # By default, print out all of the metrics that the validation # task sent # This handles getting the subset of collaborators that may be # part of the validation task collaborators_for_task = self.assigner.get_collaborators_for_task( task_name, self.round_number) # The collaborator data sizes for that task collaborator_weights_unnormalized = { c: self.collaborator_task_weight[TaskResultKey( task_name, c, self.round_number)] for c in collaborators_for_task } weight_total = sum(collaborator_weights_unnormalized.values()) collaborator_weight_dict = { k: v / weight_total for k, v in collaborator_weights_unnormalized.items() } # The validation task should have just a couple tensors (i.e. # metrics) associated with it. Because each collaborator should # have sent the same tensor list, we can use the first # collaborator in our subset, and apply the correct # transformations to the tensorkey to resolve the aggregated # tensor for that round agg_functions = self.assigner.get_aggregation_type_for_task(task_name) task_key = TaskResultKey(task_name, collaborators_for_task[0], self.round_number) for tensor_key in self.collaborator_tasks_results[task_key]: tensor_name, origin, round_number, report, tags = tensor_key assert (tags[-1] == collaborators_for_task[0]), \ 'Tensor {} in task {} has not been processed' \ ' correctly'.format(tensor_key, task_name) # Strip the collaborator label, and lookup aggregated tensor new_tags = tuple(list(tags[:-1])) agg_tensor_key = TensorKey(tensor_name, origin, round_number, report, new_tags) agg_tensor_name, agg_origin, agg_round_number, agg_report, agg_tags = agg_tensor_key agg_results, agg_metadata_dict = self.tensor_db.get_aggregated_tensor( agg_tensor_key, collaborator_weight_dict, agg_functions) if report: # Print the aggregated metric if agg_results is None: self.logger.warning( 'Aggregated metric {} could not be collected for round {}. ' 'Skipping reporting for this round'.format( agg_tensor_name, self.round_number)) if agg_functions is not None: self.logger.info('{0} {1}:\t{2:.4f}'.format( agg_functions[0], agg_tensor_name, agg_results)) else: self.logger.info('{0}:\t{1:.4f}'.format( agg_tensor_name, agg_results)) for met in agg_metadata_dict: self.logger.info('{0} {1}:\t{2:.4f}'.format( met, agg_tensor_name, agg_metadata_dict[met])) # TODO Add all of the logic for saving the model based # on best accuracy, lowest loss, etc. if 'validate_agg' in tags: # Compare the accuracy of the model, and # potentially save it if self.best_model_score is None or self.best_model_score < agg_results: self.logger.info( 'Saved the best model with score {:f}'.format( agg_results)) self.best_model_score = agg_results self._save_model(round_number, self.best_state_path) if 'trained' in tags: self._prepare_trained(tensor_name, origin, round_number, report, agg_results)
def send_local_task_results(self, collaborator_name, round_number, task_name, data_size, named_tensors): """ RPC called by collaborator. Transmits collaborator's task results to the aggregator. Args: collaborator_name: str task_name: str round_number: int data_size: int named_tensors: protobuf NamedTensor Returns: None """ self.logger.info( 'Collaborator {} is sending task results for {}, round {}'.format( collaborator_name, task_name, round_number)) # TODO: do we drop these on the floor? # if round_number != self.round_number: # return Acknowledgement(header=self.get_header(collaborator_name)) task_key = TaskResultKey(task_name, collaborator_name, round_number) # we mustn't have results already if self._collaborator_task_completed(collaborator_name, task_name, round_number): raise ValueError( "Aggregator already has task results from collaborator {}" " for task {}".format(collaborator_name, task_key)) # initialize the list of tensors that go with this task # Setting these incrementally is leading to missing values # self.collaborator_tasks_results[task_key] = [] task_results = [] # go through the tensors and add them to the tensor dictionary and the # task dictionary for named_tensor in named_tensors: # sanity check that this tensor has been updated if named_tensor.round_number != round_number: raise ValueError( 'Collaborator {} is reporting results for the wrong round.' ' Exiting...'.format(collaborator_name)) # quite a bit happens in here, including decompression, delta # handling, etc... tensor_key, nparray = self._process_named_tensor( named_tensor, collaborator_name) task_results.append(tensor_key) # By giving task_key it's own weight, we can support different # training/validation weights # As well as eventually supporting weights that change by round # (if more data is added) self.collaborator_task_weight[task_key] = data_size self.collaborator_tasks_results[task_key] = task_results self._end_of_task_check(task_name)