def test_xy_train_volume_by_class_with_federated_dataset(): # Prepare dataset_name = "fashion-mnist-100p-b1_045" xy_partitions, _, _ = load_splits(dataset_name) num_examples_expected = sum([x.shape[0] for x, _ in xy_partitions]) # We need to find out which classes are present in our dataset # (actually we know it but making it a bit more adaptable in case we parameterize it) all_classes = set() for _, y_train in xy_partitions: classes = np.unique(y_train) for c in classes: all_classes.add(c) num_classes_total = len(all_classes) results = [] # Execute for xy_train in xy_partitions: _, y_train = xy_train r = xy_train_volume_by_class(num_classes=num_classes_total, xy_train=xy_train) results.append(r) # Assert num_examples_actual = 0 for r in results: num_examples_actual += sum(r) assert num_examples_expected == num_examples_actual
def main(_): # Set exit callback if FLAGS.push_results: atexit.register(after_main, group_name=FLAGS.group_name, task_name=FLAGS.task_name) # Load data xy_train_partitions, xy_val, xy_test = load_splits(FLAGS.dataset) # Execute training start = time.time() partition_id = FLAGS.partition_id hist_metrics = None # For unitary training if partition_id is not None: # Use only a single partition if required (unitary) hist, loss, acc = run.unitary_training( model_name=FLAGS.model, xy_train=xy_train_partitions[partition_id], xy_val=xy_val, xy_test=xy_test, E=FLAGS.E, B=FLAGS.B, ) else: hist, _, hist_metrics, loss, acc = run.federated_training( model_name=FLAGS.model, xy_train_partitions=xy_train_partitions, xy_val=xy_val, xy_test=xy_test, R=FLAGS.R, E=FLAGS.E, C=FLAGS.C, B=FLAGS.B, ) end = time.time() # Write results res = { "group_name": FLAGS.group_name, "task_name": FLAGS.task_name, "task_label": FLAGS.task_label, "dataset": FLAGS.dataset, "model": FLAGS.model, "R": FLAGS.R, "E": FLAGS.E, "C": FLAGS.C, "B": FLAGS.B, "partition_id": partition_id, "start": start, "end": end, "duration": end - start, "loss": float(loss), "acc": float(acc), "hist": hist, "hist_metrics": hist_metrics, } storage.write_json(res, fname="results.json")
def benchmark_evolutionary_avg(): fn_name = benchmark_evolutionary_avg.__name__ logging.info("Starting {}".format(fn_name)) # Load dataset xy_parts, xy_val, xy_test = load_splits("fashion-mnist-100p-noniid-03cpp") # Run Federated Learning with evolutionary aggregation evaluator = Evaluator(orig_cnn_compiled(), xy_val) # FIXME refactor aggregator = EvoAgg(evaluator) _, _, _, loss_a, acc_a = run.federated_training( "blog_cnn", xy_parts, xy_val, xy_test, R=DEFAULT_R, E=DEFAULT_E, C=DEFAULT_C, B=DEFAULT_B, aggregator=aggregator, ) # Run Federated Learning with weighted average aggregation _, _, _, loss_b, acc_b = run.federated_training( "blog_cnn", xy_parts, xy_val, xy_test, R=DEFAULT_R, E=DEFAULT_E, C=DEFAULT_C, B=DEFAULT_B, ) # Write results JSON results = {} results["loss_a"] = float(loss_a) results["acc_a"] = float(acc_a) results["loss_b"] = float(loss_b) results["acc_b"] = float(acc_b) # TODO add histories storage.write_json(results, fname="EA-WA-results.json")
def unitary_versus_federated( benchmark_name: str, model_name: str, dataset_name: str, R: int = DEFAULT_R, E: int = DEFAULT_E, C: float = DEFAULT_C, B: int = DEFAULT_B, ): """ :param C: Fraction of participants used in each round of training """ logging.info(f"Starting {benchmark_name}") xy_train_partitions, xy_val, xy_test = load_splits(dataset_name) start = time.time() # Train CNN on a single partition ("unitary learning") # TODO train n models on all partitions partition_id = 0 xy_train = xy_train_partitions[partition_id] logging.info(f"Run unitary training using partition {partition_id}") ul_hist, ul_loss, ul_acc = unitary_training(model_name, xy_train, xy_val, xy_test, E=R * E, B=B) # Train CNN using federated learning on all partitions logging.info("Run federated learning using all partitions") fl_hist, _, _, fl_loss, fl_acc = federated_training(model_name, xy_train_partitions, xy_val, xy_test, R=R, E=E, C=C, B=B) end = time.time() # Write results JSON results = { "name": benchmark_name, "start": start, "end": end, "duration": end - start, "R": R, "E": E, "C": C, "B": B, "unitary_learning": { "loss": float(ul_loss), "acc": float(ul_acc), "hist": ul_hist, }, "federated_learning": { "loss": float(fl_loss), "acc": float(fl_acc), "hist": fl_hist, }, } storage.write_json(results, fname="results.json") # Plot results # TODO include aggregated participant histories in plot plot_data: List[Tuple[str, List[float], Optional[List[int]]]] = [ ( "Unitary Learning", ul_hist["val_acc"], [i for i in range(1, len(ul_hist["val_acc"]) + 1, 1)], ), ( "Federated Learning", fl_hist["val_acc"], [i for i in range(E, len(fl_hist["val_acc"]) * E + 1, E)], ), ] # FIXME use different filenames for different datasets task_accuracies.plot(plot_data, fname="plot.png")