def grid_search(train_loader, test_loader, comm_matrix, num_rounds, epochs, num_clients, net='net', optimizer='sgd', lrs=np.logspace(-12, -1, 13, base=2.0)): """ Runs a decentralized optimization algorithme for the given learning rates for a number of rounds, over some network. Outputs the accuracies and returns them. Params: train_loader (array): the list of all train datasets, one per client test_loader (array): the list of test datasets, one per client comm_matrix (numpy.array): the communication matric modeling the network num_rounds (int): the number of data exchanges between nodes epochs (int): the number of optimization steps between each communication (minimum 1) num_clients (int): the number of clients in the network net (string): the neural network framework we use optimizer (string): the chosen optimizer, SGD by default lrs (array): the list of stepsizes to test Returns: accs (array): the corresponding accuracies, with the same shape as lrs """ accs = [] for lr in lrs: global_model, client_models = model_init(num_clients, net) opt = optimizer_init(client_models, lr, optimizer) loss, test_loss, acc = 0.0, 0.0, 0.0 for r in range(num_rounds): for i in range(num_clients): loss += client_update(client_models[i], opt[i], train_loader[i], epoch=epochs) diffuse_params(client_models, comm_matrix) average_models(global_model, client_models) test_loss, acc = evaluate(global_model, test_loader) print('lr %g | average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (lr, loss / num_clients, test_loss, acc)) accs.append(acc) return accs
def run_probas(train_loader, test_loader, comm_matrix, num_rounds, epochs, num_clients, failure_rounds, corr='global', net='net', optimizer='sgd', lr=0.1): """ Runs a decentralized optimization algorithm for the given learning rate for a number of rounds, over some network. Links may fail for some rounds according to a pre-defined probabilistic model. Outputs the accuracies and returns them. Params: train_loader (array): the list of all train datasets, one per client test_loader (array): the list of test datasets, one per client comm_matrix (numpy.array): the communication matric modeling the network num_rounds (int): the number of data exchanges between nodes epochs (int): the number of optimization steps between each communication (minimum 1) num_clients (int): the number of clients in the network failure_rounds (array): list representing the number of failing links at each round corr (string): the correction policy, global by default net (string): the neural network framework we use optimizer (string): the chosen optimizer, SGD by default lr (double): the learning rate for the optimizaion algorithm Returns: global_model (nn.Module): the final global neural network averaging all the clients client_models (array of Net): the list of all the final client neural networks accs (array): the corresponding accuracies, with the same shape as lrs """ assert corr in ['global', 'local', 'none'] accs = [] global_model, client_models = model_init(num_clients, net) opt = optimizer_init(client_models, lr, optimizer) loss, test_loss, acc = 0.0, 0.0, 0.0 for r in range(num_rounds): num_failures = np.count_nonzero(failure_rounds == r) if corr == 'global': actual_comm_matrix = network_failures_global( comm_matrix, num_failures) elif corr == 'local': actual_comm_matrix = network_failures_local( comm_matrix, num_failures) else: # corr == 'none' actual_comm_matrix = network_failures_no_correction( comm_matrix, num_failures) for i in range(num_clients): loss += client_update(client_models[i], opt[i], train_loader[i], epoch=epochs) diffuse_params(client_models, actual_comm_matrix) average_models(global_model, client_models) test_loss, acc = evaluate(global_model, test_loader) print('%d-th round' % r) print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (loss / num_clients, test_loss, acc)) accs.append(acc) return global_model, client_models, accs
def run_latency_changing_topo(train_loader, test_loader, num_rounds, epochs, num_clients, latency_nodes, net='net', optimizer='sgd', lr=0.1): """ Runs a decentralized optimization algorithm for the given learning rate for a number of rounds, over some network. Some nodes send their weights with a one-rounds latency, for the entire execution. The network topology evolves over time. Outputs the accuracies and returns them. Params: train_loader (array): the list of all train datasets, one per client test_loader (array): the list of test datasets, one per client comm_matrix (numpy.array): the communication matric modeling the network num_rounds (int): the number of data exchanges between nodes epochs (int): the number of optimization steps between each communication (minimum 1) num_clients (int): the number of clients in the network latency_nodes (array): the list of delayed nodes net (string): the neural network framework we use optimizer (string): the chosen optimizer, SGD by default lr (double): the learning rate for the optimizaion algorithm Returns: global_model (nn.Module): the final global neural network averaging all the clients client_models (array of Net): the list of all the final client neural networks accs (array): the corresponding accuracies, with the same shape as lrs """ accs = [] global_model, client_models = model_init(num_clients, net) opt = optimizer_init(client_models, lr, optimizer) topos = ['centralized', 'ring', 'grid'] topo = np.random.choice(topos) comm_matrix = create_mixing_matrix(topo, num_clients) loss, test_loss, acc = 0.0, 0.0, 0.0 for r in range(num_rounds): old_client_models = client_models old_topo = topo old_comm_matrix = comm_matrix topo = np.random.choice(topos) # client update for i in range(num_clients): loss += client_update(client_models[i], opt[i], train_loader[i], epoch=epochs) # diffuse params diffuse_params_latency(client_models, comm_matrix, latency_nodes) if (r > 0): diffuse_params_latency( old_client_models, old_comm_matrix, np.setdiff1d(np.array(range(num_clients)), latency_nodes)) print("old topo: {}, new topo: {}".format(old_topo, topo)) average_models(global_model, client_models) test_loss, acc = evaluate(global_model, test_loader) print('%d-th round' % r) print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (loss / num_clients, test_loss, acc)) accs.append(acc) return global_model, client_models, accs
def run_latency_per_round(train_loader, test_loader, comm_matrix, num_rounds, epochs, num_clients, latency_nodes, latency_rounds, net='net', optimizer='sgd', lr=0.1): """ Runs a decentralized optimization algorithm for the given learning rate for a number of rounds, over some network. Some nodes send their weights with a one-rounds latency, only during specific rounds. Outputs the accuracies and returns them. Params: train_loader (array): the list of all train datasets, one per client test_loader (array): the list of test datasets, one per client comm_matrix (numpy.array): the communication matric modeling the network num_rounds (int): the number of data exchanges between nodes epochs (int): the number of optimization steps between each communication (minimum 1) num_clients (int): the number of clients in the network latency_nodes (array): the list of delayed nodes latency_rounds (array): the rounds at which latency will occur across the network net (string): the neural network framework we use optimizer (string): the chosen optimizer, SGD by default lr (double): the learning rate for the optimizaion algorithm Returns: global_model (nn.Module): the final global neural network averaging all the clients client_models (array of Net): the list of all the final client neural networks accs (array): the corresponding accuracies, with the same shape as lrs """ accs = [] global_model, client_models = model_init(num_clients, net) opt = optimizer_init(client_models, lr, optimizer) loss, test_loss, acc = 0.0, 0.0, 0.0 for r in range(num_rounds): old_client_models = client_models # client update for i in range(num_clients): loss += client_update(client_models[i], opt[i], train_loader[i], epoch=epochs) # diffuse params if (r in latency_rounds): diffuse_params_latency(client_models, comm_matrix, latency_nodes) print("round {}, delay".format(r)) elif (r in latency_rounds + 1): diffuse_params(client_models, comm_matrix) diffuse_params_latency( old_client_models, comm_matrix, np.setdiff1d(np.array(range(num_clients)), latency_nodes)) print("round {}, delay recovery".format(r)) else: diffuse_params(client_models, comm_matrix) print("round {}, normal".format(r)) average_models(global_model, client_models) test_loss, acc = evaluate(global_model, test_loader) print('%d-th round' % r) print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (loss / num_clients, test_loss, acc)) accs.append(acc) return global_model, client_models, accs
def network_model_update(request, received_model): request.actor.logger.info('updating model (averaging)') NETWORK_STATE['model'] = average_models([NETWORK_STATE['model'], received_model]) return 'ok'
def run_topos(train_loader, test_loader, num_rounds, epochs, num_clients, topos, shuffle='random', net='net', optimizer='sgd', lr=0.1): """ Runs a decentralized optimization algorithm for the given learning rate for a number of rounds, over some network. Outputs the accuracies and returns them. Params: train_loader (array): the list of all train datasets, one per client test_loader (array): the list of test datasets, one per client num_rounds (int): the number of data exchanges between nodes epochs (int): the number of optimization steps between each communication (minimum 1) num_clients (int): the number of clients in the network topos (array): list of possible network topologies shuffle (string): defines how topology evolves over time, randomly by default net (string): the neural network framework we use optimizer (string): the chosen optimizer, SGD by default lr (double): the learning rate for the optimizaion algorithm Returns: global_model (nn.Module): the final global neural network averaging all the clients client_models (array of Net): the list of all the final client neural networks accs (array): the corresponding accuracies, with the same shape as lrs """ assert shuffle in ['random', 'modulo', 'fraction'] accs = [] global_model, client_models = model_init(num_clients, net) opt = optimizer_init(client_models, lr, optimizer) loss, test_loss, acc = 0.0, 0.0, 0.0 for r in range(num_rounds): for i in range(num_clients): loss += client_update(client_models[i], opt[i], train_loader[i], epoch=epochs) if shuffle == 'fraction': t = int(r * 5 / num_rounds) comm_matrix = create_mixing_matrix(topos[t], num_clients) elif shuffle == 'modulo': t = r % 5 comm_matrix = create_mixing_matrix(topos[t], num_clients) else: # shuffle == 'random' t = np.random.choice(range(5)) comm_matrix = create_mixing_matrix(topos[t], num_clients) diffuse_params(client_models, comm_matrix) average_models(global_model, client_models) test_loss, acc = evaluate(global_model, test_loader) print('%d-th round, %s topology' % (r, topos[t])) print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (loss / num_clients, test_loss, acc)) accs.append(acc) return global_model, client_models, accs