def measure_client_group_diffs(self): average_group_diffs = np.zeros(len(self.group_list)) total_group_diff = 0.0 number_clients = [len(g.get_client_ids()) for g in self.group_list] for idx, g in enumerate(self.group_list): diff = 0.0 if number_clients[idx] > 0: model_g = process_grad(g.latest_model) for c in g.clients.values(): model_c = process_grad(c.local_model) diff += np.sum((model_c - model_g)**2)**0.5 total_group_diff += diff average_group_diffs[idx] = diff / float(number_clients[idx]) g.latest_diff = average_group_diffs[idx] else: average_group_diffs[ idx] = 0 # The group is empty, the discrepancy is ZERO average_total_diff = total_group_diff / sum(number_clients) average_diffs = np.append( [average_total_diff], average_group_diffs ) # Append the sum of average (discrepancies) to the head return average_diffs
def get_gradients(self, data, model_len): grads = np.zeros(model_len) num_samples = len(data['y']) with self.graph.as_default(): model_grads = self.sess.run(self.grads, feed_dict={ self.features: data['x'], self.labels: data['y'] }) grads = process_grad(model_grads) return num_samples, grads
def get_gradients(self, data, model_len): grads = np.zeros(model_len) num_samples = len(data['y']) processed_samples = 0 if num_samples < 50: input_data = process_x(data['x']) target_data = process_y(data['y']) with self.graph.as_default(): model_grads = self.sess.run(self.grads, feed_dict={ self.features: input_data, self.labels: target_data }) grads = process_grad(model_grads) processed_samples = num_samples else: # calculate the grads in a batch size of 50 for i in range(min(int(num_samples / 50), 4)): input_data = process_x(data['x'][50 * i:50 * (i + 1)]) target_data = process_y(data['y'][50 * i:50 * (i + 1)]) with self.graph.as_default(): model_grads = self.sess.run(self.grads, feed_dict={ self.features: input_data, self.labels: target_data }) flat_grad = process_grad(model_grads) grads = np.add(grads, flat_grad) # this is the average in this batch grads = grads * 1.0 / min(int(num_samples / 50), 4) processed_samples = min(int(num_samples / 50), 4) * 50 return processed_samples, grads
def get_gradients(self, data, model_len): grads = np.zeros(model_len) num_samples = len(data['y']) processed_samples = 0 input_data = process_x(data['x']) target_data = process_y(data['y']) with self.graph.as_default(): model_grads = self.sess.run(self.grads, feed_dict={ self.features: input_data, self.labels: target_data }) grads = process_grad(model_grads) processed_samples = num_samples return processed_samples, grads
def krum_average(self, k, parameters): # krum: return the parameter which has the lowest score defined as the sum of distance to its closest k vectors flattened_grads = [] for i in range(len(parameters)): flattened_grads.append(process_grad(parameters[i])) distance = np.zeros((len(parameters), len(parameters))) for i in range(len(parameters)): for j in range(i + 1, len(parameters)): distance[i][j] = np.sum( np.square(flattened_grads[i] - flattened_grads[j])) distance[j][i] = distance[i][j] score = np.zeros(len(parameters)) for i in range(len(parameters)): score[i] = np.sum(np.sort(distance[i])[:k + 1]) selected_idx = np.argsort(score)[0] return parameters[selected_idx]
def __init__(self, params, learner, dataset): # transfer parameters to self for key, val in params.items(): setattr(self, key, val) # create worker nodes tf.reset_default_graph() self.client_model = learner(*params['model_params'], self.inner_opt, self.seed) self.clients = self.setup_clients(dataset, self.client_model) print('{} Clients in Total'.format(len(self.clients))) self.latest_model = self.client_model.get_params() # initialize system metrics self.metrics = Metrics(self.clients, params) # Print the number of parameters model_len = process_grad(self.latest_model).size print('{} Parameters in Total'.format(model_len))
def mkrum_average(self, k, m, parameters): flattened_grads = [] for i in range(len(parameters)): flattened_grads.append(process_grad(parameters[i])) distance = np.zeros((len(parameters), len(parameters))) for i in range(len(parameters)): for j in range(i + 1, len(parameters)): distance[i][j] = np.sum( np.square(flattened_grads[i] - flattened_grads[j])) distance[j][i] = distance[i][j] score = np.zeros(len(parameters)) for i in range(len(parameters)): score[i] = np.sum(np.sort(distance[i])[:k + 1]) # multi-krum selects top-m 'good' vectors (defined by socre) (m=1: reduce to krum) selected_idx = np.argsort(score)[:m] selected_parameters = [] for i in selected_idx: selected_parameters.append(parameters[i]) return self.simple_average(selected_parameters)
def show_grads(self): ''' Return: gradients on all workers and the global gradient ''' model_len = process_grad(self.latest_model).size global_grads = np.zeros(model_len) intermediate_grads = [] samples = [] self.client_model.set_params(self.latest_model) for c in self.clients: num_samples, client_grads = c.get_grads(self.latest_model) samples.append(num_samples) global_grads = np.add(global_grads, client_grads * num_samples) intermediate_grads.append(client_grads) global_grads = global_grads * 1.0 / np.sum(np.asarray(samples)) intermediate_grads.append(global_grads) return intermediate_grads
def train_grouping(self): count_iter = 0 for i in range(self.num_rounds): # loop through mini-batches of clients for iter in range(0, len(self.clients), self.clients_per_round): if count_iter % self.eval_every == 0: self.evaluate(count_iter) selected_clients = self.clients[iter: iter + self.clients_per_round] csolns = [] ########################## local updating ############################## for client_id, c in enumerate(selected_clients): # distribute global model c.set_params(self.latest_model) # local iteration on full local batch of client c soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size) # track computational cost self.metrics.update(rnd=i, cid=c.id, stats=stats) # local update model_updates = [u - v for (u, v) in zip(soln[1], self.latest_model)] # aggregate local update csolns.append(model_updates) ######################### local process ######################### csolns_new=[] for csoln in csolns: flattened = process_grad(csoln) tmp = [] processed_update = self.local_process(flattened) tmp.append(np.reshape(processed_update[:self.dim_model], (self.dim_x, self.dim_y))) tmp.append(processed_update[self.dim_model:]) csolns_new.append(tmp) self.latest_model = [u + v for (u, v) in zip(self.latest_model, self.server_process(csolns_new))] self.client_model.set_params(self.latest_model) count_iter += 1 # final test model self.evaluate(count_iter)
def _get_cosine_similarity(self, m1, m2): flat_m1 = process_grad(m1) flat_m2 = process_grad(m2) cosine = np.dot(flat_m1, flat_m2) / (np.sqrt(np.sum(flat_m1**2)) * np.sqrt(np.sum(flat_m2**2))) return cosine
def train(self): print('Training with {} workers ---'.format(self.clients_per_round)) np.random.seed(1234567 + self.seed) corrupt_id = np.random.choice(range(len(self.clients)), size=self.num_corrupted, replace=False) print(corrupt_id) batches = {} for idx, c in enumerate(self.clients): if idx in corrupt_id: c.train_data['y'] = np.asarray(c.train_data['y']) if self.dataset == 'celeba': c.train_data['y'] = 1 - c.train_data['y'] elif self.dataset == 'femnist': c.train_data['y'] = np.random.randint( 0, 62, len(c.train_data['y'])) # [0, 62) elif self.dataset == 'fmnist': # fashion mnist c.train_data['y'] = np.random.randint( 0, 10, len(c.train_data['y'])) if self.dataset == 'celeba': batches[c] = gen_batch_celeba( c.train_data, self.batch_size, self.num_rounds * self.local_iters + 350) else: batches[c] = gen_batch( c.train_data, self.batch_size, self.num_rounds * self.local_iters + 350) initialization = copy.deepcopy(self.clients[0].get_params()) for i in range(self.num_rounds + 1): if i % self.eval_every == 0: num_test, num_correct_test, _ = self.test( ) # have set the latest model for all clients num_train, num_correct_train, loss_vector = self.train_error() avg_loss = np.dot(loss_vector, num_train) / np.sum(num_train) tqdm.write('At round {} training accu: {}, loss: {}'.format( i, np.sum(num_correct_train) * 1.0 / np.sum(num_train), avg_loss)) tqdm.write('At round {} test accu: {}'.format( i, np.sum(num_correct_test) * 1.0 / np.sum(num_test))) non_corrupt_id = np.setdiff1d(range(len(self.clients)), corrupt_id) tqdm.write('At round {} malicious test accu: {}'.format( i, np.sum(num_correct_test[corrupt_id]) * 1.0 / np.sum(num_test[corrupt_id]))) tqdm.write('At round {} benign test accu: {}'.format( i, np.sum(num_correct_test[non_corrupt_id]) * 1.0 / np.sum(num_test[non_corrupt_id]))) print( "variance of the performance: ", np.var(num_correct_test[non_corrupt_id] / num_test[non_corrupt_id])) indices, selected_clients = self.select_clients( round=i, corrupt_id=corrupt_id, num_clients=self.clients_per_round) csolns = [] losses = [] for idx in indices: c = self.clients[idx] # communicate the latest model c.set_params(self.latest_model) weights_before = copy.deepcopy(self.latest_model) loss = c.get_loss() # compute loss on the whole training data losses.append(loss) for _ in range(self.local_iters): data_batch = next(batches[c]) _, _, _ = c.solve_sgd(data_batch) new_weights = c.get_params() grads = [(u - v) * 1.0 for u, v in zip(new_weights, weights_before)] if idx in corrupt_id: if self.boosting: # model replacement grads = [self.clients_per_round * u for u in grads] elif self.random_updates: # send random updates stdev_ = get_stdev(grads) grads = [ np.random.normal(0, stdev_, size=u.shape) for u in grads ] if self.q > 0: csolns.append((np.exp(self.q * loss), grads)) else: csolns.append(grads) if self.q > 0: overall_updates = self.aggregate(csolns) else: if self.gradient_clipping: csolns = l2_clip(csolns) expected_num_mali = int(self.clients_per_round * self.num_corrupted / len(self.clients)) if self.median: overall_updates = self.median_average(csolns) elif self.k_norm: overall_updates = self.k_norm_average( self.clients_per_round - expected_num_mali, csolns) elif self.k_loss: overall_updates = self.k_loss_average( self.clients_per_round - expected_num_mali, losses, csolns) elif self.krum: overall_updates = self.krum_average( self.clients_per_round - expected_num_mali - 2, csolns) elif self.mkrum: m = self.clients_per_round - expected_num_mali overall_updates = self.mkrum_average( self.clients_per_round - expected_num_mali - 2, m, csolns) else: overall_updates = self.simple_average(csolns) self.latest_model = [ (u + v) for u, v in zip(self.latest_model, overall_updates) ] distance = np.linalg.norm( process_grad(self.latest_model) - process_grad(initialization)) if i % self.eval_every == 0: print('distance to initialization:', distance) # local finetuning init_model = copy.deepcopy(self.latest_model) after_test_accu = [] test_samples = [] for idx, c in enumerate(self.clients): c.set_params(init_model) local_model = copy.deepcopy(init_model) for _ in range( max( int(self.finetune_iters * c.train_samples / self.batch_size), self.finetune_iters)): c.set_params(local_model) data_batch = next(batches[c]) _, grads, _ = c.solve_sgd(data_batch) for j in range(len(grads[1])): eff_grad = grads[1][j] + self.lam * (local_model[j] - init_model[j]) local_model[j] = local_model[ j] - self.learning_rate * self.decay_factor * eff_grad c.set_params(local_model) tc, _, num_test = c.test() after_test_accu.append(tc) test_samples.append(num_test) after_test_accu = np.asarray(after_test_accu) test_samples = np.asarray(test_samples) tqdm.write('final test accu: {}'.format( np.sum(after_test_accu) * 1.0 / np.sum(test_samples))) tqdm.write('final malicious test accu: {}'.format( np.sum(after_test_accu[corrupt_id]) * 1.0 / np.sum(test_samples[corrupt_id]))) tqdm.write('final benign test accu: {}'.format( np.sum(after_test_accu[non_corrupt_id]) * 1.0 / np.sum(test_samples[non_corrupt_id]))) print( "variance of the performance: ", np.var(after_test_accu[non_corrupt_id] / test_samples[non_corrupt_id]))
def train(self): '''Train using Federated Proximal''' print('Training with {} workers ---'.format(self.clients_per_round)) csolns = [] # buffer for receiving client solutions # Evalute model before training for i in range(self.num_rounds): indices, selected_clients = self.select_clients( i, num_clients=self.clients_per_round) # uniform sampling np.random.seed(i) active_clients = np.random.choice(selected_clients, round(self.clients_per_round * (1 - self.drop_percent)), replace=False) diffs = [0] # Record the client diff # test model if i % self.eval_every == 0: stats = self.test( ) # have set the latest model for all clients stats_train = self.train_error_and_loss(active_clients) test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2]) tqdm.write('At round {} accuracy: {}'.format( i, test_acc)) # testing accuracy train_acc = np.sum(stats_train[3]) * 1.0 / np.sum( stats_train[2]) tqdm.write('At round {} training accuracy: {}'.format( i, train_acc)) train_loss = np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum( stats_train[2]) tqdm.write('At round {} training loss: {}'.format( i, train_loss)) # Write results to a csv file self.writer.write_stats(i, 0, test_acc, train_acc, train_loss, self.clients_per_round) # Calculate the client diff and writh it to csv file if csolns: flat_cmodels = [process_grad(soln[1]) for soln in csolns] flat_global_model = process_grad(self.latest_model) diffs[0] = np.sum([ np.sum((flat_model - flat_global_model)**2)**0.5 for flat_model in flat_cmodels ]) diffs[0] = diffs[0] / len(csolns) self.writer.write_diffs(diffs) tqdm.write('At round {} Discrepancy: {}'.format(i, diffs[0])) csolns = [] # Reset the client solutions buffer for idx, c in enumerate( active_clients.tolist()): # simply drop the slow devices # communicate the latest model c.set_params(self.latest_model) # solve minimization locally soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size) # gather solutions from client csolns.append(soln) # track communication cost self.metrics.update(rnd=i, cid=c.id, stats=stats) # update models self.latest_model = self.aggregate(csolns) self.writer.close() # final test model stats = self.test() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) tqdm.write('At round {} accuracy: {}'.format( self.num_rounds, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) tqdm.write('At round {} training accuracy: {}'.format( self.num_rounds, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2])))
def train(self): '''Train using Federated Averaging''' print("Train using Federated Averaging") print('Training with {} workers ---'.format(self.clients_per_round)) # for i in trange(self.num_rounds, desc='Round: ', ncols=120): for i in range(self.num_rounds): # test model if i % self.eval_every == 0: stats = self.test() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) tqdm.write('At round {} accuracy: {}'.format(i, np.sum(stats[3])*1.0/np.sum(stats[2]))) tqdm.write('At round {} training accuracy: {}'.format(i, np.sum(stats_train[3])*1.0/np.sum(stats_train[2]))) tqdm.write('At round {} training loss: {}'.format(i, np.dot(stats_train[4], stats_train[2])*1.0/np.sum(stats_train[2]))) self.rs_glob_acc.append(np.sum(stats[3])*1.0/np.sum(stats[2])) self.rs_train_acc.append(np.sum(stats_train[3])*1.0/np.sum(stats_train[2])) self.rs_train_loss.append(np.dot(stats_train[4], stats_train[2])*1.0/np.sum(stats_train[2])) model_len = process_grad(self.latest_model).size global_grads = np.zeros(model_len) client_grads = np.zeros(model_len) num_samples = [] local_grads = [] for c in self.clients: num, client_grad = c.get_grads(model_len) local_grads.append(client_grad) num_samples.append(num) global_grads = np.add(global_grads, client_grads * num) global_grads = global_grads * 1.0 / np.sum(np.asarray(num_samples)) difference = 0 for idx in range(len(self.clients)): difference += np.sum(np.square(global_grads - local_grads[idx])) difference = difference * 1.0 / len(self.clients) tqdm.write('-----gradient difference------: {}'.format(difference)) # save server model self.metrics.write() self.save() # choose K clients prop to data size selected_clients = self.select_clients(i, num_clients=self.clients_per_round) csolns = [] # buffer for receiving client solutions for c in tqdm(selected_clients, desc='Client: ', leave=False, ncols=120): # communicate the latest model c.set_params(self.latest_model) # solve minimization locally soln, grads, stats = c.solve_inner( self.optimizer, num_epochs=self.num_epochs, batch_size=self.batch_size) # gather solutions from client csolns.append(soln) # track communication cost self.metrics.update(rnd=i, cid=c.id, stats=stats) # update model self.latest_model = self.aggregate(csolns,weighted=True) # final test model stats = self.test() # stats_train = self.train_error() # stats_loss = self.train_loss() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) tqdm.write('At round {} accuracy: {}'.format(self.num_rounds, np.sum(stats[3])*1.0/np.sum(stats[2]))) tqdm.write('At round {} training accuracy: {}'.format(self.num_rounds, np.sum(stats_train[3])*1.0/np.sum(stats_train[2]))) # save server model self.metrics.write() #self.save() self.save(learning_rate=self.parameters["learning_rate"]) print("Test ACC:", self.rs_glob_acc) print("Training ACC:", self.rs_train_acc) print("Training Loss:", self.rs_train_loss)
def train(self): '''Train using Federated Proximal''' print('Training with {} workers ---'.format(self.clients_per_round)) csolns = [] # buffer for receiving client solutions for i in range(self.num_rounds): indices, selected_clients = self.select_clients( i, num_clients=self.clients_per_round) # uniform sampling np.random.seed( i ) # make sure that the stragglers are the same for FedProx and FedAvg active_clients = np.random.choice(selected_clients, round(self.clients_per_round * (1 - self.drop_percent)), replace=False) diffs = [0] # Record the client diff # test model if i % self.eval_every == 0: stats = self.test( ) # have set the latest model for all clients stats_train = self.train_error_and_loss(active_clients) test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2]) tqdm.write('At round {} accuracy: {}'.format( i, test_acc)) # testing accuracy train_acc = np.sum(stats_train[3]) * 1.0 / np.sum( stats_train[2]) tqdm.write('At round {} training accuracy: {}'.format( i, train_acc)) train_loss = np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum( stats_train[2]) tqdm.write('At round {} training loss: {}'.format( i, train_loss)) # Write results to a csv file self.writer.write_stats(i, 0, test_acc, train_acc, train_loss, self.clients_per_round) # Calculate the client diff and writh it to csv file if csolns: flat_cmodels = [process_grad(soln[1]) for soln in csolns] flat_global_model = process_grad(self.latest_model) diffs[0] = np.sum([ np.sum((flat_model - flat_global_model)**2)**0.5 for flat_model in flat_cmodels ]) diffs[0] = diffs[0] / len(csolns) self.writer.write_diffs(diffs) tqdm.write('At round {} Discrepancy: {}'.format(i, diffs[0])) model_len = process_grad( self.latest_model).size # no equal to model.size global_grads = np.zeros(model_len) client_grads = np.zeros(model_len) num_samples = [] local_grads = [] for c in self.clients: num, client_grad = c.get_grads(model_len) local_grads.append(client_grad) num_samples.append(num) global_grads = np.add(global_grads, client_grad * num) global_grads = global_grads * 1.0 / np.sum(np.asarray(num_samples)) difference = 0 for idx in range(len(self.clients)): difference += np.sum(np.square(global_grads - local_grads[idx])) difference = difference * 1.0 / len(self.clients) tqdm.write('gradient difference: {}'.format(difference)) csolns = [] # buffer for receiving client solutions self.inner_opt.set_params(self.latest_model, self.client_model) for idx, c in enumerate(selected_clients.tolist()): # communicate the latest model c.set_params(self.latest_model) total_iters = int( self.num_epochs * c.num_samples / self.batch_size) + 2 # randint(low,high)=[low,high) # solve minimization locally if c in active_clients: soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size) else: #soln, stats = c.solve_iters(num_iters=np.random.randint(low=1, high=total_iters), batch_size=self.batch_size) soln, stats = c.solve_inner(num_epochs=np.random.randint( low=1, high=self.num_epochs), batch_size=self.batch_size) # print(soln[0]) #DEBUG # gather solutions from client csolns.append(soln) # track communication cost self.metrics.update(rnd=i, cid=c.id, stats=stats) # update models self.latest_model = self.aggregate(csolns) self.client_model.set_params(self.latest_model) self.writer.close() # final test model stats = self.test() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) tqdm.write('At round {} accuracy: {}'.format( self.num_rounds, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) tqdm.write('At round {} training accuracy: {}'.format( self.num_rounds, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2])))
def clustering_clients(self, clients, n_clusters=None, max_iter=20): if n_clusters is None: n_clusters = self.num_group # Pre-train these clients first csolns, cupdates = {}, {} # Record the execution time start_time = time.time() for c in clients: csolns[c], cupdates[c] = self.pre_train_client(c) print("Pre-training takes {}s seconds".format(time.time() - start_time)) update_array = [process_grad(update) for update in cupdates.values()] update_array = np.vstack(update_array).T # shape=(n_params, n_client) # Record the execution time start_time = time.time() svd = TruncatedSVD(n_components=3, random_state=self.sklearn_seed) decomp_updates = svd.fit_transform(update_array) # shape=(n_params, 3) print("SVD takes {}s seconds".format(time.time() - start_time)) n_components = decomp_updates.shape[-1] # Record the execution time start_time = time.time() diffs = [] delta_w = update_array.T # shape=(n_client, n_params) diffs = self.get_ternary_cosine_similarity_matrix( delta_w, decomp_updates) ''' for dir in decomp_updates.T: dir_diff = [self.measure_difference(cupdates[c], dir) for c in clients] diffs.append(dir_diff) diffs = np.vstack(diffs).T # shape=(n_client, 3) ''' print("Ternary Cossim Matrix calculation takes {}s seconds".format( time.time() - start_time)) # Record the execution time start_time = time.time() kmeans = KMeans(n_clusters, random_state=self.sklearn_seed, max_iter=max_iter).fit(diffs) print("Clustering takes {}s seconds".format(time.time() - start_time)) print('Clustering Results:', Counter(kmeans.labels_)) print('Clustering Inertia:', kmeans.inertia_) cluster = {} # {Cluster ID: (cm, [c1, c2, ...])} cluster2clients = [[] for _ in range(n_clusters) ] # [[c1, c2,...], [c3, c4,...], ...] for idx, cluster_id in enumerate(kmeans.labels_): #print(idx, cluster_id, len(cluster2clients), n_clusters) # debug cluster2clients[cluster_id].append(clients[idx]) for cluster_id, client_list in enumerate(cluster2clients): # calculate the means of cluster # All client have equal weight weighted_csolns = [(1, csolns[c]) for c in client_list] if weighted_csolns: # Update the cluster means cluster[cluster_id] = (self.aggregate(weighted_csolns), client_list) else: print("Error, cluster is empty") return cluster
def train(self): '''Train using Federated Averaging''' print("Train using FEDL") print('Training with {} workers ---'.format(self.clients_per_round)) # for i in trange(self.num_rounds, desc='Round: ', ncols=120): for i in range(self.num_rounds): # test model if i % self.eval_every == 0: stats = self.test() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) tqdm.write('At round {} accuracy: {}'.format(i, np.sum(stats[3])*1.0/np.sum(stats[2]))) tqdm.write('At round {} training accuracy: {}'.format(i, np.sum(stats_train[3])*1.0/np.sum(stats_train[2]))) tqdm.write('At round {} training loss: {}'.format(i, np.dot(stats_train[4], stats_train[2])*1.0/np.sum(stats_train[2]))) self.rs_glob_acc.append(np.sum(stats[3])*1.0/np.sum(stats[2])) self.rs_train_acc.append(np.sum(stats_train[3])*1.0/np.sum(stats_train[2])) self.rs_train_loss.append(np.dot(stats_train[4], stats_train[2])*1.0/np.sum(stats_train[2])) model_len = process_grad(self.latest_model).size global_grads = np.zeros(model_len) client_grads = np.zeros(model_len) num_samples = [] local_grads = [] for c in self.clients: num, client_grad = c.get_grads(model_len) local_grads.append(client_grad) num_samples.append(num) global_grads = np.add(global_grads, client_grads * num) global_grads = global_grads * 1.0 / np.sum(np.asarray(num_samples)) difference = 0 for idx in range(len(self.clients)): difference += np.sum(np.square(global_grads - local_grads[idx])) difference = difference * 1.0 / len(self.clients) tqdm.write('gradient difference: {}'.format(difference)) # save server model self.metrics.write() self.save() # choose K clients prop to data size selected_clients = self.select_clients(i, num_clients=self.clients_per_round) selected_client = 0 csolns = [] # buffer for receiving client solutions cgrads_load = [] # buffer for receiving previous gradient for c in tqdm(selected_clients, desc='Client: ', leave=False, ncols=120): # communicate the latest model c.set_params(self.latest_model) pregrads = c.get_raw_grads() if(i != 0): #( algorithm run form global interation 1) c.set_gradientParam(self.meanGrads, pregrads) # solve minimization locally soln, _ , stats = c.solve_inner(self.optimizer, num_epochs=self.num_epochs, batch_size = self.batch_size) # gather solutions from client csolns.append(soln) #self.meanGrads = self.meanGrads + grad # track communication cost self.metrics.update(rnd=i, cid=c.id, stats=stats) selected_client = selected_client + 1 # update model self.latest_model = self.aggregate(csolns,weighted=True) # broad cast new model for c in tqdm(selected_clients, desc='Client: ', leave=False, ncols=120): c.set_params(self.latest_model) grad = (c.num_samples, c.get_raw_grads()) cgrads_load.append(grad) selected_client = selected_client + 1 # sent the last model to all client to cacualte the derivative # aggregate all derivative from users self.meanGrads = self.aggregate_derivate(cgrads_load,weighted=True) # final test model stats = self.test() # stats_train = self.train_error() # stats_loss = self.train_loss() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) tqdm.write('At round {} accuracy: {}'.format(self.num_rounds, np.sum(stats[3])*1.0/np.sum(stats[2]))) tqdm.write('At round {} training accuracy: {}'.format(self.num_rounds, np.sum(stats_train[3])*1.0/np.sum(stats_train[2]))) # save server model self.metrics.write() #self.save() prox = 0 if(self.parameters['lamb'] > 0): prox = 1 self.save(prox=prox, lamb=self.parameters['lamb'], learning_rate=self.parameters["learning_rate"], data_set=self.dataset, num_users=self.clients_per_round, batch=self.batch_size) print("Test ACC:", self.rs_glob_acc) print("Training ACC:", self.rs_train_acc) print("Training Loss:", self.rs_train_loss)
def test_ternary_cosine_similariy(self, alpha=20): ''' compare the ternary similarity and cosine similarity ''' def _calculate_cosine_distance(v1, v2): cosine = np.dot( v1, v2) / (np.sqrt(np.sum(v1**2)) * np.sqrt(np.sum(v2**2))) return cosine # Pre-train all clients csolns, cupdates = {}, {} for c in self.clients: csolns[c], cupdates[c] = self.pre_train_client(c) # random selecte alpha * m clients to calculate the direction matrix V n_clients = len(self.clients) selected_clients = random.sample(self.clients, k=min(self.num_group * alpha, n_clients)) clustering_update_array = [ process_grad(cupdates[c]) for c in selected_clients ] clustering_update_array = np.vstack( clustering_update_array).T # shape=(n_params, n_clients) # We decomposed the update vectors to numer_group components. svd = TruncatedSVD(n_components=self.num_group, random_state=self.sklearn_seed) decomp_updates = svd.fit_transform( clustering_update_array) # shape=(n_params, n_groups) n_components = decomp_updates.shape[-1] """ # calculate the ternary similarity matrix for all clients ternary_cossim = [] update_array = [process_grad(cupdates[c]) for c in self.clients] delta_w = np.vstack(update_array) # shape=(n_clients, n_params) ternary_cossim = self.get_ternary_cosine_similarity_matrix(delta_w, decomp_updates) """ """ # calculate the tranditional pairwise cosine similarity matrix for all clients old_cossim = cosine_similarity(delta_w) old_cossim = (1.0 - old_cossim) / 2.0 # Normalize """ # Calculate the data-driven decomposed cosine dissimilarity (EDC) for all clients update_array = [process_grad(cupdates[c]) for c in self.clients] delta_w = np.vstack(update_array) # shape=(n_clients, n_params) decomposed_cossim_matrix = cosine_similarity( delta_w, decomp_updates.T) # Shape = (n_clients, n_groups) print("Cossim_matrix shape:", decomposed_cossim_matrix.shape) # Normalize cossim to dissim #decomposed_dissim_matrix = (1.0 - decomposed_cossim_matrix) / 2.0 #EDC = self._calculate_data_driven_measure(decomposed_cossim_matrix, correction=False) EDC = euclidean_distances(decomposed_cossim_matrix) # Calculate the data-driven full cosine similarity for all clients full_cossim_matrix = cosine_similarity(delta_w) # Normalize #full_dissim_matrix = (1.0 - full_cossim_matrix) / 2.0 MADC = self._calculate_data_driven_measure(full_cossim_matrix, correction=True) # Print the shape of distance matries, make sure equal #print(EDC.shape, MADC.shape) # shape=(n_clients, n_clients) iu = np.triu_indices(n_clients) x, y = EDC[iu], MADC[iu] mesh_points = np.vstack((x, y)).T #print(x.shape, y.shape) np.savetxt("cossim.csv", mesh_points, delimiter="\t") return x, y
def freeze(self): self.model_len = process_grad(self.latest_model).size # For MNIST, should be 784*10+10 self.num_samples = [c.num_samples for c in self.clients.values()] if len(self.client_ids) < self.min_clients: print("Warning: This group does not meet the minimum client requirements.")
def clustering_clients(self, clients, n_clusters=None, max_iter=20): if n_clusters is None: n_clusters = self.num_group # Pre-train these clients first csolns, cupdates = {}, {} # The updates for clustering must be calculated upon a same model # We use the global auxiliary(AVG) model as start point self.client_model.set_params(self.latest_model) # Record the execution time start_time = time.time() for c in clients: csolns[c], cupdates[c] = self.pre_train_client(c) print("Pre-training takes {}s seconds".format(time.time() - start_time)) update_array = [process_grad(update) for update in cupdates.values()] delta_w = np.vstack(update_array) # shape=(n_clients, n_params) # Record the execution time start_time = time.time() # Decomposed the directions of updates to num_group of directional vectors svd = TruncatedSVD(n_components=self.num_group, random_state=self.sklearn_seed) decomp_updates = svd.fit_transform( delta_w.T) # shape=(n_params, n_groups) print("SVD takes {}s seconds".format(time.time() - start_time)) n_components = decomp_updates.shape[-1] # Record the execution time of EDC calculation start_time = time.time() decomposed_cossim_matrix = cosine_similarity( delta_w, decomp_updates.T) # shape=(n_clients, n_clients) ''' There is no need to normalize the data-driven measure because it is a dissimilarity measure # Normialize it to dissimilarity [0,1] decomposed_dissim_matrix = (1.0 - decomposed_cossim_matrix) / 2.0 EDC = decomposed_dissim_matrix ''' #EDC = self._calculate_data_driven_measure(decomposed_cossim_matrix, correction=False) print("EDC Matrix calculation takes {}s seconds".format(time.time() - start_time)) # Test the excution time of full cosine dissimilarity start_time = time.time() full_cossim_matrix = cosine_similarity( delta_w) # shape=(n_clients, n_clients) ''' # Normialize cossim to [0,1] full_dissim_matrix = (1.0 - full_cossim_matrix) / 2.0 ''' MADC = self._calculate_data_driven_measure( full_cossim_matrix, correction=True) # shape=(n_clients, n_clients) #MADC = full_dissim_matrix print("MADC Matrix calculation takes {}s seconds".format(time.time() - start_time)) '''Apply RBF kernel to EDC or MADC gamma=0.2 if self.MADC == True: affinity_matrix = np.exp(- MADC ** 2 / (2. * gamma ** 2)) else: # Use EDC as default affinity_matrix = np.exp(- EDC ** 2 / (2. * gamma ** 2)) ''' # Record the execution time start_time = time.time() if self.MADC == True: affinity_matrix = MADC #affinity_matrix = (1.0 - full_cossim_matrix) / 2.0 #result = AgglomerativeClustering(n_clusters, affinity='euclidean', linkage='ward').fit(full_cossim_matrix) result = AgglomerativeClustering( n_clusters, affinity='precomputed', linkage='complete').fit(affinity_matrix) else: # Use EDC as default affinity_matrix = decomposed_cossim_matrix #result = AgglomerativeClustering(n_clusters, affinity='euclidean', linkage='ward').fit(decomposed_cossim_matrix) #result = AgglomerativeClustering(n_clusters, affinity='precomputed', linkage='average').fit(EDC) result = KMeans(n_clusters, random_state=self.sklearn_seed, max_iter=max_iter).fit(affinity_matrix) #print('EDC', EDC[0][:10], '\nMADC', MADC[0][:10], '\naffinity', affinity_matrix[0][:10]) #result = SpectralClustering(n_clusters, random_state=self.sklearn_seed, n_init=max_iter, affinity='precomputed').fit(affinity_matrix) print("Clustering takes {}s seconds".format(time.time() - start_time)) print('Clustering Results:', Counter(result.labels_)) #print('Clustering Inertia:', result.inertia_) cluster = {} # {Cluster ID: (avg_soln, avg_update, [c1, c2, ...])} cluster2clients = [[] for _ in range(n_clusters) ] # [[c1, c2,...], [c3, c4,...], ...] for idx, cluster_id in enumerate(result.labels_): #print(idx, cluster_id, len(cluster2clients), n_clusters) # debug cluster2clients[cluster_id].append(clients[idx]) for cluster_id, client_list in enumerate(cluster2clients): # calculate the means of cluster # All client have equal weight average_csolns = [(1, csolns[c]) for c in client_list] average_updates = [(1, cupdates[c]) for c in client_list] if average_csolns: # Update the cluster means cluster[cluster_id] = (self.aggregate(average_csolns), self.aggregate(average_updates), client_list) else: print("Error, cluster is empty") return cluster
def train(self): '''Train using Federated Proximal''' print('Training with {} workers ---'.format(self.clients_per_round)) for i in range(self.num_rounds): # test model if i % self.eval_every == 0: stats = self.test( ) # have set the latest model for all clients stats_train = self.train_error_and_loss() # stats_train return array (ids, groups, num_samples, tot_correct, losses) tqdm.write('At round {} accuracy: {}'.format( i, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) # testing accuracy tqdm.write('At round {} training accuracy: {}'.format( i, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]))) tqdm.write('At round {} training loss: {}'.format( i, np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum(stats_train[2]))) tqdm.write('At round {} weighted average: {}'.format( i, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]))) model_len = process_grad(self.latest_model).size global_grads = np.zeros(model_len) client_grads = np.zeros(model_len) num_samples = [] local_grads = [] for c in self.clients: num, client_grad = c.get_grads(model_len) local_grads.append(client_grad) num_samples.append(num) global_grads = np.add(global_grads, client_grad * num) global_grads = global_grads * 1.0 / np.sum(np.asarray(num_samples)) difference = 0 for idx in range(len(self.clients)): difference += np.sum(np.square(global_grads - local_grads[idx])) difference = difference * 1.0 / len(self.clients) tqdm.write('gradient difference: {}'.format(difference)) indices, selected_clients = self.select_clients( i, num_clients=self.clients_per_round) # uniform sampling np.random.seed( i ) # make sure that the stragglers are the same for FedProx and FedAvg active_clients = np.random.choice(selected_clients, round(self.clients_per_round * (1 - self.drop_percent)), replace=False) csolns = [] # buffer for receiving client solutions self.inner_opt.set_params(self.latest_model, self.client_model) for idx, c in enumerate(selected_clients.tolist()): # communicate the latest model c.set_params(self.latest_model) total_iters = int( self.num_epochs * c.num_samples / self.batch_size) + 2 # randint(low,high)=[low,high) # solve minimization locally if c in active_clients: soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size) else: #soln, stats = c.solve_iters(num_iters=np.random.randint(low=1, high=total_iters), batch_size=self.batch_size) soln, stats = c.solve_inner(num_epochs=np.random.randint( low=1, high=self.num_epochs), batch_size=self.batch_size) # gather solutions from client csolns.append(soln) # track communication cost self.metrics.update(rnd=i, cid=c.id, stats=stats) # update models self.latest_model = self.aggregate(csolns) self.client_model.set_params(self.latest_model) # final test model stats = self.test() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) tqdm.write('At round {} accuracy: {}'.format( self.num_rounds, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) tqdm.write('At round {} training accuracy: {}'.format( self.num_rounds, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]))) tqdm.write('At round {} weighted average: {}'.format( i, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2])))
def train(self): '''Train using Federated Proximal''' print('Training with {} workers ---'.format(self.clients_per_round)) for i in range(self.num_rounds): # test model if i % self.eval_every == 0: stats = self.test( ) # have set the latest model for all clients stats_train = self.train_error_and_loss() tqdm.write('At round {} accuracy: {}'.format( i, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) # testing accuracy tqdm.write('At round {} training accuracy: {}'.format( i, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]))) tqdm.write('At round {} training loss: {}'.format( i, np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum(stats_train[2]))) model_len = process_grad(self.latest_model).size global_grads = np.zeros(model_len) client_grads = np.zeros(model_len) num_samples = [] local_grads = [] for c in self.clients: num, client_grad = c.get_grads(model_len) local_grads.append(client_grad) num_samples.append(num) global_grads = np.add(global_grads, client_grads * num) global_grads = global_grads * 1.0 / np.sum( np.asarray(num_samples)) difference = 0 for idx in range(len(self.clients)): difference += np.sum( np.square(global_grads - local_grads[idx])) difference = difference * 1.0 / len(self.clients) tqdm.write('gradient difference: {}'.format(difference)) selected_clients = self.select_clients( i, num_clients=self.clients_per_round) csolns = [] # buffer for receiving client solutions self.inner_opt.set_params(self.latest_model, self.client_model) for c in selected_clients: # communicate the latest model c.set_params(self.latest_model) # solve minimization locally soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size) # gather solutions from client csolns.append(soln) # track communication cost self.metrics.update(rnd=i, cid=c.id, stats=stats) # update model self.latest_model = self.aggregate(csolns) self.client_model.set_params(self.latest_model) # final test model stats = self.test() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) tqdm.write('At round {} accuracy: {}'.format( self.num_rounds, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) tqdm.write('At round {} training accuracy: {}'.format( self.num_rounds, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2])))
def train(self): '''Train using Federated Proximal''' print("Train using Federated Proximal SGD") print('Training with {} workers ---'.format(self.clients_per_round)) model_len = process_grad(self.latest_model).size for i in range(self.num_rounds): # test model if i % self.eval_every == 0: stats = self.test( ) # have set the latest model for all clients stats_train = self.train_error_and_loss() tqdm.write('At round {} accuracy: {}'.format( i, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) # testing accuracy tqdm.write('At round {} training accuracy: {}'.format( i, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]))) tqdm.write('At round {} training loss: {}'.format( i, np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum(stats_train[2]))) self.rs_glob_acc.append( np.sum(stats[3]) * 1.0 / np.sum(stats[2])) self.rs_train_acc.append( np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2])) self.rs_train_loss.append( np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum(stats_train[2])) global_grads = np.zeros(model_len) client_grads = np.zeros(model_len) num_samples = [] local_grads = [] for c in self.clients: num, client_grad = c.get_grads(model_len) local_grads.append(client_grad) num_samples.append(num) global_grads = np.add(global_grads, client_grads * num) global_grads = global_grads * 1.0 / \ np.sum(np.asarray(num_samples)) difference = 0 for idx in range(len(self.clients)): difference += np.sum( np.square(global_grads - local_grads[idx])) difference = difference * 1.0 / len(self.clients) tqdm.write('gradient difference: {}'.format(difference)) selected_clients = self.select_clients( i, num_clients=self.clients_per_round) csolns = [] # buffer for receiving client solutions #self.inner_opt.set_params(self.latest_model, self.client_model) for c in selected_clients: # communicate the latest model c.set_params(self.latest_model) self.inner_opt.set_wzero(self.latest_model, c.model) grads = c.get_raw_grads() c.set_vzero(grads) # solve minimization locally soln, stats = c.solve_inner(self.optimizer, num_epochs=self.num_epochs, batch_size=self.batch_size) # gather solutions from client csolns.append(soln) # track communication cost self.metrics.update(rnd=i, cid=c.id, stats=stats) # update model print(self.parameters['weight']) self.latest_model = self.aggregate( csolns, weighted=self.parameters['weight']) # Weighted = False / True self.client_model.set_params(self.latest_model) # final test model stats = self.test() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) tqdm.write('At round {} accuracy: {}'.format( self.num_rounds, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) tqdm.write('At round {} training accuracy: {}'.format( self.num_rounds, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]))) # save server model self.metrics.write() prox = 0 if (self.parameters['lamb'] > 0): prox = 1 self.save(prox=prox, lamb=self.parameters['lamb'], learning_rate=self.parameters["learning_rate"], data_set=self.dataset, num_users=self.clients_per_round, batch=self.batch_size) print("Test ACC:", self.rs_glob_acc) print("Training ACC:", self.rs_train_acc) print("Training Loss:", self.rs_train_loss)
def ClusterGroups(self, S, round): self.groups = [] groups = ["group_" + str(i) for i in range(self.num_groups)] groups = { g: { "model": 0, "clients": [], "num_samples": 0, "id": idx } for idx, g in enumerate(groups) } if round == 0: assign_idx = 0 for idx, c in enumerate(S): groups["group_" + str(assign_idx)]["clients"].append(c) assign_idx += 1 if assign_idx == self.num_groups: assign_idx = 0 return groups else: model_len = process_grad(self.latest_model).size X = [] for idx, c in enumerate(S): # solve minimization locally - soln #samples, weights num, client_grad = c.get_grads(model_len) X.append(client_grad) X = np.array(X) pca = PCA(n_components=0.95, svd_solver='full') X_reduced = pca.fit_transform(X) # print(pca.get_params()) # print(X_reduced.shape) # print(pca.explained_variance_ratio_) # FOR PRINTING THE PCA GRAPHS USE THIS # cumsum = np.cumsum(pca.explained_variance_ratio_) # fig, ax = plt.subplots(2, 1, figsize=[8, 12]) # ax[0].plot(np.arange(1, pca.n_components_ + 1), cumsum, linewidth=3.0, color="#17becf") # ax[0].grid() # ax[0].set_xlabel("# Components", fontsize=18) # ax[0].set_ylabel('Variance', fontsize=18) # plt.show() km = KMeans(n_clusters=self.num_groups, ) y_km = km.fit(X_reduced) print(km.labels_) csv_log.write_clusters(y_km.labels_, self.run_name) groups_predicted = y_km.labels_ for idx, c in enumerate(S): groups["group_" + str(groups_predicted[idx])]["clients"].append(c) remove = [ g for g in groups.keys() if len(groups[g]["clients"]) == 0 ] for k in remove: del groups[k] return groups