def __init__(self, params, learner, dataset): print('Using Federated prox to Train') self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu']) super(Server, self).__init__(params, learner, dataset) self.writer = CSVWriter(params['export_filename'], 'results/' + params['dataset'])
def __init__(self, params, learner, dataset): print('Using Group prox to Train') self.group_list = [] # list of Group() instance self.group_ids = [] # list of group id self.num_group = params['num_group'] self.prox = params['proximal'] self.group_min_clients = params['min_clients'] self.allow_empty = params['allow_empty'] self.evenly = params['evenly'] self.sklearn_seed = params['seed'] self.agg_lr = params['agg_lr'] self.RAC = params['RAC'] # Randomly Assign Clients self.RCC = params['RCC'] # Random Cluster Center if self.prox == True: self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu']) else: self.inner_opt = tf.train.GradientDescentOptimizer( params['learning_rate']) super(Server, self).__init__(params, learner, dataset) self.latest_model = self.client_model.get_params( ) # The global AVG model self.latest_update = self.client_model.get_params() self.create_groups() self.writer = CSVWriter(params['export_filename'], 'results/' + params['dataset'], self.group_ids)
def __init__(self, params, learner, dataset): print('Using Group prox to Train') self.group_list = [] # list of Group() instance self.group_ids = [] # list of group id # The attrs will be set in BaseFedarated.__init__(), # We repeat this assigement for clarification. self.num_group = params['num_group'] self.prox = params['proximal'] self.group_min_clients = params['min_clients'] self.allow_empty = params['allow_empty'] self.evenly = params['evenly'] self.seed = params['seed'] self.sklearn_seed = params['sklearn_seed'] self.agg_lr = params['agg_lr'] self.RAC = params['RAC'] # Randomly Assign Clients self.RCC = params['RCC'] # Random Cluster Center self.MADC = params[ 'MADC'] # Use Mean Absolute Difference of pairwise Cossim self.recluster_epoch = params['recluster_epoch'] self.max_temp = params['client_temp'] self.temp_dict = {} """ We implement THREE run mode of FedGroup: 1) Ours FedGroup 2) IFCA: "An Efficient Framework for Clustered Federated Learning" 3) FeSEM: "Multi-Center Federated Learning" """ self.run_mode = 'FedGroup' if params['ifca'] == True: self.run_mode = 'IFCA' if params['fesem'] == True: self.run_mode = 'FeSEM' if self.prox == True: self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu']) else: self.inner_opt = tf.train.GradientDescentOptimizer( params['learning_rate']) super(Server, self).__init__(params, learner, dataset) self.latest_model = self.client_model.get_params( ) # The global AVG model self.latest_update = self.client_model.get_params() self.create_groups() # Record the temperature of clients for c in self.clients: self.temp_dict[c] = self.max_temp self.writer = CSVWriter(params['export_filename'], 'results/' + params['dataset'], self.group_ids)
def __init__(self, params, learner, dataset): print('Using Federated prox to Train') self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu']) # Setup Log self.params_log = params # self.run_name = str(params["ex_name"])+"_fedprox_"+ str(datetime.datetime.now().strftime("%m%d-%H%M%S")) self.run_name = str(params["ex_name"]) + "_fedprox" self.log_main = [] csv_log.log_start('prox', params, 1, self.run_name) super(Server, self).__init__(params, learner, dataset)
def __init__(self, params, learner, dataset): print('Using Federated prox to Train') inner_opt = PerturbedGradientDescent(params['lr'], params['mu']) append2metric = f'mu{params["mu"]}' super(Server, self).__init__(params, learner, dataset, optimizer=inner_opt, append2metric=append2metric) self.drop_rate = params['drop_rate']
def __init__(self, params, learner, dataset): print('Using Federated Average to Train') if (params['optimizer'] == "fedprox"): self.alg = "FEDPROX" print('Using FedProx to Train') mu = 0.005 #0.005: faster but less smooth vs 0.01: smoother but slower # self.inner_opt = PROXSGD(params['learning_rate'], params["lamb"]) self.inner_opt = PerturbedGradientDescent(params['learning_rate'], mu) elif (params['optimizer'] == "fedavg"): self.alg = "FEDAVG" print('Using FedAvg to Train') self.inner_opt = tf.train.GradientDescentOptimizer( params['learning_rate']) super(Server, self).__init__(params, learner, dataset)
class Server(BaseFedarated): def __init__(self, params, learner, dataset): print('Using Federated prox to Train') self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu']) super(Server, self).__init__(params, learner, dataset) def train(self): '''Train using Federated Proximal''' print('Training with {} workers ---'.format(self.clients_per_round)) for i in range(self.num_rounds): # test model if i % self.eval_every == 0: stats = self.test( ) # have set the latest model for all clients stats_train = self.train_error_and_loss() # stats_train return array (ids, groups, num_samples, tot_correct, losses) tqdm.write('At round {} accuracy: {}'.format( i, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) # testing accuracy tqdm.write('At round {} training accuracy: {}'.format( i, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]))) tqdm.write('At round {} training loss: {}'.format( i, np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum(stats_train[2]))) tqdm.write('At round {} weighted average: {}'.format( i, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]))) model_len = process_grad(self.latest_model).size global_grads = np.zeros(model_len) client_grads = np.zeros(model_len) num_samples = [] local_grads = [] for c in self.clients: num, client_grad = c.get_grads(model_len) local_grads.append(client_grad) num_samples.append(num) global_grads = np.add(global_grads, client_grad * num) global_grads = global_grads * 1.0 / np.sum(np.asarray(num_samples)) difference = 0 for idx in range(len(self.clients)): difference += np.sum(np.square(global_grads - local_grads[idx])) difference = difference * 1.0 / len(self.clients) tqdm.write('gradient difference: {}'.format(difference)) indices, selected_clients = self.select_clients( i, num_clients=self.clients_per_round) # uniform sampling np.random.seed( i ) # make sure that the stragglers are the same for FedProx and FedAvg active_clients = np.random.choice(selected_clients, round(self.clients_per_round * (1 - self.drop_percent)), replace=False) csolns = [] # buffer for receiving client solutions self.inner_opt.set_params(self.latest_model, self.client_model) for idx, c in enumerate(selected_clients.tolist()): # communicate the latest model c.set_params(self.latest_model) total_iters = int( self.num_epochs * c.num_samples / self.batch_size) + 2 # randint(low,high)=[low,high) # solve minimization locally if c in active_clients: soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size) else: #soln, stats = c.solve_iters(num_iters=np.random.randint(low=1, high=total_iters), batch_size=self.batch_size) soln, stats = c.solve_inner(num_epochs=np.random.randint( low=1, high=self.num_epochs), batch_size=self.batch_size) # gather solutions from client csolns.append(soln) # track communication cost self.metrics.update(rnd=i, cid=c.id, stats=stats) # update models self.latest_model = self.aggregate(csolns) self.client_model.set_params(self.latest_model) # final test model stats = self.test() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) tqdm.write('At round {} accuracy: {}'.format( self.num_rounds, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) tqdm.write('At round {} training accuracy: {}'.format( self.num_rounds, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]))) tqdm.write('At round {} weighted average: {}'.format( i, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2])))
def __init__(self, params, learner, dataset): print('Using Federated prox to Train') self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu']) super(Server, self).__init__(params, learner, dataset)
class Server(BaseFedarated): def __init__(self, params, learner, dataset): print('Using Federated prox to Train') self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu']) #self.seed = 1 super(Server, self).__init__(params, learner, dataset) def train(self): '''Train using Federated Proximal''' print('Training with {} workers ---'.format(self.clients_per_round)) for i in range(self.num_rounds): # test model if i % self.eval_every == 0: stats = self.test( ) # have set the latest model for all clients stats_train = self.train_error_and_loss() tqdm.write('At round {} accuracy: {}'.format( i, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) # testing accuracy tqdm.write('At round {} training accuracy: {}'.format( i, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]))) tqdm.write('At round {} training loss: {}'.format( i, np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum(stats_train[2]))) model_len = process_grad(self.latest_model).size global_grads = np.zeros(model_len) client_grads = np.zeros(model_len) num_samples = [] local_grads = [] for c in self.clients: num, client_grad = c.get_grads(model_len) local_grads.append(client_grad) num_samples.append(num) global_grads = np.add(global_grads, client_grads * num) global_grads = global_grads * 1.0 / np.sum( np.asarray(num_samples)) difference = 0 for idx in range(len(self.clients)): difference += np.sum( np.square(global_grads - local_grads[idx])) difference = difference * 1.0 / len(self.clients) tqdm.write('gradient difference: {}'.format(difference)) selected_clients = self.select_clients( i, num_clients=self.clients_per_round) csolns = [] # buffer for receiving client solutions self.inner_opt.set_params(self.latest_model, self.client_model) for c in selected_clients: # communicate the latest model c.set_params(self.latest_model) # solve minimization locally soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size) # gather solutions from client csolns.append(soln) # track communication cost self.metrics.update(rnd=i, cid=c.id, stats=stats) # update model self.latest_model = self.aggregate(csolns) self.client_model.set_params(self.latest_model) # final test model stats = self.test() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) tqdm.write('At round {} accuracy: {}'.format( self.num_rounds, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) tqdm.write('At round {} training accuracy: {}'.format( self.num_rounds, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2])))
class Server(BaseFedarated): def __init__(self, params, learner, dataset): print('Using Group prox to Train') self.group_list = [] # list of Group() instance self.group_ids = [] # list of group id self.num_group = params['num_group'] self.prox = params['proximal'] self.group_min_clients = params['min_clients'] self.allow_empty = params['allow_empty'] self.evenly = params['evenly'] self.sklearn_seed = params['seed'] self.agg_lr = params['agg_lr'] self.RAC = params['RAC'] # Randomly Assign Clients self.RCC = params['RCC'] # Random Cluster Center if self.prox == True: self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu']) else: self.inner_opt = tf.train.GradientDescentOptimizer( params['learning_rate']) super(Server, self).__init__(params, learner, dataset) self.latest_model = self.client_model.get_params( ) # The global AVG model self.latest_update = self.client_model.get_params() self.create_groups() self.writer = CSVWriter(params['export_filename'], 'results/' + params['dataset'], self.group_ids) """ initialize the Group() instants """ def create_groups(self): self.group_list = [ Group(gid, self.client_model) for gid in range(self.num_group) ] # 0,1,...,num_group self.group_ids = [g.get_group_id() for g in self.group_list] self.group_cold_start(self.RCC) # init the lastest_model of all groups def _get_cosine_similarity(self, m1, m2): flat_m1 = process_grad(m1) flat_m2 = process_grad(m2) cosine = np.dot(flat_m1, flat_m2) / (np.sqrt(np.sum(flat_m1**2)) * np.sqrt(np.sum(flat_m2**2))) return cosine """ measure the difference between client_model and group_model """ def measure_difference(self, client_model, group_model): # Strategy #1: angles (cosine) between two vectors diff = self._get_cosine_similarity(client_model, group_model) diff = 1.0 - ((diff + 1.0) / 2.0) # scale to [0, 1] then flip # Strategy #2: Euclidean distance between two vectors # diff = np.sum((client_model - group_model)**2) return diff def get_ternary_cosine_similarity_matrix(self, w, V): #print(type(w), type(V)) print('Delta w shape:', w.shape, 'Matrix V shape:', V.shape) w, V = w.astype(np.float32), V.astype(np.float32) left = np.matmul(w, V) # delta_w (dot) V scale = np.reciprocal( np.linalg.norm(w, axis=1, keepdims=True) * np.linalg.norm(V, axis=0, keepdims=True)) diffs = left * scale # element-wise product diffs = (-diffs + 1.) / 2. # Normalize to [0,1] return diffs def client_cold_start(self, client): if client.group is not None: print("Warning: Client already has a group: {:2d}.".format( client.group)) # Training is base on the global avg model start_model = self.client_model.get_params() # Backup the model first self.client_model.set_params( self.latest_model) # Set the training model to global avg model client_model, client_update = self.pre_train_client(client) diff_list = [] # tuple of (group, diff) for g in self.group_list: diff_g = self.measure_difference(client_update, g.latest_update) diff_list.append((g, diff_g)) # w/o sort # update(Init) the diff list of client client.update_difference(diff_list) #print("client:", client.id, "diff_list:", diff_list) assign_group = self.group_list[np.argmin([tup[1] for tup in diff_list])] # Only set the group attr of client, do not actually add clients to the group client.set_group(assign_group) # Recovery the training model self.client_model.set_params(start_model) return """ Deal with the group cold start problem """ def group_cold_start(self, random_centers=False): if random_centers == True: # Strategy #1: random pre-train num_group clients as cluster centers selected_clients = random.sample(self.clients, k=self.num_group) for c, g in zip(selected_clients, self.group_list): g.latest_model, g.latest_update = self.pre_train_client(c) c.set_group(g) if random_centers == False: # Strategy #2: Pre-train, then clustering the directions of clients' weights alpha = 20 selected_clients = random.sample(self.clients, k=min(self.num_group * alpha, len(self.clients))) for c in selected_clients: c.clustering = True # Mark these clients as clustering client cluster = self.clustering_clients( selected_clients) # {Cluster ID: (cm, [c1, c2, ...])} # Init groups accroding to the clustering results for g, id in zip(self.group_list, cluster.keys()): # Init the group latest update new_model = cluster[id][0] g.latest_update = [ w1 - w0 for w0, w1 in zip(g.latest_model, new_model) ] g.latest_model = new_model # These clients do not need to be cold-started # Set the "group" attr of client only, didn't add the client to group for c in cluster[id][1]: c.set_group(g) return """ Clustering clients by K Means""" def clustering_clients(self, clients, n_clusters=None, max_iter=20): if n_clusters is None: n_clusters = self.num_group # Pre-train these clients first csolns, cupdates = {}, {} # Record the execution time start_time = time.time() for c in clients: csolns[c], cupdates[c] = self.pre_train_client(c) print("Pre-training takes {}s seconds".format(time.time() - start_time)) update_array = [process_grad(update) for update in cupdates.values()] update_array = np.vstack(update_array).T # shape=(n_params, n_client) # Record the execution time start_time = time.time() svd = TruncatedSVD(n_components=3, random_state=self.sklearn_seed) decomp_updates = svd.fit_transform(update_array) # shape=(n_params, 3) print("SVD takes {}s seconds".format(time.time() - start_time)) n_components = decomp_updates.shape[-1] # Record the execution time start_time = time.time() diffs = [] delta_w = update_array.T # shape=(n_client, n_params) diffs = self.get_ternary_cosine_similarity_matrix( delta_w, decomp_updates) ''' for dir in decomp_updates.T: dir_diff = [self.measure_difference(cupdates[c], dir) for c in clients] diffs.append(dir_diff) diffs = np.vstack(diffs).T # shape=(n_client, 3) ''' print("Ternary Cossim Matrix calculation takes {}s seconds".format( time.time() - start_time)) # Record the execution time start_time = time.time() kmeans = KMeans(n_clusters, random_state=self.sklearn_seed, max_iter=max_iter).fit(diffs) print("Clustering takes {}s seconds".format(time.time() - start_time)) print('Clustering Results:', Counter(kmeans.labels_)) print('Clustering Inertia:', kmeans.inertia_) cluster = {} # {Cluster ID: (cm, [c1, c2, ...])} cluster2clients = [[] for _ in range(n_clusters) ] # [[c1, c2,...], [c3, c4,...], ...] for idx, cluster_id in enumerate(kmeans.labels_): #print(idx, cluster_id, len(cluster2clients), n_clusters) # debug cluster2clients[cluster_id].append(clients[idx]) for cluster_id, client_list in enumerate(cluster2clients): # calculate the means of cluster # All client have equal weight weighted_csolns = [(1, csolns[c]) for c in client_list] if weighted_csolns: # Update the cluster means cluster[cluster_id] = (self.aggregate(weighted_csolns), client_list) else: print("Error, cluster is empty") return cluster def measure_group_diffs(self): diffs = np.empty(len(self.group_list)) for idx, g in enumerate(self.group_list): # direction #diff = self.measure_difference(self.group_list[0].latest_model, g.latest_model) # square root model_a = process_grad(self.latest_model) model_b = process_grad(g.latest_model) diff = np.sum((model_a - model_b)**2)**0.5 diffs[idx] = diff diffs = diffs + [np.sum(diffs) ] # Append the sum(discrepancies) to the end return diffs """ Pre-train the client 1 epoch and return weights """ def pre_train_client(self, client): start_model = client.get_params() # Backup the start model if self.prox == True: # Set the value of vstart to be the same as the client model to remove the proximal term self.inner_opt.set_params(self.client_model.get_params(), self.client_model) soln, stat = client.solve_inner( ) # Pre-train the client only one epoch ws = soln[1] # weights of model updates = [w1 - w0 for w0, w1 in zip(start_model, ws)] client.set_params(start_model) # Recovery the model return ws, updates def get_not_empty_groups(self): not_empty_groups = [g for g in self.group_list if not g.is_empty()] return not_empty_groups def group_test(self): backup_model = self.latest_model # Backup the global model results = [] for g in self.group_list: c_list = [] for c in self.clients: if c.group == g: c_list.append(c) num_samples = [] tot_correct = [] self.client_model.set_params(g.latest_model) for c in c_list: ct, ns = c.test() tot_correct.append(ct * 1.0) num_samples.append(ns) ids = [c.id for c in c_list] results.append((ids, g, num_samples, tot_correct)) self.client_model.set_params(backup_model) # Recovery the model return results def group_train_error_and_loss(self): backup_model = self.latest_model # Backup the global model results = [] for g in self.group_list: c_list = [] for c in self.clients: if c.group == g: c_list.append(c) num_samples = [] tot_correct = [] losses = [] self.client_model.set_params(g.latest_model) for c in c_list: ct, cl, ns = c.train_error_and_loss() tot_correct.append(ct * 1.0) num_samples.append(ns) losses.append(cl * 1.0) ids = [c.id for c in c_list] results.append((ids, g, num_samples, tot_correct, losses)) self.client_model.set_params(backup_model) # Recovery the model return results def train(self): print('Training with {} workers ---'.format(self.clients_per_round)) # Clients cold start, pre-train all clients for c in self.clients: if c.is_cold() == True: self.client_cold_start(c) for i in range(self.num_rounds): # Random select clients indices, selected_clients = self.select_clients( i, num_clients=self.clients_per_round) # uniform sampling np.random.seed( i ) # make sure that the stragglers are the same for FedProx and FedAvg active_clients = np.random.choice(selected_clients, round(self.clients_per_round * (1 - self.drop_percent)), replace=False) # Clear all group, the group attr of client is retrained for g in self.group_list: g.clear_clients() # Client cold start # Reshcedule selected clients to groups self.reschedule_groups(selected_clients, self.allow_empty, self.evenly, self.RAC) # Get not empty groups handling_groups = self.get_not_empty_groups() for g in self.group_list: if g in handling_groups: print("Group {}, clients {}".format( g.get_group_id(), g.get_client_ids())) else: print("Group {} is empty.".format(g.get_group_id())) # Freeze these groups before training for g in handling_groups: g.freeze() if i % self.eval_every == 0: """ stats = self.test() # have set the latest model for all clients # Test on training data, it's redundancy stats_train = self.train_error_and_loss() """ group_stats = self.group_test() group_stats_train = self.group_train_error_and_loss() test_tp, test_tot = 0, 0 train_tp, train_tot = 0, 0 for stats, stats_train in zip(group_stats, group_stats_train): tqdm.write('Group {}'.format(stats[1].id)) test_tp += np.sum(stats[3]) test_tot += np.sum(stats[2]) test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2]) tqdm.write('At round {} accuracy: {}'.format( i, test_acc)) # testing accuracy train_tp += np.sum(stats_train[3]) train_tot += np.sum(stats_train[2]) train_acc = np.sum(stats_train[3]) * 1.0 / np.sum( stats_train[2]) tqdm.write('At round {} training accuracy: {}'.format( i, train_acc)) # train accuracy train_loss = np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum( stats_train[2]) tqdm.write('At round {} training loss: {}'.format( i, train_loss)) mean_test_acc = test_tp * 1.0 / test_tot mean_train_acc = train_tp * 1.0 / train_tot # Write results to csv file self.writer.write_stats(i, stats[1].id, test_acc, train_acc, train_loss, len(stats[1].get_client_ids())) self.writer.write_means(mean_test_acc, mean_train_acc) print( 'At round {} mean test accuracy: {} mean train accuracy: {}' .format(i, mean_test_acc, mean_train_acc)) diffs = self.measure_group_diffs() print("The groups difference are:", diffs) self.writer.write_diffs(diffs) # Broadcast the global model to clients(groups) # self.client_model.set_params(self.latest_model) # Train each group sequentially for g in handling_groups: # Backup the origin model print("Begin group {:2d} training".format(g.get_group_id())) # Each group train group_epochs round for _ in range(g.group_epochs): if self.prox == True: # Update the optimizer, the vstar is latest_model of this group self.inner_opt.set_params(g.latest_model, self.client_model) # Set the global the group model self.client_model.set_params(g.latest_model) # Begin group training cupdates = g.train() # After end of the training of client, update the diff list of client for client, update in cupdates.items(): diff_list = [] for g in self.group_list: diff_g = self.measure_difference( update, g.latest_update) diff_list.append((g, diff_g)) client.update_difference(diff_list) # Recovery the client model before next group training #self.client_model.set_params(self.latest_model) # Aggregate groups model and update the global (latest) model self.aggregate_groups(self.group_list, agg_lr=self.agg_lr) # Refresh the global model and global delta weights (latest_update) self.refresh_global_model(self.group_list) # Close the writer and end the training self.writer.close() # Use for matain the global AVG model and global latest update def refresh_global_model(self, groups): start_model = self.latest_model # Aggregate the groups model gsolns = [] for g in groups: gsolns.append((1.0, g.latest_model)) # (n_k, soln) new_model = self.aggregate(gsolns) self.latest_update = [ w1 - w0 for w0, w1 in zip(start_model, new_model) ] self.latest_model = new_model return def aggregate_groups(self, groups, agg_lr): gsolns = [(sum(g.num_samples), g.latest_model) for g in groups] group_num = len(gsolns) # Calculate the scale of group models gscale = [0] * group_num for i, (_, gsoln) in enumerate(gsolns): for v in gsoln: gscale[i] += np.sum(v.astype(np.float64)**2) gscale[i] = gscale[i]**0.5 # Aggregate the models of each group separately for idx, g in enumerate(groups): base = [0] * len(gsolns[idx][1]) weights = [agg_lr * (1.0 / scale) for scale in gscale] weights[idx] = 1 # The weight of the main group is 1 total_weights = sum(weights) for j, (_, gsoln) in enumerate(gsolns): for k, v in enumerate(gsoln): base[k] += weights[j] * v.astype(np.float64) averaged_soln = [v / total_weights for v in base] g.latest_update = [ w1 - w0 for w0, w1 in zip(g.latest_model, averaged_soln) ] g.latest_model = averaged_soln return def reschedule_groups(self, selected_clients, allow_empty=False, evenly=False, randomly=False): def _get_even_per_group_num(selected_clients_num, group_num): per_group_num = np.array([selected_clients_num // group_num] * group_num) remain = selected_clients_num - sum(per_group_num) random_groups = random.sample(range(group_num), remain) per_group_num[random_groups] += 1 # plus the remain return per_group_num selected_clients = selected_clients.tolist( ) # convert numpy array to list if randomly == True and evenly == False: for c in selected_clients: if c.is_cold() == False: if c.clustering == False: # Randomly assgin client random.choice(self.group_list).add_client(c) else: # This client is clustering client. c.group.add_client(c) else: print('Warnning: A newcomer is no pre-trained.') return if randomly == True and evenly == True: """ # Randomly assgin client, but each group is even per_group_num = _get_even_per_group_num(len(selected_clients), len(self.group_list)) for g, max in zip(self.group_list, per_group_num): g.max_clients = max head_idx, tail_idx = 0, 0 for group_num, g in zip(per_group_num, self.group_list): tail_idx += group_num g.add_clients(selected_clients[head_idx, tail_idx]) head_idx = tail_idx """ print("Experimental setting is invalid.") return if randomly == False and allow_empty == True: # Allocate clients to their first rank groups, some groups may be empty for c in selected_clients: if c.is_cold() != True: first_rank_group = c.group first_rank_group.add_client(c) return if randomly == False and evenly == True: """ Strategy #1: Calculate the number of clients in each group (evenly) """ selected_clients_num = len(selected_clients) group_num = len(self.group_list) per_group_num = np.array([selected_clients_num // group_num] * group_num) remain = selected_clients_num - sum(per_group_num) random_groups = random.sample(range(group_num), remain) per_group_num[random_groups] += 1 # plus the remain for g, max in zip(self.group_list, per_group_num): g.max_clients = max """ Allocate clients to make the client num of each group evenly """ for c in selected_clients: if c.is_cold() != True: first_rank_group = c.group if not first_rank_group.is_full(): first_rank_group.add_client(c) else: # The first rank group is full, choose next group diff_list = c.difference # Sort diff_list diff_list = sorted(diff_list, key=lambda tup: tup[1]) for (group, diff) in diff_list: if not group.is_full(): group.add_client(c) break return if randomly == False and evenly == False: """ Strategy #2: Allocate clients to meet the minimum client requirements """ for g in self.group_list: g.min_clients = self.group_min_clients # First ensure that each group has at least self.min_clients clients. diff_list, assigned_clients = [], [] for c in selected_clients: diff_list += [(c, g, diff) for g, diff in c.difference] diff_list = sorted(diff_list, key=lambda tup: tup[2]) for c, g, diff in diff_list: if len(g.client_ids ) < g.min_clients and c not in assigned_clients: g.add_client(c) assigned_clients.append(c) # Then add the remaining clients to their first rank group for c in selected_clients: if c not in assigned_clients: first_rank_group = c.group if c.id not in first_rank_group.client_ids: first_rank_group.add_client(c) return return def test_ternary_cosine_similariy(self, alpha=20): ''' compare the ternary similarity and cosine similarity ''' def calculate_cosine_distance(v1, v2): cosine = np.dot( v1, v2) / (np.sqrt(np.sum(v1**2)) * np.sqrt(np.sum(v2**2))) return cosine # Pre-train all clients csolns, cupdates = {}, {} for c in self.clients: csolns[c], cupdates[c] = self.pre_train_client(c) # random selecte alpha * m clients to calculate the direction matrix V n_clients = len(self.clients) clustering_clients = random.sample(self.clients, k=min(self.num_group * alpha, n_clients)) clustering_update_array = [ process_grad(cupdates[c]) for c in clustering_clients ] clustering_update_array = np.vstack( clustering_update_array).T # shape=(n_params, n_clients) svd = TruncatedSVD(n_components=3, random_state=self.sklearn_seed) decomp_updates = svd.fit_transform( clustering_update_array) # shape=(n_params, 3) n_components = decomp_updates.shape[-1] # calculate the ternary similarity matrix for all clients ternary_cossim = [] update_array = [process_grad(cupdates[c]) for c in self.clients] delta_w = np.vstack(update_array) # shape=(n_clients, n_params) ternary_cossim = self.get_ternary_cosine_similarity_matrix( delta_w, decomp_updates) # calculate the tranditional similarity matrix for all clients #old_cossim = np.zeros(shape=(n_clients, n_clients), dtype=np.float32) old_cossim = cosine_similarity(delta_w) old_cossim = (1.0 - old_cossim) / 2.0 # Normalize # Calculate the euclidean distance between every two similaries distance_ternary = euclidean_distances(ternary_cossim) distance_cossim = euclidean_distances( old_cossim) # shape=(n_clients, n_clients) print(distance_ternary.shape, distance_cossim.shape) # shape=(n_clients, n_clients) iu = np.triu_indices(n_clients) x, y = distance_ternary[iu], distance_cossim[iu] mesh_points = np.vstack((x, y)).T print(x.shape, y.shape) np.savetxt("cossim.csv", mesh_points, delimiter="\t") return x, y
class Server(BaseFedarated): def __init__(self, params, learner, dataset): print('Using Federated prox to Train') self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu']) # Setup Log self.params_log = params # self.run_name = str(params["ex_name"])+"_fedprox_"+ str(datetime.datetime.now().strftime("%m%d-%H%M%S")) self.run_name = str(params["ex_name"]) + "_fedprox" self.log_main = [] csv_log.log_start('prox', params, 1, self.run_name) super(Server, self).__init__(params, learner, dataset) def train(self): '''Train using Federated Proximal''' print('Training with {} workers ---'.format(self.clients_per_round)) elapsed = [] for i in range(self.num_rounds): # test model if i % self.eval_every == 0: stats = self.test( ) # have set the latest model for all clients stats_train = self.train_error_and_loss() test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2]) train_acc = np.sum(stats_train[3]) * 1.0 / np.sum( stats_train[2]) train_loss = np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum( stats_train[2]) tqdm.write('At round {} accuracy: {}'.format( i, test_acc)) # testing accuracy tqdm.write('At round {} training accuracy: {}'.format( i, train_acc)) tqdm.write('At round {} training loss: {}'.format( i, train_loss)) self.log_main.append([i, train_loss, train_acc, test_acc]) start_time = time.time() model_len = process_grad(self.latest_model).size global_grads = np.zeros(model_len) client_grads = np.zeros(model_len) num_samples = [] local_grads = [] for c in self.clients: num, client_grad = c.get_grads(model_len) local_grads.append(client_grad) num_samples.append(num) global_grads = np.add(global_grads, client_grad * num) global_grads = global_grads * 1.0 / np.sum(np.asarray(num_samples)) difference = 0 for idx in range(len(self.clients)): difference += np.sum(np.square(global_grads - local_grads[idx])) difference = difference * 1.0 / len(self.clients) tqdm.write('gradient difference: {}'.format(difference)) indices, selected_clients = self.select_clients( i, num_clients=self.clients_per_round) # uniform sampling np.random.seed( i ) # make sure that the stragglers are the same for FedProx and FedAvg active_clients = np.random.choice(selected_clients, round(self.clients_per_round * (1 - self.drop_percent)), replace=False) csolns = [] # buffer for receiving client solutions self.inner_opt.set_params(self.latest_model, self.client_model) for idx, c in enumerate(selected_clients.tolist()): # communicate the latest model c.set_params(self.latest_model) total_iters = int( self.num_epochs * c.num_samples / self.batch_size) + 2 # randint(low,high)=[low,high) # solve minimization locally if c in active_clients: soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size) else: #soln, stats = c.solve_iters(num_iters=np.random.randint(low=1, high=total_iters), batch_size=self.batch_size) soln, stats = c.solve_inner(num_epochs=np.random.randint( low=1, high=self.num_epochs), batch_size=self.batch_size) # gather solutions from client csolns.append(soln) # track communication cost self.metrics.update(rnd=i, cid=c.id, stats=stats) # update models self.latest_model = self.aggregate(csolns) self.client_model.set_params(self.latest_model) elapsed_time = time.time() - start_time elapsed.append(elapsed_time) # final test model stats = self.test() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2]) train_acc = np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]) tqdm.write('At round {} accuracy: {}'.format(self.num_rounds, test_acc)) tqdm.write('At round {} training accuracy: {}'.format( self.num_rounds, train_acc)) self.log_main.append( [self.num_rounds, train_loss, train_acc, test_acc]) csv_log.write_all('prox', self.log_main, [], 1, self.run_name) csv_log.graph_print('prox', self.params_log, 1, self.run_name) # tqdm.write('At round {} accuracy: {}'.format(self.num_rounds, np.sum(stats[3])*1.0/np.sum(stats[2]))) # tqdm.write('At round {} training accuracy: {}'.format(self.num_rounds, np.sum(stats_train[3])*1.0/np.sum(stats_train[2]))) print("Time Taken Each Round: ") print(elapsed) print(np.mean(elapsed)) csv_log.write_time_taken(elapsed, self.run_name)
class Server(BaseFedarated): def __init__(self, params, learner, dataset): print('Using Group prox to Train') self.group_list = [] # list of Group() instance self.group_ids = [] # list of group id # The attrs will be set in BaseFedarated.__init__(), # We repeat this assigement for clarification. self.num_group = params['num_group'] self.prox = params['proximal'] self.group_min_clients = params['min_clients'] self.allow_empty = params['allow_empty'] self.evenly = params['evenly'] self.seed = params['seed'] self.sklearn_seed = params['sklearn_seed'] self.agg_lr = params['agg_lr'] self.RAC = params['RAC'] # Randomly Assign Clients self.RCC = params['RCC'] # Random Cluster Center self.MADC = params[ 'MADC'] # Use Mean Absolute Difference of pairwise Cossim self.recluster_epoch = params['recluster_epoch'] self.max_temp = params['client_temp'] self.temp_dict = {} """ We implement THREE run mode of FedGroup: 1) Ours FedGroup 2) IFCA: "An Efficient Framework for Clustered Federated Learning" 3) FeSEM: "Multi-Center Federated Learning" """ self.run_mode = 'FedGroup' if params['ifca'] == True: self.run_mode = 'IFCA' if params['fesem'] == True: self.run_mode = 'FeSEM' if self.prox == True: self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu']) else: self.inner_opt = tf.train.GradientDescentOptimizer( params['learning_rate']) super(Server, self).__init__(params, learner, dataset) self.latest_model = self.client_model.get_params( ) # The global AVG model self.latest_update = self.client_model.get_params() self.create_groups() # Record the temperature of clients for c in self.clients: self.temp_dict[c] = self.max_temp self.writer = CSVWriter(params['export_filename'], 'results/' + params['dataset'], self.group_ids) """ initialize the Group() instants """ def create_groups(self): self.group_list = [ Group(gid, self.client_model) for gid in range(self.num_group) ] # 0,1,...,num_group self.group_ids = [g.get_group_id() for g in self.group_list] self.group_cold_start(self.RCC) # init the lastest_model of all groups def _get_cosine_similarity(self, m1, m2): flat_m1 = process_grad(m1) flat_m2 = process_grad(m2) cosine = np.dot(flat_m1, flat_m2) / (np.sqrt(np.sum(flat_m1**2)) * np.sqrt(np.sum(flat_m2**2))) return cosine """ measure the difference between client and group """ def measure_difference(self, client, group, run_mode): # Strategy #1: angles (cosine) between two client update and group update # FedGroup use this. if run_mode == 'FedGroup': # FedGroup need pretrain the client cmodel, cupdate = self.pre_train_client(client) diff = self._get_cosine_similarity(cupdate, group.latest_update) diff = 1.0 - ((diff + 1.0) / 2.0) # scale to [0, 1] then flip # Strategy #2: Euclidean distance between client model and group model # FeSEM use this. if run_mode == 'FeSEM': cmodel, gmodel = process_grad(client.local_model), process_grad( group.latest_model) diff = np.sum((cmodel - gmodel)**2) # Strategy #3: Training Loss of group model # IFCA use this. if run_mode == 'IFCA': # The training loss of group model evaluate on client's training set backup_params = client.get_params() # Note: use group model for evaluation client.set_params(group.latest_model) _, train_loss, _ = client.train_error_and_loss() diff = train_loss # Restore paramters client.set_params(backup_params) return diff def get_ternary_cosine_similarity_matrix(self, w, V): #print(type(w), type(V)) print('Delta w shape:', w.shape, 'Matrix V shape:', V.shape) w, V = w.astype(np.float32), V.astype(np.float32) left = np.matmul(w, V) # delta_w (dot) V scale = np.reciprocal( np.linalg.norm(w, axis=1, keepdims=True) * np.linalg.norm(V, axis=0, keepdims=True)) diffs = left * scale # element-wise product diffs = (-diffs + 1.) / 2. # Normalize to [0,1] return diffs def get_assign_group(self, client, run_mode): diff_list = [] # tuple of (group, diff) for g in self.group_list: diff_g = self.measure_difference(client, g, run_mode) diff_list.append((g, diff_g)) # w/o sort #print("client:", client.id, "diff_list:", diff_list) assign_group = self.group_list[np.argmin([tup[1] for tup in diff_list])] return diff_list, assign_group def client_cold_start(self, client, run_mode): if client.group is not None: print("Warning: Client already has a group: {:2d}.".format( client.group)) if run_mode == 'FedGroup': # Training is base on the global avg model start_model = self.client_model.get_params( ) # Backup the model first self.client_model.set_params( self.latest_model ) # Set the training model to global avg model # client_model, client_update = self.pre_train_client(client) diff_list, assign_group = self.get_assign_group(client, run_mode) # update(Init) the diff list of client client.update_difference(diff_list) # Only set the group attr of client, do not actually add clients to the group client.set_group(assign_group) # Recovery the training model self.client_model.set_params(start_model) return if run_mode == 'IFCA' or run_mode == 'FeSEM': pass # IFCA and FeSEM didn't use cold start strategy return """ Deal with the group cold start problem """ def group_cold_start(self, random_centers=False): if self.run_mode == 'FedGroup': # Strategy #1: Randomly pre-train num_group clients as cluster centers # It is an optional strategy of FedGroup, named FedGroup-RCC if random_centers == True: selected_clients = random.sample(self.clients, k=self.num_group) for c, g in zip(selected_clients, self.group_list): g.latest_model, g.latest_update = self.pre_train_client(c) c.set_group(g) # Strategy #2: Pre-train, then clustering the directions of clients' weights # <FedGroup> and <FedGrouProx> use this strategy if random_centers == False: alpha = 20 ######## Pre-train Scaler ################### selected_clients = random.sample(self.clients, k=min(self.num_group * alpha, len(self.clients))) for c in selected_clients: c.clustering = True # Mark these clients as clustering client cluster = self.clustering_clients( selected_clients) # {Cluster ID: (cm, [c1, c2, ...])} # Init groups accroding to the clustering results for g, id in zip(self.group_list, cluster.keys()): # Init the group latest update g.latest_update = cluster[id][1] g.latest_model = cluster[id][0] # These clients do not need to be cold-started # Set the "group" attr of client only, didn't add the client to group for c in cluster[id][2]: c.set_group(g) # Strategy #3: random initialize group models as centers # <IFCA> and <FeSEM> use this strategy. if self.run_mode == 'IFCA' or self.run_mode == 'FeSEM': # Backup the original model params backup_params = self.client_model.get_params() # Reinitialize num_group clients models as centers models for idx, g in enumerate(self.group_list): # Change the seed of tensorflow new_seed = (idx + self.seed) * 888 # Reinitialize params of model self.client_model.reinitialize_params(new_seed) new_params = self.client_model.get_params() g.latest_model, g.latest_update = new_params, new_params # Restore the seed of tensorflow tf.set_random_seed(123 + self.seed) # Restore the parameter of model self.client_model.set_params(backup_params) """ # Reinitialize for insurance purposes new_params = self.client_model.reinitialize_params(123 + self.seed) # Restore the weights of model if np.array_equal(process_grad(backup_params), process_grad(new_params)) == True: print('############TRUE############') else: print('############FALSE############') """ return """ Recluster the clients then refresh the group's optimize goal, It is a dynamic clustering strategy of FedGroup. Probelm of recluster strategy: the personality of group model will be weaken. """ def group_recluster(self): if self.run_mode != 'FedGroup': return if self.RCC == True: print( "Warning: The random cluster center strategy conflicts with dynamic clustering strategy." ) return # Select alpha*num_group warm clients for reclustering. alpha = 20 warm_clients = [c for c in self.clients if c.is_cold() == False] selected_clients = random.sample(warm_clients, k=min(self.num_group * alpha, len(warm_clients))) # Clear the clustering flage of warm clients for c in warm_clients: c.clustering, c.group = False, None for c in selected_clients: c.clustering = True # Recluster the selected clients cluster = self.clustering_clients( selected_clients) # {Cluster ID: (cm, [c1, c2, ...])} # Init groups accroding to the clustering results for g, id in zip(self.group_list, cluster.keys()): # Init the group latest update g.latest_update = cluster[id][1] g.latest_model = cluster[id][0] # These clients do not need to be cold-started # Set the "group" attr of client only, didn't add the client to group for c in cluster[id][2]: c.set_group(g) # Fresh the global AVG model self.refresh_global_model(self.group_list) # *Cold start the original warm clients for evaluation for c in warm_clients: if c.is_cold() == True: self.client_cold_start(c, run_mode='FedGroup') return def reassign_warm_clients(self): warm_clients = [c for c in self.clients if c.is_cold() == False] for c in warm_clients: c.group = None self.client_cold_start(c, self.run_mode) return """ Clustering clients by Clustering Algorithm """ def clustering_clients(self, clients, n_clusters=None, max_iter=20): if n_clusters is None: n_clusters = self.num_group # Pre-train these clients first csolns, cupdates = {}, {} # The updates for clustering must be calculated upon a same model # We use the global auxiliary(AVG) model as start point self.client_model.set_params(self.latest_model) # Record the execution time start_time = time.time() for c in clients: csolns[c], cupdates[c] = self.pre_train_client(c) print("Pre-training takes {}s seconds".format(time.time() - start_time)) update_array = [process_grad(update) for update in cupdates.values()] delta_w = np.vstack(update_array) # shape=(n_clients, n_params) # Record the execution time start_time = time.time() # Decomposed the directions of updates to num_group of directional vectors svd = TruncatedSVD(n_components=self.num_group, random_state=self.sklearn_seed) decomp_updates = svd.fit_transform( delta_w.T) # shape=(n_params, n_groups) print("SVD takes {}s seconds".format(time.time() - start_time)) n_components = decomp_updates.shape[-1] # Record the execution time of EDC calculation start_time = time.time() decomposed_cossim_matrix = cosine_similarity( delta_w, decomp_updates.T) # shape=(n_clients, n_clients) ''' There is no need to normalize the data-driven measure because it is a dissimilarity measure # Normialize it to dissimilarity [0,1] decomposed_dissim_matrix = (1.0 - decomposed_cossim_matrix) / 2.0 EDC = decomposed_dissim_matrix ''' #EDC = self._calculate_data_driven_measure(decomposed_cossim_matrix, correction=False) print("EDC Matrix calculation takes {}s seconds".format(time.time() - start_time)) # Test the excution time of full cosine dissimilarity start_time = time.time() full_cossim_matrix = cosine_similarity( delta_w) # shape=(n_clients, n_clients) ''' # Normialize cossim to [0,1] full_dissim_matrix = (1.0 - full_cossim_matrix) / 2.0 ''' MADC = self._calculate_data_driven_measure( full_cossim_matrix, correction=True) # shape=(n_clients, n_clients) #MADC = full_dissim_matrix print("MADC Matrix calculation takes {}s seconds".format(time.time() - start_time)) '''Apply RBF kernel to EDC or MADC gamma=0.2 if self.MADC == True: affinity_matrix = np.exp(- MADC ** 2 / (2. * gamma ** 2)) else: # Use EDC as default affinity_matrix = np.exp(- EDC ** 2 / (2. * gamma ** 2)) ''' # Record the execution time start_time = time.time() if self.MADC == True: affinity_matrix = MADC #affinity_matrix = (1.0 - full_cossim_matrix) / 2.0 #result = AgglomerativeClustering(n_clusters, affinity='euclidean', linkage='ward').fit(full_cossim_matrix) result = AgglomerativeClustering( n_clusters, affinity='precomputed', linkage='complete').fit(affinity_matrix) else: # Use EDC as default affinity_matrix = decomposed_cossim_matrix #result = AgglomerativeClustering(n_clusters, affinity='euclidean', linkage='ward').fit(decomposed_cossim_matrix) #result = AgglomerativeClustering(n_clusters, affinity='precomputed', linkage='average').fit(EDC) result = KMeans(n_clusters, random_state=self.sklearn_seed, max_iter=max_iter).fit(affinity_matrix) #print('EDC', EDC[0][:10], '\nMADC', MADC[0][:10], '\naffinity', affinity_matrix[0][:10]) #result = SpectralClustering(n_clusters, random_state=self.sklearn_seed, n_init=max_iter, affinity='precomputed').fit(affinity_matrix) print("Clustering takes {}s seconds".format(time.time() - start_time)) print('Clustering Results:', Counter(result.labels_)) #print('Clustering Inertia:', result.inertia_) cluster = {} # {Cluster ID: (avg_soln, avg_update, [c1, c2, ...])} cluster2clients = [[] for _ in range(n_clusters) ] # [[c1, c2,...], [c3, c4,...], ...] for idx, cluster_id in enumerate(result.labels_): #print(idx, cluster_id, len(cluster2clients), n_clusters) # debug cluster2clients[cluster_id].append(clients[idx]) for cluster_id, client_list in enumerate(cluster2clients): # calculate the means of cluster # All client have equal weight average_csolns = [(1, csolns[c]) for c in client_list] average_updates = [(1, cupdates[c]) for c in client_list] if average_csolns: # Update the cluster means cluster[cluster_id] = (self.aggregate(average_csolns), self.aggregate(average_updates), client_list) else: print("Error, cluster is empty") return cluster # Measure the discrepancy between group model and global model def measure_group_diffs(self): diffs = np.zeros(len(self.group_list)) for idx, g in enumerate(self.group_list): # direction #diff = self.measure_difference(...) # square root model_a = process_grad(self.latest_model) model_b = process_grad(g.latest_model) diff = np.sum((model_a - model_b)**2)**0.5 diffs[idx] = diff diffs = diffs + [np.sum(diffs) ] # Append the sum(discrepancies) to the end return diffs # Measure the discrepancy between group model and client model def measure_client_group_diffs(self): average_group_diffs = np.zeros(len(self.group_list)) total_group_diff = 0.0 number_clients = [len(g.get_client_ids()) for g in self.group_list] for idx, g in enumerate(self.group_list): diff = 0.0 if number_clients[idx] > 0: model_g = process_grad(g.latest_model) for c in g.clients.values(): model_c = process_grad(c.local_model) diff += np.sum((model_c - model_g)**2)**0.5 total_group_diff += diff average_group_diffs[idx] = diff / float(number_clients[idx]) g.latest_diff = average_group_diffs[idx] else: average_group_diffs[ idx] = 0 # The group is empty, the discrepancy is ZERO average_total_diff = total_group_diff / sum(number_clients) average_diffs = np.append( [average_total_diff], average_group_diffs ) # Append the sum of average (discrepancies) to the head return average_diffs """ Pre-train the client 1 epoch and return weights, Train the client upon the global AVG model. """ def pre_train_client(self, client): start_model = self.client_model.get_params() # Backup the start model self.client_model.set_params( self.latest_model) # Set the run model to the global AVG model if self.prox == True: # Set the value of vstart to be the same as the client model to remove the proximal term self.inner_opt.set_params(self.latest_model, self.client_model) # Pretrain 1 epoch #soln, stat = client.solve_inner() # Pre-train the client only one epoch # or Pretrain 20 iterations soln, stat = client.solve_iters(50) ws = soln[1] # weights of model updates = [w1 - w0 for w0, w1 in zip(self.latest_model, ws)] self.client_model.set_params(start_model) # Recovery the model return ws, updates def get_not_empty_groups(self): not_empty_groups = [g for g in self.group_list if not g.is_empty()] return not_empty_groups def group_test(self): backup_model = self.latest_model # Backup the global model results = [] tot_num_client = 0 for g in self.group_list: c_list = [] for c in self.clients: if c.group == g: c_list.append(c) tot_num_client += len(c_list) num_samples = [] tot_correct = [] self.client_model.set_params(g.latest_model) for c in c_list: ct, ns = c.test() tot_correct.append(ct * 1.0) num_samples.append(ns) ids = [c.id for c in c_list] results.append((ids, g, num_samples, tot_correct)) self.client_model.set_params(backup_model) # Recovery the model return tot_num_client, results def group_train_error_and_loss(self): backup_model = self.latest_model # Backup the global model results = [] for g in self.group_list: c_list = [] for c in self.clients: if c.group == g: c_list.append(c) num_samples = [] tot_correct = [] losses = [] self.client_model.set_params(g.latest_model) for c in c_list: ct, cl, ns = c.train_error_and_loss() tot_correct.append(ct * 1.0) num_samples.append(ns) losses.append(cl * 1.0) ids = [c.id for c in c_list] results.append((ids, g, num_samples, tot_correct, losses)) self.client_model.set_params(backup_model) # Recovery the model return results """Main Train Function """ def train(self): print('Training with {} workers ---'.format(self.clients_per_round)) # Clients cold start, pre-train all clients start_time = time.time() for c in self.clients: if c.is_cold() == True: self.client_cold_start(c, self.run_mode) print("Cold start clients takes {}s seconds".format(time.time() - start_time)) for i in range(self.num_rounds): # Random select clients indices, selected_clients = self.select_clients( i, num_clients=self.clients_per_round) # uniform sampling np.random.seed( i ) # make sure that the stragglers are the same for FedProx and FedAvg active_clients = np.random.choice(selected_clients, round(self.clients_per_round * (1 - self.drop_percent)), replace=False) # Clear all group, the group attr of client is retrained for g in self.group_list: g.clear_clients() # Reshcedule selected clients to groups, add client to group's client list if self.run_mode == 'FedGroup': # Cold start the newcomer for c in selected_clients: if c.is_cold() == True: self.client_cold_start(c, self.run_mode) # Reschedule the group self.reschedule_groups(selected_clients, self.allow_empty, self.evenly, self.RAC) else: # IFCA and FeSEM need rescheduling client in each round start_time = time.time() if self.run_mode == 'IFCA': self.IFCA_reschedule_group(selected_clients) if self.run_mode == 'FeSEM': self.FeSEM_reschedule_group(selected_clients) print( "Scheduling clients takes {}s seconds".format(time.time() - start_time)) # Get not empty groups handling_groups = self.get_not_empty_groups() for g in self.group_list: if g in handling_groups: print("Group {}, clients {}".format( g.get_group_id(), g.get_client_ids())) else: print("Group {} is empty.".format(g.get_group_id())) # Freeze these groups before training for g in handling_groups: g.freeze() # Evalute group model before training if i % self.eval_every == 0: """ stats = self.test() # have set the latest model for all clients # Test on training data, it's redundancy stats_train = self.train_error_and_loss() """ num_test_client, group_stats = self.group_test() group_stats_train = self.group_train_error_and_loss() test_tp, test_tot = 0, 0 train_tp, train_tot = 0, 0 train_loss_list, number_samples_list = [], [] for stats, stats_train in zip(group_stats, group_stats_train): tqdm.write('Group {}'.format(stats[1].id)) test_tp += np.sum(stats[3]) test_tot += np.sum(stats[2]) test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2]) tqdm.write('At round {} accuracy: {}'.format( i, test_acc)) # testing accuracy train_tp += np.sum(stats_train[3]) train_tot += np.sum(stats_train[2]) train_loss_list += stats_train[4] number_samples_list += stats_train[2] train_acc = np.sum(stats_train[3]) * 1.0 / np.sum( stats_train[2]) tqdm.write('At round {} training accuracy: {}'.format( i, train_acc)) # train accuracy train_loss = np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum( stats_train[2]) tqdm.write('At round {} training loss: {}'.format( i, train_loss)) # Write results to csv file self.writer.write_stats(i, stats[1].id, test_acc, train_acc, train_loss, len(stats[1].get_client_ids())) mean_test_acc = test_tp * 1.0 / test_tot mean_train_acc = train_tp * 1.0 / train_tot mean_train_loss = np.dot( train_loss_list, number_samples_list) * 1.0 / np.sum(number_samples_list) self.writer.write_means(mean_test_acc, mean_train_acc, mean_train_loss) print( 'At round {} mean test accuracy: {} mean train accuracy: {} mean train loss: {} \ number of test client: {}'.format(i, mean_test_acc, mean_train_acc, mean_train_loss, num_test_client)) #diffs = self.measure_group_diffs() diffs = self.measure_client_group_diffs() print("The client-group discrepancy are:", diffs) # The diffs in the first round may not make sense. self.writer.write_diffs(diffs) # Broadcast the global model to clients(groups) # self.client_model.set_params(self.latest_model) # Train each group sequentially start_time = time.time() for g in handling_groups: # Backup the origin model print("Begin group {:2d} training".format(g.get_group_id())) # Each group train group_epochs round for _ in range(g.group_epochs): if self.prox == True: # Update the optimizer, the vstar is latest_model of this group self.inner_opt.set_params(g.latest_model, self.client_model) # Set the global model to the group model self.client_model.set_params(g.latest_model) """ Begin group training, call the train() function of Group object, return the update vector of client. """ cmodels, cupdates = g.train() # TODO: After end of the training of client, update the diff list of client print("Training groups takes {}s seconds".format(time.time() - start_time)) # Aggregate groups model and update the global (latest) model # Note: IFCA and FeSEM do not implement inter-group aggregation (agg_lr=0) self.aggregate_groups(self.group_list, agg_lr=self.agg_lr) # Refresh the global model and global delta weights (latest_update) self.refresh_global_model(self.group_list) ########## Dynamic Strategy Code Start ########## # Recluster group, dynamic strategy if self.recluster_epoch: if i > 0 and i % self.recluster_epoch == 0: print(f"***** Recluster groups in epoch {i} ******") self.group_recluster() # Fresh the temperature of client if self.max_temp: self.refresh_client_temperature(cmodels) ########## Dynamic Strategy Code End ########## # Close the writer and end the training self.writer.close() # Use for matain the global AVG model and global latest update def refresh_global_model(self, groups): start_model = self.latest_model # Aggregate the groups model gsolns = [] for g in groups: gsolns.append((1.0, g.latest_model)) # (n_k, soln) new_model = self.aggregate(gsolns) self.latest_update = [ w1 - w0 for w0, w1 in zip(start_model, new_model) ] self.latest_model = new_model return def refresh_client_temperature(self, cmodels): self.temp_dict # Strategy1: Discrepancy-based diffs = self.measure_client_group_diffs() avg_total_diff = diffs[0] avg_group_diff = { g: diffs[idx + 1] for idx, g in enumerate(self.group_list) } for c, model in cmodels.items(): mean_diff = avg_group_diff[c.group] model_g = process_grad(c.group.latest_model) model_c = process_grad(model) client_diff = np.sum((model_c - model_g)**2)**0.5 if client_diff > mean_diff: # This client has large discrepancy self.temp_dict[c] = self.temp_dict[c] - 1 if self.temp_dict[c] == 0: # Redo the cold start old_group = c.group c.group = None self.client_cold_start(c, run_mode='FedGroup') self.temp_dict[c] = self.max_temp if old_group != c.group: print( f'Client {c.id} migrates from Group {old_group.id} to Group {c.group.id}!' ) def aggregate_groups(self, groups, agg_lr): gsolns = [(sum(g.num_samples), g.latest_model) for g in groups] group_num = len(gsolns) # Calculate the scale of group models gscale = [0] * group_num for i, (_, gsoln) in enumerate(gsolns): for v in gsoln: gscale[i] += np.sum(v.astype(np.float64)**2) gscale[i] = gscale[i]**0.5 # Aggregate the models of each group separately for idx, g in enumerate(groups): base = [0] * len(gsolns[idx][1]) weights = [agg_lr * (1.0 / scale) for scale in gscale] weights[idx] = 1 # The weight of the main group is 1 total_weights = sum(weights) for j, (_, gsoln) in enumerate(gsolns): for k, v in enumerate(gsoln): base[k] += weights[j] * v.astype(np.float64) averaged_soln = [v / total_weights for v in base] # Note: The latest_update accumulated from last fedavg training inter_aggregation_update = [ w1 - w0 for w0, w1 in zip(g.latest_model, averaged_soln) ] g.latest_update = [ up0 + up1 for up0, up1 in zip(g.latest_update, inter_aggregation_update) ] g.latest_model = averaged_soln return """ Reschedule function of FedGroup, assign selected client to group according to some addtional options. """ def reschedule_groups(self, selected_clients, allow_empty=False, evenly=False, randomly=False): # deprecated def _get_even_per_group_num(selected_clients_num, group_num): per_group_num = np.array([selected_clients_num // group_num] * group_num) remain = selected_clients_num - sum(per_group_num) random_groups = random.sample(range(group_num), remain) per_group_num[random_groups] += 1 # plus the remain return per_group_num selected_clients = selected_clients.tolist( ) # convert numpy array to list if randomly == True and evenly == False: for c in selected_clients: if c.is_cold() == False: if c.clustering == False: # Randomly assgin client random.choice(self.group_list).add_client(c) else: # This client is clustering client. c.group.add_client(c) else: print('Warnning: A newcomer is no pre-trained.') return if randomly == True and evenly == True: """ # Randomly assgin client, but each group is even per_group_num = _get_even_per_group_num(len(selected_clients), len(self.group_list)) for g, max in zip(self.group_list, per_group_num): g.max_clients = max head_idx, tail_idx = 0, 0 for group_num, g in zip(per_group_num, self.group_list): tail_idx += group_num g.add_clients(selected_clients[head_idx, tail_idx]) head_idx = tail_idx """ print("Experimental setting is invalid.") return if randomly == False and allow_empty == True: # Allocate clients to their first rank groups, some groups may be empty for c in selected_clients: if c.is_cold() != True: first_rank_group = c.group first_rank_group.add_client(c) return if randomly == False and evenly == True: """ Strategy #1: Calculate the number of clients in each group (evenly) """ selected_clients_num = len(selected_clients) group_num = len(self.group_list) per_group_num = np.array([selected_clients_num // group_num] * group_num) remain = selected_clients_num - sum(per_group_num) random_groups = random.sample(range(group_num), remain) per_group_num[random_groups] += 1 # plus the remain for g, max in zip(self.group_list, per_group_num): g.max_clients = max """ Allocate clients to make the client num of each group evenly """ for c in selected_clients: if c.is_cold() != True: first_rank_group = c.group if not first_rank_group.is_full(): first_rank_group.add_client(c) else: # The first rank group is full, choose next group diff_list = c.difference # Sort diff_list diff_list = sorted(diff_list, key=lambda tup: tup[1]) for (group, diff) in diff_list: if not group.is_full(): group.add_client(c) break return if randomly == False and evenly == False: """ Strategy #2: Allocate clients to meet the minimum client requirements """ for g in self.group_list: g.min_clients = self.group_min_clients # First ensure that each group has at least self.min_clients clients. diff_list, assigned_clients = [], [] for c in selected_clients: diff_list += [(c, g, diff) for g, diff in c.difference] diff_list = sorted(diff_list, key=lambda tup: tup[2]) for c, g, diff in diff_list: if len(g.client_ids ) < g.min_clients and c not in assigned_clients: g.add_client(c) assigned_clients.append(c) # Then add the remaining clients to their first rank group for c in selected_clients: if c not in assigned_clients: first_rank_group = c.group if c.id not in first_rank_group.client_ids: first_rank_group.add_client(c) return return """ Reschedule function of IFCA, assign selected client according to training loss """ def IFCA_reschedule_group(self, selected_clients): for c in selected_clients: # IFCA assign client to group with minium training loss diff_list, assign_group = self.get_assign_group(c, run_mode='IFCA') c.set_group(assign_group) c.update_difference(diff_list) # Add client to group's client list assign_group.add_client(c) return """ Similar to IFCA, get_assign_group() can handle well """ def FeSEM_reschedule_group(self, selected_clients): for c in selected_clients: # IFCA assign client to group with minium training loss diff_list, assign_group = self.get_assign_group(c, run_mode='FeSEM') c.set_group(assign_group) c.update_difference(diff_list) # Add client to group's client list assign_group.add_client(c) return def _calculate_data_driven_measure(self, pm, correction=False): ''' calculate the data-driven measure such as MADD''' # Input: pm-> proximity matrix; Output: dm-> data-driven distance matrix # pm.shape=(n_clients, n_dims), dm.shape=(n_clients, n_clients) n_clients, n_dims = pm.shape[0], pm.shape[1] dm = np.zeros(shape=(n_clients, n_clients)) """ Too Slow, and misunderstanding MADD. Deprecated for i in range(n_clients): for j in range(i+1, n_clients): for k in range(n_clients): if k !=i and k != j: dm[i,j] = dm[j,i] = abs(np.sum((pm[i]-pm[k])**2)**0.5 - \ np.sum((pm[j]-pm[k])**2)**0.5) """ # Fast version '''1, Get the repeated proximity matrix. We write Row1 = d11, d12, d13, ... ; and Row2 = d21, d22, d23, ... [ Row1 ] [ Row2 ] [ Rown ] | Row1 | | Row2 | | Rown | | ... | | ... | | ... | [ Row1 ], [ Row2 ], ..., [ Rown ] ''' row_pm_matrix = np.repeat(pm[:, np.newaxis, :], n_clients, axis=1) #print('row_pm', row_pm_matrix[0][0][:5], row_pm_matrix[0][1][:5]) # Get the repeated colum proximity matrix ''' [ Row1 ] [ Row1 ] [ Row1 ] | Row2 | | Row2 | | Row2 | | ... | | ... | | ... | [ Rown ], [ Rown ], ..., [ Rown ] ''' col_pm_matrix = np.tile(pm, (n_clients, 1, 1)) #print('col_pm', col_pm_matrix[0][0][:5], col_pm_matrix[0][1][:5]) # Calculate the absolute difference of two disstance matrix, It is 'abs(||u-z|| - ||v-z||)' in MADD. # d(1,2) = ||w1-z|| - ||w2-z||, shape=(n_clients,); d(x,x) always equal 0 ''' [ d(1,1) ] [ d(1,2) ] [ d(1,n) ] | d(2,1) | | d(2,2) | | d(2,n) | | ... | | ... | | ... | [ d(n,1) ], [ d(n,2) ], ..., [ d(n,n) ] ''' absdiff_pm_matrix = np.abs( col_pm_matrix - row_pm_matrix) # shape=(n_clients, n_clients, n_clients) # Calculate the sum of absolute differences if correction == True: # We should mask these values like sim(1,2), sim(2,1) in d(1,2) mask = np.zeros(shape=(n_clients, n_clients)) np.fill_diagonal(mask, 1) # Mask all diag mask = np.repeat(mask[np.newaxis, :, :], n_clients, axis=0) for idx in range(mask.shape[-1]): #mask[idx,idx,:] = 1 # Mask all row d(1,1), d(2,2)...; Actually d(1,1)=d(2,2)=0 mask[idx, :, idx] = 1 # Mask all 0->n colum for 0->n diff matrix, dm = np.sum(np.ma.array(absdiff_pm_matrix, mask=mask), axis=-1) / (n_dims - 2.0) else: dm = np.sum(absdiff_pm_matrix, axis=-1) / (n_dims) #print('absdiff_pm_matrix', absdiff_pm_matrix[0][0][:5]) return dm # shape=(n_clients, n_clients) def test_ternary_cosine_similariy(self, alpha=20): ''' compare the ternary similarity and cosine similarity ''' def _calculate_cosine_distance(v1, v2): cosine = np.dot( v1, v2) / (np.sqrt(np.sum(v1**2)) * np.sqrt(np.sum(v2**2))) return cosine # Pre-train all clients csolns, cupdates = {}, {} for c in self.clients: csolns[c], cupdates[c] = self.pre_train_client(c) # random selecte alpha * m clients to calculate the direction matrix V n_clients = len(self.clients) selected_clients = random.sample(self.clients, k=min(self.num_group * alpha, n_clients)) clustering_update_array = [ process_grad(cupdates[c]) for c in selected_clients ] clustering_update_array = np.vstack( clustering_update_array).T # shape=(n_params, n_clients) # We decomposed the update vectors to numer_group components. svd = TruncatedSVD(n_components=self.num_group, random_state=self.sklearn_seed) decomp_updates = svd.fit_transform( clustering_update_array) # shape=(n_params, n_groups) n_components = decomp_updates.shape[-1] """ # calculate the ternary similarity matrix for all clients ternary_cossim = [] update_array = [process_grad(cupdates[c]) for c in self.clients] delta_w = np.vstack(update_array) # shape=(n_clients, n_params) ternary_cossim = self.get_ternary_cosine_similarity_matrix(delta_w, decomp_updates) """ """ # calculate the tranditional pairwise cosine similarity matrix for all clients old_cossim = cosine_similarity(delta_w) old_cossim = (1.0 - old_cossim) / 2.0 # Normalize """ # Calculate the data-driven decomposed cosine dissimilarity (EDC) for all clients update_array = [process_grad(cupdates[c]) for c in self.clients] delta_w = np.vstack(update_array) # shape=(n_clients, n_params) decomposed_cossim_matrix = cosine_similarity( delta_w, decomp_updates.T) # Shape = (n_clients, n_groups) print("Cossim_matrix shape:", decomposed_cossim_matrix.shape) # Normalize cossim to dissim #decomposed_dissim_matrix = (1.0 - decomposed_cossim_matrix) / 2.0 #EDC = self._calculate_data_driven_measure(decomposed_cossim_matrix, correction=False) EDC = euclidean_distances(decomposed_cossim_matrix) # Calculate the data-driven full cosine similarity for all clients full_cossim_matrix = cosine_similarity(delta_w) # Normalize #full_dissim_matrix = (1.0 - full_cossim_matrix) / 2.0 MADC = self._calculate_data_driven_measure(full_cossim_matrix, correction=True) # Print the shape of distance matries, make sure equal #print(EDC.shape, MADC.shape) # shape=(n_clients, n_clients) iu = np.triu_indices(n_clients) x, y = EDC[iu], MADC[iu] mesh_points = np.vstack((x, y)).T #print(x.shape, y.shape) np.savetxt("cossim.csv", mesh_points, delimiter="\t") return x, y
class Server(BaseFedarated): def __init__(self, params, learner, dataset): print('Using Federated prox to Train') self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu']) super(Server, self).__init__(params, learner, dataset) def train(self): '''Train using Federated Proximal''' print('Training with {} workers ---'.format(self.clients_per_round)) for i in range(self.num_rounds): # test model if i % self.eval_every == 0: stats = self.test( ) # have set the latest model for all clients stats_train = self.train_error_and_loss() tqdm.write('At round {} testing accuracy: {}'.format( i, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) # testing accuracy tqdm.write('At round {} training accuracy: {}'.format( i, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2]))) tqdm.write('At round {} training loss: {}'.format( i, np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum(stats_train[2]))) selected_clients = self.select_clients( i, num_clients=self.clients_per_round) csolns = [] # buffer for receiving client solutions self.inner_opt.set_params(self.latest_model, self.client_model) for idx, c in enumerate(selected_clients): # communicate the latest model c.set_params(self.latest_model) # solve minimization locally soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size) # gather solutions from client csolns.append(soln) # update models self.latest_model = self.aggregate(csolns) # final test model stats = self.test() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) tqdm.write('At round {} accuracy: {}'.format( self.num_rounds, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) tqdm.write('At round {} training accuracy: {}'.format( self.num_rounds, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2])))
class Server(BaseFedarated): def __init__(self, params, learner, dataset): print('Using Federated prox to Train') self.inner_opt = PerturbedGradientDescent(params['learning_rate'], params['mu']) super(Server, self).__init__(params, learner, dataset) self.writer = CSVWriter(params['export_filename'], 'results/' + params['dataset']) def train(self): '''Train using Federated Proximal''' print('Training with {} workers ---'.format(self.clients_per_round)) csolns = [] # buffer for receiving client solutions for i in range(self.num_rounds): indices, selected_clients = self.select_clients( i, num_clients=self.clients_per_round) # uniform sampling np.random.seed( i ) # make sure that the stragglers are the same for FedProx and FedAvg active_clients = np.random.choice(selected_clients, round(self.clients_per_round * (1 - self.drop_percent)), replace=False) diffs = [0] # Record the client diff # test model if i % self.eval_every == 0: stats = self.test( ) # have set the latest model for all clients stats_train = self.train_error_and_loss(active_clients) test_acc = np.sum(stats[3]) * 1.0 / np.sum(stats[2]) tqdm.write('At round {} accuracy: {}'.format( i, test_acc)) # testing accuracy train_acc = np.sum(stats_train[3]) * 1.0 / np.sum( stats_train[2]) tqdm.write('At round {} training accuracy: {}'.format( i, train_acc)) train_loss = np.dot(stats_train[4], stats_train[2]) * 1.0 / np.sum( stats_train[2]) tqdm.write('At round {} training loss: {}'.format( i, train_loss)) # Write results to a csv file self.writer.write_stats(i, 0, test_acc, train_acc, train_loss, self.clients_per_round) # Calculate the client diff and writh it to csv file if csolns: flat_cmodels = [process_grad(soln[1]) for soln in csolns] flat_global_model = process_grad(self.latest_model) diffs[0] = np.sum([ np.sum((flat_model - flat_global_model)**2)**0.5 for flat_model in flat_cmodels ]) diffs[0] = diffs[0] / len(csolns) self.writer.write_diffs(diffs) tqdm.write('At round {} Discrepancy: {}'.format(i, diffs[0])) model_len = process_grad( self.latest_model).size # no equal to model.size global_grads = np.zeros(model_len) client_grads = np.zeros(model_len) num_samples = [] local_grads = [] for c in self.clients: num, client_grad = c.get_grads(model_len) local_grads.append(client_grad) num_samples.append(num) global_grads = np.add(global_grads, client_grad * num) global_grads = global_grads * 1.0 / np.sum(np.asarray(num_samples)) difference = 0 for idx in range(len(self.clients)): difference += np.sum(np.square(global_grads - local_grads[idx])) difference = difference * 1.0 / len(self.clients) tqdm.write('gradient difference: {}'.format(difference)) csolns = [] # buffer for receiving client solutions self.inner_opt.set_params(self.latest_model, self.client_model) for idx, c in enumerate(selected_clients.tolist()): # communicate the latest model c.set_params(self.latest_model) total_iters = int( self.num_epochs * c.num_samples / self.batch_size) + 2 # randint(low,high)=[low,high) # solve minimization locally if c in active_clients: soln, stats = c.solve_inner(num_epochs=self.num_epochs, batch_size=self.batch_size) else: #soln, stats = c.solve_iters(num_iters=np.random.randint(low=1, high=total_iters), batch_size=self.batch_size) soln, stats = c.solve_inner(num_epochs=np.random.randint( low=1, high=self.num_epochs), batch_size=self.batch_size) # print(soln[0]) #DEBUG # gather solutions from client csolns.append(soln) # track communication cost self.metrics.update(rnd=i, cid=c.id, stats=stats) # update models self.latest_model = self.aggregate(csolns) self.client_model.set_params(self.latest_model) self.writer.close() # final test model stats = self.test() stats_train = self.train_error_and_loss() self.metrics.accuracies.append(stats) self.metrics.train_accuracies.append(stats_train) tqdm.write('At round {} accuracy: {}'.format( self.num_rounds, np.sum(stats[3]) * 1.0 / np.sum(stats[2]))) tqdm.write('At round {} training accuracy: {}'.format( self.num_rounds, np.sum(stats_train[3]) * 1.0 / np.sum(stats_train[2])))