def Geometric_Median(model_data, args): ''' input: array of tuples containing model and the number of samples from each respective node output: geometric median aggregated model ''' total_no_samples = 0 # creates an array of weights and sample counts # shape -> no_models*no_layers*dim_of_layer node_weights = [] node_samples = [] for model, no_samples in model_data: node_weights.append(weights_to_array(model)) node_samples.append(no_samples) # calculates the total number of samples total_no_samples += no_samples agg_model = getModelArchitecture(args) a = [] for i in agg_model.parameters(): a.append(i.clone()) agg_model = optimizeGM(agg_model, node_weights, node_samples, total_no_samples, args) b = [] for i in agg_model.parameters(): b.append(i.clone()) for i in range(len(a)): print(torch.equal(a[i].data, b[i].data)) return agg_model
def comed_aggregator(model_data, args): ''' input: array of tuples containing model and the number of samples from each respective node output: COMED aggregated model ''' total_no_samples = 0 # creates an array of weights and sample counts # shape -> no_models*no_layers*dim_of_layer node_weights = [] node_samples = [] for model, no_samples in model_data: node_weights.append(weights_to_array(model)) node_samples.append(no_samples) total_no_samples += no_samples aggregated_weights = [] for layer_idx in range(len(node_weights[0])): layer_shape = node_weights[0][layer_idx].shape temp = torch.zeros(node_weights[0][layer_idx].shape).to(args['device']) for node_idx in range(len(node_weights)): if (node_idx == 0): temp = torch.flatten( node_weights[node_idx][layer_idx]).unsqueeze(1) else: layer_flattened = torch.flatten( node_weights[node_idx][layer_idx]).unsqueeze(1) temp = torch.cat((temp, layer_flattened), 1) temp = temp.detach().cpu().numpy() temp = np.median(temp, 1) temp = torch.from_numpy(temp) temp = torch.reshape(temp, layer_shape) aggregated_weights.append(temp) agg_model = getModelArchitecture(args) agg_state = OrderedDict() for idx, key in enumerate(agg_model.state_dict().keys()): agg_state[key] = aggregated_weights[idx] agg_model.load_state_dict(agg_state) # for idx, param in enumerate(agg_model.parameters()): # param.data = aggregated_weights[idx] return agg_model
def fed_avg_aggregator(model_data, args): ''' input: array of tuples containing model and the number of samples from each respective node output: fed_avg aggregated model ''' total_no_samples = 0 # creates an array of weights and sample counts # shape -> no_models*no_layers*dim_of_layer node_weights = [] node_samples = [] for model, no_samples in model_data: node_weights.append(weights_to_array(model)) node_samples.append(no_samples) # calculates the total number of samples total_no_samples += no_samples aggregated_weights = [] for layer_idx in range(len(node_weights[0])): temp = torch.zeros(node_weights[0][layer_idx].shape).to(args['device']) for node_idx in range(len(node_weights)): if args["smpc"]: fraction = 1 else: fraction = (node_samples[node_idx] / total_no_samples) temp += fraction * node_weights[node_idx][layer_idx] aggregated_weights.append(temp) agg_model = getModelArchitecture(args) agg_state = OrderedDict() for idx, key in enumerate(agg_model.state_dict().keys()): agg_state[key] = aggregated_weights[idx] agg_model.load_state_dict(agg_state) # agg_model = loadModel(args['aggregated_model_location']+'agg_model.pt').to(args['device']) # for idx, (key, param) in enumerate(agg_model.state_dict().items()): # agg_model.state_dict()[key] = aggregated_weights[idx] # import pdb; pdb.set_trace() return agg_model
def layer_sharing(model_list, serverargs): ''' input: model list tuple and serverargs output: shuffled model list ''' shuffling_matrix = [] # Calling initialization models # shuffling_model_list = initialize_empty_models(len(model_list)) ''' Generating the shuffling matrix generates a matrix of dimension no_of_nodes*no_of_layers Matrix values are the destination location of the particular node layer ''' num_of_nodes = len(model_list) num_of_layers = len(model_list[0][0].state_dict()) for i in range(num_of_nodes): l = [0 for j in range(num_of_layers)] shuffling_matrix.append(l) for layer_num in range(num_of_layers): model_set = set(range(len(model_list))) for node_num in range(num_of_nodes): random_node = random.sample(model_set, 1)[0] model_set.remove(random_node) shuffling_matrix[node_num][layer_num] = random_node # for model, samples in model_list: # shuffling_layers = [] # layers = list(range(len(model.state_dict()))) # while(len(layers)): # random_index = random.randrange(0, len(layers)) # shuffling_layers.append(layers[random_index]) # layers.pop(random_index) # import pdb; pdb.set_trace() # shuffling_matrix.append(shuffling_layers) # Calling the matrix print function print_matrix(shuffling_matrix) # Creating model's array models_array = [] total_samples = sum([samples for (model, samples) in model_list]) for model, samples in model_list: fraction = samples / total_samples models_array.append( normalize_weights(weights_to_array(model), fraction)) # models_array.append(weights_to_array(model)) shuffling_models_array = deepcopy(models_array) shuffling_model_list = [] for model_num in range(len(model_list)): for layer_num in range(len(model_list[0][0].state_dict().items())): destination_model = shuffling_matrix[model_num][layer_num] shuffling_models_array[destination_model][ layer_num] = models_array[model_num][layer_num] for model_num in range(len(shuffling_models_array)): model_state = OrderedDict() model = getModelArchitecture(serverargs) for idx, key in enumerate(model.state_dict().keys()): model_state[key] = shuffling_models_array[model_num][idx] model.load_state_dict(model_state) shuffling_model_list.append((model, 1)) return shuffling_model_list