def non_iid(model_names, numClasses, numParams, iterations=3000, ideal_attack=False): batch_size = 50 memory_size = 0 list_of_models = [] transform = transforms.Compose([transforms.ToTensor()]) model = MNISTCNNModel dataset = MNISTDataset numParams = 41386 train_client = Client("mnist", "mnist_train", batch_size, model(), dataset, transform) test_client = Client("mnist", "mnist_test", batch_size, model(), dataset, transform) init_weights = train_client.getModelWeights() for dataset_name in model_names: list_of_models.append( Client("mnist", dataset_name, batch_size, model(), dataset, transform)) list_of_models[-1].updateModel(init_weights) # # Include the model that sends the ideal vector on each iteration # if ideal_attack: # list_of_models.append(softmax_model_obj.SoftMaxModelEvil(dataPath + # "_bad_ideal_4_9", numClasses)) numClients = len(list_of_models) model_aggregator.init(numClients, numParams, numClasses) print("Start training across " + str(numClients) + " clients.") # weights = np.random.rand(numParams) / 100.0 train_progress = [] delta_memory = np.zeros((numClients, numParams, memory_size)) summed_deltas = np.zeros((numClients, numParams)) for i in range(iterations): delta = np.zeros((numClients, numParams)) ################################## # Use significant features filter or not ################################## topk = int(numParams / 2) # Significant features filter, the top k biggest weights # sig_features_idx = np.argpartition(weights, -topk)[-topk:] sig_features_idx = np.arange(numParams) ################################## # Use annealing strategy or not ################################## if memory_size > 0: for k in range(len(list_of_models)): delta[k, :] = list_of_models[k].getGrad() # normalize delta if np.linalg.norm(delta[k, :]) > 1: delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :]) delta_memory[k, :, i % memory_size] = delta[k, :] # Track the total vector from each individual client summed_deltas = np.sum(delta_memory, axis=2) else: for k in range(len(list_of_models)): delta[k, :] = list_of_models[k].getGrad() # normalize delta if np.linalg.norm(delta[k, :]) > 1: delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :]) # Track the total vector from each individual client summed_deltas = summed_deltas + delta ################################## # Use FoolsGold or something else ################################## # Use Foolsgold (can optionally clip gradients via Krum) weights = list_of_models[0].getModelWeights() this_delta = model_aggregator.foolsgold(delta, summed_deltas, sig_features_idx, i, weights, clip=0) # Mean # this_delta = model_aggregator.average(delta) # Krum # this_delta = model_aggregator.krum(delta, clip=1) # Step in new gradient direction for k in range(len(list_of_models)): list_of_models[k].simpleStep(this_delta) if i % 20 == 0: loss = 0.0 for i in range(10): loss += list_of_models[i].getLoss() print("Average loss is " + str(loss / len(list_of_models))) print("Done iterations!") train_client.updateModel(weights) test_client.updateModel(weights) print("Train error: %d", train_client.getTrainErr()) print("Test error: %d", test_client.getTrainErr()) return weights
def non_iid(max_similarity, Xtest, ytest, from_class, to_class, model_names, numClasses, numParams, softmax_test, topk_prop, iterations=3000, numSybils=2, ideal_attack=False, poisoner_indices=[]): numFeatures = int(numParams / numClasses) batch_size = 50 topk = int(numParams / 2) list_of_models = [] for dataset in model_names: list_of_models.append( softmax_model_obj.SoftMaxModel(dataset, numClasses)) # Include the model that sends the ideal vector on each iteration if ideal_attack: list_of_models.append( softmax_model_obj.SoftMaxModelEvil(dataPath + "_bad_ideal_4_9", numClasses)) numClients = len(list_of_models) model_aggregator.init(numClients, numParams, numClasses) print("Start training across " + str(numClients) + " clients.") weights = np.random.rand(numParams) / 100.0 train_progress = [] summed_deltas = np.zeros((numClients, numParams)) #### Cosine similarity for adversaries #### sybil_noise = getOrthogonalNoise(numSybils, numParams) Attack_rates = [] Attack_rates_iter = [] numPoisonContribution = 0.0 for i in xrange(iterations): # if i % 10 == 0 and max_similarity < 0.2: # score = poisoning_compare.eval(Xtest, ytest, weights, int(from_class), int(to_class), numClasses, numFeatures, False) # Attack_rates.append(score[4]) # Attack_rates_iter.append(i) # print("Attack rate: " + str(score[4])) # # attack success # if score[4] > 0.8: # pdb.set_trace() # return weights, numPoisonContribution / i delta = np.zeros((numClients, numParams)) # Significant features filter # sig_features_idx = np.argpartition(weights, -topk)[-topk:] sig_features_idx = np.arange(numParams) for k in range(len(list_of_models)): delta[k, :] = list_of_models[k].privateFun(1, weights, batch_size) # normalize delta if np.linalg.norm(delta[k, :]) > 1: delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :]) # Add adversarial noise # noisevec = rescale(np.random.rand(numParams), np.min(delta), np.max(delta)) # delta[poisoner_indices[0], :] = delta[poisoner_indices[0], :] + 10000*noisevec # delta[poisoner_indices[1], :] = delta[poisoner_indices[1], :] - 10000*noisevec ### Adaptive poisoning !! use even number sybils ### adaptive = True if adaptive: # sybil_deltas = summed_deltas[10:10+numSybils].copy() # sybil_deltas = sybil_deltas + delta[10:10+numSybils] sybil_cs = smp.cosine_similarity( summed_deltas[numClasses:numClasses + numSybils] + delta[numClasses:numClasses + numSybils]) - np.eye(numSybils) sybil_cs = np.max(sybil_cs, axis=0) # max_similarity = 1.0 if np.any(sybil_cs > max_similarity): delta[numClasses:numClasses + numSybils] = rescaleOrthogonalNoise(sybil_noise, delta) else: numPoisonContribution += 1.0 # delta[10:10+numSybils] = getOrthogonalNoise(numSybils, numParams) # pdb.set_trace() # pdb:: np.max(smp.cosine_similarity(delta[10:10+numSybils]) - np.eye(numSybils), axis=1) ########################## # Track the total vector from each individual client summed_deltas = summed_deltas + delta # Use Foolsgold this_delta = model_aggregator.foolsgold(delta, summed_deltas, sig_features_idx, i, weights, 1.0, importance=True, importanceHard=False) # this_delta = model_aggregator.average(delta) weights = weights + this_delta if i % 100 == 0: error = softmax_test.train_error(weights) print("Train error: %.10f" % error) train_progress.append(error) print("Done iterations!") print("Train error: %d", softmax_test.train_error(weights)) print("Test error: %d", softmax_test.test_error(weights)) return weights, numPoisonContribution / iterations
def non_iid(model_names, numClasses, numParams, softmax_test, topk_prop, iterations=3000, numSybils=2, ideal_attack=False, poisoner_indices=[], solution=None): batch_size = 50 topk = int(numParams / 10) list_of_models = [] for dataset in model_names: list_of_models.append( softmax_model_obj.SoftMaxModel(dataset, numClasses)) # Include the model that sends the ideal vector on each iteration if ideal_attack: list_of_models.append( softmax_model_obj.SoftMaxModelEvil(dataPath + "_bad_ideal_4_9", numClasses)) numClients = len(list_of_models) model_aggregator.init(numClients, numParams, numClasses) print("\nStart training across " + str(numClients) + " clients with solution " + str(solution) + '.') weights = np.random.rand(numParams) / 100.0 lr = np.ones(numClients, ) acc_in_iterations = [] delta_all = [] train_progress = [] norm_progress = [] loss_progress = [] summed_deltas = np.zeros((numClients, numParams)) for i in range(iterations): delta = np.zeros((numClients, numParams)) # Significant features filter # sig_features_idx = np.argpartition(weights, -topk)[-topk:] sig_features_idx = np.arange(numParams) for k in range(len(list_of_models)): delta[k, :], _ = list_of_models[k].privateFun(weights, batch_size) # normalize delta if np.linalg.norm(delta[k, :]) > 1: delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :]) # Add adversarial noise noisevec = rescale(np.random.rand(numParams), np.min(delta), np.max(delta)) delta[ poisoner_indices[0], :] = delta[poisoner_indices[0], :] + noisevec delta[ poisoner_indices[1], :] = delta[poisoner_indices[1], :] - noisevec # Track the total vector from each individual client summed_deltas = summed_deltas + delta if solution: if solution == 'fg': # Use Foolsgold this_delta = model_aggregator.foolsgold(delta, summed_deltas, sig_features_idx, i, weights, lr, topk_prop, importance=False, importanceHard=True) if solution == 'ours': this_delta, lr = model_aggregator.foolsgold2( delta, summed_deltas, sig_features_idx, i, weights, lr, topk_prop, importance=False, importanceHard=True) if solution == 'krum': # Krum this_delta = model_aggregator.krum(delta, clip=1) if solution == 'average': this_delta = model_aggregator.average(delta) if solution == 'median': this_delta = model_aggregator.median(delta) if solution == 'trimmed_mean': this_delta = model_aggregator.trimmed_mean(delta, 0.2) else: this_delta = np.dot(delta.T, lr) weights = weights + this_delta if i % 10 == 0: delta_index = heapq.nlargest(20, range(len(this_delta)), this_delta.take) delta_each_client = [] for idx in delta_index: delta_each_client.append( np.hstack(([i, idx], delta[:, idx], this_delta[idx]))) delta_all.append(delta_each_client) norm_progress.append(np.mean(np.linalg.norm(delta, axis=1))) test_error = softmax_test.test_error(weights) train_progress.append(test_error) acc_in_iterations.append([test_error] + list( poisoning_compare.eval(Xtest, ytest, weights, int(from_class), int(to_class), numClasses, numFeatures, verbose=False))) # if i % 100 == 0: # print("Validation error: %.5f" % test_error) column = ['iteration', 'deltaInxex' ] + ['client{}'.format(i) for i in range(numClients)] + ['combined'] pd.DataFrame(columns=column, data=np.reshape( delta_all, (-1, len(column)))).to_csv('_'.join(argv) + '_' + str(solution) + '_delta.csv') test_error = softmax_test.test_error(weights) acc_in_iterations.append([test_error] + list( poisoning_compare.eval(Xtest, ytest, weights, int(from_class), int(to_class), numClasses, numFeatures, verbose=True))) # column = ['iteration', 'Test error', 'Accuracy overall', 'Accuracy on other digits', # 'Target Accuracy on source label', # 'Target Accuracy on target label', 'Target Attack Rate'] # acc_in_iterations = np.insert(acc_in_iterations, 0, values=np.arange(0, iterations + 1, 10), axis=1) # res = pd.DataFrame(columns=column, data=acc_in_iterations) # res.to_csv('_'.join(argv) + '_' + str(solution) + '.csv') print("Done iterations!") print("Train error: {}".format(softmax_test.train_error(weights))) print("Test error: {}".format(softmax_test.test_error(weights))) # pdb.set_trace() # import sklearn.metrics.pairwise as smp # cs = smp.cosine_similarity(summed_deltas) return weights
def non_iid(model_names, numClasses, numParams, softmax_test, iterations=3000, ideal_attack=False): # SGD batch size batch_size = 50 # The number of local steps each client takes fed_avg_size = 1 list_of_models = [] for dataset in model_names: list_of_models.append( softmax_model_obj.SoftMaxModel(dataset, numClasses)) # Include the model that sends the ideal vector on each iteration if ideal_attack: list_of_models.append( softmax_model_obj.SoftMaxModelEvil(dataPath + "_bad_ideal_4_9", numClasses)) numClients = len(list_of_models) model_aggregator.init(numClients, numParams, numClasses) print("Start training across " + str(numClients) + " clients.") weights = np.random.rand(numParams) / 100.0 train_progress = [] # The number of previous iterations to use FoolsGold on memory_size = 0 delta_memory = np.zeros((numClients, numParams, memory_size)) summed_deltas = np.zeros((numClients, numParams)) for i in xrange(iterations): delta = np.zeros((numClients, numParams)) ################################## # Use significant features filter or not ################################## # Significant features filter, the top k biggest weights # topk = int(numParams / 2) # sig_features_idx = np.argpartition(weights, -topk)[-topk:] sig_features_idx = np.arange(numParams) ################################## # Use history or not ################################## if memory_size > 0: for k in range(len(list_of_models)): delta[k, :] = list_of_models[k].privateFun( weights, batch_size=batch_size, num_iterations=fed_avg_size) # normalize delta if np.linalg.norm(delta[k, :]) > 1: delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :]) delta_memory[k, :, i % memory_size] = delta[k, :] # Track the total vector from each individual client summed_deltas = np.sum(delta_memory, axis=2) else: for k in range(len(list_of_models)): delta[k, :] = list_of_models[k].privateFun( weights, batch_size=batch_size, num_iterations=fed_avg_size) # normalize delta if np.linalg.norm(delta[k, :]) > 1: delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :]) # Track the total vector from each individual client summed_deltas = summed_deltas + delta ################################## # Use FoolsGold or something else ################################## # Use Foolsgold (can optionally clip gradients via Krum) this_delta = model_aggregator.foolsgold(delta, summed_deltas, sig_features_idx, i, weights, clip=0) # Krum # this_delta = model_aggregator.krum(delta, clip=1) # Simple Average # this_delta = model_aggregator.average(delta) weights = weights + this_delta if i % 200 == 0: error = softmax_test.train_error(weights) print("Train error: %.10f" % error) train_progress.append(error) print("Done iterations!") print("Train error: %d", softmax_test.train_error(weights)) print("Test error: %d", softmax_test.test_error(weights)) return weights
def non_iid(model_names, numClasses, numParams, softmax_test, topk_prop, iterations=3000, numSybils=2, ideal_attack=False, poisoner_indices=[]): batch_size = 50 topk = int(numParams / 10) list_of_models = [] for dataset in model_names: list_of_models.append( softmax_model_obj.SoftMaxModel(dataset, numClasses)) # Include the model that sends the ideal vector on each iteration if ideal_attack: list_of_models.append( softmax_model_obj.SoftMaxModelEvil(dataPath + "_bad_ideal_4_9", numClasses)) numClients = len(list_of_models) model_aggregator.init(numClients, numParams, numClasses) print("Start training across " + str(numClients) + " clients.") weights = np.random.rand(numParams) / 100.0 train_progress = [] summed_deltas = np.zeros((numClients, numParams)) for i in xrange(iterations): delta = np.zeros((numClients, numParams)) # Significant features filter # sig_features_idx = np.argpartition(weights, -topk)[-topk:] sig_features_idx = np.arange(numParams) for k in range(len(list_of_models)): delta[k, :] = list_of_models[k].privateFun(1, weights, batch_size) # normalize delta if np.linalg.norm(delta[k, :]) > 1: delta[k, :] = delta[k, :] / np.linalg.norm(delta[k, :]) # Add adversarial noise noisevec = rescale(np.random.rand(numParams), np.min(delta), np.max(delta)) delta[ poisoner_indices[0], :] = delta[poisoner_indices[0], :] + noisevec delta[ poisoner_indices[1], :] = delta[poisoner_indices[1], :] - noisevec # Track the total vector from each individual client summed_deltas = summed_deltas + delta # Use Foolsgold this_delta = model_aggregator.foolsgold(delta, summed_deltas, sig_features_idx, i, weights, topk_prop, importance=False, importanceHard=True) # this_delta = model_aggregator.average(delta) weights = weights + this_delta if i % 100 == 0: error = softmax_test.train_error(weights) print("Train error: %.10f" % error) train_progress.append(error) print("Done iterations!") print("Train error: %d", softmax_test.train_error(weights)) print("Test error: %d", softmax_test.test_error(weights)) # pdb.set_trace() # import sklearn.metrics.pairwise as smp # cs = smp.cosine_similarity(summed_deltas) return weights