def run(size, rank): modell = model.CNN() optimizer = torch.optim.Adam(modell.parameters(), lr=0.001) loss_func = torch.nn.CrossEntropyLoss() #size = torch.distributed.get_world_size() #rank = torch.distributed.get_rank() train_loader = Mnist().get_train_data() test_data = Mnist().get_test_data() test_x = torch.unsqueeze(test_data.test_data, dim=1).type( torch.FloatTensor )[:2000] / 255. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1) test_y = test_data.test_labels[:2000] group_list = [] for i in range(size): group_list.append(i) group = torch.distributed.new_group(group_list) print(rank, group_list) for epoch in range(50): for step, (b_x, b_y) in enumerate(train_loader): modell = get_new_model(modell) current_model = copy.deepcopy(modell) output = modell(b_x)[0] loss = loss_func(output, b_y) optimizer.zero_grad() loss.backward() optimizer.step() new_model = copy.deepcopy(modell) if step % 50 == 0: test_output, last_layer = modell(test_x) pred_y = torch.max(test_output, 1)[1].data.numpy() accuracy = float( (pred_y == test_y.data.numpy()).astype(int).sum()) / float( test_y.size(0)) print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy) for param1, param2 in zip(current_model.parameters(), new_model.parameters()): torch.distributed.reduce(param2.data - param1.data, dst=0, op=torch.distributed.reduce_op.SUM, group=group)
def worker_run(size, rank): modell = model.CNN() optimizer = torch.optim.Adam(modell.parameters(), lr=LR) loss_func = torch.nn.CrossEntropyLoss() test_data = Mnist().get_test_data() if (IID == True): train_loader = Mnist().get_train_data() else: if (rank > 0): if (rank == 1): train_loader = Mnist_noniid().get_train_data1() if (rank == 2): train_loader = Mnist_noniid().get_train_data2() if (rank == 3): train_loader = Mnist_noniid().get_train_data3() if (rank == 4): train_loader = Mnist_noniid().get_train_data4() if (rank == 5): train_loader = Mnist_noniid().get_train_data5() for step, (b_x, b_y) in enumerate(test_data): test_x = b_x test_y = b_y group = dist.new_group(range(size)) for epoch in range(MAX_EPOCH): modell = get_new_model(modell, group) for step, (b_x, b_y) in enumerate(train_loader): output = modell(b_x)[0] loss = loss_func(output, b_y) optimizer.zero_grad() loss.backward() optimizer.step() for param in modell.parameters(): dist.reduce(param.data, dst=0, op=dist.reduce_op.SUM, group=group) test_output, last_layer = modell(test_x) pred_y = torch.max(test_output, 1)[1].data.numpy() accuracy = float( (pred_y == test_y.data.numpy()).astype(int).sum()) / float( test_y.size(0)) print('Epoch: ', epoch, ' Rank: ', rank, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)
def get_testset(rank): if IID == True: if DATA_SET == 'Mnist': test_loader = Mnist(rank).get_test_data() if DATA_SET == 'Cifar10': test_loader = Cifar10(rank).get_test_data() else: if DATA_SET == 'Mnist': test_loader = Mnist_noniid().get_test_data() if DATA_SET == 'Cifar10': test_loader = Cifar10_noniid().get_test_data() return test_loader
def get_train_loader(world_size, rank, batch_size): if IID == True: if DATASET == 'Mnist': train_loader = Mnist(rank, batch_size).get_train_data() if DATASET == 'Cifar10': train_loader = Cifar10(rank, batch_size).get_train_data() else: if DATASET == 'Mnist': train_loader = Mnist_noniid(batch_size, world_size).get_train_data(rank) if DATASET == 'Cifar10': train_loader = Cifar10_noniid(batch_size, world_size).get_train_data(rank) return train_loader
def get_local_data(world_size, rank, batch_size): logging('enter get local data') if IID == True: if DATA_SET == 'Mnist': train_loader = Mnist(rank, batch_size).get_train_data() if DATA_SET == 'Cifar10': train_loader = Cifar10(rank, batch_size).get_train_data() else: if DATA_SET == 'Mnist': train_loader = Mnist_noniid(batch_size, world_size).get_train_data(rank) if DATA_SET == 'Cifar10': train_loader = Cifar10_noniid(batch_size, world_size).get_train_data(rank) return train_loader
def get_local_data(size, rank, batchsize): if IID == True: if DATA_SET == 'Mnist': train_loader = Mnist(batchsize).get_train_data() if DATA_SET == 'Cifar10': train_loader = Cifar10(batchsize).get_train_data() else: if DATA_SET == 'Mnist': train_loader = Mnist_noniid(batchsize, size).get_train_data(rank) if DATA_SET == 'Cifar10': train_loader = Cifar10_noniid(batchsize, size).get_train_data(rank) return train_loader
def get_testset(): if IID == True: if DATA_SET == 'Mnist': test_loader = Mnist().get_test_data() if DATA_SET == 'Cifar10': test_loader = Cifar10().get_test_data() else: if DATA_SET == 'Mnist': test_loader = Mnist_noniid().get_test_data() if DATA_SET == 'Cifar10': test_loader = Cifar10_noniid().get_test_data() for step, (b_x, b_y) in enumerate(test_loader): test_x = b_x test_y = b_y return test_x, test_y
pass fileName = datetime.datetime.now().strftime('day' + '%Y_%m_%d') sys.stdout = Logger(fileName + '.log', path=path) ############################################################# # 这里输出之后的所有的输出的print 内容即将写入日志 ############################################################# print(fileName.center(60, '*')) if __name__ == '__main__': random.seed(1) np.random.seed(1) make_print_to_file(path='./') Minst_1 = Mnist(20, 10) writers_all = Minst_1.partitioned_by_rows(num_workers=10) # writers = writers_all["train"]#iid数据 writers = Minst_1.client() #noniid数据 # data = datasource.MINSIT_NONIID() train_x, train_y = Minst_1.global_train()[0], Minst_1.global_train()[1] data_test = Minst_1.test_data() x_test, y_test = data_test[0], data_test[1] # writers = data.client() # print(len(writers)) parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--aggregation', type=str, choices=[ "normal_atten", "atten", "rule_out", "TrimmedMean",
def run(size, rank): modell = model.CNN() #modell = model.AlexNet() optimizer = torch.optim.Adam(modell.parameters(), lr=LR) loss_func = torch.nn.CrossEntropyLoss() if(IID == True): train_loader = Mnist().get_train_data() test_data = Mnist().get_test_data() else: if(rank > 0): if(rank == 1): train_loader = Mnist_noniid().get_train_data1() test_data = Mnist_noniid().get_test_data1() if(rank == 2): train_loader = Mnist_noniid().get_train_data2() test_data = Mnist_noniid().get_test_data2() if(rank == 3): train_loader = Mnist_noniid().get_train_data3() test_data = Mnist_noniid().get_test_data3() if(rank == 4): train_loader = Mnist_noniid().get_train_data4() test_data = Mnist_noniid().get_test_data4() if(rank == 5): train_loader = Mnist_noniid().get_train_data5() test_data = Mnist_noniid().get_test_data5() #size = dist.get_world_size() #rank = dist.get_rank() #train_loader = Mnist().get_train_data() #test_data = Mnist().get_test_data() for step, (b_x, b_y) in enumerate(test_data): test_x = b_x test_y = b_y group_list = [] for i in range(size): group_list.append(i) group = dist.new_group(group_list) for epoch in range(MAX_EPOCH): modell = get_new_model(modell, group) #current_model = copy.deepcopy(modell) test_output, last_layer = modell(test_x) pred_y = torch.max(test_output, 1)[1].data.numpy() accuracy = float((pred_y == test_y.data.numpy()).astype(int).sum()) / float(test_y.size(0)) for step, (b_x, b_y) in enumerate(train_loader): #modell = get_new_model(modell) #current_model = copy.deepcopy(modell) output = modell(b_x)[0] loss = loss_func(output, b_y) optimizer.zero_grad() loss.backward() optimizer.step() for param in modell.parameters(): dist.reduce(param.data, dst=0, op=dist.reduce_op.SUM, group=group) f = open('./test.txt', 'a') print('Epoch: ', epoch, ' Rank: ', rank, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy, file=f) print('Epoch: ', epoch, ' Rank: ', rank, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy) f.close()
def get_test_loader(rank): if DATASET == 'Mnist': test_loader = Mnist(rank).get_test_data() if DATASET == 'Cifar10': test_loader = Cifar10(rank).get_test_data() return test_loader
def run(size, rank): modell = model.CNN() # modell = model.AlexNet() optimizer = torch.optim.Adam(modell.parameters(), lr=LR) loss_func = torch.nn.CrossEntropyLoss() # size = dist.get_world_size() # rank = dist.get_rank() if (IID == True): train_loader = Mnist().get_train_data() test_data = Mnist().get_test_data() test_x = torch.unsqueeze(test_data.test_data, dim=1).type( torch.FloatTensor ) / 255. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1) test_y = test_data.test_labels else: if (rank > 0): if (rank == 1): train_loader = Mnist_noniid().get_train_data1() test_data = Mnist_noniid().get_test_data1() test_x = torch.unsqueeze(test_data.test_data, dim=1).type( torch.FloatTensor ) / 255. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1) test_y = test_data.test_labels if (rank == 2): train_loader = Mnist_noniid().get_train_data2() test_data = Mnist_noniid().get_test_data2() test_x = torch.unsqueeze(test_data.test_data, dim=1).type( torch.FloatTensor ) / 255. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1) test_y = test_data.test_labels if (rank == 3): train_loader = Mnist_noniid().get_train_data3() test_data = Mnist_noniid().get_test_data3() test_x = torch.unsqueeze(test_data.test_data, dim=1).type( torch.FloatTensor ) / 255. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1) test_y = test_data.test_labels if (rank == 4): train_loader = Mnist_noniid().get_train_data4() test_data = Mnist_noniid().get_test_data4() test_x = torch.unsqueeze(test_data.test_data, dim=1).type( torch.FloatTensor ) / 255. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1) test_y = test_data.test_labels if (rank == 5): train_loader = Mnist_noniid().get_train_data5() test_data = Mnist_noniid().get_test_data5() test_x = torch.unsqueeze(test_data.test_data, dim=1).type( torch.FloatTensor ) / 255. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1) test_y = test_data.test_labels # test_x = torch.unsqueeze(test_data.test_data, dim=1).type( # torch.FloatTensor) / 255. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1) # test_y = test_data.test_labels group_list = [] for i in range(size): group_list.append(i) group = dist.new_group(group_list) for epoch in range(MAX_EPOCH): modell = get_new_model(modell) # current_model = copy.deepcopy(modell) for step, (b_x, b_y) in enumerate(train_loader): # modell = get_new_model(modell) # current_model = copy.deepcopy(modell) output = modell(b_x)[0] loss = loss_func(output, b_y) optimizer.zero_grad() loss.backward() optimizer.step() # new_model = copy.deepcopy(modell) # for param1, param2 in zip( current_model.parameters(), new_model.parameters() ): # dist.reduce(param2.data-param1.data, dst=0, op=dist.reduce_op.SUM, group=group) for param in modell.parameters(): dist.reduce(param, dst=0, op=dist.reduce_op.SUM, group=group) test_output, last_layer = modell(test_x) pred_y = torch.max(test_output, 1)[1].data.numpy() accuracy = float( (pred_y == test_y.data.numpy()).astype(int).sum()) / float( test_y.size(0)) print('Epoch: ', epoch, ' Rank: ', rank, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)