def __init__(self, embedding_dim=64, embedding_fn=None, classifier=None): super().__init__() if embedding_fn == None: self.embedding_fn = feature_encoder(num_classes=embedding_dim) self.embedding_fn = self.embedding_fn.cuda() else: self.embedding_fn = embedding_fn if classifier == None: self.classifier = Classifier(fea_dim=embedding_dim).to( torch.device("cuda")) # self.classifier = Classifier(fea_dim=576).to(torch.device("cuda")) self.classifier = self.classifier.cuda() else: self.classifier = classifier torch.cuda.empty_cache()
def __init__(self, meta_iterations, embedding_dim=64): super().__init__() self.embedding_fn = feature_encoder(num_classes=embedding_dim) # self.embedding_fn = LoadParameter(self.embedding_fn, torch.load('resnet18_tiered.pth.tar')['state_dict']) self.classifier = Classifier(fea_dim=embedding_dim).to( torch.device("cuda")) # self.classifier = Classifier(fea_dim=576).to(torch.device("cuda")) self.embedding_fn = self.embedding_fn.to(torch.device("cuda")) torch.cuda.empty_cache() self.optimizer = optim.SGD(self.classifier.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4, nesterov=False) self.lr_schedule = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[5500], gamma=0.1) self.count = 0 self.meta_iterations = meta_iterations
def main(args): torch.manual_seed(args.seed) if not os.path.exists(args.res_dir): os.mkdir(args.res_dir) if not os.path.exists(args.model_dir): os.mkdir(args.model_dir) data1 = dd.io.load(os.path.join(args.vec_dir, 'NYU_correlation_matrix.h5')) data2 = dd.io.load(os.path.join(args.vec_dir, 'UM_correlation_matrix.h5')) data3 = dd.io.load(os.path.join(args.vec_dir, 'USM_correlation_matrix.h5')) data4 = dd.io.load(os.path.join(args.vec_dir, 'UCLA_correlation_matrix.h5')) x1 = torch.from_numpy(data1['data']).float() y1 = torch.from_numpy(data1['label']).long() x2 = torch.from_numpy(data2['data']).float() y2 = torch.from_numpy(data2['label']).long() x3 = torch.from_numpy(data3['data']).float() y3 = torch.from_numpy(data3['label']).long() x4 = torch.from_numpy(data4['data']).float() y4 = torch.from_numpy(data4['label']).long() if args.overlap: idNYU = dd.io.load('./idx/NYU_sub_overlap.h5') idUM = dd.io.load('./idx/UM_sub_overlap.h5') idUSM = dd.io.load('./idx/USM_sub_overlap.h5') idUCLA = dd.io.load('./idx/UCLA_sub_overlap.h5') else: idNYU = dd.io.load('./idx/NYU_sub.h5') idUM = dd.io.load('./idx/UM_sub.h5') idUSM = dd.io.load('./idx/USM_sub.h5') idUCLA = dd.io.load('./idx/UCLA_sub.h5') if args.split == 0: tr1 = idNYU['1'] + idNYU['2'] + idNYU['3'] + idNYU['4'] tr2 = idUM['1'] + idUM['2'] + idUM['3'] + idUM['4'] tr3 = idUSM['1'] + idUSM['2'] + idUSM['3'] + idUSM['4'] tr4 = idUCLA['1'] + idUCLA['2'] + idUCLA['3'] + idUCLA['4'] te1 = idNYU['0'] te2 = idUM['0'] te3 = idUSM['0'] te4 = idUCLA['0'] elif args.split == 1: tr1 = idNYU['0'] + idNYU['2'] + idNYU['3'] + idNYU['4'] tr2 = idUM['0'] + idUM['2'] + idUM['3'] + idUM['4'] tr3 = idUSM['0'] + idUSM['2'] + idUSM['3'] + idUSM['4'] tr4 = idUCLA['0'] + idUCLA['2'] + idUCLA['3'] + idUCLA['4'] te1 = idNYU['1'] te2 = idUM['1'] te3 = idUSM['1'] te4 = idUCLA['1'] elif args.split == 2: tr1 = idNYU['0'] + idNYU['1'] + idNYU['3'] + idNYU['4'] tr2 = idUM['0'] + idUM['1'] + idUM['3'] + idUM['4'] tr3 = idUSM['0'] + idUSM['1'] + idUSM['3'] + idUSM['4'] tr4 = idUCLA['0'] + idUCLA['1'] + idUCLA['3'] + idUCLA['4'] te1 = idNYU['2'] te2 = idUM['2'] te3 = idUSM['2'] te4 = idUCLA['2'] elif args.split == 3: tr1 = idNYU['0'] + idNYU['1'] + idNYU['2'] + idNYU['4'] tr2 = idUM['0'] + idUM['1'] + idUM['2'] + idUM['4'] tr3 = idUSM['0'] + idUSM['1'] + idUSM['2'] + idUSM['4'] tr4 = idUCLA['0'] + idUCLA['1'] + idUCLA['2'] + idUCLA['4'] te1 = idNYU['3'] te2 = idUM['3'] te3 = idUSM['3'] te4 = idUCLA['3'] elif args.split == 4: tr1 = idNYU['0'] + idNYU['1'] + idNYU['2'] + idNYU['3'] tr2 = idUM['0'] + idUM['1'] + idUM['2'] + idUM['3'] tr3 = idUSM['0'] + idUSM['1'] + idUSM['2'] + idUSM['3'] tr4 = idUCLA['0'] + idUCLA['1'] + idUCLA['2'] + idUCLA['3'] te1 = idNYU['4'] te2 = idUM['4'] te3 = idUSM['4'] te4 = idUCLA['4'] x1_train = x1[tr1] y1_train = y1[tr1] x2_train = x2[tr2] y2_train = y2[tr2] x3_train = x3[tr3] y3_train = y3[tr3] x4_train = x4[tr4] y4_train = y4[tr4] x1_test = x1[te1] y1_test = y1[te1] x2_test = x2[te2] y2_test = y2[te2] x3_test = x3[te3] y3_test = y3[te3] x4_test = x4[te4] y4_test = y4[te4] if args.sepnorm: mean = x1_train.mean(0, keepdim=True) dev = x1_train.std(0, keepdim=True) x1_train = (x1_train - mean) / dev x1_test = (x1_test - mean) / dev mean = x2_train.mean(0, keepdim=True) dev = x2_train.std(0, keepdim=True) x2_train = (x2_train - mean) / dev x2_test = (x2_test - mean) / dev mean = x3_train.mean(0, keepdim=True) dev = x3_train.std(0, keepdim=True) x3_train = (x3_train - mean) / dev x3_test = (x3_test - mean) / dev mean = x4_train.mean(0, keepdim=True) dev = x4_train.std(0, keepdim=True) x4_train = (x4_train - mean) / dev x4_test = (x4_test - mean) / dev else: mean = torch.cat((x1_train, x2_train, x3_train, x4_train), 0).mean(0, keepdim=True) dev = torch.cat((x1_train, x2_train, x3_train, x4_train), 0).std(0, keepdim=True) x1_train = (x1_train - mean) / dev x1_test = (x1_test - mean) / dev x2_train = (x2_train - mean) / dev x2_test = (x2_test - mean) / dev x3_train = (x3_train - mean) / dev x3_test = (x3_test - mean) / dev x4_train = (x4_train - mean) / dev x4_test = (x4_test - mean) / dev train1 = TensorDataset(x1_train, y1_train) train_loader1 = DataLoader(train1, batch_size=len(train1) // args.nsteps, shuffle=True) train2 = TensorDataset(x2_train, y2_train) train_loader2 = DataLoader(train2, batch_size=len(train2) // args.nsteps, shuffle=True) train3 = TensorDataset(x3_train, y3_train) train_loader3 = DataLoader(train3, batch_size=len(train3) // args.nsteps, shuffle=True) train4 = TensorDataset(x4_train, y4_train) train_loader4 = DataLoader(train4, batch_size=len(train4) // args.nsteps, shuffle=True) train_loaders = [ train_loader1, train_loader2, train_loader3, train_loader4 ] test1 = TensorDataset(x1_test, y1_test) test2 = TensorDataset(x2_test, y2_test) test3 = TensorDataset(x3_test, y3_test) test4 = TensorDataset(x4_test, y4_test) test_loader1 = DataLoader(test1, batch_size=args.test_batch_size1, shuffle=False) test_loader2 = DataLoader(test2, batch_size=args.test_batch_size2, shuffle=False) test_loader3 = DataLoader(test3, batch_size=args.test_batch_size3, shuffle=False) test_loader4 = DataLoader(test4, batch_size=args.test_batch_size4, shuffle=False) tbs = [ args.test_batch_size1, args.test_batch_size2, args.test_batch_size3, args.test_batch_size4 ] test_loaders = [test_loader1, test_loader2, test_loader3, test_loader4] # federated set up model1 = MoE(6105, args.feddim, 2).to(device) model2 = MoE(6105, args.feddim, 2).to(device) model3 = MoE(6105, args.feddim, 2).to(device) model4 = MoE(6105, args.feddim, 2).to(device) optimizer1 = optim.Adam(model1.parameters(), lr=args.lr1, weight_decay=1e-3) optimizer2 = optim.Adam(model2.parameters(), lr=args.lr2, weight_decay=1e-3) optimizer3 = optim.Adam(model3.parameters(), lr=args.lr3, weight_decay=1e-3) optimizer4 = optim.Adam(model4.parameters(), lr=args.lr4, weight_decay=1e-3) models = [model1, model2, model3, model4] optimizers = [optimizer1, optimizer2, optimizer3, optimizer4] model = MoE(6105, args.feddim, 2).to(device) print('Global Model:', model) # local set up, does not communicate with federated model model_local1 = Classifier(6105, args.dim, 2).to(device) model_local2 = Classifier(6105, args.dim, 2).to(device) model_local3 = Classifier(6105, args.dim, 2).to(device) model_local4 = Classifier(6105, args.dim, 2).to(device) optimizer_local1 = optim.Adam(model_local1.parameters(), lr=args.llr, weight_decay=5e-2) optimizer_local2 = optim.Adam(model_local2.parameters(), lr=args.llr, weight_decay=5e-2) optimizer_local3 = optim.Adam(model_local3.parameters(), lr=args.llr, weight_decay=5e-2) optimizer_local4 = optim.Adam(model_local4.parameters(), lr=args.llr, weight_decay=5e-2) models_local = [model_local1, model_local2, model_local3, model_local4] optimizers_local = [ optimizer_local1, optimizer_local2, optimizer_local3, optimizer_local4 ] nnloss = nn.NLLLoss() def train(epoch): pace = args.pace for i in range(4): models[i].train() models_local[i].train() if epoch <= 50 and epoch % 20 == 0: for param_group1 in optimizers[i].param_groups: param_group1['lr'] = 0.5 * param_group1['lr'] elif epoch > 50 and epoch % 20 == 0: for param_group1 in optimizers[i].param_groups: param_group1['lr'] = 0.5 * param_group1['lr'] if epoch <= 50 and epoch % 20 == 0: for param_group1 in optimizers_local[i].param_groups: param_group1['lr'] = 0.5 * param_group1['lr'] elif epoch > 50 and epoch % 20 == 0: for param_group1 in optimizers_local[i].param_groups: param_group1['lr'] = 0.5 * param_group1['lr'] #define weights w = dict() denominator = np.sum(np.array(tbs)) for i in range(4): w[i] = 0.25 #tbs[i]/denominator loss_all = dict() loss_lc = dict() num_data = dict() for i in range(4): loss_all[i] = 0 loss_lc[i] = 0 num_data[i] = 0 count = 0 for t in range(args.nsteps): for i in range(4): optimizers[i].zero_grad() a, b = next(iter(train_loaders[i])) num_data[i] += b.size(0) a = a.to(device) b = b.to(device) outlocal = models_local[i](a) loss_local = nnloss(outlocal, b) loss_local.backward(retain_graph=True) loss_lc[i] += loss_local.item() * b.size(0) optimizers_local[i].step() output, _ = models[i](a, outlocal) loss = nnloss(output, b) loss.backward() loss_all[i] += loss.item() * b.size(0) optimizers[i].step() count += 1 if count % pace == 0 or t == args.nsteps - 1: with torch.no_grad(): for key in model.classifier.state_dict().keys(): if models[0].classifier.state_dict( )[key].dtype == torch.int64: model.classifier.state_dict()[key].data.copy_( models[0].classifier.state_dict()[key]) else: temp = torch.zeros_like( model.classifier.state_dict()[key]) # add noise for s in range(4): nn = tdist.Normal( torch.tensor([0.0]), args.noise * torch.std(models[s].classifier.state_dict( )[key].detach().cpu())) noise = nn.sample( models[i].classifier.state_dict() [key].size()).squeeze() noise = noise.to(device) temp += w[s] * ( models[s].classifier.state_dict()[key] + noise) #updata global model model.classifier.state_dict()[key].data.copy_(temp) # only classifier get updated for s in range(4): models[s].classifier.state_dict( )[key].data.copy_( model.classifier.state_dict()[key]) return loss_all[0] / num_data[0], loss_all[1] / num_data[1],loss_all[2] / num_data[2],loss_all[3] / num_data[3], \ loss_lc[0] / num_data[0],loss_lc[1] / num_data[1], loss_lc[2] / num_data[2], loss_lc[3] / num_data[3] def test(federated_model, dataloader, train=True): federated_model.eval() test_loss = 0 correct = 0 for data, target in dataloader: data = data.to(device) target = target.to(device) output = federated_model(data) test_loss += nnloss(output, target).item() * target.size(0) pred = output.data.max(1)[1] correct += pred.eq(target.view(-1)).sum().item() test_loss /= len(dataloader.dataset) correct /= len(dataloader.dataset) if train: print('Train set local: Average loss: {:.4f}, Average acc: {:.4f}'. format(test_loss, correct)) else: print('Test set local: Average loss: {:.4f}, Average acc: {:.4f}'. format(test_loss, correct)) return test_loss, correct def testfed(federated_model, local_model, dataloader, train=True): federated_model = federated_model.to(device) local_model = local_model.to(device) federated_model.eval() local_model.eval() test_loss = 0 correct = 0 outputs = [] preds = [] targets = [] gates = [] for data, target in dataloader: data = data.to(device) targets.append(target[0].detach().numpy()) target = target.to(device) local_output = local_model(data) output, a = federated_model(data, local_output) outputs.append(output.detach().cpu().numpy()) gates.append(a.detach().cpu().numpy()) test_loss += nnloss(output, target).item() * target.size(0) pred = output.data.max(1)[1] preds.append(pred.detach().cpu().numpy()) correct += pred.eq(target.view(-1)).sum().item() test_loss /= len(dataloader.dataset) correct /= len(dataloader.dataset) if train: print('Train set fed: Average loss: {:.4f}, Average acc: {:.4f}'. format(test_loss, correct)) else: print('Test set fed: Average loss: {:.4f}, Average acc: {:.4f}'. format(test_loss, correct)) return test_loss, correct, targets, outputs, preds, gates best_acc = [0, 0, 0, 0] best_epoch = [0, 0, 0, 0] for epoch in range(args.epochs): start_time = time.time() print(f"Epoch Number {epoch + 1}") l1, l2, l3, l4, lc1, lc2, lc3, lc4 = train(epoch) print("===========================") print( "L1: {:.7f}, L2: {:.7f}, L3: {:.7f}, L4: {:.7f}, Lc1: {:.7f}, Lc2: {:.7f}, Lc3: {:.7f}, Lc4: {:.7f} " .format(l1, l2, l3, l4, lc1, lc2, lc3, lc4)) #local model performance print("***Local***") for i in range(4): test(models_local[i], train_loaders[i], train=True) test(models_local[i], test_loaders[i], train=False) #fed model performance print("***Federated***") for i in range(4): test(model.classifier, train_loaders[i], train=True) test(model.classifier, test_loaders[i], train=False) # moe model performance print("***MOE***") te_accs = list() targets = list() outputs = list() preds = list() gates = list() for i in range(4): testfed(models[i], models_local[i], train_loaders[i], train=True) _, te_acc, tar, out, pre, gate = testfed(models[i], models_local[i], test_loaders[i], train=False) te_accs.append(te_acc) targets.append(tar) outputs.append(out) preds.append(pre) gates.append(gate) for i in range(4): if te_accs[i] > best_acc[i]: best_acc[i] = te_accs[i] best_epoch[i] = epoch total_time = time.time() - start_time print('Communication time over the network', round(total_time, 2), 's\n') model_wts = copy.deepcopy(model.state_dict()) torch.save(model_wts, os.path.join(args.model_dir, str(args.split) + '.pth')) dd.io.save( os.path.join(args.res_dir, 'NYU_' + str(args.split) + '.h5'), { 'outputs': outputs[0], 'preds': preds[0], 'targets': targets[0], 'gates': gates[0] }) dd.io.save( os.path.join(args.res_dir, 'UM_' + str(args.split) + '.h5'), { 'outputs': outputs[1], 'preds': preds[1], 'targets': targets[1], 'gates': gates[1] }) dd.io.save( os.path.join(args.res_dir, 'USM_' + str(args.split) + '.h5'), { 'outputs': outputs[2], 'preds': preds[2], 'targets': targets[2], 'gates': gates[2] }) dd.io.save( os.path.join(args.res_dir, 'UCLA_' + str(args.split) + '.h5'), { 'outputs': outputs[3], 'preds': preds[3], 'targets': targets[3], 'gates': gates[3] }) for i in range(4): print('Best Acc:', best_acc[i], 'Best Epoch:', best_epoch[i]) print('split:', args.split, ' noise:', args.noise, ' pace:', args.pace)
def main(args): torch.manual_seed(args.seed) if not os.path.exists(args.res_dir): os.mkdir(args.res_dir) if not os.path.exists(args.model_dir): os.mkdir(args.model_dir) log_dir = os.path.join('./log', 'Align_' + str(args.split)) if not os.path.exists(log_dir): os.mkdir(log_dir) writer = SummaryWriter(log_dir) data1 = dd.io.load(os.path.join(args.vec_dir, 'NYU_correlation_matrix.h5')) data2 = dd.io.load(os.path.join(args.vec_dir, 'UM_correlation_matrix.h5')) data3 = dd.io.load(os.path.join(args.vec_dir, 'USM_correlation_matrix.h5')) data4 = dd.io.load(os.path.join(args.vec_dir, 'UCLA_correlation_matrix.h5')) x1 = torch.from_numpy(data1['data']).float() y1 = torch.from_numpy(data1['label']).long() x2 = torch.from_numpy(data2['data']).float() y2 = torch.from_numpy(data2['label']).long() x3 = torch.from_numpy(data3['data']).float() y3 = torch.from_numpy(data3['label']).long() x4 = torch.from_numpy(data4['data']).float() y4 = torch.from_numpy(data4['label']).long() if args.overlap: idNYU = dd.io.load('./idx/NYU_sub_overlap.h5') idUM = dd.io.load('./idx/UM_sub_overlap.h5') idUSM = dd.io.load('./idx/USM_sub_overlap.h5') idUCLA = dd.io.load('./idx/UCLA_sub_overlap.h5') else: idNYU = dd.io.load('./idx/NYU_sub.h5') idUM = dd.io.load('./idx/UM_sub.h5') idUSM = dd.io.load('./idx/USM_sub.h5') idUCLA = dd.io.load('./idx/UCLA_sub.h5') if args.split == 0: tr1 = idNYU['1'] + idNYU['2'] + idNYU['3'] + idNYU['4'] tr2 = idUM['1'] + idUM['2'] + idUM['3'] + idUM['4'] tr3 = idUSM['1'] + idUSM['2'] + idUSM['3'] + idUSM['4'] tr4 = idUCLA['1'] + idUCLA['2'] + idUCLA['3'] + idUCLA['4'] te1 = idNYU['0'] te2 = idUM['0'] te3 = idUSM['0'] te4 = idUCLA['0'] elif args.split == 1: tr1 = idNYU['0'] + idNYU['2'] + idNYU['3'] + idNYU['4'] tr2 = idUM['0'] + idUM['2'] + idUM['3'] + idUM['4'] tr3 = idUSM['0'] + idUSM['2'] + idUSM['3'] + idUSM['4'] tr4 = idUCLA['0'] + idUCLA['2'] + idUCLA['3'] + idUCLA['4'] te1 = idNYU['1'] te2 = idUM['1'] te3 = idUSM['1'] te4 = idUCLA['1'] elif args.split == 2: tr1 = idNYU['0'] + idNYU['1'] + idNYU['3'] + idNYU['4'] tr2 = idUM['0'] + idUM['1'] + idUM['3'] + idUM['4'] tr3 = idUSM['0'] + idUSM['1'] + idUSM['3'] + idUSM['4'] tr4 = idUCLA['0'] + idUCLA['1'] + idUCLA['3'] + idUCLA['4'] te1 = idNYU['2'] te2 = idUM['2'] te3 = idUSM['2'] te4 = idUCLA['2'] elif args.split == 3: tr1 = idNYU['0'] + idNYU['1'] + idNYU['2'] + idNYU['4'] tr2 = idUM['0'] + idUM['1'] + idUM['2'] + idUM['4'] tr3 = idUSM['0'] + idUSM['1'] + idUSM['2'] + idUSM['4'] tr4 = idUCLA['0'] + idUCLA['1'] + idUCLA['2'] + idUCLA['4'] te1 = idNYU['3'] te2 = idUM['3'] te3 = idUSM['3'] te4 = idUCLA['3'] elif args.split == 4: tr1 = idNYU['0'] + idNYU['1'] + idNYU['2'] + idNYU['3'] tr2 = idUM['0'] + idUM['1'] + idUM['2'] + idUM['3'] tr3 = idUSM['0'] + idUSM['1'] + idUSM['2'] + idUSM['3'] tr4 = idUCLA['0'] + idUCLA['1'] + idUCLA['2'] + idUCLA['3'] te1 = idNYU['4'] te2 = idUM['4'] te3 = idUSM['4'] te4 = idUCLA['4'] x1_train = x1[tr1] y1_train = y1[tr1] x2_train = x2[tr2] y2_train = y2[tr2] x3_train = x3[tr3] y3_train = y3[tr3] x4_train = x4[tr4] y4_train = y4[tr4] x1_test = x1[te1] y1_test = y1[te1] x2_test = x2[te2] y2_test = y2[te2] x3_test = x3[te3] y3_test = y3[te3] x4_test = x4[te4] y4_test = y4[te4] if args.sepnorm: mean = x1_train.mean(0, keepdim=True) dev = x1_train.std(0, keepdim=True) x1_train = (x1_train - mean) / dev x1_test = (x1_test - mean) / dev mean = x2_train.mean(0, keepdim=True) dev = x2_train.std(0, keepdim=True) x2_train = (x2_train - mean) / dev x2_test = (x2_test - mean) / dev mean = x3_train.mean(0, keepdim=True) dev = x3_train.std(0, keepdim=True) x3_train = (x3_train - mean) / dev x3_test = (x3_test - mean) / dev mean = x4_train.mean(0, keepdim=True) dev = x4_train.std(0, keepdim=True) x4_train = (x4_train - mean) / dev x4_test = (x4_test - mean) / dev else: mean = torch.cat((x1_train, x2_train, x3_train, x4_train), 0).mean(0, keepdim=True) dev = torch.cat((x1_train, x2_train, x3_train, x4_train), 0).std(0, keepdim=True) x1_train = (x1_train - mean) / dev x1_test = (x1_test - mean) / dev x2_train = (x2_train - mean) / dev x2_test = (x2_test - mean) / dev x3_train = (x3_train - mean) / dev x3_test = (x3_test - mean) / dev x4_train = (x4_train - mean) / dev x4_test = (x4_test - mean) / dev train1 = TensorDataset(x1_train, y1_train) train_loader1 = DataLoader(train1, batch_size=len(train1) // args.nsteps, shuffle=True) train2 = TensorDataset(x2_train, y2_train) train_loader2 = DataLoader(train2, batch_size=len(train2) // args.nsteps, shuffle=True) train3 = TensorDataset(x3_train, y3_train) train_loader3 = DataLoader(train3, batch_size=len(train3) // args.nsteps, shuffle=True) train4 = TensorDataset(x4_train, y4_train) train_loader4 = DataLoader(train4, batch_size=len(train4) // args.nsteps, shuffle=True) train_loaders = [ train_loader1, train_loader2, train_loader3, train_loader4 ] data_inters = [ iter(train_loader1), iter(train_loader2), iter(train_loader3), iter(train_loader4) ] test1 = TensorDataset(x1_test, y1_test) test2 = TensorDataset(x2_test, y2_test) test3 = TensorDataset(x3_test, y3_test) test4 = TensorDataset(x4_test, y4_test) test_loader1 = DataLoader(test1, batch_size=args.test_batch_size1, shuffle=False) test_loader2 = DataLoader(test2, batch_size=args.test_batch_size2, shuffle=False) test_loader3 = DataLoader(test3, batch_size=args.test_batch_size3, shuffle=False) test_loader4 = DataLoader(test4, batch_size=args.test_batch_size4, shuffle=False) tbs = [ args.test_batch_size1, args.test_batch_size2, args.test_batch_size3, args.test_batch_size4 ] test_loaders = [test_loader1, test_loader2, test_loader3, test_loader4] # federated setup model1 = Classifier(6105, args.dim, 2).to(device) model2 = Classifier(6105, args.dim, 2).to(device) model3 = Classifier(6105, args.dim, 2).to(device) model4 = Classifier(6105, args.dim, 2).to(device) models = [model1, model2, model3, model4] optimizer1 = optim.Adam(model1.parameters(), lr=args.lr1, weight_decay=1e-3) optimizer2 = optim.Adam(model2.parameters(), lr=args.lr2, weight_decay=1e-3) optimizer3 = optim.Adam(model3.parameters(), lr=args.lr3, weight_decay=1e-3) optimizer4 = optim.Adam(model4.parameters(), lr=args.lr4, weight_decay=1e-3) optimizers = [optimizer1, optimizer2, optimizer3, optimizer4] optimizerG1 = optim.Adam(model1.encoder.parameters(), lr=args.lr, weight_decay=1e-3) optimizerG2 = optim.Adam(model2.encoder.parameters(), lr=args.lr, weight_decay=1e-3) optimizerG3 = optim.Adam(model3.encoder.parameters(), lr=args.lr, weight_decay=1e-3) optimizerG4 = optim.Adam(model4.encoder.parameters(), lr=args.lr, weight_decay=1e-3) optimizerGs = [optimizerG1, optimizerG2, optimizerG3, optimizerG4] discriminators = dict() optimizerDs = dict() for i in range(4): discriminators[i] = Discriminator(args.dim).to(device) optimizerDs[i] = optim.Adam(discriminators[i].parameters(), lr=args.lr, weight_decay=1e-3) #global model model = Classifier(6105, args.dim, 2).to(device) print(model) # loss functions celoss = nn.CrossEntropyLoss() def advDloss(d1, d2): res = -torch.log(d1).mean() - torch.log(1 - d2).mean() return res def advGloss(d1, d2): res = -torch.log(d1).mean() - torch.log(d2).mean() return res.mean() def train(epoch): pace = args.pace for i in range(4): models[i].train() if epoch <= 50 and epoch % 20 == 0: for param_group1 in optimizers[i].param_groups: param_group1['lr'] = 0.5 * param_group1['lr'] elif epoch > 50 and epoch % 20 == 0: for param_group1 in optimizers[i].param_groups: param_group1['lr'] = 0.5 * param_group1['lr'] if epoch <= 50 and epoch % 20 == 0: for param_group1 in optimizerGs[i].param_groups: param_group1['lr'] = 0.5 * param_group1['lr'] elif epoch > 50 and epoch % 20 == 0: for param_group1 in optimizerGs[i].param_groups: param_group1['lr'] = 0.5 * param_group1['lr'] discriminators[i].train() if epoch <= 50 and epoch % 20 == 0: for param_group1 in optimizerDs[i].param_groups: param_group1['lr'] = 0.5 * param_group1['lr'] elif epoch > 50 and epoch % 20 == 0: for param_group1 in optimizerDs[i].param_groups: param_group1['lr'] = 0.5 * param_group1['lr'] #define weights w = dict() denominator = np.sum(np.array(tbs)) for i in range(4): w[i] = 0.25 #tbs[i]/denominator loss_all = dict() lossD_all = dict() lossG_all = dict() num_data = dict() num_dataG = dict() num_dataD = dict() for i in range(4): loss_all[i] = 0 num_data[i] = EPS num_dataG[i] = EPS lossG_all[i] = 0 lossD_all[i] = 0 num_dataD[i] = EPS count = 0 for t in range(args.nsteps): fs = [] # optimize classifier for i in range(4): optimizers[i].zero_grad() a, b = next(data_iters[i]) num_data[i] += b.size(0) a = a.to(device) b = b.to(device) output = models[i](a) loss = celoss(output, b) loss_all[i] += loss.item() * b.size(0) if epoch >= 0: loss.backward(retain_graph=True) optimizers[i].step() fs.append(models[i].encoder(a)) #optimize alignment nn = [] noises = [] for i in range(4): nn = tdist.Normal(torch.tensor([0.0]), 0.001 * torch.std(fs[i].detach().cpu())) noises.append(nn.sample(fs[i].size()).squeeze().to(device)) for i in range(4): for j in range(4): if i != j: optimizerDs[i].zero_grad() optimizerGs[i].zero_grad() optimizerGs[j].zero_grad() d1 = discriminators[i](fs[i] + noises[i]) d2 = discriminators[i](fs[j] + noises[j]) num_dataG[i] += d1.size(0) num_dataD[i] += d1.size(0) lossD = advDloss(d1, d2) lossG = advGloss(d1, d2) lossD_all[i] += lossD.item() * d1.size(0) lossG_all[i] += lossG.item() * d1.size(0) lossG_all[j] += lossG.item() * d2.size(0) lossD = 0.1 * lossD if epoch >= 5: lossD.backward(retain_graph=True) optimizerDs[i].step() lossG.backward(retain_graph=True) optimizerGs[i].step() optimizerGs[j].step() writer.add_histogram( 'Hist/hist_' + site[i] + '2' + site[j] + '_source', d1, epoch * args.nsteps + t) writer.add_histogram( 'Hist/hist_' + site[i] + '2' + site[j] + '_target', d2, epoch * args.nsteps + t) count += 1 if count % pace == 0 or t == args.nsteps - 1: with torch.no_grad(): for key in model.state_dict().keys(): if models[0].state_dict()[key].dtype == torch.int64: model.state_dict()[key].data.copy_( models[0].state_dict()[key]) else: temp = torch.zeros_like(model.state_dict()[key]) # add noise for s in range(4): if args.type == 'G': nn = tdist.Normal( torch.tensor([0.0]), args.noise * torch.std(models[s].state_dict() [key].detach().cpu())) else: nn = tdist.Laplace( torch.tensor([0.0]), args.noise * torch.std(models[s].state_dict() [key].detach().cpu())) noise = nn.sample(models[s].state_dict() [key].size()).squeeze() noise = noise.to(device) temp += w[s] * (models[s].state_dict()[key] + noise) # update global model model.state_dict()[key].data.copy_(temp) # updata local model for s in range(4): models[s].state_dict()[key].data.copy_( model.state_dict()[key]) return loss_all, lossG_all, lossD_all, num_data, num_dataG, num_dataD def test(federated_model, data_loader, train=False): federated_model = federated_model.to(device) federated_model.eval() test_loss = 0 correct = 0 outputs = [] preds = [] targets = [] for data, target in data_loader: targets.append(target[0].detach().numpy()) data = data.to(device) target = target.to(device) output = federated_model(data) outputs.append(output.detach().cpu().numpy()) test_loss += celoss(output, target).item() * target.size(0) pred = output.data.max(1)[1] preds.append(pred.detach().cpu().numpy()) correct += pred.eq(target.view(-1)).sum().item() test_loss /= len(data_loader.dataset) correct /= len(data_loader.dataset) if train: print('Train set local: Average loss: {:.4f}, Average acc: {:.4f}'. format(test_loss, correct)) else: print('Test set local: Average loss: {:.4f}, Average acc: {:.4f}'. format(test_loss, correct)) return test_loss, correct, targets, outputs, preds best_acc = 0 best_epoch = 0 for epoch in range(args.epochs): start_time = time.time() print(f"Epoch Number {epoch + 1}") l, lG, lD, n, nG, nD = train(epoch) print("===========================") print("L1: {:.7f}, L2: {:.7f}, L3: {:.7f}, L4: {:.7f} ".format( l[0] / n[0], l[1] / n[1], l[2] / n[2], l[3] / n[3])) print("G1: {:.7f}, G2: {:.7f}, G3: {:.7f}, G4: {:.7f} ".format( lG[0] / nG[0], lG[1] / nG[1], lG[2] / nG[2], lG[3] / nG[3])) print("D1: {:.7f}, D2: {:.7f}, D3: {:.7f}, D4: {:.7f} ".format( lD[0] / nD[0], lD[1] / nD[1], lD[2] / nD[2], lD[3] / nD[3])) writer.add_scalars( 'CEloss', { 'l1': l[0] / n[0], 'l2': l[1] / n[1], 'l3': l[2] / n[2], 'l4': l[3] / n[3] }, epoch) writer.add_scalars( 'Gloss', { 'gl1': lG[0] / nG[0], 'gl2': lG[1] / nG[1], 'gl3': lG[2] / nG[2], 'gl4': lG[3] / nG[3] }, epoch) writer.add_scalars( 'Dloss', { 'dl1': lD[0] / nD[0], 'dl2': lD[1] / nD[1], 'dl3': lD[2] / nD[2], 'dl4': lD[3] / nD[3] }, epoch) print('===NYU===') test(model, train_loader1, train=True) _, acc1, targets1, outputs1, preds1 = test(model, test_loader1, train=False) print('===UM===') test(model, train_loader2, train=True) _, acc2, targets2, outputs2, preds2 = test(model, test_loader2, train=False) print('===USM===') test(model, train_loader3, train=True) _, acc3, targets3, outputs3, preds3 = test(model, test_loader3, train=False) print('===UCLA===') test(model, train_loader4, train=True) _, acc4, targets4, outputs4, preds4 = test(model, test_loader4, train=False) if (acc1 + acc2 + acc3 + acc4) / 4 > best_acc: best_acc = (acc1 + acc2 + acc3 + acc4) / 4 best_epoch = epoch total_time = time.time() - start_time print('Communication time over the network', round(total_time, 2), 's\n') model_wts = copy.deepcopy(model.state_dict()) torch.save(model_wts, os.path.join(args.model_dir, str(args.split) + '.pth')) print('Best Acc:', best_acc, 'Beat Epoch:', best_epoch) print('split:', args.split, ' noise:', args.noise, ' pace:', args.pace) dd.io.save(os.path.join(args.res_dir, 'NYU_' + str(args.split) + '.h5'), { 'outputs': outputs1, 'preds': preds1, 'targets': targets1 }) dd.io.save(os.path.join(args.res_dir, 'UM_' + str(args.split) + '.h5'), { 'outputs': outputs2, 'preds': preds2, 'targets': targets2 }) dd.io.save(os.path.join(args.res_dir, 'USM_' + str(args.split) + '.h5'), { 'outputs': outputs3, 'preds': preds3, 'targets': targets3 }) dd.io.save(os.path.join(args.res_dir, 'UCLA_' + str(args.split) + '.h5'), { 'outputs': outputs4, 'preds': preds4, 'targets': targets4 })
#CLASSI = Classifier.CLASSIFIER(args).to(device) appr = A_VAE(net, args) #class_appr = classifier_train(CLASSI, args) seen_acc = [] unseen_acc = [] harmonic_mean = [] accuracy_matrix = [[] for kk in range(args.num_tasks)] #area_under_curve = [] overall_acc = [] replay_Classes = [] for t in range(args.num_tasks): print('Task:', t + 1) CLASSI = Classifier.CLASSIFIER(args).to(device) class_appr = classifier_train(CLASSI, args) trainData = torch.tensor(trainData1[t], dtype=torch.float32) trainLabels = torch.tensor(trainLabels1[t]) #print(trainLabels.shape, 'shape00') trainLabelVectors = torch.tensor(trainLabelsVectors1[t], dtype=torch.float32) testData = torch.tensor(testData1[t], dtype=torch.float32) testLabels = torch.tensor(testLabels1[t], dtype=torch.int64) X_train = torch.cat([trainData, trainLabelVectors], dim=1).to(args.device) if t == 0: #print(t, trainData.shape, trainLabels.shape, trainLabelVectors.shape, ) appr.train(t, trainData, trainLabels, trainLabelVectors)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=1, pin_memory=False) length_training_dataset = len(training_data) length_test_dataset = len(test_data) NUM_CLASS = 20 device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu") convnet = ResNet50(pretrained=True) classifier = Classifier(in_features=2048, num_class=NUM_CLASS) pan = PAN(convnet.blocks[::-1]) mask_classifier = Mask_Classifier(in_features=256, num_class=(NUM_CLASS + 1)) convnet.to(device) classifier.to(device) pan.to(device) mask_classifier.to(device) def train(epoch, optimizer, data_loader): convnet.train() classifier.train() pan.train() y_true = [] y_pred = []
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") dataset = TDCCMCDataset( filenames=["./data/6zXXZvVvTFs", "./data/2AYaxTiWKoY", "./data/sYbBgkP9aMo"], trims=[(960, 9960), (550, 9550), (1, 9901)], crops=[(35, 50, 445, 300), (0, 13, 640, 335), (5, 22, 475, 341)], ) loader = DataLoader( dataset, batch_size=args.BATCH_SIZE, num_workers=0, pin_memory=True ) print("[pretrain] Dataset and DataLoader ready.") tdc = TDC().to(device) cmc = CMC().to(device) tdc_classifier = Classifier().to(device) cmc_classifier = Classifier().to(device) print("[pretrain] Neural networks initialized.") # Initialize Optimizer optim_params = ( list(tdc.parameters()) + list(cmc.parameters()) + list(tdc_classifier.parameters()) + list(cmc_classifier.parameters()) ) optimizer = optim.Adam(optim_params, lr=args.LR) print("[pretrain] Optimizer initialized.") # Setup t-SNE datasets tsne_filenames = [
def main( no_cuda, seed, batch_size, alpha, random_labels, classifier_optimizer_args, classifier_epochs, discriminator_optimizer_args, discriminator_epochs, discriminator_args, classifier_load_path, discriminator_load_path, log_dir, log_interval, run_id, num_iterations, aux_coef, simple_dataset_size, ): torch.manual_seed(seed) use_cuda = not no_cuda and torch.cuda.is_available() torch.manual_seed(seed) if use_cuda: n_gpu = get_n_gpu() try: index = int(run_id[-1]) except ValueError: index = random.randrange(0, n_gpu) device = torch.device("cuda", index=index % n_gpu) else: device = "cpu" kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True} if use_cuda else {} simple = bool(simple_dataset_size) if simple: splits = Datasets( train=simple_dataset_size * 3 // 7, test=simple_dataset_size * 3 // 7, valid=None, ) splits = splits._replace(valid=simple_dataset_size - splits.train - splits.test) classifier_datasets = Datasets( train=Subset( AddLabel( SimpleDataset(n=simple_dataset_size, generalization_error=alpha), 0 ), list(range(splits.train)), ), test=Subset( AddLabel( SimpleDataset(n=simple_dataset_size, generalization_error=0), 1 ), list(range(splits.train, splits.train + splits.test)), ), valid=Subset( AddLabel( SimpleDataset(n=simple_dataset_size, generalization_error=0), 2 ), list(range(splits.train + splits.test, simple_dataset_size)), ), ) else: train_dataset = NoiseDataset( "../data", train=True, download=True, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), percent_noise=alpha, ) test_dataset = NoiseDataset( "../data", train=False, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), percent_noise=alpha, ) dataset = train_dataset + test_dataset size = len(dataset) splits = Datasets(train=size * 3 // 7, test=size * 3 // 7, valid=None) splits = splits._replace(valid=size - splits.train - splits.test) classifier_datasets = Datasets( *[ AddLabel(dataset, label, random_labels=random_labels) for label, dataset in enumerate(random_split(dataset, splits)) ] ) if simple: print("train") for i in range(len(classifier_datasets.train)): print(classifier_datasets.train[i]) print("test") for i in range(len(classifier_datasets.test)): print(classifier_datasets.test[i]) classifier_loaders = Datasets( *[ DataLoader(dataset, batch_size=batch_size, **kwargs) for dataset in classifier_datasets ] ) discriminator_dataset = Datasets( *random_split( classifier_datasets.train + classifier_datasets.test, [splits.train, splits.test], ), valid=None, ) discriminator_loaders = Datasets( train=DataLoader(discriminator_dataset.train, batch_size=batch_size, **kwargs), test=DataLoader(discriminator_dataset.test, batch_size=batch_size, **kwargs), valid=None, ) classifier = Classifier(n=simple_dataset_size).to(device) classifier_optimizer = optim.SGD( classifier.parameters(), **{ k.replace("classifier_", ""): v for k, v in classifier_optimizer_args.items() }, ) discriminator = Discriminator(n=simple_dataset_size, **discriminator_args).to( device ) discriminator_optimizer = optim.SGD( discriminator.parameters(), **{ k.replace("discriminator_", ""): v for k, v in discriminator_optimizer_args.items() }, ) writer = SummaryWriter(str(log_dir)) if classifier_load_path: classifier.load_state_dict(torch.load(classifier_load_path)) # sanity check to make sure that classifier was properly loaded for k, v in test( classifier=classifier, device=device, test_loader=classifier_loaders.train ).items(): writer.add_scalar("sanity_check" + k, v, 0) for k, v in test( classifier=classifier, device=device, test_loader=classifier_loaders.test ).items(): pass if discriminator_load_path: classifier.load_state_dict(torch.load(discriminator_load_path)) # sanity check to make sure that discriminator was properly loaded for k, v in test_discriminator( classifier=classifier, discriminator=discriminator, device=device, test_loader=discriminator_loaders.train, ).items(): writer.add_scalar("sanity_check" + k, v, 0) iterations = range(num_iterations) if num_iterations else itertools.count() batch_count = Counter() for i in iterations: for k, v in test( classifier=classifier, device=device, test_loader=classifier_loaders.valid ).items(): writer.add_scalar(k, v, i) for epoch in tqdm(range(1, classifier_epochs + 1), desc="classifier"): for counter in train( classifier=classifier, discriminator=discriminator, aux_coef=aux_coef if i > 0 else 0, device=device, train_loader=classifier_loaders.train, optimizer=classifier_optimizer, log_interval=log_interval, ): batch_count.update(classifier=counter["batch"]) for k, v in counter.items(): if k != "batch": writer.add_scalar(k, v, batch_count["classifier"]) if simple: print("classifier weights") for p in classifier.parameters(): print(p) for k, v in test_discriminator( classifier=classifier, discriminator=discriminator, device=device, test_loader=discriminator_loaders.test, ).items(): writer.add_scalar(k, v, i) iterator = ( itertools.count() if discriminator_epochs is None else tqdm(range(1, discriminator_epochs + 1), desc="discriminator") ) for epoch in iterator: for j, counter in enumerate( train_discriminator( classifier=classifier, discriminator=discriminator, device=device, train_loader=discriminator_loaders.train, optimizer=discriminator_optimizer, log_interval=log_interval, use_pbar=discriminator_epochs is None, ) ): batch_count.update(discriminator=counter["batch"]) for k, v in counter.items(): if k != "batch": writer.add_scalar(k, v, batch_count["discriminator"]) if simple: print("discriminator weights") for p in discriminator.parameters(): print(p) torch.save(classifier.state_dict(), str(Path(log_dir, "classifier.pt"))) torch.save(discriminator.state_dict(), str(Path(log_dir, "discriminator.pt")))
tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), tr.ToTensor() ]) test_data = Voc2012('/home/tom/DISK/DISK2/jian/PASCAL/VOC2012' ,'val',transform=test_transforms) test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=1, pin_memory=False) length_test_dataset = len(test_data) NUM_CLASS = 20 device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu") convnet = ResNet50(pretrained=True) classifier = Classifier(in_features=2048, num_class=NUM_CLASS) pan = PAN(convnet.blocks[::-1]) mask_classifier = Mask_Classifier(in_features=256, num_class=(NUM_CLASS+1)) color_classifier = Color_Classifier(in_features=256, num_class=2) convnet.to(device) classifier.to(device) pan.to(device) mask_classifier.to(device) color_classifier.to(device) def test(data_loader): global best_acc convnet.eval() pan.eval() all_i_count = []
torch.load(os.path.join('checkpoints', FLAGS.decoder_save), map_location=lambda storage, loc: storage)) # class labels variable X = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size) class_labels = torch.LongTensor(FLAGS.batch_size) # test if torch.cuda.is_available() and not FLAGS.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") # load data set and create data loader instance print('Loading MNIST dataset...') mnist = datasets.MNIST(root='mnist', download=True, train=True, transform=transform_config) loader = cycle(DataLoader(mnist, batch_size=FLAGS.batch_size, shuffle=True, num_workers=0, drop_last=True)) style_classifier = Classifier(z_dim=FLAGS.style_dim, num_classes=FLAGS.num_classes) style_classifier.apply(weights_init) class_classifier = Classifier(z_dim=FLAGS.class_dim, num_classes=FLAGS.num_classes) class_classifier.apply(weights_init) cross_entropy_loss = nn.CrossEntropyLoss() style_classifier_optimizer = optim.Adam( list(style_classifier.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2) ) class_classifier_optimizer = optim.Adam( list(class_classifier.parameters()),