def __init__(self, embedding_dim=64, embedding_fn=None, classifier=None):
        super().__init__()
        if embedding_fn == None:
            self.embedding_fn = feature_encoder(num_classes=embedding_dim)
            self.embedding_fn = self.embedding_fn.cuda()
        else:
            self.embedding_fn = embedding_fn

        if classifier == None:
            self.classifier = Classifier(fea_dim=embedding_dim).to(
                torch.device("cuda"))
            #             self.classifier = Classifier(fea_dim=576).to(torch.device("cuda"))
            self.classifier = self.classifier.cuda()
        else:
            self.classifier = classifier
        torch.cuda.empty_cache()
 def __init__(self, meta_iterations, embedding_dim=64):
     super().__init__()
     self.embedding_fn = feature_encoder(num_classes=embedding_dim)
     #         self.embedding_fn = LoadParameter(self.embedding_fn, torch.load('resnet18_tiered.pth.tar')['state_dict'])
     self.classifier = Classifier(fea_dim=embedding_dim).to(
         torch.device("cuda"))
     #         self.classifier = Classifier(fea_dim=576).to(torch.device("cuda"))
     self.embedding_fn = self.embedding_fn.to(torch.device("cuda"))
     torch.cuda.empty_cache()
     self.optimizer = optim.SGD(self.classifier.parameters(),
                                lr=0.001,
                                momentum=0.9,
                                weight_decay=1e-4,
                                nesterov=False)
     self.lr_schedule = optim.lr_scheduler.MultiStepLR(self.optimizer,
                                                       milestones=[5500],
                                                       gamma=0.1)
     self.count = 0
     self.meta_iterations = meta_iterations
Example #3
0
def main(args):
    torch.manual_seed(args.seed)
    if not os.path.exists(args.res_dir):
        os.mkdir(args.res_dir)

    if not os.path.exists(args.model_dir):
        os.mkdir(args.model_dir)

    data1 = dd.io.load(os.path.join(args.vec_dir, 'NYU_correlation_matrix.h5'))
    data2 = dd.io.load(os.path.join(args.vec_dir, 'UM_correlation_matrix.h5'))
    data3 = dd.io.load(os.path.join(args.vec_dir, 'USM_correlation_matrix.h5'))
    data4 = dd.io.load(os.path.join(args.vec_dir,
                                    'UCLA_correlation_matrix.h5'))

    x1 = torch.from_numpy(data1['data']).float()
    y1 = torch.from_numpy(data1['label']).long()
    x2 = torch.from_numpy(data2['data']).float()
    y2 = torch.from_numpy(data2['label']).long()
    x3 = torch.from_numpy(data3['data']).float()
    y3 = torch.from_numpy(data3['label']).long()
    x4 = torch.from_numpy(data4['data']).float()
    y4 = torch.from_numpy(data4['label']).long()

    if args.overlap:
        idNYU = dd.io.load('./idx/NYU_sub_overlap.h5')
        idUM = dd.io.load('./idx/UM_sub_overlap.h5')
        idUSM = dd.io.load('./idx/USM_sub_overlap.h5')
        idUCLA = dd.io.load('./idx/UCLA_sub_overlap.h5')
    else:
        idNYU = dd.io.load('./idx/NYU_sub.h5')
        idUM = dd.io.load('./idx/UM_sub.h5')
        idUSM = dd.io.load('./idx/USM_sub.h5')
        idUCLA = dd.io.load('./idx/UCLA_sub.h5')

    if args.split == 0:
        tr1 = idNYU['1'] + idNYU['2'] + idNYU['3'] + idNYU['4']
        tr2 = idUM['1'] + idUM['2'] + idUM['3'] + idUM['4']
        tr3 = idUSM['1'] + idUSM['2'] + idUSM['3'] + idUSM['4']
        tr4 = idUCLA['1'] + idUCLA['2'] + idUCLA['3'] + idUCLA['4']
        te1 = idNYU['0']
        te2 = idUM['0']
        te3 = idUSM['0']
        te4 = idUCLA['0']
    elif args.split == 1:
        tr1 = idNYU['0'] + idNYU['2'] + idNYU['3'] + idNYU['4']
        tr2 = idUM['0'] + idUM['2'] + idUM['3'] + idUM['4']
        tr3 = idUSM['0'] + idUSM['2'] + idUSM['3'] + idUSM['4']
        tr4 = idUCLA['0'] + idUCLA['2'] + idUCLA['3'] + idUCLA['4']
        te1 = idNYU['1']
        te2 = idUM['1']
        te3 = idUSM['1']
        te4 = idUCLA['1']
    elif args.split == 2:
        tr1 = idNYU['0'] + idNYU['1'] + idNYU['3'] + idNYU['4']
        tr2 = idUM['0'] + idUM['1'] + idUM['3'] + idUM['4']
        tr3 = idUSM['0'] + idUSM['1'] + idUSM['3'] + idUSM['4']
        tr4 = idUCLA['0'] + idUCLA['1'] + idUCLA['3'] + idUCLA['4']
        te1 = idNYU['2']
        te2 = idUM['2']
        te3 = idUSM['2']
        te4 = idUCLA['2']
    elif args.split == 3:
        tr1 = idNYU['0'] + idNYU['1'] + idNYU['2'] + idNYU['4']
        tr2 = idUM['0'] + idUM['1'] + idUM['2'] + idUM['4']
        tr3 = idUSM['0'] + idUSM['1'] + idUSM['2'] + idUSM['4']
        tr4 = idUCLA['0'] + idUCLA['1'] + idUCLA['2'] + idUCLA['4']
        te1 = idNYU['3']
        te2 = idUM['3']
        te3 = idUSM['3']
        te4 = idUCLA['3']
    elif args.split == 4:
        tr1 = idNYU['0'] + idNYU['1'] + idNYU['2'] + idNYU['3']
        tr2 = idUM['0'] + idUM['1'] + idUM['2'] + idUM['3']
        tr3 = idUSM['0'] + idUSM['1'] + idUSM['2'] + idUSM['3']
        tr4 = idUCLA['0'] + idUCLA['1'] + idUCLA['2'] + idUCLA['3']
        te1 = idNYU['4']
        te2 = idUM['4']
        te3 = idUSM['4']
        te4 = idUCLA['4']

    x1_train = x1[tr1]
    y1_train = y1[tr1]
    x2_train = x2[tr2]
    y2_train = y2[tr2]
    x3_train = x3[tr3]
    y3_train = y3[tr3]
    x4_train = x4[tr4]
    y4_train = y4[tr4]

    x1_test = x1[te1]
    y1_test = y1[te1]
    x2_test = x2[te2]
    y2_test = y2[te2]
    x3_test = x3[te3]
    y3_test = y3[te3]
    x4_test = x4[te4]
    y4_test = y4[te4]

    if args.sepnorm:
        mean = x1_train.mean(0, keepdim=True)
        dev = x1_train.std(0, keepdim=True)
        x1_train = (x1_train - mean) / dev
        x1_test = (x1_test - mean) / dev

        mean = x2_train.mean(0, keepdim=True)
        dev = x2_train.std(0, keepdim=True)
        x2_train = (x2_train - mean) / dev
        x2_test = (x2_test - mean) / dev

        mean = x3_train.mean(0, keepdim=True)
        dev = x3_train.std(0, keepdim=True)
        x3_train = (x3_train - mean) / dev
        x3_test = (x3_test - mean) / dev

        mean = x4_train.mean(0, keepdim=True)
        dev = x4_train.std(0, keepdim=True)
        x4_train = (x4_train - mean) / dev
        x4_test = (x4_test - mean) / dev
    else:
        mean = torch.cat((x1_train, x2_train, x3_train, x4_train),
                         0).mean(0, keepdim=True)
        dev = torch.cat((x1_train, x2_train, x3_train, x4_train),
                        0).std(0, keepdim=True)
        x1_train = (x1_train - mean) / dev
        x1_test = (x1_test - mean) / dev
        x2_train = (x2_train - mean) / dev
        x2_test = (x2_test - mean) / dev
        x3_train = (x3_train - mean) / dev
        x3_test = (x3_test - mean) / dev
        x4_train = (x4_train - mean) / dev
        x4_test = (x4_test - mean) / dev

    train1 = TensorDataset(x1_train, y1_train)
    train_loader1 = DataLoader(train1,
                               batch_size=len(train1) // args.nsteps,
                               shuffle=True)
    train2 = TensorDataset(x2_train, y2_train)
    train_loader2 = DataLoader(train2,
                               batch_size=len(train2) // args.nsteps,
                               shuffle=True)
    train3 = TensorDataset(x3_train, y3_train)
    train_loader3 = DataLoader(train3,
                               batch_size=len(train3) // args.nsteps,
                               shuffle=True)
    train4 = TensorDataset(x4_train, y4_train)
    train_loader4 = DataLoader(train4,
                               batch_size=len(train4) // args.nsteps,
                               shuffle=True)
    train_loaders = [
        train_loader1, train_loader2, train_loader3, train_loader4
    ]

    test1 = TensorDataset(x1_test, y1_test)
    test2 = TensorDataset(x2_test, y2_test)
    test3 = TensorDataset(x3_test, y3_test)
    test4 = TensorDataset(x4_test, y4_test)
    test_loader1 = DataLoader(test1,
                              batch_size=args.test_batch_size1,
                              shuffle=False)
    test_loader2 = DataLoader(test2,
                              batch_size=args.test_batch_size2,
                              shuffle=False)
    test_loader3 = DataLoader(test3,
                              batch_size=args.test_batch_size3,
                              shuffle=False)
    test_loader4 = DataLoader(test4,
                              batch_size=args.test_batch_size4,
                              shuffle=False)
    tbs = [
        args.test_batch_size1, args.test_batch_size2, args.test_batch_size3,
        args.test_batch_size4
    ]
    test_loaders = [test_loader1, test_loader2, test_loader3, test_loader4]

    # federated set up
    model1 = MoE(6105, args.feddim, 2).to(device)
    model2 = MoE(6105, args.feddim, 2).to(device)
    model3 = MoE(6105, args.feddim, 2).to(device)
    model4 = MoE(6105, args.feddim, 2).to(device)
    optimizer1 = optim.Adam(model1.parameters(),
                            lr=args.lr1,
                            weight_decay=1e-3)
    optimizer2 = optim.Adam(model2.parameters(),
                            lr=args.lr2,
                            weight_decay=1e-3)
    optimizer3 = optim.Adam(model3.parameters(),
                            lr=args.lr3,
                            weight_decay=1e-3)
    optimizer4 = optim.Adam(model4.parameters(),
                            lr=args.lr4,
                            weight_decay=1e-3)

    models = [model1, model2, model3, model4]
    optimizers = [optimizer1, optimizer2, optimizer3, optimizer4]

    model = MoE(6105, args.feddim, 2).to(device)
    print('Global Model:', model)

    # local set up, does not communicate with federated model
    model_local1 = Classifier(6105, args.dim, 2).to(device)
    model_local2 = Classifier(6105, args.dim, 2).to(device)
    model_local3 = Classifier(6105, args.dim, 2).to(device)
    model_local4 = Classifier(6105, args.dim, 2).to(device)
    optimizer_local1 = optim.Adam(model_local1.parameters(),
                                  lr=args.llr,
                                  weight_decay=5e-2)
    optimizer_local2 = optim.Adam(model_local2.parameters(),
                                  lr=args.llr,
                                  weight_decay=5e-2)
    optimizer_local3 = optim.Adam(model_local3.parameters(),
                                  lr=args.llr,
                                  weight_decay=5e-2)
    optimizer_local4 = optim.Adam(model_local4.parameters(),
                                  lr=args.llr,
                                  weight_decay=5e-2)
    models_local = [model_local1, model_local2, model_local3, model_local4]
    optimizers_local = [
        optimizer_local1, optimizer_local2, optimizer_local3, optimizer_local4
    ]

    nnloss = nn.NLLLoss()

    def train(epoch):
        pace = args.pace
        for i in range(4):
            models[i].train()
            models_local[i].train()
            if epoch <= 50 and epoch % 20 == 0:
                for param_group1 in optimizers[i].param_groups:
                    param_group1['lr'] = 0.5 * param_group1['lr']
            elif epoch > 50 and epoch % 20 == 0:
                for param_group1 in optimizers[i].param_groups:
                    param_group1['lr'] = 0.5 * param_group1['lr']
            if epoch <= 50 and epoch % 20 == 0:
                for param_group1 in optimizers_local[i].param_groups:
                    param_group1['lr'] = 0.5 * param_group1['lr']
            elif epoch > 50 and epoch % 20 == 0:
                for param_group1 in optimizers_local[i].param_groups:
                    param_group1['lr'] = 0.5 * param_group1['lr']

        #define weights
        w = dict()
        denominator = np.sum(np.array(tbs))
        for i in range(4):
            w[i] = 0.25  #tbs[i]/denominator
        loss_all = dict()
        loss_lc = dict()
        num_data = dict()
        for i in range(4):
            loss_all[i] = 0
            loss_lc[i] = 0
            num_data[i] = 0
        count = 0
        for t in range(args.nsteps):
            for i in range(4):
                optimizers[i].zero_grad()
                a, b = next(iter(train_loaders[i]))
                num_data[i] += b.size(0)
                a = a.to(device)
                b = b.to(device)
                outlocal = models_local[i](a)
                loss_local = nnloss(outlocal, b)
                loss_local.backward(retain_graph=True)
                loss_lc[i] += loss_local.item() * b.size(0)
                optimizers_local[i].step()

                output, _ = models[i](a, outlocal)
                loss = nnloss(output, b)
                loss.backward()
                loss_all[i] += loss.item() * b.size(0)
                optimizers[i].step()
            count += 1

            if count % pace == 0 or t == args.nsteps - 1:
                with torch.no_grad():
                    for key in model.classifier.state_dict().keys():
                        if models[0].classifier.state_dict(
                        )[key].dtype == torch.int64:
                            model.classifier.state_dict()[key].data.copy_(
                                models[0].classifier.state_dict()[key])
                        else:
                            temp = torch.zeros_like(
                                model.classifier.state_dict()[key])
                            # add noise
                            for s in range(4):
                                nn = tdist.Normal(
                                    torch.tensor([0.0]),
                                    args.noise *
                                    torch.std(models[s].classifier.state_dict(
                                    )[key].detach().cpu()))
                                noise = nn.sample(
                                    models[i].classifier.state_dict()
                                    [key].size()).squeeze()
                                noise = noise.to(device)
                                temp += w[s] * (
                                    models[s].classifier.state_dict()[key] +
                                    noise)
                            #updata global model
                            model.classifier.state_dict()[key].data.copy_(temp)
                            # only classifier get updated
                            for s in range(4):
                                models[s].classifier.state_dict(
                                )[key].data.copy_(
                                    model.classifier.state_dict()[key])

        return loss_all[0] / num_data[0], loss_all[1] / num_data[1],loss_all[2] / num_data[2],loss_all[3] / num_data[3], \
               loss_lc[0] / num_data[0],loss_lc[1] / num_data[1], loss_lc[2] / num_data[2], loss_lc[3] / num_data[3]

    def test(federated_model, dataloader, train=True):
        federated_model.eval()
        test_loss = 0
        correct = 0
        for data, target in dataloader:
            data = data.to(device)
            target = target.to(device)
            output = federated_model(data)
            test_loss += nnloss(output, target).item() * target.size(0)
            pred = output.data.max(1)[1]
            correct += pred.eq(target.view(-1)).sum().item()

        test_loss /= len(dataloader.dataset)
        correct /= len(dataloader.dataset)
        if train:
            print('Train set local: Average loss: {:.4f}, Average acc: {:.4f}'.
                  format(test_loss, correct))
        else:
            print('Test set local: Average loss: {:.4f}, Average acc: {:.4f}'.
                  format(test_loss, correct))
        return test_loss, correct

    def testfed(federated_model, local_model, dataloader, train=True):
        federated_model = federated_model.to(device)
        local_model = local_model.to(device)
        federated_model.eval()
        local_model.eval()
        test_loss = 0
        correct = 0
        outputs = []
        preds = []
        targets = []
        gates = []
        for data, target in dataloader:
            data = data.to(device)
            targets.append(target[0].detach().numpy())
            target = target.to(device)
            local_output = local_model(data)
            output, a = federated_model(data, local_output)
            outputs.append(output.detach().cpu().numpy())
            gates.append(a.detach().cpu().numpy())
            test_loss += nnloss(output, target).item() * target.size(0)
            pred = output.data.max(1)[1]
            preds.append(pred.detach().cpu().numpy())
            correct += pred.eq(target.view(-1)).sum().item()

        test_loss /= len(dataloader.dataset)
        correct /= len(dataloader.dataset)
        if train:
            print('Train set fed: Average loss: {:.4f}, Average acc: {:.4f}'.
                  format(test_loss, correct))
        else:
            print('Test set fed: Average loss: {:.4f}, Average acc: {:.4f}'.
                  format(test_loss, correct))
        return test_loss, correct, targets, outputs, preds, gates

    best_acc = [0, 0, 0, 0]
    best_epoch = [0, 0, 0, 0]

    for epoch in range(args.epochs):
        start_time = time.time()
        print(f"Epoch Number {epoch + 1}")
        l1, l2, l3, l4, lc1, lc2, lc3, lc4 = train(epoch)
        print("===========================")
        print(
            "L1: {:.7f}, L2: {:.7f}, L3: {:.7f}, L4: {:.7f}, Lc1: {:.7f}, Lc2: {:.7f}, Lc3: {:.7f}, Lc4: {:.7f} "
            .format(l1, l2, l3, l4, lc1, lc2, lc3, lc4))

        #local model performance
        print("***Local***")
        for i in range(4):
            test(models_local[i], train_loaders[i], train=True)
            test(models_local[i], test_loaders[i], train=False)

        #fed model performance
        print("***Federated***")
        for i in range(4):
            test(model.classifier, train_loaders[i], train=True)
            test(model.classifier, test_loaders[i], train=False)
        # moe model performance
        print("***MOE***")
        te_accs = list()
        targets = list()
        outputs = list()
        preds = list()
        gates = list()
        for i in range(4):
            testfed(models[i], models_local[i], train_loaders[i], train=True)
            _, te_acc, tar, out, pre, gate = testfed(models[i],
                                                     models_local[i],
                                                     test_loaders[i],
                                                     train=False)
            te_accs.append(te_acc)
            targets.append(tar)
            outputs.append(out)
            preds.append(pre)
            gates.append(gate)

        for i in range(4):
            if te_accs[i] > best_acc[i]:
                best_acc[i] = te_accs[i]
                best_epoch[i] = epoch

        total_time = time.time() - start_time
        print('Communication time over the network', round(total_time, 2),
              's\n')
    model_wts = copy.deepcopy(model.state_dict())
    torch.save(model_wts, os.path.join(args.model_dir,
                                       str(args.split) + '.pth'))
    dd.io.save(
        os.path.join(args.res_dir, 'NYU_' + str(args.split) + '.h5'), {
            'outputs': outputs[0],
            'preds': preds[0],
            'targets': targets[0],
            'gates': gates[0]
        })
    dd.io.save(
        os.path.join(args.res_dir, 'UM_' + str(args.split) + '.h5'), {
            'outputs': outputs[1],
            'preds': preds[1],
            'targets': targets[1],
            'gates': gates[1]
        })
    dd.io.save(
        os.path.join(args.res_dir, 'USM_' + str(args.split) + '.h5'), {
            'outputs': outputs[2],
            'preds': preds[2],
            'targets': targets[2],
            'gates': gates[2]
        })
    dd.io.save(
        os.path.join(args.res_dir, 'UCLA_' + str(args.split) + '.h5'), {
            'outputs': outputs[3],
            'preds': preds[3],
            'targets': targets[3],
            'gates': gates[3]
        })
    for i in range(4):
        print('Best Acc:', best_acc[i], 'Best Epoch:', best_epoch[i])

    print('split:', args.split, '   noise:', args.noise, '   pace:', args.pace)
Example #4
0
def main(args):
    torch.manual_seed(args.seed)
    if not os.path.exists(args.res_dir):
        os.mkdir(args.res_dir)
    if not os.path.exists(args.model_dir):
        os.mkdir(args.model_dir)

    log_dir = os.path.join('./log', 'Align_' + str(args.split))
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)
    writer = SummaryWriter(log_dir)

    data1 = dd.io.load(os.path.join(args.vec_dir, 'NYU_correlation_matrix.h5'))
    data2 = dd.io.load(os.path.join(args.vec_dir, 'UM_correlation_matrix.h5'))
    data3 = dd.io.load(os.path.join(args.vec_dir, 'USM_correlation_matrix.h5'))
    data4 = dd.io.load(os.path.join(args.vec_dir,
                                    'UCLA_correlation_matrix.h5'))

    x1 = torch.from_numpy(data1['data']).float()
    y1 = torch.from_numpy(data1['label']).long()
    x2 = torch.from_numpy(data2['data']).float()
    y2 = torch.from_numpy(data2['label']).long()
    x3 = torch.from_numpy(data3['data']).float()
    y3 = torch.from_numpy(data3['label']).long()
    x4 = torch.from_numpy(data4['data']).float()
    y4 = torch.from_numpy(data4['label']).long()

    if args.overlap:
        idNYU = dd.io.load('./idx/NYU_sub_overlap.h5')
        idUM = dd.io.load('./idx/UM_sub_overlap.h5')
        idUSM = dd.io.load('./idx/USM_sub_overlap.h5')
        idUCLA = dd.io.load('./idx/UCLA_sub_overlap.h5')
    else:
        idNYU = dd.io.load('./idx/NYU_sub.h5')
        idUM = dd.io.load('./idx/UM_sub.h5')
        idUSM = dd.io.load('./idx/USM_sub.h5')
        idUCLA = dd.io.load('./idx/UCLA_sub.h5')

    if args.split == 0:
        tr1 = idNYU['1'] + idNYU['2'] + idNYU['3'] + idNYU['4']
        tr2 = idUM['1'] + idUM['2'] + idUM['3'] + idUM['4']
        tr3 = idUSM['1'] + idUSM['2'] + idUSM['3'] + idUSM['4']
        tr4 = idUCLA['1'] + idUCLA['2'] + idUCLA['3'] + idUCLA['4']
        te1 = idNYU['0']
        te2 = idUM['0']
        te3 = idUSM['0']
        te4 = idUCLA['0']
    elif args.split == 1:
        tr1 = idNYU['0'] + idNYU['2'] + idNYU['3'] + idNYU['4']
        tr2 = idUM['0'] + idUM['2'] + idUM['3'] + idUM['4']
        tr3 = idUSM['0'] + idUSM['2'] + idUSM['3'] + idUSM['4']
        tr4 = idUCLA['0'] + idUCLA['2'] + idUCLA['3'] + idUCLA['4']
        te1 = idNYU['1']
        te2 = idUM['1']
        te3 = idUSM['1']
        te4 = idUCLA['1']
    elif args.split == 2:
        tr1 = idNYU['0'] + idNYU['1'] + idNYU['3'] + idNYU['4']
        tr2 = idUM['0'] + idUM['1'] + idUM['3'] + idUM['4']
        tr3 = idUSM['0'] + idUSM['1'] + idUSM['3'] + idUSM['4']
        tr4 = idUCLA['0'] + idUCLA['1'] + idUCLA['3'] + idUCLA['4']
        te1 = idNYU['2']
        te2 = idUM['2']
        te3 = idUSM['2']
        te4 = idUCLA['2']
    elif args.split == 3:
        tr1 = idNYU['0'] + idNYU['1'] + idNYU['2'] + idNYU['4']
        tr2 = idUM['0'] + idUM['1'] + idUM['2'] + idUM['4']
        tr3 = idUSM['0'] + idUSM['1'] + idUSM['2'] + idUSM['4']
        tr4 = idUCLA['0'] + idUCLA['1'] + idUCLA['2'] + idUCLA['4']
        te1 = idNYU['3']
        te2 = idUM['3']
        te3 = idUSM['3']
        te4 = idUCLA['3']
    elif args.split == 4:
        tr1 = idNYU['0'] + idNYU['1'] + idNYU['2'] + idNYU['3']
        tr2 = idUM['0'] + idUM['1'] + idUM['2'] + idUM['3']
        tr3 = idUSM['0'] + idUSM['1'] + idUSM['2'] + idUSM['3']
        tr4 = idUCLA['0'] + idUCLA['1'] + idUCLA['2'] + idUCLA['3']
        te1 = idNYU['4']
        te2 = idUM['4']
        te3 = idUSM['4']
        te4 = idUCLA['4']

    x1_train = x1[tr1]
    y1_train = y1[tr1]
    x2_train = x2[tr2]
    y2_train = y2[tr2]
    x3_train = x3[tr3]
    y3_train = y3[tr3]
    x4_train = x4[tr4]
    y4_train = y4[tr4]

    x1_test = x1[te1]
    y1_test = y1[te1]
    x2_test = x2[te2]
    y2_test = y2[te2]
    x3_test = x3[te3]
    y3_test = y3[te3]
    x4_test = x4[te4]
    y4_test = y4[te4]

    if args.sepnorm:
        mean = x1_train.mean(0, keepdim=True)
        dev = x1_train.std(0, keepdim=True)
        x1_train = (x1_train - mean) / dev
        x1_test = (x1_test - mean) / dev

        mean = x2_train.mean(0, keepdim=True)
        dev = x2_train.std(0, keepdim=True)
        x2_train = (x2_train - mean) / dev
        x2_test = (x2_test - mean) / dev

        mean = x3_train.mean(0, keepdim=True)
        dev = x3_train.std(0, keepdim=True)
        x3_train = (x3_train - mean) / dev
        x3_test = (x3_test - mean) / dev

        mean = x4_train.mean(0, keepdim=True)
        dev = x4_train.std(0, keepdim=True)
        x4_train = (x4_train - mean) / dev
        x4_test = (x4_test - mean) / dev
    else:
        mean = torch.cat((x1_train, x2_train, x3_train, x4_train),
                         0).mean(0, keepdim=True)
        dev = torch.cat((x1_train, x2_train, x3_train, x4_train),
                        0).std(0, keepdim=True)
        x1_train = (x1_train - mean) / dev
        x1_test = (x1_test - mean) / dev
        x2_train = (x2_train - mean) / dev
        x2_test = (x2_test - mean) / dev
        x3_train = (x3_train - mean) / dev
        x3_test = (x3_test - mean) / dev
        x4_train = (x4_train - mean) / dev
        x4_test = (x4_test - mean) / dev

    train1 = TensorDataset(x1_train, y1_train)
    train_loader1 = DataLoader(train1,
                               batch_size=len(train1) // args.nsteps,
                               shuffle=True)
    train2 = TensorDataset(x2_train, y2_train)
    train_loader2 = DataLoader(train2,
                               batch_size=len(train2) // args.nsteps,
                               shuffle=True)
    train3 = TensorDataset(x3_train, y3_train)
    train_loader3 = DataLoader(train3,
                               batch_size=len(train3) // args.nsteps,
                               shuffle=True)
    train4 = TensorDataset(x4_train, y4_train)
    train_loader4 = DataLoader(train4,
                               batch_size=len(train4) // args.nsteps,
                               shuffle=True)
    train_loaders = [
        train_loader1, train_loader2, train_loader3, train_loader4
    ]
    data_inters = [
        iter(train_loader1),
        iter(train_loader2),
        iter(train_loader3),
        iter(train_loader4)
    ]

    test1 = TensorDataset(x1_test, y1_test)
    test2 = TensorDataset(x2_test, y2_test)
    test3 = TensorDataset(x3_test, y3_test)
    test4 = TensorDataset(x4_test, y4_test)
    test_loader1 = DataLoader(test1,
                              batch_size=args.test_batch_size1,
                              shuffle=False)
    test_loader2 = DataLoader(test2,
                              batch_size=args.test_batch_size2,
                              shuffle=False)
    test_loader3 = DataLoader(test3,
                              batch_size=args.test_batch_size3,
                              shuffle=False)
    test_loader4 = DataLoader(test4,
                              batch_size=args.test_batch_size4,
                              shuffle=False)
    tbs = [
        args.test_batch_size1, args.test_batch_size2, args.test_batch_size3,
        args.test_batch_size4
    ]
    test_loaders = [test_loader1, test_loader2, test_loader3, test_loader4]

    # federated setup
    model1 = Classifier(6105, args.dim, 2).to(device)
    model2 = Classifier(6105, args.dim, 2).to(device)
    model3 = Classifier(6105, args.dim, 2).to(device)
    model4 = Classifier(6105, args.dim, 2).to(device)
    models = [model1, model2, model3, model4]
    optimizer1 = optim.Adam(model1.parameters(),
                            lr=args.lr1,
                            weight_decay=1e-3)
    optimizer2 = optim.Adam(model2.parameters(),
                            lr=args.lr2,
                            weight_decay=1e-3)
    optimizer3 = optim.Adam(model3.parameters(),
                            lr=args.lr3,
                            weight_decay=1e-3)
    optimizer4 = optim.Adam(model4.parameters(),
                            lr=args.lr4,
                            weight_decay=1e-3)
    optimizers = [optimizer1, optimizer2, optimizer3, optimizer4]

    optimizerG1 = optim.Adam(model1.encoder.parameters(),
                             lr=args.lr,
                             weight_decay=1e-3)
    optimizerG2 = optim.Adam(model2.encoder.parameters(),
                             lr=args.lr,
                             weight_decay=1e-3)
    optimizerG3 = optim.Adam(model3.encoder.parameters(),
                             lr=args.lr,
                             weight_decay=1e-3)
    optimizerG4 = optim.Adam(model4.encoder.parameters(),
                             lr=args.lr,
                             weight_decay=1e-3)
    optimizerGs = [optimizerG1, optimizerG2, optimizerG3, optimizerG4]

    discriminators = dict()
    optimizerDs = dict()
    for i in range(4):
        discriminators[i] = Discriminator(args.dim).to(device)
        optimizerDs[i] = optim.Adam(discriminators[i].parameters(),
                                    lr=args.lr,
                                    weight_decay=1e-3)

    #global model
    model = Classifier(6105, args.dim, 2).to(device)
    print(model)

    # loss functions
    celoss = nn.CrossEntropyLoss()

    def advDloss(d1, d2):
        res = -torch.log(d1).mean() - torch.log(1 - d2).mean()
        return res

    def advGloss(d1, d2):
        res = -torch.log(d1).mean() - torch.log(d2).mean()
        return res.mean()

    def train(epoch):
        pace = args.pace
        for i in range(4):
            models[i].train()
            if epoch <= 50 and epoch % 20 == 0:
                for param_group1 in optimizers[i].param_groups:
                    param_group1['lr'] = 0.5 * param_group1['lr']
            elif epoch > 50 and epoch % 20 == 0:
                for param_group1 in optimizers[i].param_groups:
                    param_group1['lr'] = 0.5 * param_group1['lr']
            if epoch <= 50 and epoch % 20 == 0:
                for param_group1 in optimizerGs[i].param_groups:
                    param_group1['lr'] = 0.5 * param_group1['lr']
            elif epoch > 50 and epoch % 20 == 0:
                for param_group1 in optimizerGs[i].param_groups:
                    param_group1['lr'] = 0.5 * param_group1['lr']

            discriminators[i].train()
            if epoch <= 50 and epoch % 20 == 0:
                for param_group1 in optimizerDs[i].param_groups:
                    param_group1['lr'] = 0.5 * param_group1['lr']
            elif epoch > 50 and epoch % 20 == 0:
                for param_group1 in optimizerDs[i].param_groups:
                    param_group1['lr'] = 0.5 * param_group1['lr']

        #define weights
        w = dict()
        denominator = np.sum(np.array(tbs))
        for i in range(4):
            w[i] = 0.25  #tbs[i]/denominator

        loss_all = dict()
        lossD_all = dict()
        lossG_all = dict()
        num_data = dict()
        num_dataG = dict()
        num_dataD = dict()
        for i in range(4):
            loss_all[i] = 0
            num_data[i] = EPS
            num_dataG[i] = EPS
            lossG_all[i] = 0
            lossD_all[i] = 0
            num_dataD[i] = EPS

        count = 0
        for t in range(args.nsteps):
            fs = []

            # optimize classifier

            for i in range(4):
                optimizers[i].zero_grad()
                a, b = next(data_iters[i])
                num_data[i] += b.size(0)
                a = a.to(device)
                b = b.to(device)
                output = models[i](a)
                loss = celoss(output, b)
                loss_all[i] += loss.item() * b.size(0)
                if epoch >= 0:
                    loss.backward(retain_graph=True)
                    optimizers[i].step()

                fs.append(models[i].encoder(a))

            #optimize alignment

            nn = []
            noises = []
            for i in range(4):
                nn = tdist.Normal(torch.tensor([0.0]),
                                  0.001 * torch.std(fs[i].detach().cpu()))
                noises.append(nn.sample(fs[i].size()).squeeze().to(device))

            for i in range(4):
                for j in range(4):
                    if i != j:
                        optimizerDs[i].zero_grad()
                        optimizerGs[i].zero_grad()
                        optimizerGs[j].zero_grad()

                        d1 = discriminators[i](fs[i] + noises[i])
                        d2 = discriminators[i](fs[j] + noises[j])
                        num_dataG[i] += d1.size(0)
                        num_dataD[i] += d1.size(0)
                        lossD = advDloss(d1, d2)
                        lossG = advGloss(d1, d2)
                        lossD_all[i] += lossD.item() * d1.size(0)
                        lossG_all[i] += lossG.item() * d1.size(0)
                        lossG_all[j] += lossG.item() * d2.size(0)
                        lossD = 0.1 * lossD
                        if epoch >= 5:
                            lossD.backward(retain_graph=True)
                            optimizerDs[i].step()
                            lossG.backward(retain_graph=True)
                            optimizerGs[i].step()
                            optimizerGs[j].step()
                        writer.add_histogram(
                            'Hist/hist_' + site[i] + '2' + site[j] + '_source',
                            d1, epoch * args.nsteps + t)
                        writer.add_histogram(
                            'Hist/hist_' + site[i] + '2' + site[j] + '_target',
                            d2, epoch * args.nsteps + t)

            count += 1
            if count % pace == 0 or t == args.nsteps - 1:
                with torch.no_grad():
                    for key in model.state_dict().keys():
                        if models[0].state_dict()[key].dtype == torch.int64:
                            model.state_dict()[key].data.copy_(
                                models[0].state_dict()[key])
                        else:
                            temp = torch.zeros_like(model.state_dict()[key])
                            # add noise
                            for s in range(4):
                                if args.type == 'G':
                                    nn = tdist.Normal(
                                        torch.tensor([0.0]),
                                        args.noise *
                                        torch.std(models[s].state_dict()
                                                  [key].detach().cpu()))
                                else:
                                    nn = tdist.Laplace(
                                        torch.tensor([0.0]),
                                        args.noise *
                                        torch.std(models[s].state_dict()
                                                  [key].detach().cpu()))
                                noise = nn.sample(models[s].state_dict()
                                                  [key].size()).squeeze()
                                noise = noise.to(device)
                                temp += w[s] * (models[s].state_dict()[key] +
                                                noise)
                            # update global model
                            model.state_dict()[key].data.copy_(temp)
                            # updata local model
                            for s in range(4):
                                models[s].state_dict()[key].data.copy_(
                                    model.state_dict()[key])

        return loss_all, lossG_all, lossD_all, num_data, num_dataG, num_dataD

    def test(federated_model, data_loader, train=False):
        federated_model = federated_model.to(device)
        federated_model.eval()
        test_loss = 0
        correct = 0
        outputs = []
        preds = []
        targets = []
        for data, target in data_loader:
            targets.append(target[0].detach().numpy())
            data = data.to(device)
            target = target.to(device)
            output = federated_model(data)
            outputs.append(output.detach().cpu().numpy())
            test_loss += celoss(output, target).item() * target.size(0)
            pred = output.data.max(1)[1]
            preds.append(pred.detach().cpu().numpy())
            correct += pred.eq(target.view(-1)).sum().item()

        test_loss /= len(data_loader.dataset)
        correct /= len(data_loader.dataset)
        if train:
            print('Train set local: Average loss: {:.4f}, Average acc: {:.4f}'.
                  format(test_loss, correct))
        else:
            print('Test set local: Average loss: {:.4f}, Average acc: {:.4f}'.
                  format(test_loss, correct))
        return test_loss, correct, targets, outputs, preds

    best_acc = 0
    best_epoch = 0
    for epoch in range(args.epochs):
        start_time = time.time()
        print(f"Epoch Number {epoch + 1}")
        l, lG, lD, n, nG, nD = train(epoch)
        print("===========================")
        print("L1: {:.7f}, L2: {:.7f}, L3: {:.7f}, L4: {:.7f} ".format(
            l[0] / n[0], l[1] / n[1], l[2] / n[2], l[3] / n[3]))
        print("G1: {:.7f}, G2: {:.7f}, G3: {:.7f}, G4: {:.7f} ".format(
            lG[0] / nG[0], lG[1] / nG[1], lG[2] / nG[2], lG[3] / nG[3]))
        print("D1: {:.7f}, D2: {:.7f}, D3: {:.7f}, D4: {:.7f} ".format(
            lD[0] / nD[0], lD[1] / nD[1], lD[2] / nD[2], lD[3] / nD[3]))
        writer.add_scalars(
            'CEloss', {
                'l1': l[0] / n[0],
                'l2': l[1] / n[1],
                'l3': l[2] / n[2],
                'l4': l[3] / n[3]
            }, epoch)
        writer.add_scalars(
            'Gloss', {
                'gl1': lG[0] / nG[0],
                'gl2': lG[1] / nG[1],
                'gl3': lG[2] / nG[2],
                'gl4': lG[3] / nG[3]
            }, epoch)
        writer.add_scalars(
            'Dloss', {
                'dl1': lD[0] / nD[0],
                'dl2': lD[1] / nD[1],
                'dl3': lD[2] / nD[2],
                'dl4': lD[3] / nD[3]
            }, epoch)

        print('===NYU===')
        test(model, train_loader1, train=True)
        _, acc1, targets1, outputs1, preds1 = test(model,
                                                   test_loader1,
                                                   train=False)
        print('===UM===')
        test(model, train_loader2, train=True)
        _, acc2, targets2, outputs2, preds2 = test(model,
                                                   test_loader2,
                                                   train=False)
        print('===USM===')
        test(model, train_loader3, train=True)
        _, acc3, targets3, outputs3, preds3 = test(model,
                                                   test_loader3,
                                                   train=False)
        print('===UCLA===')
        test(model, train_loader4, train=True)
        _, acc4, targets4, outputs4, preds4 = test(model,
                                                   test_loader4,
                                                   train=False)
        if (acc1 + acc2 + acc3 + acc4) / 4 > best_acc:
            best_acc = (acc1 + acc2 + acc3 + acc4) / 4
            best_epoch = epoch
        total_time = time.time() - start_time
        print('Communication time over the network', round(total_time, 2),
              's\n')
    model_wts = copy.deepcopy(model.state_dict())
    torch.save(model_wts, os.path.join(args.model_dir,
                                       str(args.split) + '.pth'))
    print('Best Acc:', best_acc, 'Beat Epoch:', best_epoch)
    print('split:', args.split, '   noise:', args.noise, '   pace:', args.pace)
    dd.io.save(os.path.join(args.res_dir, 'NYU_' + str(args.split) + '.h5'), {
        'outputs': outputs1,
        'preds': preds1,
        'targets': targets1
    })
    dd.io.save(os.path.join(args.res_dir, 'UM_' + str(args.split) + '.h5'), {
        'outputs': outputs2,
        'preds': preds2,
        'targets': targets2
    })
    dd.io.save(os.path.join(args.res_dir, 'USM_' + str(args.split) + '.h5'), {
        'outputs': outputs3,
        'preds': preds3,
        'targets': targets3
    })
    dd.io.save(os.path.join(args.res_dir, 'UCLA_' + str(args.split) + '.h5'), {
        'outputs': outputs4,
        'preds': preds4,
        'targets': targets4
    })
Example #5
0
#CLASSI = Classifier.CLASSIFIER(args).to(device)

appr = A_VAE(net, args)
#class_appr = classifier_train(CLASSI, args)

seen_acc = []
unseen_acc = []
harmonic_mean = []
accuracy_matrix = [[] for kk in range(args.num_tasks)]
#area_under_curve = []
overall_acc = []
replay_Classes = []
for t in range(args.num_tasks):

    print('Task:', t + 1)
    CLASSI = Classifier.CLASSIFIER(args).to(device)
    class_appr = classifier_train(CLASSI, args)

    trainData = torch.tensor(trainData1[t], dtype=torch.float32)
    trainLabels = torch.tensor(trainLabels1[t])
    #print(trainLabels.shape, 'shape00')
    trainLabelVectors = torch.tensor(trainLabelsVectors1[t],
                                     dtype=torch.float32)

    testData = torch.tensor(testData1[t], dtype=torch.float32)
    testLabels = torch.tensor(testLabels1[t], dtype=torch.int64)
    X_train = torch.cat([trainData, trainLabelVectors], dim=1).to(args.device)

    if t == 0:
        #print(t, trainData.shape, trainLabels.shape, trainLabelVectors.shape, )
        appr.train(t, trainData, trainLabels, trainLabelVectors)
test_loader = torch.utils.data.DataLoader(test_data,
                                          batch_size=1,
                                          shuffle=False,
                                          num_workers=1,
                                          pin_memory=False)

length_training_dataset = len(training_data)
length_test_dataset = len(test_data)

NUM_CLASS = 20

device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")

convnet = ResNet50(pretrained=True)
classifier = Classifier(in_features=2048, num_class=NUM_CLASS)
pan = PAN(convnet.blocks[::-1])
mask_classifier = Mask_Classifier(in_features=256, num_class=(NUM_CLASS + 1))

convnet.to(device)
classifier.to(device)
pan.to(device)
mask_classifier.to(device)


def train(epoch, optimizer, data_loader):
    convnet.train()
    classifier.train()
    pan.train()
    y_true = []
    y_pred = []
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    dataset = TDCCMCDataset(
        filenames=["./data/6zXXZvVvTFs", "./data/2AYaxTiWKoY", "./data/sYbBgkP9aMo"],
        trims=[(960, 9960), (550, 9550), (1, 9901)],
        crops=[(35, 50, 445, 300), (0, 13, 640, 335), (5, 22, 475, 341)],
    )
    loader = DataLoader(
        dataset, batch_size=args.BATCH_SIZE, num_workers=0, pin_memory=True
    )
    print("[pretrain] Dataset and DataLoader ready.")

    tdc = TDC().to(device)
    cmc = CMC().to(device)
    tdc_classifier = Classifier().to(device)
    cmc_classifier = Classifier().to(device)
    print("[pretrain] Neural networks initialized.")

    # Initialize Optimizer
    optim_params = (
        list(tdc.parameters())
        + list(cmc.parameters())
        + list(tdc_classifier.parameters())
        + list(cmc_classifier.parameters())
    )
    optimizer = optim.Adam(optim_params, lr=args.LR)
    print("[pretrain] Optimizer initialized.")

    # Setup t-SNE datasets
    tsne_filenames = [
Example #8
0
def main(
    no_cuda,
    seed,
    batch_size,
    alpha,
    random_labels,
    classifier_optimizer_args,
    classifier_epochs,
    discriminator_optimizer_args,
    discriminator_epochs,
    discriminator_args,
    classifier_load_path,
    discriminator_load_path,
    log_dir,
    log_interval,
    run_id,
    num_iterations,
    aux_coef,
    simple_dataset_size,
):
    torch.manual_seed(seed)
    use_cuda = not no_cuda and torch.cuda.is_available()
    torch.manual_seed(seed)
    if use_cuda:
        n_gpu = get_n_gpu()
        try:
            index = int(run_id[-1])
        except ValueError:
            index = random.randrange(0, n_gpu)
        device = torch.device("cuda", index=index % n_gpu)
    else:
        device = "cpu"
    kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True} if use_cuda else {}

    simple = bool(simple_dataset_size)
    if simple:
        splits = Datasets(
            train=simple_dataset_size * 3 // 7,
            test=simple_dataset_size * 3 // 7,
            valid=None,
        )
        splits = splits._replace(valid=simple_dataset_size - splits.train - splits.test)
        classifier_datasets = Datasets(
            train=Subset(
                AddLabel(
                    SimpleDataset(n=simple_dataset_size, generalization_error=alpha), 0
                ),
                list(range(splits.train)),
            ),
            test=Subset(
                AddLabel(
                    SimpleDataset(n=simple_dataset_size, generalization_error=0), 1
                ),
                list(range(splits.train, splits.train + splits.test)),
            ),
            valid=Subset(
                AddLabel(
                    SimpleDataset(n=simple_dataset_size, generalization_error=0), 2
                ),
                list(range(splits.train + splits.test, simple_dataset_size)),
            ),
        )
    else:
        train_dataset = NoiseDataset(
            "../data",
            train=True,
            download=True,
            transform=transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
            ),
            percent_noise=alpha,
        )
        test_dataset = NoiseDataset(
            "../data",
            train=False,
            transform=transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
            ),
            percent_noise=alpha,
        )
        dataset = train_dataset + test_dataset
        size = len(dataset)
        splits = Datasets(train=size * 3 // 7, test=size * 3 // 7, valid=None)
        splits = splits._replace(valid=size - splits.train - splits.test)
        classifier_datasets = Datasets(
            *[
                AddLabel(dataset, label, random_labels=random_labels)
                for label, dataset in enumerate(random_split(dataset, splits))
            ]
        )
    if simple:
        print("train")
        for i in range(len(classifier_datasets.train)):
            print(classifier_datasets.train[i])
        print("test")
        for i in range(len(classifier_datasets.test)):
            print(classifier_datasets.test[i])
    classifier_loaders = Datasets(
        *[
            DataLoader(dataset, batch_size=batch_size, **kwargs)
            for dataset in classifier_datasets
        ]
    )
    discriminator_dataset = Datasets(
        *random_split(
            classifier_datasets.train + classifier_datasets.test,
            [splits.train, splits.test],
        ),
        valid=None,
    )
    discriminator_loaders = Datasets(
        train=DataLoader(discriminator_dataset.train, batch_size=batch_size, **kwargs),
        test=DataLoader(discriminator_dataset.test, batch_size=batch_size, **kwargs),
        valid=None,
    )
    classifier = Classifier(n=simple_dataset_size).to(device)
    classifier_optimizer = optim.SGD(
        classifier.parameters(),
        **{
            k.replace("classifier_", ""): v
            for k, v in classifier_optimizer_args.items()
        },
    )
    discriminator = Discriminator(n=simple_dataset_size, **discriminator_args).to(
        device
    )
    discriminator_optimizer = optim.SGD(
        discriminator.parameters(),
        **{
            k.replace("discriminator_", ""): v
            for k, v in discriminator_optimizer_args.items()
        },
    )
    writer = SummaryWriter(str(log_dir))
    if classifier_load_path:
        classifier.load_state_dict(torch.load(classifier_load_path))
        # sanity check to make sure that classifier was properly loaded
        for k, v in test(
            classifier=classifier, device=device, test_loader=classifier_loaders.train
        ).items():
            writer.add_scalar("sanity_check" + k, v, 0)
        for k, v in test(
            classifier=classifier, device=device, test_loader=classifier_loaders.test
        ).items():
            pass
    if discriminator_load_path:
        classifier.load_state_dict(torch.load(discriminator_load_path))
        # sanity check to make sure that discriminator was properly loaded
        for k, v in test_discriminator(
            classifier=classifier,
            discriminator=discriminator,
            device=device,
            test_loader=discriminator_loaders.train,
        ).items():
            writer.add_scalar("sanity_check" + k, v, 0)
    iterations = range(num_iterations) if num_iterations else itertools.count()
    batch_count = Counter()

    for i in iterations:
        for k, v in test(
            classifier=classifier, device=device, test_loader=classifier_loaders.valid
        ).items():
            writer.add_scalar(k, v, i)
        for epoch in tqdm(range(1, classifier_epochs + 1), desc="classifier"):
            for counter in train(
                classifier=classifier,
                discriminator=discriminator,
                aux_coef=aux_coef if i > 0 else 0,
                device=device,
                train_loader=classifier_loaders.train,
                optimizer=classifier_optimizer,
                log_interval=log_interval,
            ):
                batch_count.update(classifier=counter["batch"])
                for k, v in counter.items():
                    if k != "batch":
                        writer.add_scalar(k, v, batch_count["classifier"])
        if simple:
            print("classifier weights")
            for p in classifier.parameters():
                print(p)
        for k, v in test_discriminator(
            classifier=classifier,
            discriminator=discriminator,
            device=device,
            test_loader=discriminator_loaders.test,
        ).items():
            writer.add_scalar(k, v, i)
        iterator = (
            itertools.count()
            if discriminator_epochs is None
            else tqdm(range(1, discriminator_epochs + 1), desc="discriminator")
        )
        for epoch in iterator:
            for j, counter in enumerate(
                train_discriminator(
                    classifier=classifier,
                    discriminator=discriminator,
                    device=device,
                    train_loader=discriminator_loaders.train,
                    optimizer=discriminator_optimizer,
                    log_interval=log_interval,
                    use_pbar=discriminator_epochs is None,
                )
            ):
                batch_count.update(discriminator=counter["batch"])
                for k, v in counter.items():
                    if k != "batch":
                        writer.add_scalar(k, v, batch_count["discriminator"])
        if simple:
            print("discriminator weights")
            for p in discriminator.parameters():
                print(p)
        torch.save(classifier.state_dict(), str(Path(log_dir, "classifier.pt")))
        torch.save(discriminator.state_dict(), str(Path(log_dir, "discriminator.pt")))
                                      tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                                      tr.ToTensor()
])

test_data = Voc2012('/home/tom/DISK/DISK2/jian/PASCAL/VOC2012'
                    ,'val',transform=test_transforms)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=1, pin_memory=False)

length_test_dataset = len(test_data)

NUM_CLASS = 20

device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")

convnet = ResNet50(pretrained=True)
classifier = Classifier(in_features=2048, num_class=NUM_CLASS)
pan = PAN(convnet.blocks[::-1])
mask_classifier = Mask_Classifier(in_features=256, num_class=(NUM_CLASS+1))
color_classifier = Color_Classifier(in_features=256, num_class=2)

convnet.to(device)
classifier.to(device)
pan.to(device)
mask_classifier.to(device)
color_classifier.to(device)

def test(data_loader):
    global best_acc
    convnet.eval()
    pan.eval()
    all_i_count = []
        torch.load(os.path.join('checkpoints', FLAGS.decoder_save), map_location=lambda storage, loc: storage))

    # class labels variable
    X = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size)
    class_labels = torch.LongTensor(FLAGS.batch_size)

    # test
    if torch.cuda.is_available() and not FLAGS.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")

    # load data set and create data loader instance
    print('Loading MNIST dataset...')
    mnist = datasets.MNIST(root='mnist', download=True, train=True, transform=transform_config)
    loader = cycle(DataLoader(mnist, batch_size=FLAGS.batch_size, shuffle=True, num_workers=0, drop_last=True))

    style_classifier = Classifier(z_dim=FLAGS.style_dim, num_classes=FLAGS.num_classes)
    style_classifier.apply(weights_init)

    class_classifier = Classifier(z_dim=FLAGS.class_dim, num_classes=FLAGS.num_classes)
    class_classifier.apply(weights_init)

    cross_entropy_loss = nn.CrossEntropyLoss()

    style_classifier_optimizer = optim.Adam(
        list(style_classifier.parameters()),
        lr=FLAGS.initial_learning_rate,
        betas=(FLAGS.beta_1, FLAGS.beta_2)
    )

    class_classifier_optimizer = optim.Adam(
        list(class_classifier.parameters()),