def cross_validation_for_clustered_data(num_labels, num_cluster, args):
    print("cross validation for clustered data")
    best_acc = 0
    resize = cf.resize
    start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type
    results = {}
    for i in range(num_cluster):
        cv_idx = i
        test_list = [i]
        train_eval_list = list(range(num_cluster))
        train_eval_list = [x for x in train_eval_list if x != i]
        print(test_list, train_eval_list)
        trainset, evalset, testset, inputs, outputs = prepare_data(
            args, train_eval_list, test_list, resize)
        # Hyper Parameter settings
        use_cuda = torch.cuda.is_available()
        use_cuda = cf.use_cuda()
        if use_cuda is True:
            torch.cuda.set_device(0)
        best_acc = 0
        resize = cf.resize
        start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type

        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  num_workers=4)
        evalloader = torch.utils.data.DataLoader(evalset,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 num_workers=4)
        testloader = torch.utils.data.DataLoader(testset,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 num_workers=4)

        # num_workers: how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)# Return network & file name

        # Model
        print('\n[Phase 2] : Model setup')
        if args.resume:
            # Load checkpoint
            print('| Resuming from checkpoint...')
            assert os.path.isdir(
                'checkpoint'), 'Error: No checkpoint directory found!'
            _, file_name = getNetwork(args, inputs, outputs)
            checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep +
                                    file_name + args.cv_type + str(cv_idx) +
                                    '.t7')
            # checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep + file_name + '.t7')
            net = checkpoint['net']
            best_acc = checkpoint['acc']
            start_epoch = checkpoint['epoch']
        else:
            print('| Building net type [' + args.net_type + ']...')
            net, file_name = getNetwork(args, inputs, outputs)

        if use_cuda:
            net.cuda()

        vi = GaussianVariationalInference(torch.nn.CrossEntropyLoss())

        logfile_train = os.path.join(
            'diagnostics_Bayes{}_{}_cv{}_train_vgmm.txt'.format(
                args.net_type, args.dataset, i))
        logfile_test = os.path.join(
            'diagnostics_Bayes{}_{}_cv{}_test_vgmm.txt'.format(
                args.net_type, args.dataset, i))
        logfile_eval = os.path.join(
            'diagnostics_Bayes{}_{}_cv{}_val_vgmm.txt'.format(
                args.net_type, args.dataset, i))

        print('\n[Phase 3] : Training model with validation')
        print('| Training Epochs = ' + str(num_epochs))
        print('| Initial Learning Rate = ' + str(args.lr))
        print('| Optimizer = ' + str(optim_type))

        elapsed_time = 0
        train_return = []
        eval_return = []
        test_return = []
        for epoch in range(start_epoch, start_epoch + num_epochs):

            start_time = time.time()

            temp_train_return = train(epoch, trainset, inputs, net, batch_size,
                                      trainloader, resize, num_epochs,
                                      use_cuda, vi, logfile_train)
            temp_eval_return = test(epoch, evalset, inputs, batch_size,
                                    evalloader, net, use_cuda, num_epochs,
                                    resize, vi, logfile_eval, file_name)
            temp_test_return = test(epoch, testset, inputs, batch_size,
                                    testloader, net, use_cuda, num_epochs,
                                    resize, vi, logfile_test, "test")

            train_return = np.append(train_return, temp_train_return)
            eval_return = np.append(eval_return, temp_eval_return)
            test_return = np.append(test_return, temp_test_return)

            print(temp_train_return)
            print(temp_eval_return)
            print(temp_test_return)
            epoch_time = time.time() - start_time
            elapsed_time += epoch_time
            print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time)))

        print('\n[Phase 4] : Testing model')
        print('* Test results : Acc@1 = %.2f%%' % (best_acc))
        results[str(i)] = {
            "train": train_return,
            "test": test_return,
            "val": eval_return
        }
        print(results)

    return results
def cross_validation(num_labels,num_cluster,args):
    method = args.cv_type
    print("cross validation for random resampling")
    best_acc = 0
    resize = cf.resize
    start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type
    results = {}
    ds = mdataset_class.InputDataset("fashion-mnist", -1, 10)
    #X, y = utils_parent.load_mnist('fashion-mnist')
    X, y = ds.data_X, ds.data_y
    kf = KFold(n_splits=num_cluster, shuffle = True)
    mlist = list(kf.split(X,y))
    #i = 0
    #for train_eval_idx, test_idx in kf.split(X, y):  #iterator
    for i in range(num_cluster):  #iterator
        #breakpoint()  iter = kf.split(X,y); for xx in iter: print(xx);  it seems that KFold.split works
        cv_idx = i
        if method == "rand":
        #i = i +1
            train_eval_idx = list(mlist[i][0])
            test_idx = list(mlist[i][1])
            trainset, evalset, testset, inputs, outputs = prepare_data_for_normal_cv(args, train_eval_idx, test_idx, resize)
        elif method == "vgmm":
            test_list = [i]
            train_eval_list = list(range(num_cluster))
            train_eval_list = [x for x in train_eval_list if x != i]
            print(test_list,train_eval_list)
            trainset, evalset, testset,inputs,outputs = prepare_data(args,train_eval_list,test_list,resize, method = "vgmm")
        else:
            raise NotImplementedError

        # Hyper Parameter settings
        use_cuda = torch.cuda.is_available()
        use_cuda = cf.use_cuda()
        if use_cuda is True:
            torch.cuda.set_device(GPUIndex)
        best_acc = 0
        resize = cf.resize
        start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type

        trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
        evalloader = torch.utils.data.DataLoader(evalset, batch_size=batch_size, shuffle=False, num_workers=4)
        testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4)

        # num_workers: how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)# Return network & file name

        # Model
        print('\n[Phase 2] : Model setup')
        if args.resume:
            # Load checkpoint
            print('| Resuming from checkpoint...')
            assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!'
            _, file_name = getNetwork(args, inputs, outputs)

            checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep + file_name+ args.cv_type + str(cv_idx)  + '.t7')
            net = checkpoint['net']
            best_acc = checkpoint['acc']
            start_epoch = checkpoint['epoch']
        else:
            print('| Building net type [' + args.net_type + ']...')
            net, file_name = getNetwork(args, inputs, outputs)

        if use_cuda:
            net.cuda()

        vi = GaussianVariationalInference(torch.nn.CrossEntropyLoss())

        #logfile = os.path.join('diagnostics_Bayes{}_{}.txt'.format(args.net_type, args.dataset))
        logfile_train = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_train_rand.txt'.format(args.net_type, args.dataset, i))
        logfile_test = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_test_rand.txt'.format(args.net_type, args.dataset, i))
        logfile_eval = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_val_rand.txt'.format(args.net_type, args.dataset, i))

        print('\n[Phase 3] : Training model')
        print('| Training Epochs = ' + str(num_epochs))
        print('| Initial Learning Rate = ' + str(args.lr))
        print('| Optimizer = ' + str(optim_type))

        elapsed_time = 0

        train_return = []
        test_return = []
        eval_return = []

        for epoch in range(start_epoch, start_epoch + num_epochs):
            start_time = time.time()

            temp_train_return = train(epoch, trainset, inputs, net, batch_size, trainloader, resize, num_epochs, use_cuda, vi, logfile_train)
            temp_eval_return = test(epoch, evalset, inputs, batch_size, evalloader, net, use_cuda, num_epochs, resize, vi, logfile_eval,file_name)
            temp_test_return = test(epoch, testset, inputs, batch_size, testloader, net, use_cuda, num_epochs, resize, vi, logfile_test, "test")

            train_return = np.append(train_return,temp_train_return)
            eval_return = np.append(eval_return,temp_eval_return)
            test_return = np.append(test_return, temp_test_return)

            print(temp_train_return)
            print(temp_eval_return)
            print(temp_test_return)

            epoch_time = time.time() - start_time
            elapsed_time += epoch_time
            print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time)))

        print('\n[Phase 4] : Testing model')
        print('* Test results : Acc@1 = %.2f%%' % (best_acc))
        results[str(i)] = {"train": train_return, "test": test_return, "eval": eval_return}
        print(results)
    return results
Ejemplo n.º 3
0
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        save_point = './checkpoint/' + args.dataset + os.sep
        if not os.path.isdir(save_point):
            os.mkdir(save_point)
        torch.save(state, save_point + file_name + '.t7')
        best_acc = acc


print('\n[Phase 3] : Training model')
print('| Training Epochs = ' + str(num_epochs))
print('| Initial Learning Rate = ' + str(args.lr))
print('| Optimizer = ' + str(optim_type))

elapsed_time = 0
for epoch in range(start_epoch, start_epoch + num_epochs):
    start_time = time.time()

    train(epoch)
    test(epoch)

    epoch_time = time.time() - start_time
    elapsed_time += epoch_time
    print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time)))

print('\n[Phase 4] : Testing model')
print('* Test results : Acc@1 = %.2f%%' % (best_acc))
def tr_val_te(ds, num_labels, num_cluster, args, cv_idx, config_parent):
    print("cross validation for random resampling")
    best_acc = 0
    resize = cf.resize
    start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type

    transform_train = transforms.Compose([
        transforms.Resize((resize, resize)),
        transforms.ToTensor(),
        transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]),
    ])  # meanstd transformation

    transform_test = transforms.Compose([
        transforms.Resize((resize, resize)),
        transforms.ToTensor(),
        transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]),
    ])

    print('\n[Phase 1] : Data Preparation')
    trainset, evalset, testset, inputs, outputs = ds.prepare_data(config_parent, args, transform_train, transform_test, cv_idx, num_cluster)
    # Hyper Parameter settings
    use_cuda = torch.cuda.is_available()
    use_cuda = cf.use_cuda()
    if use_cuda is True:
        torch.cuda.set_device(args.g)
        print("*** using gpu ind", args.g)
    best_acc = 0
    resize = cf.resize
    start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type

    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
    evalloader = torch.utils.data.DataLoader(evalset, batch_size=batch_size, shuffle=False, num_workers=4)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4)

    # num_workers: how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)# Return network & file name

    # Model
    print('\n[Phase 2] : Model setup')
    if args.resume:
        # Load checkpoint
        print('| Resuming from checkpoint...')
        assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!'
        _, file_name = getNetwork(args, inputs, outputs)

        checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep + file_name+ args.cv_type + str(cv_idx)  + '.t7')
        net = checkpoint['net']
        best_acc = checkpoint['acc']
        start_epoch = checkpoint['epoch']
    else:
        print('| Building net type [' + args.net_type + ']...')
        net, file_name = getNetwork(args, inputs, outputs)

    if use_cuda:
        net.cuda()

    vi = GaussianVariationalInference(torch.nn.CrossEntropyLoss())

    rstfolder = args.rst_dir
    logfile_train = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_train_{}.txt'.format(args.net_type, args.dataset, cv_idx, args.cv_type))
    logfile_test = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_test_{}.txt'.format(args.net_type, args.dataset, cv_idx, args.cv_type))
    logfile_eval = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_val_{}.txt'.format(args.net_type, args.dataset, cv_idx, args.cv_type))

    print('\n[Phase 3] : Training model')
    print('| Training Epochs = ' + str(num_epochs))
    print('| Initial Learning Rate = ' + str(args.lr))
    print('| Optimizer = ' + str(optim_type))

    elapsed_time = 0

    train_return = []
    test_return = []
    eval_return = []

    for epoch in range(start_epoch, start_epoch + num_epochs):
        start_time = time.time()

        temp_train_return = train(epoch, trainset, inputs, net, batch_size, trainloader, resize, num_epochs, use_cuda, vi, logfile_train)
        temp_eval_return = test(epoch, evalset, inputs, batch_size, evalloader, net, use_cuda, num_epochs, resize, vi, logfile_eval, file_name)
        temp_test_return = test(epoch, testset, inputs, batch_size, testloader, net, use_cuda, num_epochs, resize, vi, logfile_test, "test")

        train_return = np.append(train_return,temp_train_return)
        eval_return = np.append(eval_return,temp_eval_return)
        test_return = np.append(test_return, temp_test_return)

        print(temp_train_return)
        print(temp_eval_return)
        print(temp_test_return)

        epoch_time = time.time() - start_time
        elapsed_time += epoch_time
        print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time)))

    print('\n[Phase 4] : Testing model')
    print('* Test results : Acc@1 = %.2f%%' % (best_acc))
    rst = {"train": train_return, "test": test_return, "eval": eval_return}
    return rst