Exemple #1
0
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    m = math.ceil(len(trainset) / batch_size)
    optimizer = optim.Adam(net.parameters(),
                           lr=cf.learning_rate(args.lr, epoch),
                           weight_decay=args.weight_decay)

    print('\n=> Training Epoch #%d, LR=%.4f' %
          (epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs_value, targets) in enumerate(trainloader):
        # repeat samples for
        x = inputs_value.view(-1, inputs, resize,
                              resize).repeat(args.num_samples, 1, 1, 1)
        print(x.shape)
        y = targets.repeat(args.num_samples)
        if use_cuda:
            x, y = x.cuda(), y.cuda()  # GPU settings

        if args.beta_type is "Blundell":
            beta = 2**(m - (batch_idx + 1)) / (2**m - 1)
        elif args.beta_type is "Soenderby":
            beta = min(epoch / (num_epochs // 4), 1)
        elif args.beta_type is "Standard":
            beta = 1 / m
        else:
            beta = 0
        # Forward Propagation
        x, y = Variable(x), Variable(y)
        outputs, kl = net.probforward(x)
        #print(outputs.shape)
        loss = vi(outputs, y, kl, beta)  # Loss
        optimizer.zero_grad()
        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update
        train_loss += loss.data
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(y.data).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write(
            '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' %
            (epoch, num_epochs, batch_idx + 1,
             (len(trainset) // batch_size) + 1, loss.data,
             (100 * correct / total) / args.num_samples))
        sys.stdout.flush()

    #diagnostics_to_write = {'Epoch': epoch, 'Loss': loss.data[0], 'Accuracy': (100*correct/total)/args.num_samples}
    diagnostics_to_write = {
        'Epoch': epoch,
        'Loss': loss.data,
        'Accuracy': (100 * correct / total) / args.num_samples
    }
    with open(logfile, 'a') as lf:
        lf.write(str(diagnostics_to_write))
Exemple #2
0
def train(epoch):
    net.train()   # torch.nn.Module.train:torch.nn.Module.train:  Sets the module in training mode.
    train_loss = 0
    correct = 0
    total = 0
    m = math.ceil(len(trainset) / batch_size)
    optimizer = optim.Adam(net.parameters(), lr=cf.learning_rate(args.lr, epoch), weight_decay=args.weight_decay)

    print('\n=> Training Epoch #%d, LR=%.4f' %(epoch, cf.learning_rate(args.lr, epoch)))
    for batch_idx, (inputs_value, targets) in enumerate(trainloader):
        #print(input_value)
        #print(targets)
        x = inputs_value.view(-1, inputs, resize, resize).repeat(args.num_samples, 1, 1, 1)
        # after repeat, the first dimension of x becomes args.num_samples of the original size
        #x = inputs_value.repeat(args.num_samples, 1, 1, 1)
        #breakpoint()
        y = targets.repeat(args.num_samples)
        #y = targets.repeat(args.num_samples, 1)
        if use_cuda:
            x, y = x.cuda(), y.cuda() # GPU settings

        if args.beta_type is "Blundell":
            beta = 2 ** (m - (batch_idx + 1)) / (2 ** m - 1)
        elif args.beta_type is "Soenderby":
            beta = min(epoch / (num_epochs // 4), 1)
        elif args.beta_type is "Standard":
            beta = 1 / m
        else:
            beta = 0
        # Forward Propagation
        x, y = Variable(x), Variable(y)
        outputs, kl = net.probforward(x)    # prob.forward is not from torch.nn.Module
        # torch.nn.Module.forward: Although the recipe for forward pass needs to be defined within this function, one should call the Module instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.
        print(x.shape)
        print(outputs.shape)  # here is the bug
        print(kl.shape) # scalar shape should be empty here
        loss = vi(outputs, y, kl, beta)  # Loss, equivalent to calling vi.forward(outputs, y, kl, beta)
        optimizer.zero_grad()  # Clears the gradients of all optimized torch.Tensor s.
        loss.backward()  # Backward Propagation
        optimizer.step()  # Optimizer update
        train_loss += loss.data
        _, predicted = torch.max(outputs.data, dim = 1)  # Returns the maximum value of each row of the input tensor in the given dimension dim. The second return value is the index location of each maximum value found (argmax).
        total += targets.size(0)
        correct += predicted.eq(y.data).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' %(epoch, num_epochs, batch_idx+1,
                    (len(trainset)//batch_size)+1, loss.data, (100*correct.to(dtype=torch.float)/float(total))/args.num_samples))
        sys.stdout.flush()

    #diagnostics_to_write = {'Epoch': epoch, 'Loss': loss.data[0], 'Accuracy': (100*correct.to(dtype=torch.float)/float(total))/args.num_samples}
    diagnostics_to_write = {'Epoch': epoch, 'Loss': loss.data, 'Accuracy': (100*correct.to(dtype=torch.float)/float(total))/args.num_samples}
    with open(logfile, 'a') as lf:
        lf.write(str(diagnostics_to_write))
def cross_validation_for_clustered_data(num_labels, num_cluster, args):
    print("cross validation for clustered data")
    best_acc = 0
    resize = cf.resize
    start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type
    results = {}
    for i in range(num_cluster):
        cv_idx = i
        test_list = [i]
        train_eval_list = list(range(num_cluster))
        train_eval_list = [x for x in train_eval_list if x != i]
        print(test_list, train_eval_list)
        trainset, evalset, testset, inputs, outputs = prepare_data(
            args, train_eval_list, test_list, resize)
        # Hyper Parameter settings
        use_cuda = torch.cuda.is_available()
        use_cuda = cf.use_cuda()
        if use_cuda is True:
            torch.cuda.set_device(0)
        best_acc = 0
        resize = cf.resize
        start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type

        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  num_workers=4)
        evalloader = torch.utils.data.DataLoader(evalset,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 num_workers=4)
        testloader = torch.utils.data.DataLoader(testset,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 num_workers=4)

        # num_workers: how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)# Return network & file name

        # Model
        print('\n[Phase 2] : Model setup')
        if args.resume:
            # Load checkpoint
            print('| Resuming from checkpoint...')
            assert os.path.isdir(
                'checkpoint'), 'Error: No checkpoint directory found!'
            _, file_name = getNetwork(args, inputs, outputs)
            checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep +
                                    file_name + args.cv_type + str(cv_idx) +
                                    '.t7')
            # checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep + file_name + '.t7')
            net = checkpoint['net']
            best_acc = checkpoint['acc']
            start_epoch = checkpoint['epoch']
        else:
            print('| Building net type [' + args.net_type + ']...')
            net, file_name = getNetwork(args, inputs, outputs)

        if use_cuda:
            net.cuda()

        vi = GaussianVariationalInference(torch.nn.CrossEntropyLoss())

        logfile_train = os.path.join(
            'diagnostics_Bayes{}_{}_cv{}_train_vgmm.txt'.format(
                args.net_type, args.dataset, i))
        logfile_test = os.path.join(
            'diagnostics_Bayes{}_{}_cv{}_test_vgmm.txt'.format(
                args.net_type, args.dataset, i))
        logfile_eval = os.path.join(
            'diagnostics_Bayes{}_{}_cv{}_val_vgmm.txt'.format(
                args.net_type, args.dataset, i))

        print('\n[Phase 3] : Training model with validation')
        print('| Training Epochs = ' + str(num_epochs))
        print('| Initial Learning Rate = ' + str(args.lr))
        print('| Optimizer = ' + str(optim_type))

        elapsed_time = 0
        train_return = []
        eval_return = []
        test_return = []
        for epoch in range(start_epoch, start_epoch + num_epochs):

            start_time = time.time()

            temp_train_return = train(epoch, trainset, inputs, net, batch_size,
                                      trainloader, resize, num_epochs,
                                      use_cuda, vi, logfile_train)
            temp_eval_return = test(epoch, evalset, inputs, batch_size,
                                    evalloader, net, use_cuda, num_epochs,
                                    resize, vi, logfile_eval, file_name)
            temp_test_return = test(epoch, testset, inputs, batch_size,
                                    testloader, net, use_cuda, num_epochs,
                                    resize, vi, logfile_test, "test")

            train_return = np.append(train_return, temp_train_return)
            eval_return = np.append(eval_return, temp_eval_return)
            test_return = np.append(test_return, temp_test_return)

            print(temp_train_return)
            print(temp_eval_return)
            print(temp_test_return)
            epoch_time = time.time() - start_time
            elapsed_time += epoch_time
            print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time)))

        print('\n[Phase 4] : Testing model')
        print('* Test results : Acc@1 = %.2f%%' % (best_acc))
        results[str(i)] = {
            "train": train_return,
            "test": test_return,
            "val": eval_return
        }
        print(results)

    return results
Exemple #4
0
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        save_point = './checkpoint/' + args.dataset + os.sep
        if not os.path.isdir(save_point):
            os.mkdir(save_point)
        torch.save(state, save_point + file_name + '.t7')
        best_acc = acc


print('\n[Phase 3] : Training model')
print('| Training Epochs = ' + str(num_epochs))
print('| Initial Learning Rate = ' + str(args.lr))
print('| Optimizer = ' + str(optim_type))

elapsed_time = 0
for epoch in range(start_epoch, start_epoch + num_epochs):
    start_time = time.time()

    train(epoch)
    test(epoch)

    epoch_time = time.time() - start_time
    elapsed_time += epoch_time
    print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time)))

print('\n[Phase 4] : Testing model')
print('* Test results : Acc@1 = %.2f%%' % (best_acc))
def cross_validation(num_labels,num_cluster,args):
    method = args.cv_type
    print("cross validation for random resampling")
    best_acc = 0
    resize = cf.resize
    start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type
    results = {}
    ds = mdataset_class.InputDataset("fashion-mnist", -1, 10)
    #X, y = utils_parent.load_mnist('fashion-mnist')
    X, y = ds.data_X, ds.data_y
    kf = KFold(n_splits=num_cluster, shuffle = True)
    mlist = list(kf.split(X,y))
    #i = 0
    #for train_eval_idx, test_idx in kf.split(X, y):  #iterator
    for i in range(num_cluster):  #iterator
        #breakpoint()  iter = kf.split(X,y); for xx in iter: print(xx);  it seems that KFold.split works
        cv_idx = i
        if method == "rand":
        #i = i +1
            train_eval_idx = list(mlist[i][0])
            test_idx = list(mlist[i][1])
            trainset, evalset, testset, inputs, outputs = prepare_data_for_normal_cv(args, train_eval_idx, test_idx, resize)
        elif method == "vgmm":
            test_list = [i]
            train_eval_list = list(range(num_cluster))
            train_eval_list = [x for x in train_eval_list if x != i]
            print(test_list,train_eval_list)
            trainset, evalset, testset,inputs,outputs = prepare_data(args,train_eval_list,test_list,resize, method = "vgmm")
        else:
            raise NotImplementedError

        # Hyper Parameter settings
        use_cuda = torch.cuda.is_available()
        use_cuda = cf.use_cuda()
        if use_cuda is True:
            torch.cuda.set_device(GPUIndex)
        best_acc = 0
        resize = cf.resize
        start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type

        trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
        evalloader = torch.utils.data.DataLoader(evalset, batch_size=batch_size, shuffle=False, num_workers=4)
        testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4)

        # num_workers: how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)# Return network & file name

        # Model
        print('\n[Phase 2] : Model setup')
        if args.resume:
            # Load checkpoint
            print('| Resuming from checkpoint...')
            assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!'
            _, file_name = getNetwork(args, inputs, outputs)

            checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep + file_name+ args.cv_type + str(cv_idx)  + '.t7')
            net = checkpoint['net']
            best_acc = checkpoint['acc']
            start_epoch = checkpoint['epoch']
        else:
            print('| Building net type [' + args.net_type + ']...')
            net, file_name = getNetwork(args, inputs, outputs)

        if use_cuda:
            net.cuda()

        vi = GaussianVariationalInference(torch.nn.CrossEntropyLoss())

        #logfile = os.path.join('diagnostics_Bayes{}_{}.txt'.format(args.net_type, args.dataset))
        logfile_train = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_train_rand.txt'.format(args.net_type, args.dataset, i))
        logfile_test = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_test_rand.txt'.format(args.net_type, args.dataset, i))
        logfile_eval = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_val_rand.txt'.format(args.net_type, args.dataset, i))

        print('\n[Phase 3] : Training model')
        print('| Training Epochs = ' + str(num_epochs))
        print('| Initial Learning Rate = ' + str(args.lr))
        print('| Optimizer = ' + str(optim_type))

        elapsed_time = 0

        train_return = []
        test_return = []
        eval_return = []

        for epoch in range(start_epoch, start_epoch + num_epochs):
            start_time = time.time()

            temp_train_return = train(epoch, trainset, inputs, net, batch_size, trainloader, resize, num_epochs, use_cuda, vi, logfile_train)
            temp_eval_return = test(epoch, evalset, inputs, batch_size, evalloader, net, use_cuda, num_epochs, resize, vi, logfile_eval,file_name)
            temp_test_return = test(epoch, testset, inputs, batch_size, testloader, net, use_cuda, num_epochs, resize, vi, logfile_test, "test")

            train_return = np.append(train_return,temp_train_return)
            eval_return = np.append(eval_return,temp_eval_return)
            test_return = np.append(test_return, temp_test_return)

            print(temp_train_return)
            print(temp_eval_return)
            print(temp_test_return)

            epoch_time = time.time() - start_time
            elapsed_time += epoch_time
            print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time)))

        print('\n[Phase 4] : Testing model')
        print('* Test results : Acc@1 = %.2f%%' % (best_acc))
        results[str(i)] = {"train": train_return, "test": test_return, "eval": eval_return}
        print(results)
    return results
Exemple #6
0
parser.add_argument('--lr', default=0.0001, type=float, help='learning_rate')
parser.add_argument('--net_type', default='3conv3fc', type=str, help='model')
#parser.add_argument('--depth', default=28, type=int, help='depth of model') #parser.add_argument('--widen_factor', default=10, type=int, help='width of model')
parser.add_argument('--num_samples', default=10, type=int, help='Number of samples')
parser.add_argument('--beta_type', default="Blundell", type=str, help='Beta type')
parser.add_argument('--p_logvar_init', default=0, type=int, help='p_logvar_init')
parser.add_argument('--q_logvar_init', default=-10, type=int, help='q_logvar_init')
parser.add_argument('--weight_decay', default=0.0005, type=float, help='weight_decay')
parser.add_argument('--dataset', default='mnist', type=str, help='dataset = [mnist/cifar10/cifar100]')
parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
parser.add_argument('--testOnly', '-t', action='store_true', help='Test mode with the saved model')
args = parser.parse_args()

# Hyper Parameter settings
# use_cuda = torch.cuda.is_available()
use_cuda = cf.use_cuda()
if use_cuda is True:
    torch.cuda.set_device(0)
best_acc = 0
resize = cf.resize
start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type

# Data Uplaod
print('\n[Phase 1] : Data Preparation')

transform_train = transforms.Compose([
    transforms.Resize((resize, resize)),
    transforms.ToTensor(),
    transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]),
])  # meanstd transformation
def tr_val_te(ds, num_labels, num_cluster, args, cv_idx, config_parent):
    print("cross validation for random resampling")
    best_acc = 0
    resize = cf.resize
    start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type

    transform_train = transforms.Compose([
        transforms.Resize((resize, resize)),
        transforms.ToTensor(),
        transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]),
    ])  # meanstd transformation

    transform_test = transforms.Compose([
        transforms.Resize((resize, resize)),
        transforms.ToTensor(),
        transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]),
    ])

    print('\n[Phase 1] : Data Preparation')
    trainset, evalset, testset, inputs, outputs = ds.prepare_data(config_parent, args, transform_train, transform_test, cv_idx, num_cluster)
    # Hyper Parameter settings
    use_cuda = torch.cuda.is_available()
    use_cuda = cf.use_cuda()
    if use_cuda is True:
        torch.cuda.set_device(args.g)
        print("*** using gpu ind", args.g)
    best_acc = 0
    resize = cf.resize
    start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type

    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
    evalloader = torch.utils.data.DataLoader(evalset, batch_size=batch_size, shuffle=False, num_workers=4)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4)

    # num_workers: how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)# Return network & file name

    # Model
    print('\n[Phase 2] : Model setup')
    if args.resume:
        # Load checkpoint
        print('| Resuming from checkpoint...')
        assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!'
        _, file_name = getNetwork(args, inputs, outputs)

        checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep + file_name+ args.cv_type + str(cv_idx)  + '.t7')
        net = checkpoint['net']
        best_acc = checkpoint['acc']
        start_epoch = checkpoint['epoch']
    else:
        print('| Building net type [' + args.net_type + ']...')
        net, file_name = getNetwork(args, inputs, outputs)

    if use_cuda:
        net.cuda()

    vi = GaussianVariationalInference(torch.nn.CrossEntropyLoss())

    rstfolder = args.rst_dir
    logfile_train = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_train_{}.txt'.format(args.net_type, args.dataset, cv_idx, args.cv_type))
    logfile_test = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_test_{}.txt'.format(args.net_type, args.dataset, cv_idx, args.cv_type))
    logfile_eval = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_val_{}.txt'.format(args.net_type, args.dataset, cv_idx, args.cv_type))

    print('\n[Phase 3] : Training model')
    print('| Training Epochs = ' + str(num_epochs))
    print('| Initial Learning Rate = ' + str(args.lr))
    print('| Optimizer = ' + str(optim_type))

    elapsed_time = 0

    train_return = []
    test_return = []
    eval_return = []

    for epoch in range(start_epoch, start_epoch + num_epochs):
        start_time = time.time()

        temp_train_return = train(epoch, trainset, inputs, net, batch_size, trainloader, resize, num_epochs, use_cuda, vi, logfile_train)
        temp_eval_return = test(epoch, evalset, inputs, batch_size, evalloader, net, use_cuda, num_epochs, resize, vi, logfile_eval, file_name)
        temp_test_return = test(epoch, testset, inputs, batch_size, testloader, net, use_cuda, num_epochs, resize, vi, logfile_test, "test")

        train_return = np.append(train_return,temp_train_return)
        eval_return = np.append(eval_return,temp_eval_return)
        test_return = np.append(test_return, temp_test_return)

        print(temp_train_return)
        print(temp_eval_return)
        print(temp_test_return)

        epoch_time = time.time() - start_time
        elapsed_time += epoch_time
        print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time)))

    print('\n[Phase 4] : Testing model')
    print('* Test results : Acc@1 = %.2f%%' % (best_acc))
    rst = {"train": train_return, "test": test_return, "eval": eval_return}
    return rst