コード例 #1
0
    def initsol(self):
        # generate data (by adding noise to noise-free data)
        torch.manual_seed(time.time())
        # one observation
        n = torch.randn(self.N,
                        dtype=torch.float32,
                        requires_grad=False,
                        device=mydevice)
        self.y = self.y0 + self.SNR * torch.norm(self.y0) / torch.norm(n) * n
        # parameters, initialized to zero
        x = torch.zeros(self.M, requires_grad=True, device=mydevice)

        def lossfunction(A, y, x, alpha=self.rho[0], beta=self.rho[1]):
            Ax = torch.matmul(A, x)
            err = y - Ax
            return torch.norm(
                err,
                2)**2 + alpha * torch.norm(x, 2)**2 + beta * torch.norm(x, 1)

        opt = LBFGSNew([x],
                       history_size=7,
                       max_iter=10,
                       line_search_fn=True,
                       batch_mode=False)

        # find solution x
        for nepoch in range(0, 20):

            def closure():
                if torch.is_grad_enabled():
                    opt.zero_grad()
                loss = lossfunction(self.A, self.y, x, self.rho[0],
                                    self.rho[1])
                if loss.requires_grad:
                    loss.backward()
                    #print(loss.data.item())
                return loss

            opt.step(closure)

        self.x = x
コード例 #2
0
        # set up primal,dual variables
        y1 = torch.empty(N, dtype=torch.float, requires_grad=False)
        y2 = torch.empty(N, dtype=torch.float, requires_grad=False)
        y3 = torch.empty(N, dtype=torch.float, requires_grad=False)
        y1.fill_(0.0)
        y2.fill_(0.0)
        y3.fill_(0.0)
        z = torch.empty(N, dtype=torch.float, requires_grad=False)
        z.fill_(0.0)

        #opt1=optim.Adam(filter(lambda p: p.requires_grad, net1.parameters()),lr=0.001)
        #opt2=optim.Adam(filter(lambda p: p.requires_grad, net2.parameters()),lr=0.001)
        #opt3=optim.Adam(filter(lambda p: p.requires_grad, net3.parameters()),lr=0.001)
        opt1 = LBFGSNew(filter(lambda p: p.requires_grad, net1.parameters()),
                        history_size=10,
                        max_iter=4,
                        line_search_fn=True,
                        batch_mode=True)
        opt2 = LBFGSNew(filter(lambda p: p.requires_grad, net2.parameters()),
                        history_size=10,
                        max_iter=4,
                        line_search_fn=True,
                        batch_mode=True)
        opt3 = LBFGSNew(filter(lambda p: p.requires_grad, net3.parameters()),
                        history_size=10,
                        max_iter=4,
                        line_search_fn=True,
                        batch_mode=True)

        # need to scale rho down when starting from scratch
        rho = 0.001
コード例 #3
0
def hyperparameters_tuning_LBFGS_new_minibatch2(trainset, valset,
                                                batchsize_grid, max_iter_grid,
                                                epochs, model_NN):

    training_loss = []
    test_loss = []
    training_accuracy = []
    test_accuracy = []
    times = []
    parameters = []
    results = []
    Names = [
        "training_loss", "training_accuracy", "test_loss", "test_accuracy",
        "times", "parameters: batch iter"
    ]
    results.append(Names)

    for i in range(len(batchsize_grid)):
        bs = batchsize_grid[i]
        max_iter_ = max_iter_grid[i]

        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=bs,
                                                  shuffle=True)
        valloader = torch.utils.data.DataLoader(valset,
                                                batch_size=bs,
                                                shuffle=True)
        dataiter = iter(trainloader)
        images, _ = dataiter.next()
        image_size = images[0].shape[1]
        input_size = int(image_size**2)
        output_size = 10

        print("Minibatch size: ", bs)
        print("History size: ", max_iter_)
        parameter = []
        if model_NN == "FCNN":
            sizes = [input_size, 128, 64, output_size]
            model = fully_connected_NN(sizes)
            criterion = nn.NLLLoss()
            optimizer = LBFGSNew(model.parameters(),
                                 max_iter=max_iter_,
                                 history_size=max_iter_,
                                 line_search_fn=True,
                                 batch_mode=True)

        elif model_NN == "CNN":
            model = ConvNet(image_size)
            criterion = nn.CrossEntropyLoss()
            optimizer = LBFGSNew(model.parameters(),
                                 max_iter=max_iter_,
                                 history_size=max_iter_,
                                 line_search_fn=True,
                                 batch_mode=True)

        if model_NN == "FCNN":
            train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize(
                optimizer,
                epochs,
                trainloader,
                valloader,
                model,
                criterion,
                method="LBFGS")
        elif model_NN == "CNN":
            train_losses, test_losses, train_accuracies, test_accuracies, train_time = optimize_CNN(
                optimizer,
                epochs,
                trainloader,
                valloader,
                model,
                criterion,
                method="LBFGS")

            # save the parameters
        parameter = []
        parameter.append(bs)
        parameter.append(max_iter_)

        parameters.append(parameter)

        training_loss.append(train_losses)
        test_loss.append(test_losses)
        training_accuracy.append(train_accuracies)
        test_accuracy.append(test_accuracies)

    results.append(training_loss)
    results.append(training_accuracy)
    results.append(test_loss)
    results.append(test_accuracy)

    results.append(parameters)

    return results
コード例 #4
0
            param1.data.copy_(X[cnt:cnt + numel].view_as(param1.data))
            param2.data.copy_(X[cnt:cnt + numel].view_as(param2.data))
            param3.data.copy_(X[cnt:cnt + numel].view_as(param3.data))
        cnt += numel

from lbfgsnew import LBFGSNew  # custom optimizer
import torch.optim as optim
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.CrossEntropyLoss()
criterion3 = nn.CrossEntropyLoss()
#optimizer1=optim.Adam(net1.parameters(), lr=0.001)
#optimizer2=optim.Adam(net2.parameters(), lr=0.001)
#optimizer3=optim.Adam(net3.parameters(), lr=0.001)
optimizer1 = LBFGSNew(net1.parameters(),
                      history_size=10,
                      max_iter=4,
                      line_search_fn=True,
                      batch_mode=True)
optimizer2 = LBFGSNew(net2.parameters(),
                      history_size=10,
                      max_iter=4,
                      line_search_fn=True,
                      batch_mode=True)
optimizer3 = LBFGSNew(net3.parameters(),
                      history_size=10,
                      max_iter=4,
                      line_search_fn=True,
                      batch_mode=True)

start_time = time.time()
# train network LBFGS 12, other 60
コード例 #5
0
                unfreeze_one_layer(net_dict[ck], ci)
            else:
                unfreeze_one_block(net_dict[ck], ci)
        trainable = filter(lambda p: p.requires_grad, net_dict[0].parameters())
        params_vec1 = torch.cat([x.view(-1) for x in list(trainable)])

        # number of parameters trained
        N = params_vec1.numel()
        z = torch.empty(N, dtype=torch.float, requires_grad=False)
        z.fill_(0.0)

        opt_dict = {}
        for ck in range(K):
            opt_dict[ck] = LBFGSNew(filter(lambda p: p.requires_grad,
                                           net_dict[ck].parameters()),
                                    history_size=10,
                                    max_iter=4,
                                    line_search_fn=True,
                                    batch_mode=True)
            #opt_dict[ck]=optim.Adam(filter(lambda p: p.requires_grad, net_dict[ck].parameters()),lr=0.001)

        ############# loop 1 (Federated avaraging for subset of model)
        for nadmm in range(Nadmm):
            ##### loop 2 (data) (all network updates are done per epoch, because K is large
            ##### and data per host is assumed to be small)
            for epoch in range(Nepoch):

                #### loop 3 (models)
                for ck in range(K):
                    running_loss = 0.0

                    for i, data1 in enumerate(trainloader_dict[ck], 0):
コード例 #6
0
ファイル: run.py プロジェクト: ShijianXu/OptML_Project
def train(model_name,
          model,
          trainloader,
          testloader,
          device,
          opt,
          nb_epochs,
          lr=0.001):
    history_loss = []
    history_acc = []

    criterion = nn.CrossEntropyLoss()
    print("Using optimizer: ", opt)

    #TODO adjust optimizer hyperparameters
    if opt == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    elif opt == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif opt == 'lbfgs':
        optimizer = LBFGSNew(model.parameters(),
                             history_size=7,
                             max_iter=2,
                             line_search_fn=True,
                             batch_mode=True)
        #optimizer = optim.LBFGS(model.parameters())
    else:
        raise NotImplementedError

    for epoch in range(nb_epochs):
        # Train for each epoch
        model.train()

        running_loss = 0.0
        for batch_idx, data in enumerate(trainloader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)

            if opt == 'lbfgs':
                # Def Closure
                def closure():
                    if torch.is_grad_enabled():
                        optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    if loss.requires_grad:
                        loss.backward()
                    return loss

                optimizer.step(closure)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            else:
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

            running_loss += loss.item()
            #if batch_idx % 100 == 99:    # print every 100 mini-batches
            #    print('[{}, {}] loss: {}'.format(epoch + 1, i + 1, running_loss / 100))
            #    running_loss = 0.0

        # Test for each epoch
        epoch_loss = running_loss / (batch_idx + 1)
        epoch_acc = test(model, testloader, device)

        print("Epoch {} train loss: {}, test acc: {}".format(
            epoch + 1, epoch_loss, epoch_acc))
        history_loss.append(epoch_loss)
        history_acc.append(epoch_acc)

    print('Finished Training')
    with open('history_loss_mnist' + '_' + model_name + '_' + opt + '.json',
              'w') as f:
        json.dump(history_loss, f)
    with open('history_acc_mnist' + '_' + model_name + '_' + opt + '.json',
              'w') as f:
        json.dump(history_acc, f)
コード例 #7
0
    def step(self, action, keepnoise=False):
        done = False  # make sure to return True at some point
        # update state based on the action  rho = scale*(action)
        self.rho = action * (HIGH - LOW) / 2 + (HIGH + LOW) / 2
        penalty = 0
        # make sure rho stays within limits, if this happens, add a penalty
        for ci in range(self.K):
            if self.rho[ci] < LOW:
                self.rho[ci] = LOW
                penalty += -0.1
            if self.rho[ci] > HIGH:
                self.rho[ci] = HIGH
                penalty += -0.1

        # generate data (by adding noise to noise-free data)
        if not keepnoise:
            torch.manual_seed(time.time())
            n = torch.randn(self.N,
                            dtype=torch.float32,
                            requires_grad=False,
                            device=mydevice)
            y = self.y0 + self.SNR * torch.norm(self.y0) / torch.norm(n) * n
        else:
            y = self.y
        # parameters, initialized to zero
        x = torch.zeros(self.M, requires_grad=True, device=mydevice)

        def lossfunction(A, y, x, alpha=self.rho[0], beta=self.rho[1]):
            Ax = torch.matmul(A, x)
            err = y - Ax
            return torch.norm(
                err,
                2)**2 + alpha * torch.norm(x, 2)**2 + beta * torch.norm(x, 1)

        opt = LBFGSNew([x],
                       history_size=7,
                       max_iter=10,
                       line_search_fn=True,
                       batch_mode=False)

        # find solution x
        for nepoch in range(0, 20):

            def closure():
                if torch.is_grad_enabled():
                    opt.zero_grad()
                loss = lossfunction(self.A, y, x, self.rho[0], self.rho[1])
                if loss.requires_grad:
                    loss.backward()
                    #print(loss.data.item())
                return loss

            opt.step(closure)

        # Jacobian of model = A
        jac = jacobian(torch.matmul(self.A, x), x).to(mydevice)

        # right hand term = -2 A^T
        df_dx = (lambda yi: gradient(
            lossfunction(self.A, yi, x, self.rho[0], self.rho[1]), x))
        # no need to pass one-hot vectors, because we calculate d( )/dy^T in one go
        e = torch.ones_like(y)  # all ones
        ll = torch.autograd.functional.jacobian(df_dx, e)

        mm = torch.zeros_like(ll).to(mydevice)
        # copy ll because it is modified
        for i in range(self.N):
            ll2 = ll[:, i].clone().detach()
            mm[:, i] = inv_hessian_mult(opt, ll2)

        # multiply by Jacobian of model
        B = torch.matmul(jac, mm).to('cpu')
        #print(B)
        # eigenvalues
        E, _ = torch.linalg.eig(B)
        # 1+eigenvalues (only real part), sorted in ascending order
        EE = E.real + 1
        # remember this for rendering later
        self.x = x

        observation = {'A': self.A.view(-1).cpu(), 'eig': EE}
        # final error ||Ax-y||
        final_err = torch.norm(torch.matmul(self.A, x) - y, 2).detach()
        # reward : penalize by adding -penalty
        # residual: normalize by data power, eigenvalues = normalize by min/max
        reward = torch.norm(
            y, 2) / final_err + torch.min(EE) / torch.max(EE) + penalty
        #reward.clamp_(-1,1) # clip to [-1,1] - only useful for multiple environments, not here

        # info : meta details {}
        info = {}
        return observation, reward, done, info
コード例 #8
0
ファイル: rica_lofar.py プロジェクト: SarodYatawatta/LSHM
  for i in range(Niter):
    # get the inputs
    patchx,patchy,inputs,uvcoords=get_data_minibatch(file_list,sap_list,batch_size=default_batch,patch_size=patch_size,normalize_data=True,num_channels=num_in_channels,uvdist=True)
    # wrap them in variable
    x=Variable(inputs).to(mydevice)
    uv=Variable(uvcoords).to(mydevice)
    (nbatch,nchan,nx,ny)=inputs.shape 
    # nbatch = patchx x patchy x default_batch
    # i.e., one baseline (per polarization, real,imag) will create patchx x patchy batches
    batch_per_bline=patchx*patchy

    X=torch.transpose(x.view(-1,L),0,1)
    # setup S for this data batch
    S=torch.rand((M,nbatch),requires_grad=True,dtype=torch.float32,device=mydevice)
    # setup optimizer
    optimizer=LBFGSNew([S],history_size=7,max_iter=10,line_search_fn=True,batch_mode=True)
    def closure():
     if torch.is_grad_enabled():
       optimizer.zero_grad()
     # loss
     loss=criterion(X,torch.matmul(A,S))/(nbatch*L)+lambda1*torch.linalg.norm(S,1)/S.numel()
     if loss.requires_grad:
       #print('%d %d %e'%(epoch,i,loss.data.item()))
       loss.backward()
     return loss

    optimizer.step(closure)

    with torch.no_grad():
      # now update A
      E=X-torch.matmul(A,S)
コード例 #9
0
def main():
    best_prec1 = 0

    # Check the save_dir exists or not
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    trainable = args.trainable
    if trainable == "none":
        trainable = "freeze"

    if args.use_lbfgs:
        opt = "LBFGS"
    else:
        opt = "SGD"

    if args.wide_resnet:
        wide = "wide"
    else:
        wide = ""

    datetime = date.today().strftime("%b-%d-%Y")

    exp_name = "{}{}{}{}_{}".format(args.arch, wide, opt, trainable, datetime)
    logfile = open(os.path.join(args.save_dir, "{}.txt".format(exp_name)), "a")

    if args.wide_resnet:
        # use wide residual net https://arxiv.org/abs/1605.07146
        model = torchvision.models.resnet.wide_resnet50_2()
    else:
        model = resnet.__dict__[args.arch]()
    if trainable == "freeze":
        freeze_model(model)
    elif trainable == "bn":
        freeze_model(model)
        unfreeze_model(model, ["gamma", "beta"])

    print(test(model), file=logfile)

    model.cuda()

    if args.use_lbfgs:
        optimizer = LBFGSNew(model.parameters(),
                             history_size=7,
                             max_iter=2,
                             line_search_fn=True,
                             batch_mode=True)

    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        if args.arch in ['resnet1202', 'resnet110']:
            # for resnet1202 original paper uses lr=0.01 for first 400 minibatches for warm-up
            # then switch back. In this setup it will correspond for first epoch.
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr * 0.1

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[100, 150], last_epoch=args.start_epoch - 1)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    torch.manual_seed(0)
    train_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
        root='./data',
        train=True,
        transform=transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4),
            transforms.ToTensor(),
            normalize,
        ]),
        download=True),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    torch.manual_seed(0)
    val_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
        root='./data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=128,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    if args.half:
        model.half()
        criterion.half()

    if args.evaluate:
        validate(val_loader, model, criterion, logfile)
        return

    for epoch in range(args.start_epoch, args.epochs):

        # train for one epoch
        print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr']))
        train(train_loader, model, criterion, optimizer, epoch, logfile)
        lr_scheduler.step()

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, logfile)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

        if epoch > 0 and epoch % args.save_every == 0:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                },
                is_best,
                filename=os.path.join(args.save_dir, 'checkpoint.th'))

        save_checkpoint(
            {
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            },
            is_best,
            filename=os.path.join(args.save_dir, 'model.th'))
    logfile.close()
コード例 #10
0
ファイル: cifar10_resnet.py プロジェクト: nlesc-dirac/pytorch
     correct += (predicted==labels.to(mydevice)).sum()
     total += labels.size(0)

   return 100*correct//total
#####################################################

lambda1=0.000001
lambda2=0.001

# loss function and optimizer
import torch.optim as optim
from lbfgsnew import LBFGSNew # custom optimizer
criterion=nn.CrossEntropyLoss()
#optimizer=optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
#optimizer=optim.Adam(net.parameters(), lr=0.001)
optimizer = LBFGSNew(net.parameters(), history_size=7, max_iter=2, line_search_fn=True,batch_mode=True)


load_model=False
# update from a saved model 
if load_model:
  checkpoint=torch.load('./res18.model',map_location=mydevice)
  net.load_state_dict(checkpoint['model_state_dict'])
  net.train() # initialize for training (BN,dropout)

start_time=time.time()
use_lbfgs=True
# train network
for epoch in range(20):
  running_loss=0.0
  for i,data in enumerate(trainloader,0):
コード例 #11
0
            params_vec1 = torch.cat([x.view(-1) for x in list(trainable)])
            N = params_vec1.numel()
            del trainable, params_vec1

            z = torch.zeros(N, dtype=torch.float,
                            requires_grad=False).to(mydevice,
                                                    non_blocking=True)

            opt_dict = {}
            for ck in range(K):
                if mdl == 0:
                    #opt_dict[ck]=optim.Adam(filter(lambda p: p.requires_grad, encoder_dict[ck].parameters()),lr=0.0001)
                    opt_dict[ck] = LBFGSNew(filter(
                        lambda p: p.requires_grad,
                        encoder_dict[ck].parameters()),
                                            history_size=7,
                                            max_iter=2,
                                            line_search_fn=True,
                                            batch_mode=True)
                elif mdl == 1:
                    #opt_dict[ck]=optim.Adam(filter(lambda p: p.requires_grad, contextgen_dict[ck].parameters()),lr=0.0001)
                    opt_dict[ck] = LBFGSNew(filter(
                        lambda p: p.requires_grad,
                        contextgen_dict[ck].parameters()),
                                            history_size=7,
                                            max_iter=2,
                                            line_search_fn=True,
                                            batch_mode=True)
                else:
                    #opt_dict[ck]=optim.Adam(filter(lambda p: p.requires_grad, predictor_dict[ck].parameters()),lr=0.0001)
                    opt_dict[ck] = LBFGSNew(filter(