Esempio n. 1
0
def main():
    X, Y = twospirals(500, noise=1.3)
    train_x = torch.FloatTensor(X)
    train_y = torch.FloatTensor(Y).unsqueeze(-1)

    X, Y = twospirals(100, noise=1.3)
    test_x = torch.FloatTensor(X)
    test_y = torch.FloatTensor(Y).unsqueeze(-1)

    use_cuda = torch.cuda.is_available()
    if use_cuda:
        torch.cuda.set_device(0)
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
        train_x, train_y = train_x.cuda(), train_y.cuda()
        test_x, test_y = test_x.cuda(), test_y.cuda()

    loss_func = torch.nn.BCEWithLogitsLoss()
    lr = 0.01

    n_trials = 2
    n_iters = 1000
    losses = torch.zeros(n_trials, n_iters)
    test_losses = torch.zeros(n_trials, n_iters)
    init_eigs = []
    final_eigs = []
    pct_keep = 0.4
    optim = torch.optim.Adam

    for trial in range(n_trials):
        model = hess.nets.MaskedNet(train_x,
                                    train_y,
                                    bias=True,
                                    n_hidden=5,
                                    hidden_size=10,
                                    activation=torch.nn.ELU(),
                                    pct_keep=pct_keep)

        if use_cuda:
            model = model.cuda()
        mask, perm = hess.utils.mask_model(model, pct_keep, use_cuda)
        keepers = np.array(np.where(mask.cpu() == 1))[0]

        ## compute hessian pre-training ##
        initial_evals = utils.get_hessian_eigs(train_x,
                                               train_y,
                                               loss=loss_func,
                                               model=model,
                                               mask=mask,
                                               use_cuda=use_cuda,
                                               n_eigs=100)
        init_eigs.append(initial_evals)
        # hessian = utils.get_hessian(train_x, train_y, loss=loss_func,
        #                      model=model, use_cuda=use_cuda)
        # sub_hess = hessian[np.ix_(keepers, keepers)]
        # e_val, _ = np.linalg.eig(sub_hess.cpu().detach())
        # init_eigs.append(e_val.real)

        ## train ##
        optimizer = optim(model.parameters(), lr=lr)

        for step in range(n_iters):
            optimizer.zero_grad()
            outputs = model(train_x)

            loss = loss_func(outputs, train_y)
            losses[trial, step] = loss

            test_out = model(test_x)
            test_losses[trial, step] = loss_func(test_out, test_y)

            loss.backward()
            optimizer.step()

        ## compute final hessian ##
        hessian = utils.get_hessian(train_x,
                                    train_y,
                                    loss=loss_func,
                                    model=model,
                                    use_cuda=use_cuda)
        sub_hess = hessian[np.ix_(keepers, keepers)]
        e_val, _ = np.linalg.eig(sub_hess.cpu().detach())
        final_eigs.append(e_val.real)

        print("model ", trial, " done")

    fpath = "../saved-experiments/"

    fname = "losses.pt"
    torch.save(losses, fpath + fname)

    fname = "init_eigs.P"
    with open(fpath + fname, 'wb') as fp:
        pickle.dump(init_eigs, fp)

    fname = "final_eigs.P"
    with open(fpath + fname, 'wb') as fp:
        pickle.dump(final_eigs, fp)
def main():
    torch.random.manual_seed(88)
    X, Y = twospirals(500, noise=1.3)
    train_x = torch.FloatTensor(X)
    train_y = torch.FloatTensor(Y).unsqueeze(-1)

    ###################################
    ## Set up nets and match weights ##
    ###################################

    n_hidden = 5
    width = 1024

    subnet_model = SubNetLinear(in_dim=2, out_dim=1, n_layers=n_hidden, k=width)
    masked_model = MaskedNetLinear(in_dim=2, out_dim=1, n_layers=n_hidden, k=width)

    hess.net_utils.set_model_prune_rate(subnet_model, 0.5)
    hess.net_utils.freeze_model_weights(subnet_model)

    weights = net_utils.get_weights_from_subnet(subnet_model)

    net_utils.apply_weights(masked_model, weights)
    mask = net_utils.get_mask_from_subnet(subnet_model)
    net_utils.apply_mask(masked_model, mask)
    mask = utils.flatten(mask)
    print(mask)

    use_cuda = torch.cuda.is_available()
    if use_cuda:
        print('using cuda')
        torch.cuda.set_device(0)
        train_x, train_y = train_x.cuda(), train_y.cuda()
        subnet_model = subnet_model.cuda()
        masked_model = masked_model.cuda()

    ######################
    ## Train the Subnet ##
    ######################

    optimizer = torch.optim.Adam(subnet_model.parameters(), lr=0.001)
    loss_func = torch.nn.BCEWithLogitsLoss()
    n_eigs = 200
    n_iters = 1000
    eigs_out = []
    eig_steps = []
    losses = torch.zeros(n_iters)
    eigs_every = 5
    min_loss = 1.
    prev_computed_step = -eigs_every-1

    for step in range(n_iters):
        optimizer.zero_grad()
        outputs = subnet_model(train_x)

        loss=loss_func(outputs,train_y)
        print(loss)
        losses[step] = loss.item()
        loss.backward()
        optimizer.step()

        if losses[step] < min_loss + 0.01:
            min_loss = losses[step]
            if step > prev_computed_step + eigs_every:

                mask = net_utils.get_mask_from_subnet(subnet_model)
                net_utils.apply_mask(masked_model, mask)
                mask = utils.flatten(mask)

                eigs = utils.get_hessian_eigs(loss_func, masked_model, mask=mask,
                                              n_eigs=n_eigs, train_x=train_x,
                                              train_y=train_y)

                eigs_out.append(eigs)
                eig_steps.append(step)

                prev_computed_step = step
                print("step ", step, " done")
#         if step >= 1:
#             if losses[step] > losses[step-1] + 0.1:
                
#                 mask = net_utils.get_mask_from_subnet(subnet_model)
#                 net_utils.apply_mask(masked_model, mask)
#                 mask = utils.flatten(mask)

#                 eigs = utils.get_hessian_eigs(loss_func, masked_model, mask=mask,
#                                               n_eigs=n_eigs, train_x=train_x,
#                                               train_y=train_y)

#                 eigs_out.append(eigs)
#                 eig_steps.append(step)

#                 prev_computed_step = step
#                 print("step ", step, " done")
        


    fpath = "./saved-subnet-hessian_0116/"

    fname = "subnet_eigs.pkl"
    subnet_eigs = [ee.cpu() for ee in eigs_out]
    with open(fpath + fname, 'wb') as f:
        pickle.dump(subnet_eigs, f)

    fname = "eig_steps.pkl"
    with open(fpath + fname, 'wb') as f:
        pickle.dump(eig_steps, f)

    fname = "subnet_model.pt"
    torch.save(subnet_model.state_dict(), fpath + fname)

    fname = "masked_model.pt"
    torch.save(masked_model.state_dict(), fpath + fname)

    fname = "losses.pt"
    torch.save(losses, fpath + fname)
Esempio n. 3
0
def main():
    args = parser()
    args.device = None

    if torch.cuda.is_available():
        args.device = torch.device("cuda")
        args.cuda = True
    else:
        args.device = torch.device("cpu")
        args.cuda = False

    #loss_func = torch.nn.BCEWithLogitsLoss()
    #lr = 0.01

    n_trials = 10
    #n_iters = 1000
    #losses = torch.zeros(n_trials, n_iters)
    init_eigs = []
    final_eigs = []
    #pct_keep = 0.4
    #optim = torch.optim.SGD

    print("Preparing base directory %s" % args.dir)
    os.makedirs(args.dir, exist_ok=True)

    for trial in range(n_trials):
        print("Preparing directory %s" % args.dir + '/trial_' + str(trial))

        os.makedirs(args.dir + '/trial_' + str(trial), exist_ok=True)
        with open(
                os.path.join(args.dir + '/trial_' + str(trial), "command.sh"),
                "w") as f:
            f.write(" ".join(sys.argv))
            f.write("\n")

        torch.backends.cudnn.benchmark = True
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed(args.seed)

        print("Using model %s" % args.model)
        model_cfg = getattr(models, args.model)

        print("Loading dataset %s from %s" % (args.dataset, args.data_path))
        loaders, num_classes = data.loaders(
            args.dataset,
            args.data_path,
            args.batch_size,
            args.num_workers,
            model_cfg.transform_train,
            model_cfg.transform_test,
            use_validation=not args.use_test,
            split_classes=args.split_classes,
        )

        print("Preparing model")
        print(*model_cfg.args)
        model = model_cfg.base(*model_cfg.args,
                               num_classes=num_classes,
                               **model_cfg.kwargs,
                               use_masked=True)
        model.to(args.device)
        # bad set to for now
        for m in model.modules():
            if isinstance(m, hess.nets.MaskedConv2d) or isinstance(
                    m, hess.nets.MaskedLinear):
                if m.mask is not None and m.weight is not None:
                    m.mask = m.mask.to(m.weight.device)
                if m.has_bias:
                    if m.bias_mask is not None and m.bias is not None:
                        m.bias_mask = m.bias_mask.to(m.bias.device)

        mask = hess.utils.get_mask(model)
        #mask, perm = hess.utils.mask_model(model, pct_keep, use_cuda)
        #keepers = np.array(np.where(mask.cpu() == 1))[0]

        criterion = torch.nn.functional.cross_entropy

        ## compute hessian pre-training ##
        initial_evals = utils.get_hessian_eigs(loss=criterion,
                                               model=model,
                                               mask=mask,
                                               use_cuda=args.cuda,
                                               n_eigs=100,
                                               loader=loaders['train'])
        init_eigs.append(initial_evals)

        ## train ##
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr_init,
                                    momentum=args.momentum,
                                    weight_decay=args.wd)

        for epoch in range(0, args.epochs):
            train_epoch(model,
                        loaders,
                        swag.losses.cross_entropy,
                        optimizer,
                        epoch=epoch,
                        end_epoch=args.epochs,
                        eval_freq=args.eval_freq,
                        save_freq=args.save_freq,
                        output_dir=args.dir + '/trial_' + str(trial),
                        lr_init=args.lr_init)

        ## compute final hessian ##
        final_evals = utils.get_hessian_eigs(loss=criterion,
                                             model=model,
                                             use_cuda=args.cuda,
                                             n_eigs=100,
                                             mask=mask,
                                             loader=loaders['train'])
        # sub_hess = hessian[np.ix_(keepers, keepers)]
        # e_val, _ = np.linalg.eig(sub_hess.cpu().detach())
        # final_eigs.append(e_val.real)
        final_eigs.append(final_evals)

        print("model ", trial, " done")

        # fpath = "../saved-experiments/"

        # fname = "losses.pt"
        # torch.save(losses, fpath + fname)
        fpath = args.dir + '/trial_' + str(trial)
        fname = "init_eigs.P"
        with open(fpath + fname, 'wb') as fp:
            pickle.dump(init_eigs, fp)

        fname = "final_eigs.P"
        with open(fpath + fname, 'wb') as fp:
            pickle.dump(final_eigs, fp)
Esempio n. 4
0
    trainset = torchvision.datasets.CIFAR10(root='/datasets/cifar10/', train=True,
                                            download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                              shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='/datasets/cifar10/', train=False,
                                           download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                             shuffle=False, num_workers=2)

    classes = ('plane', 'car', 'bird', 'cat',
               'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


    dataiter = iter(testloader)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    evals, evecs = get_hessian_eigs(loss=criterion,
                         model=model, use_cuda=True, n_eigs=200,
                         loader=trainloader, evals=True)


    fpath = "./"

    fname = "cifar_evals_200.pt"
    torch.save(evals, fpath + fname)

    fname = "cifar_evecs_200.pt"
    torch.save(evecs, fpath + fname)
def main():
    use_cuda = torch.cuda.is_available()

    model = Net()
    criterion = torch.nn.CrossEntropyLoss()

    if use_cuda:
        model = model.cuda()

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    trainset = torchvision.datasets.CIFAR10(root='/datasets/cifar10/',
                                            train=True,
                                            download=True,
                                            transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=128,
                                              shuffle=True,
                                              num_workers=2)

    ## Super Trainer ##
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    for epoch in range(30):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            if use_cuda:
                inputs, labels = inputs.cuda(), labels.cuda()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 100 == 99:  # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 100))
                running_loss = 0.0

    fpath = "./outputs/"
    fname = "saved_model.pt"
    torch.save(model.state_dict(), fpath + fname)

    evals, evecs = get_hessian_eigs(loss=criterion,
                                    model=model,
                                    use_cuda=use_cuda,
                                    n_eigs=200,
                                    loader=trainloader,
                                    evals=True)

    print("positive evals = ", evals)
    ## clean these guys up ##
    keep = np.where(evals.cpu() != 1)
    evals = evals[keep].squeeze()
    evecs = evecs[:, keep].squeeze()

    fname = "top_evecs.pt"
    torch.save(evecs, fpath + fname)
    fname = "top_evals.pt"
    torch.save(evals, fpath + fname)