Example #1
0
def str2optim(optimiser: Optimiserlike, model: Module, lr: float) -> Optimiser:
    if not isinstance(optimiser, str):
        return optimiser
    elif optimiser == 'adam':
        return adam.Adam(model.parameters(), lr=lr)
    elif optimiser == 'adadelta':
        return adadelta.Adadelta(model.parameters(), lr=lr)
    elif optimiser == 'adagrad':
        return adagrad.Adagrad(model.parameters(), lr=lr)
    elif optimiser == 'adamw':
        return adamw.AdamW(model.parameters(), lr=lr)
    elif optimiser == 'sparse_adam':
        return sparse_adam.SparseAdam(model.parameters(), lr=lr)
    elif optimiser == 'adamax':
        return adamax.Adamax(model.parameters(), lr=lr)
    elif optimiser == 'rmsprop':
        return rmsprop.RMSprop(model.parameters(), lr=lr)
    elif optimiser == 'sgd':
        return sgd.SGD(model.parameters(), lr=lr)
    else:
        raise RuntimeError(f'Optimiser {optimiser} not found.')
Example #2
0
    def forward(self, x):
        return self.activation(self.lin(x))


num_layers = 10

highway_layers = [HighwayLayer(64, nn.Linear, 64, 64)]
first_layer = LinWActivation(784, 64)

model = nn.Sequential(first_layer, *highway_layers, nn.Linear(64, 10))

loss = nn.CrossEntropyLoss()

data = DataLoader(myMNIST(), shuffle=True, batch_size=256)
optimizer = sgd.SGD(list(model.parameters()), lr=0.001)

writer = SummaryWriter()

model = model.float()

c = 0
for step in range(100):
    print(step)
    for i, (x, y) in enumerate(data):
        x = x.type(torch.FloatTensor)
        y = y.type(torch.LongTensor)
        ychap = model(x)

        l = loss(ychap, y)
Example #3
0
n = data.shape[0]

regling = linear1()
mse = MSE()


learning_rate = 0.01
minibatch_size = 64

w = torch.rand(13, requires_grad=True, dtype=torch.double)
b = torch.rand(1, requires_grad=True, dtype=torch.double)

writer = SummaryWriter()

# We define an optimizer and we give to it the paramets it should opptimize
optimizer = sgd.SGD(params=(w,b), lr=learning_rate)

for step in range(100):
    idx = np.random.randint(0, n)

    indices = np.random.choice(data.shape[0], minibatch_size)

    l = None

    for i in indices:

        x = torch.DoubleTensor(data[i, :-1]/100)
        y = torch.DoubleTensor([data[i, -1] / 100])
        y.requires_grad = False
        x.requires_grad = True
def optimize_state(state, ctm_env_init, loss_fn, obs_fn=None, post_proc=None,
    main_args=cfg.main_args, opt_args=cfg.opt_args, ctm_args=cfg.ctm_args, 
    global_args=cfg.global_args):
    r"""
    :param state: initial wavefunction
    :param ctm_env_init: initial environment corresponding to ``state``
    :param loss_fn: loss function
    :param model: model with definition of observables
    :param local_args: parsed command line arguments
    :param opt_args: optimization configuration
    :param ctm_args: CTM algorithm configuration
    :param global_args: global configuration
    :type state: IPEPS
    :type ctm_env_init: ENV
    :type loss_fn: function(IPEPS,ENV,CTMARGS,OPTARGS,GLOBALARGS)->torch.tensor
    :type model: TODO Model base class
    :type local_args: argparse.Namespace
    :type opt_args: OPTARGS
    :type ctm_args: CTMARGS
    :type global_args: GLOBALARGS

    Optimizes initial wavefunction ``state`` with respect to ``loss_fn`` using 
    :class:`optim.lbfgs_modified.SGD_MOD` optimizer.
    The main parameters influencing the optimization process are given in :class:`config.OPTARGS`.
    Calls to functions ``loss_fn``, ``obs_fn``, and ``post_proc`` pass the current configuration
    as dictionary ``{"ctm_args":ctm_args, "opt_args":opt_args}``.
    """
    verbosity = opt_args.verbosity_opt_epoch
    checkpoint_file = main_args.out_prefix+"_checkpoint.p" 
    outputstatefile= main_args.out_prefix+"_state.json"
    t_data = dict({"loss": [], "min_loss": 1.0e+16, "loss_ls": [], "min_loss_ls": 1.0e+16})
    current_env=[ctm_env_init]
    context= dict({"ctm_args":ctm_args, "opt_args":opt_args, "loss_history": t_data})
    epoch= 0

    parameters= state.get_parameters()
    for A in parameters: A.requires_grad_(True)

    # optimizer = sgd_modified.SGD_MOD(parameters, lr=opt_args.lr, momentum=opt_args.momentum, \
    #     line_search_fn=opt_args.line_search, line_search_eps=opt_args.line_search_tol)
    optimizer = sgd.SGD(parameters, lr=opt_args.lr, momentum=opt_args.momentum)

    # TODO test opt_resume
    if main_args.opt_resume is not None:
        print(f"INFO: resuming from check point. resume = {main_args.opt_resume}")
        checkpoint = torch.load(main_args.opt_resume)
        epoch0 = checkpoint["epoch"]
        loss0 = checkpoint["loss"]
        cp_state_dict= checkpoint["optimizer_state_dict"]
        cp_opt_params= cp_state_dict["param_groups"][0]
        if main_args.opt_resume_override_params:
            cp_opt_params['lr']= opt_args.lr
            cp_opt_params['momentum']= opt_args.momentum
            cp_opt_params['dampening']= opt_args.dampening
            cp_opt_params['line_search_fn']= opt_args.line_search
            cp_opt_params['line_search_eps']= opt_args.line_search_tol
        cp_state_dict["param_groups"][0]= cp_opt_params
        optimizer.load_state_dict(cp_state_dict)
        print(f"checkpoint.loss = {loss0}")

    #@profile
    def closure(linesearching=False):
        context["line_search"]=linesearching
        optimizer.zero_grad()

        # 0) evaluate loss and the gradient
        loss, ctm_env, history, t_ctm, t_check = loss_fn(state, current_env[0], 
            context)

        t_grad0= time.perf_counter()
        loss.backward()
        t_grad1= time.perf_counter()

        # 6) detach current environment from autograd graph
        ctm_env.detach_()
        current_env[0] = ctm_env

        # 1) record loss and store current state if the loss improves
        if linesearching:
            t_data["loss_ls"].append(loss.item())
            if t_data["min_loss_ls"] > t_data["loss_ls"][-1]:
                t_data["min_loss_ls"]= t_data["loss_ls"][-1]
        else:  
            t_data["loss"].append(loss.item())
            if t_data["min_loss"] > t_data["loss"][-1]:
                t_data["min_loss"]= t_data["loss"][-1]
                state.write_to_file(outputstatefile, normalize=True)

        # 2) log CTM metrics for debugging
        if opt_args.opt_logging:
            log_entry=dict({"id": epoch, "loss": t_data["loss"][-1], "t_ctm": t_ctm, \
                    "t_check": t_check})
            if linesearching:
                log_entry["LS"]=len(t_data["loss_ls"])
                log_entry["loss"]=t_data["loss_ls"]
            log.info(json.dumps(log_entry))

        # 3) compute desired observables
        if obs_fn is not None:
            obs_fn(state, current_env[0], context)

        # 5) log grad metrics
        if opt_args.opt_logging:
            log_entry=dict({"id": epoch})
            if linesearching: log_entry["LS"]=len(t_data["loss_ls"])
            else: 
                log_entry["t_grad"]=t_grad1-t_grad0
                # log just l2 and l\infty norm of the full grad
                # log_entry["grad_mag"]= [p.grad.norm().item() for p in parameters]
                flat_grad= torch.cat(tuple(p.grad.view(-1) for p in parameters))
                log_entry["grad_mag"]= [flat_grad.norm().item(), flat_grad.norm(p=float('inf')).item()]
                if opt_args.opt_log_grad: log_entry["grad"]= [p.grad.tolist() for p in parameters]
            log.info(json.dumps(log_entry))

        return loss

    # closure for derivative-free line search. This closure
    # is to be called within torch.no_grad context
    @torch.no_grad()
    def closure_linesearch(linesearching):
        context["line_search"]=linesearching

        # 1) evaluate loss
        loc_opt_args= copy.deepcopy(opt_args)
        loc_opt_args.opt_ctm_reinit= opt_args.line_search_ctm_reinit
        loc_ctm_args= copy.deepcopy(ctm_args)

        if opt_args.line_search_svd_method != 'DEFAULT':
            loc_ctm_args.projector_svd_method= opt_args.line_search_svd_method
        ls_context= dict({"ctm_args":loc_ctm_args, "opt_args":loc_opt_args, "loss_history": t_data,
            "line_search": linesearching})

        loss, ctm_env, history, t_ctm, t_check = loss_fn(state, current_env[0],\
            ls_context)
        current_env[0] = ctm_env

        # 2) store current state if the loss improves
        t_data["loss_ls"].append(loss.item())
        if t_data["min_loss_ls"] > t_data["loss_ls"][-1]:
            t_data["min_loss_ls"]= t_data["loss_ls"][-1]

        # 3) log metrics for debugging
        if opt_args.opt_logging:
            log_entry=dict({"id": epoch, "LS": len(t_data["loss_ls"]), \
                "loss": t_data["loss_ls"], "t_ctm": t_ctm, "t_check": t_check})
            log.info(json.dumps(log_entry))

        # 4) compute desired observables
        if obs_fn is not None:
            obs_fn(state, current_env[0], context)

        return loss

    for epoch in range(main_args.opt_max_iter):
        # checkpoint the optimizer
        # checkpointing before step, guarantees the correspondence between the wavefunction
        # and the last computed value of loss t_data["loss"][-1]
        if epoch>0:
            store_checkpoint(checkpoint_file, state, optimizer, epoch, t_data["loss"][-1])
        
        # After execution closure ``current_env`` **IS NOT** corresponding to ``state``, since
        # the ``state`` on-site tensors have been modified by gradient. 
        # optimizer.step_2c(closure, closure_linesearch)
        optimizer.step(closure)

        # reset line search history
        t_data["loss_ls"]=[]
        t_data["min_loss_ls"]=1.0e+16

        # if post_proc is not None:
        #     post_proc(state, current_env[0], context)

        # terminate condition
        if len(t_data["loss"])>1 and \
            abs(t_data["loss"][-1]-t_data["loss"][-2])<opt_args.tolerance_change:
            break

    # optimization is over, store the last checkpoint
    store_checkpoint(checkpoint_file, state, optimizer, \
        main_args.opt_max_iter, t_data["loss"][-1])
Example #5
0
def main():
    epochs = 5
    batch_size = 1000

    training_data = torchvision.datasets.CIFAR10(
        root="./data",
        train=True,
        download=True,
        transform=transforms.ToTensor())
    test_data = torchvision.datasets.CIFAR10(root="./data",
                                             train=False,
                                             download=True,
                                             transform=transforms.ToTensor())
    train_loader = dataloader.DataLoader(training_data,
                                         shuffle=True,
                                         batch_size=batch_size)
    test_loader = dataloader.DataLoader(test_data,
                                        shuffle=True,
                                        batch_size=len(test_data))

    num_classes = len(training_data.classes)

    model = CNN(num_classes)
    model.apply(weight_init)
    # print(model)

    reuse = False
    if not reuse:

        optimizer = sgd.SGD(model.parameters(), lr=1e-2, momentum=0.9)
        loss_function = nn.NLLLoss()

        for e in range(epochs):

            for i, data in enumerate(train_loader):

                image, label = data
                # print("Image shape: ", image.shape)
                # print("Labels shape: ", label.shape)

                model.zero_grad()

                prediction = model(image)
                # print("Prediction shape: ", prediction.shape)

                loss = loss_function(prediction, label)
                loss.backward()

                optimizer.step()

                print("[%d/%d][%d/%d] %.4f" %
                      (e + 1, epochs, i + 1, len(train_loader),
                       loss.mean().item()))

        torch.save(model.state_dict(), "CNN.model")

    else:
        model_dict = torch.load("CNN.model")
        model = CNN(num_classes)
        model.load_state_dict(model_dict)

    for b in test_loader:

        image, label = b

        with torch.no_grad():
            output = model(image)
            predicted = torch.max(output, 1)

            correct = (predicted.indices == label).nonzero().squeeze()
            # print(correct.shape)

            print("%d/%d = %.2f%%" % (len(correct), len(test_data),
                                      len(correct) / len(test_data) * 100))
Example #6
0
        self.b2 = torch.nn.Parameter(torch.zeros(28 * 28), requires_grad=True)

    def encoder(self, x):
        return x.matmul(self.W) + self.b1

    def decoder(self, x):
        return x.matmul(self.W.t()) + self.b2

    def forward(self, x):
        return self.decoder(self.encoder(x))


data = DataLoader(myMNIST(), shuffle=True, batch_size=256)

auc = Autoencoder()
optimizer = sgd.SGD(list(auc.parameters()), lr=learning_rate)

writer = SummaryWriter()

auc = auc.to(device)

for step in range(100):
    print(step)
    for x in data:
        x = x.type(torch.FloatTensor)
        x = x.to(device)
        xchap = auc(x)

        l = nn.MSELoss()(xchap, x)

        l.backward()
Example #7
0
    if argl.reuse_model and os.path.exists("GAN.model"):
        model_dict = torch.load('GAN.model')

        D_dict = model_dict["Discriminator"]
        G_dict = model_dict["Generator"]

        # print(D_dict)
        # print("Model's state_dict:")
        # for param_tensor in model.D.state_dict():
        #    print(param_tensor, "\t", model.D.state_dict()[param_tensor].size())

        model.D.load_state_dict(D_dict)
        model.G.load_state_dict(G_dict)

    else:
        D_optimizer = sgd.SGD(model.D.parameters(), lr=1e-3, momentum=0.9)
        G_optimizer = sgd.SGD(model.G.parameters(), lr=1e-3, momentum=0.9)

        # D_optimizer = adam.Adam(model.D.parameters(), lr=1e-3)
        # G_optimizer = adam.Adam(model.G.parameters(), lr=1e-3)
        torch.autograd.set_detect_anomaly(True)

        real = torch.full((argl.batch_size, ), 1)
        fake = torch.full((argl.batch_size, ), 0)

        for e in range(argl.epochs):

            for i, data in enumerate(train_loader):

                # Zero the gradients
                model.D.zero_grad()
Example #8
0
n = data.shape[0]

learning_rate = 0.01
minibatch_size = 64


writer = SummaryWriter()

# On définit les différentes couches de notre modèle
f1 = nn.Linear(data.shape[1]-1, 10)
f2 = nn.Linear(10, 1)

loss = nn.MSELoss()

# On récupère tous les paramètres des différents modules
optimizer =sgd.SGD([*list(f1.parameters()), *list(f2.parameters())], lr=learning_rate)

for step in range(10000):
    idx = np.random.choice(data.shape[0], minibatch_size)

    x = torch.FloatTensor(data[idx, :-1] / 100)
    y = torch.FloatTensor([data[idx, -1] / 100])

    output = f2(nn.Tanh()(f1(x)))
    l = loss(output, y)
    l.backward()
    optimizer.step()
    optimizer.zero_grad()

    writer.add_scalar("Modules/sgd/Loss/MSE", l, step)
Example #9
0
fields, data = ds.files.data()
n = data.shape[0]

writer = SummaryWriter()

f1 = nn.Linear(data.shape[1] - 1, 10)
f2 = nn.Linear(10, 1)

# On utilise un container, il va aggréger les paramètres des différents modules
network = nn.Sequential(f1, nn.Tanh(), f2)

# Loss op
loss = nn.MSELoss()

# Notre optimizer travaillant sur les paramètres du modèle network
optimizer = sgd.SGD(list(network.parameters()), lr=learning_rate)

for step in range(1000):
    # On sample un minibatch
    idx = np.random.choice(data.shape[0], minibatch_size)

    x = torch.FloatTensor(data[idx, :-1] / 100)
    y = torch.FloatTensor([data[idx, -1] / 100])

    # On utilise le container pour générer la sortie
    output = network(x)

    l = loss(output, y)
    # On différencie
    l.backward()
    # un pas de descente de gradient