def str2optim(optimiser: Optimiserlike, model: Module, lr: float) -> Optimiser: if not isinstance(optimiser, str): return optimiser elif optimiser == 'adam': return adam.Adam(model.parameters(), lr=lr) elif optimiser == 'adadelta': return adadelta.Adadelta(model.parameters(), lr=lr) elif optimiser == 'adagrad': return adagrad.Adagrad(model.parameters(), lr=lr) elif optimiser == 'adamw': return adamw.AdamW(model.parameters(), lr=lr) elif optimiser == 'sparse_adam': return sparse_adam.SparseAdam(model.parameters(), lr=lr) elif optimiser == 'adamax': return adamax.Adamax(model.parameters(), lr=lr) elif optimiser == 'rmsprop': return rmsprop.RMSprop(model.parameters(), lr=lr) elif optimiser == 'sgd': return sgd.SGD(model.parameters(), lr=lr) else: raise RuntimeError(f'Optimiser {optimiser} not found.')
def forward(self, x): return self.activation(self.lin(x)) num_layers = 10 highway_layers = [HighwayLayer(64, nn.Linear, 64, 64)] first_layer = LinWActivation(784, 64) model = nn.Sequential(first_layer, *highway_layers, nn.Linear(64, 10)) loss = nn.CrossEntropyLoss() data = DataLoader(myMNIST(), shuffle=True, batch_size=256) optimizer = sgd.SGD(list(model.parameters()), lr=0.001) writer = SummaryWriter() model = model.float() c = 0 for step in range(100): print(step) for i, (x, y) in enumerate(data): x = x.type(torch.FloatTensor) y = y.type(torch.LongTensor) ychap = model(x) l = loss(ychap, y)
n = data.shape[0] regling = linear1() mse = MSE() learning_rate = 0.01 minibatch_size = 64 w = torch.rand(13, requires_grad=True, dtype=torch.double) b = torch.rand(1, requires_grad=True, dtype=torch.double) writer = SummaryWriter() # We define an optimizer and we give to it the paramets it should opptimize optimizer = sgd.SGD(params=(w,b), lr=learning_rate) for step in range(100): idx = np.random.randint(0, n) indices = np.random.choice(data.shape[0], minibatch_size) l = None for i in indices: x = torch.DoubleTensor(data[i, :-1]/100) y = torch.DoubleTensor([data[i, -1] / 100]) y.requires_grad = False x.requires_grad = True
def optimize_state(state, ctm_env_init, loss_fn, obs_fn=None, post_proc=None, main_args=cfg.main_args, opt_args=cfg.opt_args, ctm_args=cfg.ctm_args, global_args=cfg.global_args): r""" :param state: initial wavefunction :param ctm_env_init: initial environment corresponding to ``state`` :param loss_fn: loss function :param model: model with definition of observables :param local_args: parsed command line arguments :param opt_args: optimization configuration :param ctm_args: CTM algorithm configuration :param global_args: global configuration :type state: IPEPS :type ctm_env_init: ENV :type loss_fn: function(IPEPS,ENV,CTMARGS,OPTARGS,GLOBALARGS)->torch.tensor :type model: TODO Model base class :type local_args: argparse.Namespace :type opt_args: OPTARGS :type ctm_args: CTMARGS :type global_args: GLOBALARGS Optimizes initial wavefunction ``state`` with respect to ``loss_fn`` using :class:`optim.lbfgs_modified.SGD_MOD` optimizer. The main parameters influencing the optimization process are given in :class:`config.OPTARGS`. Calls to functions ``loss_fn``, ``obs_fn``, and ``post_proc`` pass the current configuration as dictionary ``{"ctm_args":ctm_args, "opt_args":opt_args}``. """ verbosity = opt_args.verbosity_opt_epoch checkpoint_file = main_args.out_prefix+"_checkpoint.p" outputstatefile= main_args.out_prefix+"_state.json" t_data = dict({"loss": [], "min_loss": 1.0e+16, "loss_ls": [], "min_loss_ls": 1.0e+16}) current_env=[ctm_env_init] context= dict({"ctm_args":ctm_args, "opt_args":opt_args, "loss_history": t_data}) epoch= 0 parameters= state.get_parameters() for A in parameters: A.requires_grad_(True) # optimizer = sgd_modified.SGD_MOD(parameters, lr=opt_args.lr, momentum=opt_args.momentum, \ # line_search_fn=opt_args.line_search, line_search_eps=opt_args.line_search_tol) optimizer = sgd.SGD(parameters, lr=opt_args.lr, momentum=opt_args.momentum) # TODO test opt_resume if main_args.opt_resume is not None: print(f"INFO: resuming from check point. resume = {main_args.opt_resume}") checkpoint = torch.load(main_args.opt_resume) epoch0 = checkpoint["epoch"] loss0 = checkpoint["loss"] cp_state_dict= checkpoint["optimizer_state_dict"] cp_opt_params= cp_state_dict["param_groups"][0] if main_args.opt_resume_override_params: cp_opt_params['lr']= opt_args.lr cp_opt_params['momentum']= opt_args.momentum cp_opt_params['dampening']= opt_args.dampening cp_opt_params['line_search_fn']= opt_args.line_search cp_opt_params['line_search_eps']= opt_args.line_search_tol cp_state_dict["param_groups"][0]= cp_opt_params optimizer.load_state_dict(cp_state_dict) print(f"checkpoint.loss = {loss0}") #@profile def closure(linesearching=False): context["line_search"]=linesearching optimizer.zero_grad() # 0) evaluate loss and the gradient loss, ctm_env, history, t_ctm, t_check = loss_fn(state, current_env[0], context) t_grad0= time.perf_counter() loss.backward() t_grad1= time.perf_counter() # 6) detach current environment from autograd graph ctm_env.detach_() current_env[0] = ctm_env # 1) record loss and store current state if the loss improves if linesearching: t_data["loss_ls"].append(loss.item()) if t_data["min_loss_ls"] > t_data["loss_ls"][-1]: t_data["min_loss_ls"]= t_data["loss_ls"][-1] else: t_data["loss"].append(loss.item()) if t_data["min_loss"] > t_data["loss"][-1]: t_data["min_loss"]= t_data["loss"][-1] state.write_to_file(outputstatefile, normalize=True) # 2) log CTM metrics for debugging if opt_args.opt_logging: log_entry=dict({"id": epoch, "loss": t_data["loss"][-1], "t_ctm": t_ctm, \ "t_check": t_check}) if linesearching: log_entry["LS"]=len(t_data["loss_ls"]) log_entry["loss"]=t_data["loss_ls"] log.info(json.dumps(log_entry)) # 3) compute desired observables if obs_fn is not None: obs_fn(state, current_env[0], context) # 5) log grad metrics if opt_args.opt_logging: log_entry=dict({"id": epoch}) if linesearching: log_entry["LS"]=len(t_data["loss_ls"]) else: log_entry["t_grad"]=t_grad1-t_grad0 # log just l2 and l\infty norm of the full grad # log_entry["grad_mag"]= [p.grad.norm().item() for p in parameters] flat_grad= torch.cat(tuple(p.grad.view(-1) for p in parameters)) log_entry["grad_mag"]= [flat_grad.norm().item(), flat_grad.norm(p=float('inf')).item()] if opt_args.opt_log_grad: log_entry["grad"]= [p.grad.tolist() for p in parameters] log.info(json.dumps(log_entry)) return loss # closure for derivative-free line search. This closure # is to be called within torch.no_grad context @torch.no_grad() def closure_linesearch(linesearching): context["line_search"]=linesearching # 1) evaluate loss loc_opt_args= copy.deepcopy(opt_args) loc_opt_args.opt_ctm_reinit= opt_args.line_search_ctm_reinit loc_ctm_args= copy.deepcopy(ctm_args) if opt_args.line_search_svd_method != 'DEFAULT': loc_ctm_args.projector_svd_method= opt_args.line_search_svd_method ls_context= dict({"ctm_args":loc_ctm_args, "opt_args":loc_opt_args, "loss_history": t_data, "line_search": linesearching}) loss, ctm_env, history, t_ctm, t_check = loss_fn(state, current_env[0],\ ls_context) current_env[0] = ctm_env # 2) store current state if the loss improves t_data["loss_ls"].append(loss.item()) if t_data["min_loss_ls"] > t_data["loss_ls"][-1]: t_data["min_loss_ls"]= t_data["loss_ls"][-1] # 3) log metrics for debugging if opt_args.opt_logging: log_entry=dict({"id": epoch, "LS": len(t_data["loss_ls"]), \ "loss": t_data["loss_ls"], "t_ctm": t_ctm, "t_check": t_check}) log.info(json.dumps(log_entry)) # 4) compute desired observables if obs_fn is not None: obs_fn(state, current_env[0], context) return loss for epoch in range(main_args.opt_max_iter): # checkpoint the optimizer # checkpointing before step, guarantees the correspondence between the wavefunction # and the last computed value of loss t_data["loss"][-1] if epoch>0: store_checkpoint(checkpoint_file, state, optimizer, epoch, t_data["loss"][-1]) # After execution closure ``current_env`` **IS NOT** corresponding to ``state``, since # the ``state`` on-site tensors have been modified by gradient. # optimizer.step_2c(closure, closure_linesearch) optimizer.step(closure) # reset line search history t_data["loss_ls"]=[] t_data["min_loss_ls"]=1.0e+16 # if post_proc is not None: # post_proc(state, current_env[0], context) # terminate condition if len(t_data["loss"])>1 and \ abs(t_data["loss"][-1]-t_data["loss"][-2])<opt_args.tolerance_change: break # optimization is over, store the last checkpoint store_checkpoint(checkpoint_file, state, optimizer, \ main_args.opt_max_iter, t_data["loss"][-1])
def main(): epochs = 5 batch_size = 1000 training_data = torchvision.datasets.CIFAR10( root="./data", train=True, download=True, transform=transforms.ToTensor()) test_data = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transforms.ToTensor()) train_loader = dataloader.DataLoader(training_data, shuffle=True, batch_size=batch_size) test_loader = dataloader.DataLoader(test_data, shuffle=True, batch_size=len(test_data)) num_classes = len(training_data.classes) model = CNN(num_classes) model.apply(weight_init) # print(model) reuse = False if not reuse: optimizer = sgd.SGD(model.parameters(), lr=1e-2, momentum=0.9) loss_function = nn.NLLLoss() for e in range(epochs): for i, data in enumerate(train_loader): image, label = data # print("Image shape: ", image.shape) # print("Labels shape: ", label.shape) model.zero_grad() prediction = model(image) # print("Prediction shape: ", prediction.shape) loss = loss_function(prediction, label) loss.backward() optimizer.step() print("[%d/%d][%d/%d] %.4f" % (e + 1, epochs, i + 1, len(train_loader), loss.mean().item())) torch.save(model.state_dict(), "CNN.model") else: model_dict = torch.load("CNN.model") model = CNN(num_classes) model.load_state_dict(model_dict) for b in test_loader: image, label = b with torch.no_grad(): output = model(image) predicted = torch.max(output, 1) correct = (predicted.indices == label).nonzero().squeeze() # print(correct.shape) print("%d/%d = %.2f%%" % (len(correct), len(test_data), len(correct) / len(test_data) * 100))
self.b2 = torch.nn.Parameter(torch.zeros(28 * 28), requires_grad=True) def encoder(self, x): return x.matmul(self.W) + self.b1 def decoder(self, x): return x.matmul(self.W.t()) + self.b2 def forward(self, x): return self.decoder(self.encoder(x)) data = DataLoader(myMNIST(), shuffle=True, batch_size=256) auc = Autoencoder() optimizer = sgd.SGD(list(auc.parameters()), lr=learning_rate) writer = SummaryWriter() auc = auc.to(device) for step in range(100): print(step) for x in data: x = x.type(torch.FloatTensor) x = x.to(device) xchap = auc(x) l = nn.MSELoss()(xchap, x) l.backward()
if argl.reuse_model and os.path.exists("GAN.model"): model_dict = torch.load('GAN.model') D_dict = model_dict["Discriminator"] G_dict = model_dict["Generator"] # print(D_dict) # print("Model's state_dict:") # for param_tensor in model.D.state_dict(): # print(param_tensor, "\t", model.D.state_dict()[param_tensor].size()) model.D.load_state_dict(D_dict) model.G.load_state_dict(G_dict) else: D_optimizer = sgd.SGD(model.D.parameters(), lr=1e-3, momentum=0.9) G_optimizer = sgd.SGD(model.G.parameters(), lr=1e-3, momentum=0.9) # D_optimizer = adam.Adam(model.D.parameters(), lr=1e-3) # G_optimizer = adam.Adam(model.G.parameters(), lr=1e-3) torch.autograd.set_detect_anomaly(True) real = torch.full((argl.batch_size, ), 1) fake = torch.full((argl.batch_size, ), 0) for e in range(argl.epochs): for i, data in enumerate(train_loader): # Zero the gradients model.D.zero_grad()
n = data.shape[0] learning_rate = 0.01 minibatch_size = 64 writer = SummaryWriter() # On définit les différentes couches de notre modèle f1 = nn.Linear(data.shape[1]-1, 10) f2 = nn.Linear(10, 1) loss = nn.MSELoss() # On récupère tous les paramètres des différents modules optimizer =sgd.SGD([*list(f1.parameters()), *list(f2.parameters())], lr=learning_rate) for step in range(10000): idx = np.random.choice(data.shape[0], minibatch_size) x = torch.FloatTensor(data[idx, :-1] / 100) y = torch.FloatTensor([data[idx, -1] / 100]) output = f2(nn.Tanh()(f1(x))) l = loss(output, y) l.backward() optimizer.step() optimizer.zero_grad() writer.add_scalar("Modules/sgd/Loss/MSE", l, step)
fields, data = ds.files.data() n = data.shape[0] writer = SummaryWriter() f1 = nn.Linear(data.shape[1] - 1, 10) f2 = nn.Linear(10, 1) # On utilise un container, il va aggréger les paramètres des différents modules network = nn.Sequential(f1, nn.Tanh(), f2) # Loss op loss = nn.MSELoss() # Notre optimizer travaillant sur les paramètres du modèle network optimizer = sgd.SGD(list(network.parameters()), lr=learning_rate) for step in range(1000): # On sample un minibatch idx = np.random.choice(data.shape[0], minibatch_size) x = torch.FloatTensor(data[idx, :-1] / 100) y = torch.FloatTensor([data[idx, -1] / 100]) # On utilise le container pour générer la sortie output = network(x) l = loss(output, y) # On différencie l.backward() # un pas de descente de gradient