opt.hidden_size, opt.decoder_hidden_layers, opt.batch_size, device, ) pc_prior = sum(param.numel() for param in prior_net.parameters()) print(prior_net) print("Total parameters of prior net: {}".format(pc_prior)) pc_posterior = sum(param.numel() for param in posterior_net.parameters()) print(posterior_net) print("Total parameters of posterior net: {}".format(pc_posterior)) pc_decoder = sum(param.numel() for param in decoder.parameters()) print(decoder) print("Total parameters of decoder: {}".format(pc_decoder)) pc_project_net = sum(param.numel() for param in project_net.parameters()) print(project_net) print("Total parameters of decoder: {}".format(pc_project_net)) if opt.use_lie: # Use Lie representation trainer = TrainerLie(motion_loader, action_embed_dict, opt, device, raw_offsets, kinematic_chain) else: # Use 3d coordinates representation trainer = Trainer(motion_loader, action_embed_dict, opt, device) logs = trainer.trainIters(prior_net, posterior_net, project_net, decoder) plot_loss(logs, os.path.join(opt.save_root, "loss_curve.png"), opt.plot_every)
class NormalPolicy(): def __init__(self, layers, sigma, activation=F.relu): self.mu_net = MLP(layers, activation) self.sigma = MLP(layers, activation=F.softplus) # self.mu_net.fc1.weight.data = torch.zeros(self.mu_net.fc1.weight.data.shape) # self.mu_net.eta.data = torch.ones(1) * 2 def get_mu(self, states): return self.mu_net.forward(states) def get_sigma(self, states): return self.sigma.forward(states) def get_action(self, state): # random action if untrained # if self.initial_policy is not None: # return self.initial_policy.get_action(state) # sample from normal otherwise if state.dim() < 2: state.unsqueeze_(0) mean = self.get_mu(state) std_dev = self.get_sigma(state) mean.squeeze() std_dev.squeeze() m = torch.normal(mean, std_dev) return m.data def optimize(self, max_epochs_opt, train_dataset, val_dataset, batch_size, learning_rate, verbose=False): # init data loader train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) # init optimizers optimizer_mu = optim.Adagrad([{'params': self.mu_net.parameters()}, {'params':self.sigma.parameters()}], lr=learning_rate) # train on batches best_model = None last_loss_opt = None epochs_opt_no_decrease = 0 epoch_opt = 0 while (epoch_opt < max_epochs_opt) and (epochs_opt_no_decrease < 3): for batch_idx, batch in enumerate(train_data_loader): optimizer_mu.zero_grad() # forward pass mu = self.mu_net(batch[0]) sigma = self.get_sigma(batch[0]) loss = NormalPolicyLoss(mu, sigma, batch[1], batch[2]) # backpropagate loss.backward() optimizer_mu.step() # calculate loss on validation data mu = self.get_mu(val_dataset[0]) sigma = self.get_sigma(val_dataset[0]) cur_loss_opt = NormalPolicyLoss(mu, sigma, val_dataset[1], val_dataset[2]) # evaluate optimization iteration if verbose: sys.stdout.write('\r[policy] epoch: %d | loss: %f' % (epoch_opt+1, cur_loss_opt)) sys.stdout.flush() if (last_loss_opt is None) or (cur_loss_opt < last_loss_opt - 1e-3): best_model = self.mu_net.state_dict() epochs_opt_no_decrease = 0 last_loss_opt = cur_loss_opt else: epochs_opt_no_decrease += 1 epoch_opt += 1 self.mu_net.load_state_dict(best_model) if verbose: sys.stdout.write('\r[policy] training complete (%d epochs, %f best loss)' % (epoch_opt, last_loss_opt) + (' ' * (len(str(epoch_opt)))*2 + '\n'))
plt.semilogy(vals, np.ones(len(vals)), '--', label='AutoLip') plt.semilogy(vals, [1 / math.pi**(x - 1) for x in vals], '--', label='theoretical limit') for epsilon in [0.01]: #[0.5, 0.1, 0.01]: res = np.ones(len(vals)) for i in range(len(vals)): input_size = 2 output_size = 1 layer_size = 100 n_layers = vals[i] #5 layers = [layer_size] * n_layers model = MLP(input_size, output_size, layers) #print(model) for p in model.parameters(): if len(p.shape) > 1: p.data = random_matrix_fixed_spectrum(p.shape, epsilon) #dataset = create_dataset('RANDOM', 1000) #data_train, data_test = gp.make_dataset(2000, 500, dimension=input_size, scale=2) #model = gp.train_model(model, data_train, data_test, n_epochs=1000) #plot_model(model, window_size=10, num_points=1000) #compute_lipschitz_approximations(model, random_dataset([input_size], 100, scale=2)) for p in model.parameters(): p.requires_grad = False # Compute input sizes for all modules of the model