コード例 #1
0
            opt.hidden_size,
            opt.decoder_hidden_layers,
            opt.batch_size,
            device,
        )

    pc_prior = sum(param.numel() for param in prior_net.parameters())
    print(prior_net)
    print("Total parameters of prior net: {}".format(pc_prior))
    pc_posterior = sum(param.numel() for param in posterior_net.parameters())
    print(posterior_net)
    print("Total parameters of posterior net: {}".format(pc_posterior))
    pc_decoder = sum(param.numel() for param in decoder.parameters())
    print(decoder)
    print("Total parameters of decoder: {}".format(pc_decoder))
    pc_project_net = sum(param.numel() for param in project_net.parameters())
    print(project_net)
    print("Total parameters of decoder: {}".format(pc_project_net))

    if opt.use_lie:
        # Use Lie representation
        trainer = TrainerLie(motion_loader, action_embed_dict, opt, device,
                             raw_offsets, kinematic_chain)
    else:
        # Use 3d coordinates representation
        trainer = Trainer(motion_loader, action_embed_dict, opt, device)

    logs = trainer.trainIters(prior_net, posterior_net, project_net, decoder)

    plot_loss(logs, os.path.join(opt.save_root, "loss_curve.png"),
              opt.plot_every)
コード例 #2
0
ファイル: normal.py プロジェクト: stefanknegt/REPS
class NormalPolicy():
    def __init__(self, layers, sigma, activation=F.relu):
        self.mu_net = MLP(layers, activation)
        self.sigma = MLP(layers, activation=F.softplus)

        # self.mu_net.fc1.weight.data = torch.zeros(self.mu_net.fc1.weight.data.shape)
        # self.mu_net.eta.data = torch.ones(1) * 2

    def get_mu(self, states):
        return self.mu_net.forward(states)

    def get_sigma(self, states):
        return self.sigma.forward(states)

    def get_action(self, state):
        # random action if untrained
        # if self.initial_policy is not None:
        #     return self.initial_policy.get_action(state)
        # sample from normal otherwise
        if state.dim() < 2:
            state.unsqueeze_(0)
        mean = self.get_mu(state)
        std_dev = self.get_sigma(state)
        mean.squeeze()
        std_dev.squeeze()
        m = torch.normal(mean, std_dev)
        return m.data

    def optimize(self, max_epochs_opt, train_dataset, val_dataset, batch_size, learning_rate, verbose=False):
        # init data loader
        train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
        # init optimizers
        optimizer_mu = optim.Adagrad([{'params': self.mu_net.parameters()}, {'params':self.sigma.parameters()}], lr=learning_rate)
        # train on batches
        best_model = None
        last_loss_opt = None
        epochs_opt_no_decrease = 0
        epoch_opt = 0
        while (epoch_opt < max_epochs_opt) and (epochs_opt_no_decrease < 3):
            for batch_idx, batch in enumerate(train_data_loader):
                optimizer_mu.zero_grad()

                # forward pass
                mu = self.mu_net(batch[0])
                sigma = self.get_sigma(batch[0])
                loss = NormalPolicyLoss(mu, sigma, batch[1], batch[2])
                # backpropagate
                loss.backward()
                optimizer_mu.step()
            # calculate loss on validation data
            mu = self.get_mu(val_dataset[0])
            sigma = self.get_sigma(val_dataset[0])
            cur_loss_opt = NormalPolicyLoss(mu, sigma, val_dataset[1], val_dataset[2])
            # evaluate optimization iteration

            if verbose:
                sys.stdout.write('\r[policy] epoch: %d | loss: %f' % (epoch_opt+1, cur_loss_opt))
                sys.stdout.flush()
            if (last_loss_opt is None) or (cur_loss_opt < last_loss_opt - 1e-3):
                best_model = self.mu_net.state_dict()
                epochs_opt_no_decrease = 0
                last_loss_opt = cur_loss_opt
            else:
                epochs_opt_no_decrease += 1
            epoch_opt += 1
        self.mu_net.load_state_dict(best_model)
        if verbose: sys.stdout.write('\r[policy] training complete (%d epochs, %f best loss)' % (epoch_opt, last_loss_opt) + (' ' * (len(str(epoch_opt)))*2 + '\n'))
コード例 #3
0
    plt.semilogy(vals, np.ones(len(vals)), '--', label='AutoLip')
    plt.semilogy(vals, [1 / math.pi**(x - 1) for x in vals],
                 '--',
                 label='theoretical limit')
    for epsilon in [0.01]:  #[0.5, 0.1, 0.01]:
        res = np.ones(len(vals))
        for i in range(len(vals)):
            input_size = 2
            output_size = 1
            layer_size = 100
            n_layers = vals[i]  #5
            layers = [layer_size] * n_layers
            model = MLP(input_size, output_size, layers)
            #print(model)

            for p in model.parameters():
                if len(p.shape) > 1:
                    p.data = random_matrix_fixed_spectrum(p.shape, epsilon)

            #dataset = create_dataset('RANDOM', 1000)
            #data_train, data_test = gp.make_dataset(2000, 500, dimension=input_size, scale=2)

            #model = gp.train_model(model, data_train, data_test, n_epochs=1000)
            #plot_model(model, window_size=10, num_points=1000)

            #compute_lipschitz_approximations(model, random_dataset([input_size], 100, scale=2))

            for p in model.parameters():
                p.requires_grad = False

            # Compute input sizes for all modules of the model