Example #1
0
 def fit(self,
         experience_dataset,
         batch_size=100,
         iterations=2000,
         optimizer=None):
     X, Y = experience_dataset.get_dynmodel_dataset(
         deltas=True, return_costs=self.learn_reward)
     self.set_dataset(
         torch.tensor(X).to(self.X.device, self.X.dtype),
         torch.tensor(Y).to(self.X.device, self.X.dtype))
     if optimizer is None:
         optimizer = torch.optim.Adam(self.parameters())
     utils.train_regressor(self,
                           iterations,
                           batch_size,
                           optimizer=optimizer,
                           log_likelihood=self.log_likelihood_loss)
Example #2
0
                                         stop_when_done=args.stop_when_done,
                                         callback=render_fn)
            exp.append_episode(*ret, policy_params=[])
        exp.policy_parameters[-1] = copy.deepcopy(pol.state_dict())
        exp.save(results_filename)

        # train dynamics
        X, Y = exp.get_dynmodel_dataset(deltas=True,
                                        return_costs=args.learn_reward)
        dyn.set_dataset(X.to(dyn.X.device, dyn.X.dtype),
                        Y.to(dyn.X.device, dyn.X.dtype))
        utils.train_regressor(dyn,
                              args.dyn_opt_iters,
                              args.dyn_batch_size,
                              True,
                              opt1,
                              log_likelihood=dyn.output_density.log_prob,
                              prioritized_sampling=args.prioritized_replay,
                              summary_writer=writer,
                              summary_scope='model_learning/episode_%d' %
                              ps_it)
        torch.save(dyn.state_dict(),
                   os.path.join(results_folder, 'latest_dynamics.pth.tar'))

        # sample initial states for policy optimization
        x0 = exp.sample_states(args.pol_batch_size,
                               timestep=0).to(dyn.X.device,
                                              dyn.X.dtype).detach()

        if args.plot_level > 0:
            utils.plot_rollout(x0[:25], dyn, pol, args.pred_H * 2)
                env,
                pol,
                min(control_H, new_exp - exp.n_samples() + 1),
                callback=lambda *args, **kwargs: env.render())
            params_ = [p.clone() for p in list(pol.parameters())]
            exp.append_episode(*ret, policy_params=params_)
            exp.save(results_filename)

        # train dynamics
        X, Y = exp.get_dynmodel_dataset(deltas=True, return_costs=learn_reward)
        dyn.set_dataset(X.to(dyn.X.device).float(), Y.to(dyn.X.device).float())
        utils.train_regressor(dyn,
                              N_dynopt,
                              dyn_batch_size,
                              True,
                              opt1,
                              log_likelihood=dyn.output_density.log_prob,
                              summary_writer=writer,
                              summary_scope='model_learning/episode_%d' %
                              ps_it)

        # sample initial states for policy optimization
        x0 = exp.sample_states(pol_batch_size,
                               timestep=0).to(dyn.X.device).float()
        x0 = x0 + 1e-2 * x0.std(0) * torch.randn_like(x0)
        x0 = x0.detach()

        utils.plot_rollout(x0[:25], dyn, pol, pred_H * 2)

        # train policy
        def on_iteration(i, loss, states, actions, rewards, discount):
Example #4
0
def main():
    # model parameters
    n_layers = 4
    layer_width = 200
    drop_rate = 0.25
    odims = 1
    n_components = 5
    N_batch = 100
    use_cuda = False

    # single gaussian output model
    mlp = models.mlp(
        1,
        2 * odims, [layer_width] * n_layers,
        nonlin=torch.nn.ReLU,
        weights_initializer=partial(torch.nn.init.xavier_normal_,
                                    gain=torch.nn.init.calculate_gain('relu')),
        biases_initializer=partial(torch.nn.init.uniform_, a=-1.0, b=1.0),
        dropout_layers=[
            models.CDropout(drop_rate, temperature=.1) for i in range(n_layers)
        ])
    model = models.Regressor(mlp,
                             output_density=models.DiagGaussianDensity(odims))

    # mixture density network
    mlp2 = models.mlp(
        1,
        2 * n_components * odims + n_components + 1, [layer_width] * n_layers,
        nonlin=torch.nn.ReLU,
        weights_initializer=partial(torch.nn.init.xavier_normal_,
                                    gain=torch.nn.init.calculate_gain('relu')),
        biases_initializer=partial(torch.nn.init.uniform_, a=-1.0, b=1.0),
        dropout_layers=[
            models.CDropout(drop_rate, temperature=.1) for i in range(n_layers)
        ])
    mmodel = models.Regressor(mlp2,
                              output_density=models.GaussianMixtureDensity(
                                  odims, n_components))

    # optimizer for single gaussian model
    opt1 = torch.optim.Adam(model.parameters(), 1e-3)

    # optimizer for mixture density network
    opt2 = torch.optim.Adam(mmodel.parameters(), 1e-3)

    # create training dataset
    train_x = np.concatenate([
        np.arange(-0.6, -0.25, 0.01),
        np.arange(0.1, 0.25, 0.01),
        np.arange(0.65, 1.0, 0.01)
    ])
    train_y = f(train_x)
    train_y += 0.01 * np.random.randn(*train_y.shape)
    X = torch.from_numpy(train_x[:, None]).float()
    Y = torch.from_numpy(train_y[:, None]).float()

    model.set_dataset(X, Y)
    mmodel.set_dataset(X, Y)

    model = model.float()
    mmodel = mmodel.float()

    if use_cuda and torch.cuda.is_available():
        X = X.cuda()
        Y = Y.cuda()
        model = model.cuda()
        mmodel = mmodel.cuda()

    print(('Dataset size:', train_x.shape[0], 'samples'))

    utils.train_regressor(model,
                          iters=4000,
                          batchsize=N_batch,
                          resample=True,
                          optimizer=opt1)
    utils.train_regressor(
        mmodel,
        iters=4000,
        batchsize=N_batch,
        resample=True,
        optimizer=opt2,
        log_likelihood=losses.gaussian_mixture_log_likelihood)

    # evaluate single gaussian model
    test_x = np.arange(-1.0, 1.5, 0.005)
    ret = []
    model.resample()
    for i, x in enumerate(test_x):
        x = torch.tensor(x[None]).float().to(model.X.device)
        outs = model(x.expand((N_batch, 1)), resample=False)
        y = torch.cat(outs[:2], -1)
        ret.append(y.cpu().detach().numpy())
        torch.cuda.empty_cache()
    ret = np.stack(ret)
    ret = ret.transpose(1, 0, 2)
    torch.cuda.empty_cache()
    for i in range(3):
        gc.collect()

    plt.figure(figsize=(16, 9))
    nc = ret.shape[-2]
    colors = np.array(list(plt.cm.rainbow_r(np.linspace(0, 1, nc))))
    for i in range(len(ret)):
        m, logS = ret[i, :, 0], ret[i, :, 1]
        samples = gaussian_sample(m, logS)
        plt.scatter(test_x, m, c=colors[0:1], s=1)
        plt.scatter(test_x, samples, c=colors[0:1] * 0.5, s=1)
    plt.plot(test_x, f(test_x), linestyle='--', label='true function')
    plt.scatter(X.cpu().numpy().flatten(), Y.cpu().numpy().flatten())
    plt.xlabel('$x$', fontsize=18)
    plt.ylabel('$y$', fontsize=18)

    print(model)

    # evaluate mixture density network
    test_x = np.arange(-1.0, 1.5, 0.005)
    ret = []
    logit_weights = []
    mmodel.resample()
    for i, x in enumerate(test_x):
        x = torch.tensor(x[None]).float().to(mmodel.X.device)
        outs = mmodel(x.expand((N_batch, 1)), resample=False)
        y = torch.cat(outs[:2], -2)
        ret.append(y.cpu().detach().numpy())
        logit_weights.append(outs[2].cpu().detach().numpy())
        torch.cuda.empty_cache()
    ret = np.stack(ret)
    ret = ret.transpose(1, 0, 2, 3)
    logit_weights = np.stack(logit_weights)
    logit_weights = logit_weights.transpose(1, 0, 2)
    torch.cuda.empty_cache()
    for i in range(3):
        gc.collect()

    plt.figure(figsize=(16, 9))
    nc = ret.shape[-1]
    colors = np.array(list(plt.cm.rainbow_r(np.linspace(0, 1, nc))))
    total_samples = []
    for i in range(len(ret)):
        m, logS = ret[i, :, 0, :], ret[i, :, 1, :]
        samples, c = mixture_sample(m, logS, logit_weights[i], colors)
        plt.scatter(test_x, samples, c=c * 0.5, s=1)
        samples, c = mixture_sample(m,
                                    logS,
                                    logit_weights[i],
                                    colors,
                                    noise=False)
        plt.scatter(test_x, samples, c=c, s=1)
        total_samples.append(samples)
    total_samples = np.array(total_samples)
    plt.plot(test_x, f(test_x), linestyle='--', label='true function')
    plt.scatter(X.cpu().numpy().flatten(), Y.cpu().numpy().flatten())
    plt.xlabel('$x$', fontsize=18)
    plt.ylabel('$y$', fontsize=18)

    print(mmodel)

    plt.show()
Example #5
0
                states = states.transpose(0, 1).cpu().detach().numpy()
                actions = actions.transpose(0, 1).cpu().detach().numpy()
                rewards = rewards.transpose(0, 1).cpu().detach().numpy()
                utils.plot_trajectories(states,
                                        actions,
                                        rewards,
                                        plot_samples=False)

        # train dynamics
        X, Y = exp.get_dynmodel_dataset(deltas=True, return_costs=learn_reward)
        dyn.set_dataset(
            torch.tensor(X).to(dyn.X.device).float(),
            torch.tensor(Y).to(dyn.X.device).float())
        utils.train_regressor(dyn,
                              2000,
                              N_particles,
                              True,
                              opt1,
                              log_likelihood=log_likelihood_loss)

        # sample initial states for policy optimization
        x0 = torch.tensor(exp.sample_states(N_particles, timestep=0)).to(
            dyn.X.device).float()
        x0 = x0 + 1e-1 * x0.std(0) * torch.randn_like(x0)
        x0 = x0.detach()
        utils.plot_rollout(x0, dyn, pol, H)

        # train policy
        print("Policy search iteration %d" % (ps_it + 1))
        algorithms.mc_pilco(x0,
                            dyn,
                            pol,
Example #6
0
mmodel.set_dataset(X, Y)

model = model.float()
mmodel = mmodel.float()

if use_cuda and torch.cuda.is_available():
    X = X.cuda()
    Y = Y.cuda()
    model = model.cuda()
    mmodel = mmodel.cuda()

print(('Dataset size:', train_x.shape[0], 'samples'))

utils.train_regressor(model,
                      iters=10000,
                      batchsize=N_ensemble,
                      resample=True,
                      optimizer=opt1)
utils.train_regressor(mmodel,
                      iters=10000,
                      batchsize=N_ensemble,
                      resample=True,
                      optimizer=opt2,
                      log_likelihood=losses.gaussian_mixture_log_likelihood)

# evaluate single gaussian model
test_x = np.arange(-1.0, 1.5, 0.005)
ret = []
# model.model.resample()
for i, x in enumerate(test_x):
    x = torch.tensor(x[None]).float().to(model.X.device)
Example #7
0
def main():
    # model parameters
    parser = argparse.ArgumentParser("BNN regression example")
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--num_threads', type=int, default=1)
    parser.add_argument('--net_shape',
                        type=lambda s: [int(d) for d in s.split(',')],
                        default=[200, 200])
    parser.add_argument('--drop_rate', type=float, default=0.1)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--n_components', type=int, default=5)
    parser.add_argument('--N_batch', type=int, default=100)
    parser.add_argument('--train_iters', type=int, default=10000)
    parser.add_argument('--noise_level', type=float, default=1e-1)
    parser.add_argument('--resample', action='store_true')
    parser.add_argument('--use_cuda', action='store_true')
    args = parser.parse_args()

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.set_num_threads(args.num_threads)

    idims, odims = 1, 1
    # single gaussian output model
    mlp = models.mlp(idims,
                     2 * odims,
                     args.net_shape,
                     dropout_layers=[
                         models.CDropout(args.drop_rate * np.ones(hid))
                         for hid in args.net_shape
                     ])
    model = models.Regressor(mlp,
                             output_density=models.DiagGaussianDensity(odims))

    # mixture density network
    mlp2 = models.mlp(idims,
                      2 * args.n_components * odims + args.n_components + 1,
                      args.net_shape,
                      dropout_layers=[
                          models.CDropout(args.drop_rate * np.ones(hid))
                          for hid in args.net_shape
                      ])
    mmodel = models.Regressor(mlp2,
                              output_density=models.GaussianMixtureDensity(
                                  odims, args.n_components))

    # optimizer for single gaussian model
    opt1 = torch.optim.Adam(model.parameters(), args.lr)

    # optimizer for mixture density network
    opt2 = torch.optim.Adam(mmodel.parameters(), args.lr)

    # create training dataset
    train_x = np.concatenate([
        np.linspace(-0.6, -0.25, 100),
        np.linspace(0.1, 0.25, 100),
        np.linspace(0.65, 1.0, 100)
    ])
    train_y = f(train_x)
    train_y += args.noise_level * np.random.randn(*train_y.shape)
    X = torch.from_numpy(train_x[:, None]).float()
    Y = torch.from_numpy(train_y[:, None]).float()

    model.set_dataset(X, Y)
    mmodel.set_dataset(X, Y)

    model = model.float()
    mmodel = mmodel.float()

    if args.use_cuda and torch.cuda.is_available():
        X = X.cuda()
        Y = Y.cuda()
        model = model.cuda()
        mmodel = mmodel.cuda()

    print(('Dataset size:', train_x.shape[0], 'samples'))
    # train unimodal regressor
    utils.train_regressor(model,
                          iters=args.train_iters,
                          batchsize=args.N_batch,
                          resample=args.resample,
                          optimizer=opt1,
                          log_likelihood=model.output_density.log_prob)

    # evaluate single gaussian model
    test_x = np.arange(-1.0, 1.5, 0.005)
    ret = []
    if args.resample:
        model.resample()
    for i, x in enumerate(test_x):
        x = torch.tensor(x[None]).float().to(model.X.device)
        outs = model(x.expand((2 * args.N_batch, 1)), resample=False)
        y = torch.cat(outs[:2], -1)
        ret.append(y.cpu().detach().numpy())
        torch.cuda.empty_cache()
    ret = np.stack(ret)
    ret = ret.transpose(1, 0, 2)
    torch.cuda.empty_cache()
    for i in range(3):
        gc.collect()

    plt.figure(figsize=(16, 9))
    nc = ret.shape[-2]
    colors = np.array(list(plt.cm.rainbow_r(np.linspace(0, 1, nc))))
    for i in range(len(ret)):
        m, logS = ret[i, :, 0], ret[i, :, 1]
        samples = gaussian_sample(m, logS)
        plt.scatter(test_x, m, c=colors[0:1], s=1)
        plt.scatter(test_x, samples, c=colors[0:1] * 0.5, s=1)
    plt.plot(test_x, f(test_x), linestyle='--', label='true function')
    plt.scatter(X.cpu().numpy().flatten(), Y.cpu().numpy().flatten())
    plt.xlabel('$x$', fontsize=18)
    plt.ylabel('$y$', fontsize=18)

    print(model)

    # train mixture regressor
    utils.train_regressor(mmodel,
                          iters=args.train_iters,
                          batchsize=args.N_batch,
                          resample=args.resample,
                          optimizer=opt2,
                          log_likelihood=mmodel.output_density.log_prob)

    # evaluate mixture density network
    test_x = np.arange(-1.0, 1.5, 0.005)
    ret = []
    logit_weights = []
    if args.resample:
        mmodel.resample()
    for i, x in enumerate(test_x):
        x = torch.tensor(x[None]).float().to(mmodel.X.device)
        outs = mmodel(x.expand((2 * args.N_batch, 1)), resample=False)
        y = torch.cat(outs[:2], -2)
        ret.append(y.cpu().detach().numpy())
        logit_weights.append(outs[2].cpu().detach().numpy())
        torch.cuda.empty_cache()
    ret = np.stack(ret)
    ret = ret.transpose(1, 0, 2, 3)
    logit_weights = np.stack(logit_weights)
    logit_weights = logit_weights.transpose(1, 0, 2)
    torch.cuda.empty_cache()
    for i in range(3):
        gc.collect()

    plt.figure(figsize=(16, 9))
    nc = ret.shape[-1]
    colors = np.array(list(plt.cm.rainbow_r(np.linspace(0, 1, nc))))
    total_samples = []
    for i in range(len(ret)):
        m, logS = ret[i, :, 0, :], ret[i, :, 1, :]
        samples, c = mixture_sample(m, logS, logit_weights[i], colors)
        plt.scatter(test_x, samples, c=c * 0.5, s=1)
        samples, c = mixture_sample(m,
                                    logS,
                                    logit_weights[i],
                                    colors,
                                    noise=False)
        plt.scatter(test_x, samples, c=c, s=1)
        total_samples.append(samples)
    total_samples = np.array(total_samples)
    plt.plot(test_x, f(test_x), linestyle='--', label='true function')
    plt.scatter(X.cpu().numpy().flatten(), Y.cpu().numpy().flatten())
    plt.xlabel('$x$', fontsize=18)
    plt.ylabel('$y$', fontsize=18)

    print(mmodel)

    plt.show()