def fit(self, experience_dataset, batch_size=100, iterations=2000, optimizer=None): X, Y = experience_dataset.get_dynmodel_dataset( deltas=True, return_costs=self.learn_reward) self.set_dataset( torch.tensor(X).to(self.X.device, self.X.dtype), torch.tensor(Y).to(self.X.device, self.X.dtype)) if optimizer is None: optimizer = torch.optim.Adam(self.parameters()) utils.train_regressor(self, iterations, batch_size, optimizer=optimizer, log_likelihood=self.log_likelihood_loss)
stop_when_done=args.stop_when_done, callback=render_fn) exp.append_episode(*ret, policy_params=[]) exp.policy_parameters[-1] = copy.deepcopy(pol.state_dict()) exp.save(results_filename) # train dynamics X, Y = exp.get_dynmodel_dataset(deltas=True, return_costs=args.learn_reward) dyn.set_dataset(X.to(dyn.X.device, dyn.X.dtype), Y.to(dyn.X.device, dyn.X.dtype)) utils.train_regressor(dyn, args.dyn_opt_iters, args.dyn_batch_size, True, opt1, log_likelihood=dyn.output_density.log_prob, prioritized_sampling=args.prioritized_replay, summary_writer=writer, summary_scope='model_learning/episode_%d' % ps_it) torch.save(dyn.state_dict(), os.path.join(results_folder, 'latest_dynamics.pth.tar')) # sample initial states for policy optimization x0 = exp.sample_states(args.pol_batch_size, timestep=0).to(dyn.X.device, dyn.X.dtype).detach() if args.plot_level > 0: utils.plot_rollout(x0[:25], dyn, pol, args.pred_H * 2)
env, pol, min(control_H, new_exp - exp.n_samples() + 1), callback=lambda *args, **kwargs: env.render()) params_ = [p.clone() for p in list(pol.parameters())] exp.append_episode(*ret, policy_params=params_) exp.save(results_filename) # train dynamics X, Y = exp.get_dynmodel_dataset(deltas=True, return_costs=learn_reward) dyn.set_dataset(X.to(dyn.X.device).float(), Y.to(dyn.X.device).float()) utils.train_regressor(dyn, N_dynopt, dyn_batch_size, True, opt1, log_likelihood=dyn.output_density.log_prob, summary_writer=writer, summary_scope='model_learning/episode_%d' % ps_it) # sample initial states for policy optimization x0 = exp.sample_states(pol_batch_size, timestep=0).to(dyn.X.device).float() x0 = x0 + 1e-2 * x0.std(0) * torch.randn_like(x0) x0 = x0.detach() utils.plot_rollout(x0[:25], dyn, pol, pred_H * 2) # train policy def on_iteration(i, loss, states, actions, rewards, discount):
def main(): # model parameters n_layers = 4 layer_width = 200 drop_rate = 0.25 odims = 1 n_components = 5 N_batch = 100 use_cuda = False # single gaussian output model mlp = models.mlp( 1, 2 * odims, [layer_width] * n_layers, nonlin=torch.nn.ReLU, weights_initializer=partial(torch.nn.init.xavier_normal_, gain=torch.nn.init.calculate_gain('relu')), biases_initializer=partial(torch.nn.init.uniform_, a=-1.0, b=1.0), dropout_layers=[ models.CDropout(drop_rate, temperature=.1) for i in range(n_layers) ]) model = models.Regressor(mlp, output_density=models.DiagGaussianDensity(odims)) # mixture density network mlp2 = models.mlp( 1, 2 * n_components * odims + n_components + 1, [layer_width] * n_layers, nonlin=torch.nn.ReLU, weights_initializer=partial(torch.nn.init.xavier_normal_, gain=torch.nn.init.calculate_gain('relu')), biases_initializer=partial(torch.nn.init.uniform_, a=-1.0, b=1.0), dropout_layers=[ models.CDropout(drop_rate, temperature=.1) for i in range(n_layers) ]) mmodel = models.Regressor(mlp2, output_density=models.GaussianMixtureDensity( odims, n_components)) # optimizer for single gaussian model opt1 = torch.optim.Adam(model.parameters(), 1e-3) # optimizer for mixture density network opt2 = torch.optim.Adam(mmodel.parameters(), 1e-3) # create training dataset train_x = np.concatenate([ np.arange(-0.6, -0.25, 0.01), np.arange(0.1, 0.25, 0.01), np.arange(0.65, 1.0, 0.01) ]) train_y = f(train_x) train_y += 0.01 * np.random.randn(*train_y.shape) X = torch.from_numpy(train_x[:, None]).float() Y = torch.from_numpy(train_y[:, None]).float() model.set_dataset(X, Y) mmodel.set_dataset(X, Y) model = model.float() mmodel = mmodel.float() if use_cuda and torch.cuda.is_available(): X = X.cuda() Y = Y.cuda() model = model.cuda() mmodel = mmodel.cuda() print(('Dataset size:', train_x.shape[0], 'samples')) utils.train_regressor(model, iters=4000, batchsize=N_batch, resample=True, optimizer=opt1) utils.train_regressor( mmodel, iters=4000, batchsize=N_batch, resample=True, optimizer=opt2, log_likelihood=losses.gaussian_mixture_log_likelihood) # evaluate single gaussian model test_x = np.arange(-1.0, 1.5, 0.005) ret = [] model.resample() for i, x in enumerate(test_x): x = torch.tensor(x[None]).float().to(model.X.device) outs = model(x.expand((N_batch, 1)), resample=False) y = torch.cat(outs[:2], -1) ret.append(y.cpu().detach().numpy()) torch.cuda.empty_cache() ret = np.stack(ret) ret = ret.transpose(1, 0, 2) torch.cuda.empty_cache() for i in range(3): gc.collect() plt.figure(figsize=(16, 9)) nc = ret.shape[-2] colors = np.array(list(plt.cm.rainbow_r(np.linspace(0, 1, nc)))) for i in range(len(ret)): m, logS = ret[i, :, 0], ret[i, :, 1] samples = gaussian_sample(m, logS) plt.scatter(test_x, m, c=colors[0:1], s=1) plt.scatter(test_x, samples, c=colors[0:1] * 0.5, s=1) plt.plot(test_x, f(test_x), linestyle='--', label='true function') plt.scatter(X.cpu().numpy().flatten(), Y.cpu().numpy().flatten()) plt.xlabel('$x$', fontsize=18) plt.ylabel('$y$', fontsize=18) print(model) # evaluate mixture density network test_x = np.arange(-1.0, 1.5, 0.005) ret = [] logit_weights = [] mmodel.resample() for i, x in enumerate(test_x): x = torch.tensor(x[None]).float().to(mmodel.X.device) outs = mmodel(x.expand((N_batch, 1)), resample=False) y = torch.cat(outs[:2], -2) ret.append(y.cpu().detach().numpy()) logit_weights.append(outs[2].cpu().detach().numpy()) torch.cuda.empty_cache() ret = np.stack(ret) ret = ret.transpose(1, 0, 2, 3) logit_weights = np.stack(logit_weights) logit_weights = logit_weights.transpose(1, 0, 2) torch.cuda.empty_cache() for i in range(3): gc.collect() plt.figure(figsize=(16, 9)) nc = ret.shape[-1] colors = np.array(list(plt.cm.rainbow_r(np.linspace(0, 1, nc)))) total_samples = [] for i in range(len(ret)): m, logS = ret[i, :, 0, :], ret[i, :, 1, :] samples, c = mixture_sample(m, logS, logit_weights[i], colors) plt.scatter(test_x, samples, c=c * 0.5, s=1) samples, c = mixture_sample(m, logS, logit_weights[i], colors, noise=False) plt.scatter(test_x, samples, c=c, s=1) total_samples.append(samples) total_samples = np.array(total_samples) plt.plot(test_x, f(test_x), linestyle='--', label='true function') plt.scatter(X.cpu().numpy().flatten(), Y.cpu().numpy().flatten()) plt.xlabel('$x$', fontsize=18) plt.ylabel('$y$', fontsize=18) print(mmodel) plt.show()
states = states.transpose(0, 1).cpu().detach().numpy() actions = actions.transpose(0, 1).cpu().detach().numpy() rewards = rewards.transpose(0, 1).cpu().detach().numpy() utils.plot_trajectories(states, actions, rewards, plot_samples=False) # train dynamics X, Y = exp.get_dynmodel_dataset(deltas=True, return_costs=learn_reward) dyn.set_dataset( torch.tensor(X).to(dyn.X.device).float(), torch.tensor(Y).to(dyn.X.device).float()) utils.train_regressor(dyn, 2000, N_particles, True, opt1, log_likelihood=log_likelihood_loss) # sample initial states for policy optimization x0 = torch.tensor(exp.sample_states(N_particles, timestep=0)).to( dyn.X.device).float() x0 = x0 + 1e-1 * x0.std(0) * torch.randn_like(x0) x0 = x0.detach() utils.plot_rollout(x0, dyn, pol, H) # train policy print("Policy search iteration %d" % (ps_it + 1)) algorithms.mc_pilco(x0, dyn, pol,
mmodel.set_dataset(X, Y) model = model.float() mmodel = mmodel.float() if use_cuda and torch.cuda.is_available(): X = X.cuda() Y = Y.cuda() model = model.cuda() mmodel = mmodel.cuda() print(('Dataset size:', train_x.shape[0], 'samples')) utils.train_regressor(model, iters=10000, batchsize=N_ensemble, resample=True, optimizer=opt1) utils.train_regressor(mmodel, iters=10000, batchsize=N_ensemble, resample=True, optimizer=opt2, log_likelihood=losses.gaussian_mixture_log_likelihood) # evaluate single gaussian model test_x = np.arange(-1.0, 1.5, 0.005) ret = [] # model.model.resample() for i, x in enumerate(test_x): x = torch.tensor(x[None]).float().to(model.X.device)
def main(): # model parameters parser = argparse.ArgumentParser("BNN regression example") parser.add_argument('--seed', type=int, default=0) parser.add_argument('--num_threads', type=int, default=1) parser.add_argument('--net_shape', type=lambda s: [int(d) for d in s.split(',')], default=[200, 200]) parser.add_argument('--drop_rate', type=float, default=0.1) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--n_components', type=int, default=5) parser.add_argument('--N_batch', type=int, default=100) parser.add_argument('--train_iters', type=int, default=10000) parser.add_argument('--noise_level', type=float, default=1e-1) parser.add_argument('--resample', action='store_true') parser.add_argument('--use_cuda', action='store_true') args = parser.parse_args() np.random.seed(args.seed) torch.manual_seed(args.seed) torch.set_num_threads(args.num_threads) idims, odims = 1, 1 # single gaussian output model mlp = models.mlp(idims, 2 * odims, args.net_shape, dropout_layers=[ models.CDropout(args.drop_rate * np.ones(hid)) for hid in args.net_shape ]) model = models.Regressor(mlp, output_density=models.DiagGaussianDensity(odims)) # mixture density network mlp2 = models.mlp(idims, 2 * args.n_components * odims + args.n_components + 1, args.net_shape, dropout_layers=[ models.CDropout(args.drop_rate * np.ones(hid)) for hid in args.net_shape ]) mmodel = models.Regressor(mlp2, output_density=models.GaussianMixtureDensity( odims, args.n_components)) # optimizer for single gaussian model opt1 = torch.optim.Adam(model.parameters(), args.lr) # optimizer for mixture density network opt2 = torch.optim.Adam(mmodel.parameters(), args.lr) # create training dataset train_x = np.concatenate([ np.linspace(-0.6, -0.25, 100), np.linspace(0.1, 0.25, 100), np.linspace(0.65, 1.0, 100) ]) train_y = f(train_x) train_y += args.noise_level * np.random.randn(*train_y.shape) X = torch.from_numpy(train_x[:, None]).float() Y = torch.from_numpy(train_y[:, None]).float() model.set_dataset(X, Y) mmodel.set_dataset(X, Y) model = model.float() mmodel = mmodel.float() if args.use_cuda and torch.cuda.is_available(): X = X.cuda() Y = Y.cuda() model = model.cuda() mmodel = mmodel.cuda() print(('Dataset size:', train_x.shape[0], 'samples')) # train unimodal regressor utils.train_regressor(model, iters=args.train_iters, batchsize=args.N_batch, resample=args.resample, optimizer=opt1, log_likelihood=model.output_density.log_prob) # evaluate single gaussian model test_x = np.arange(-1.0, 1.5, 0.005) ret = [] if args.resample: model.resample() for i, x in enumerate(test_x): x = torch.tensor(x[None]).float().to(model.X.device) outs = model(x.expand((2 * args.N_batch, 1)), resample=False) y = torch.cat(outs[:2], -1) ret.append(y.cpu().detach().numpy()) torch.cuda.empty_cache() ret = np.stack(ret) ret = ret.transpose(1, 0, 2) torch.cuda.empty_cache() for i in range(3): gc.collect() plt.figure(figsize=(16, 9)) nc = ret.shape[-2] colors = np.array(list(plt.cm.rainbow_r(np.linspace(0, 1, nc)))) for i in range(len(ret)): m, logS = ret[i, :, 0], ret[i, :, 1] samples = gaussian_sample(m, logS) plt.scatter(test_x, m, c=colors[0:1], s=1) plt.scatter(test_x, samples, c=colors[0:1] * 0.5, s=1) plt.plot(test_x, f(test_x), linestyle='--', label='true function') plt.scatter(X.cpu().numpy().flatten(), Y.cpu().numpy().flatten()) plt.xlabel('$x$', fontsize=18) plt.ylabel('$y$', fontsize=18) print(model) # train mixture regressor utils.train_regressor(mmodel, iters=args.train_iters, batchsize=args.N_batch, resample=args.resample, optimizer=opt2, log_likelihood=mmodel.output_density.log_prob) # evaluate mixture density network test_x = np.arange(-1.0, 1.5, 0.005) ret = [] logit_weights = [] if args.resample: mmodel.resample() for i, x in enumerate(test_x): x = torch.tensor(x[None]).float().to(mmodel.X.device) outs = mmodel(x.expand((2 * args.N_batch, 1)), resample=False) y = torch.cat(outs[:2], -2) ret.append(y.cpu().detach().numpy()) logit_weights.append(outs[2].cpu().detach().numpy()) torch.cuda.empty_cache() ret = np.stack(ret) ret = ret.transpose(1, 0, 2, 3) logit_weights = np.stack(logit_weights) logit_weights = logit_weights.transpose(1, 0, 2) torch.cuda.empty_cache() for i in range(3): gc.collect() plt.figure(figsize=(16, 9)) nc = ret.shape[-1] colors = np.array(list(plt.cm.rainbow_r(np.linspace(0, 1, nc)))) total_samples = [] for i in range(len(ret)): m, logS = ret[i, :, 0, :], ret[i, :, 1, :] samples, c = mixture_sample(m, logS, logit_weights[i], colors) plt.scatter(test_x, samples, c=c * 0.5, s=1) samples, c = mixture_sample(m, logS, logit_weights[i], colors, noise=False) plt.scatter(test_x, samples, c=c, s=1) total_samples.append(samples) total_samples = np.array(total_samples) plt.plot(test_x, f(test_x), linestyle='--', label='true function') plt.scatter(X.cpu().numpy().flatten(), Y.cpu().numpy().flatten()) plt.xlabel('$x$', fontsize=18) plt.ylabel('$y$', fontsize=18) print(mmodel) plt.show()