예제 #1
0
    def __init__(self,
                 state_dim,
                 action_dim,
                 reward_func=None,
                 dyn_components=1,
                 dyn_hidden=[200] * 2):
        self.learn_reward = reward_func is None
        dynE = 2 * (state_dim + 1) if self.learn_reward else 2 * state_dim
        if dyn_components > 1:
            output_density = models.GaussianMixtureDensity(
                dynE / 2, dyn_components)
            dynE = (dynE + 1) * dyn_components
            self.log_likelihood_loss = losses.gaussian_mixture_log_likelihood
        else:
            output_density = models.DiagGaussianDensity(dynE / 2)
            self.log_likelihood_loss = losses.gaussian_log_likelihood

        dyn_model = models.mlp(state_dim + action_dim,
                               dynE,
                               dyn_hidden,
                               dropout_layers=[
                                   models.modules.CDropout(0.5, 0.1)
                                   for i in range(len(dyn_hidden))
                               ],
                               nonlin=torch.nn.ReLU)
        super(DynModel, self).__init__(dyn_model,
                                       reward_func=reward_func,
                                       output_density=output_density)
예제 #2
0
 def __init__(self, state_dim, action_dim, critic_hidden=[200] * 2):
     critic_model = models.mlp(
         state_dim + action_dim,
         1,
         critic_hidden,
         dropout_layers=[
             models.modules.CDropout(0.1) for i in range(len(critic_hidden))
         ],
         nonlin=torch.nn.ReLU,
         weights_initializer=torch.nn.init.xavier_normal_,
         biases_initializer=None)
     super(Critic, self).__init__(critic_model, None)
예제 #3
0
    def __init__(self, state_dim, action_dim, max_action, pol_shape=[200] * 2):
        pol_model = models.mlp(
            state_dim,
            action_dim,
            pol_shape,
            dropout_layers=[
                models.modules.BDropout(0.1) for i in range(len(pol_shape))
            ],
            nonlin=torch.nn.ReLU,
            weights_initializer=torch.nn.init.xavier_normal_,
            biases_initializer=None,
            output_nonlin=torch.nn.Tanh)

        self.expl_noise = 0.0
        super(Actor, self).__init__(pol_model, max_action)
예제 #4
0
            args.discount_factor = float(args.discount_factor)

    # initialize dynamics model
    dynE = 2 * (D + 1) if args.learn_reward else 2 * D
    if args.dyn_components > 1:
        output_density = models.GaussianMixtureDensity(dynE / 2,
                                                       args.dyn_components)
        dynE = (dynE + 1) * args.dyn_components + 1
    else:
        output_density = models.DiagGaussianDensity(dynE / 2)

    dyn_model = models.mlp(
        D + U,
        dynE,
        args.dyn_shape,
        dropout_layers=[
            models.modules.CDropout(args.dyn_drop_rate * np.ones(hid))
            if args.dyn_drop_rate > 0 else None for hid in args.dyn_shape
        ],
        nonlin=torch.nn.ReLU)
    dyn = models.DynamicsModel(dyn_model,
                               reward_func=reward_func,
                               output_density=output_density).float()

    # initalize policy
    pol_model = models.mlp(D,
                           2 * U,
                           args.pol_shape,
                           dropout_layers=[
                               models.modules.BDropout(args.pol_drop_rate)
                               if args.pol_drop_rate > 0 else None
            control_H = env.spec.max_episode_steps
    initial_experience = control_H * n_initial_epi

    # initialize dynamics model
    dynE = 2 * (D + 1) if learn_reward else 2 * D
    if dyn_components > 1:
        output_density = models.GaussianMixtureDensity(dynE / 2,
                                                       dyn_components)
        dynE = (dynE + 1) * dyn_components + 1
    else:
        output_density = models.DiagGaussianDensity(dynE / 2)

    dyn_model = models.mlp(D + U,
                           dynE,
                           dyn_hidden,
                           dropout_layers=[
                               models.modules.CDropout(0.1, 0.1)
                               for i in range(len(dyn_hidden))
                           ],
                           nonlin=torch.nn.ReLU)
    dyn = models.DynamicsModel(dyn_model,
                               reward_func=reward_func,
                               output_density=output_density).float()

    # initalize policy
    pol_model = models.mlp(D,
                           2 * U,
                           pol_hidden,
                           dropout_layers=[
                               models.modules.BDropout(0.1)
                               for i in range(len(pol_hidden))
                           ],
예제 #6
0
    if hasattr(env, 'spec'):
        if hasattr(env.spec, 'max_episode_steps'):
            args.control_H = env.spec.max_episode_steps
            args.stop_when_done = True
    D = env.observation_space.shape[0]
    U = env.action_space.shape[0]
    maxU = env.action_space.high
    minU = env.action_space.low

    # initalize policy
    pol_model = models.mlp(D,
                           2 * U,
                           args.pol_shape,
                           dropout_layers=[
                               models.modules.BDropout(args.pol_drop_rate)
                               if args.pol_drop_rate > 0 else None
                               for hid in args.pol_shape
                           ],
                           biases_initializer=None,
                           nonlin=torch.nn.ReLU,
                           output_nonlin=partial(models.DiagGaussianDensity,
                                                 U))

    pol = models.Policy(pol_model, maxU, minU).float()

    # load experience dataset
    exp = utils.ExperienceDataset()
    exp_path = os.path.join(loaded_from, 'experience.pth.tar')
    exp.load(exp_path)

    # evaluate policy
    trajectories = []
예제 #7
0
            control_H = env.spec.max_episode_steps
    initial_experience = control_H * n_initial_epi

    # initialize dynamics model
    dynE = 2 * (D + 1) if learn_reward else 2 * D
    if dyn_components > 1:
        output_density = models.GaussianMixtureDensity(dynE / 2,
                                                       dyn_components)
        dynE = (dynE + 1) * dyn_components + 1
    else:
        output_density = models.DiagGaussianDensity(dynE / 2)

    dyn_model = models.mlp(D + U,
                           dynE,
                           dyn_hidden,
                           dropout_layers=[
                               models.modules.CDropout(0.5, 0.1)
                               for i in range(len(dyn_hidden))
                           ],
                           nonlin=torch.nn.ReLU)
    dyn = models.DynamicsModel(dyn_model,
                               reward_func=reward_func,
                               output_density=output_density).float()

    # initalize policy
    pol_model = models.mlp(D,
                           2 * U,
                           pol_hidden,
                           dropout_layers=[
                               models.modules.BDropout(0.1)
                               for i in range(len(pol_hidden))
                           ],
예제 #8
0
def main():
    # model parameters
    n_layers = 4
    layer_width = 200
    drop_rate = 0.25
    odims = 1
    n_components = 5
    N_batch = 100
    use_cuda = False

    # single gaussian output model
    mlp = models.mlp(
        1,
        2 * odims, [layer_width] * n_layers,
        nonlin=torch.nn.ReLU,
        weights_initializer=partial(torch.nn.init.xavier_normal_,
                                    gain=torch.nn.init.calculate_gain('relu')),
        biases_initializer=partial(torch.nn.init.uniform_, a=-1.0, b=1.0),
        dropout_layers=[
            models.CDropout(drop_rate, temperature=.1) for i in range(n_layers)
        ])
    model = models.Regressor(mlp,
                             output_density=models.DiagGaussianDensity(odims))

    # mixture density network
    mlp2 = models.mlp(
        1,
        2 * n_components * odims + n_components + 1, [layer_width] * n_layers,
        nonlin=torch.nn.ReLU,
        weights_initializer=partial(torch.nn.init.xavier_normal_,
                                    gain=torch.nn.init.calculate_gain('relu')),
        biases_initializer=partial(torch.nn.init.uniform_, a=-1.0, b=1.0),
        dropout_layers=[
            models.CDropout(drop_rate, temperature=.1) for i in range(n_layers)
        ])
    mmodel = models.Regressor(mlp2,
                              output_density=models.GaussianMixtureDensity(
                                  odims, n_components))

    # optimizer for single gaussian model
    opt1 = torch.optim.Adam(model.parameters(), 1e-3)

    # optimizer for mixture density network
    opt2 = torch.optim.Adam(mmodel.parameters(), 1e-3)

    # create training dataset
    train_x = np.concatenate([
        np.arange(-0.6, -0.25, 0.01),
        np.arange(0.1, 0.25, 0.01),
        np.arange(0.65, 1.0, 0.01)
    ])
    train_y = f(train_x)
    train_y += 0.01 * np.random.randn(*train_y.shape)
    X = torch.from_numpy(train_x[:, None]).float()
    Y = torch.from_numpy(train_y[:, None]).float()

    model.set_dataset(X, Y)
    mmodel.set_dataset(X, Y)

    model = model.float()
    mmodel = mmodel.float()

    if use_cuda and torch.cuda.is_available():
        X = X.cuda()
        Y = Y.cuda()
        model = model.cuda()
        mmodel = mmodel.cuda()

    print(('Dataset size:', train_x.shape[0], 'samples'))

    utils.train_regressor(model,
                          iters=4000,
                          batchsize=N_batch,
                          resample=True,
                          optimizer=opt1)
    utils.train_regressor(
        mmodel,
        iters=4000,
        batchsize=N_batch,
        resample=True,
        optimizer=opt2,
        log_likelihood=losses.gaussian_mixture_log_likelihood)

    # evaluate single gaussian model
    test_x = np.arange(-1.0, 1.5, 0.005)
    ret = []
    model.resample()
    for i, x in enumerate(test_x):
        x = torch.tensor(x[None]).float().to(model.X.device)
        outs = model(x.expand((N_batch, 1)), resample=False)
        y = torch.cat(outs[:2], -1)
        ret.append(y.cpu().detach().numpy())
        torch.cuda.empty_cache()
    ret = np.stack(ret)
    ret = ret.transpose(1, 0, 2)
    torch.cuda.empty_cache()
    for i in range(3):
        gc.collect()

    plt.figure(figsize=(16, 9))
    nc = ret.shape[-2]
    colors = np.array(list(plt.cm.rainbow_r(np.linspace(0, 1, nc))))
    for i in range(len(ret)):
        m, logS = ret[i, :, 0], ret[i, :, 1]
        samples = gaussian_sample(m, logS)
        plt.scatter(test_x, m, c=colors[0:1], s=1)
        plt.scatter(test_x, samples, c=colors[0:1] * 0.5, s=1)
    plt.plot(test_x, f(test_x), linestyle='--', label='true function')
    plt.scatter(X.cpu().numpy().flatten(), Y.cpu().numpy().flatten())
    plt.xlabel('$x$', fontsize=18)
    plt.ylabel('$y$', fontsize=18)

    print(model)

    # evaluate mixture density network
    test_x = np.arange(-1.0, 1.5, 0.005)
    ret = []
    logit_weights = []
    mmodel.resample()
    for i, x in enumerate(test_x):
        x = torch.tensor(x[None]).float().to(mmodel.X.device)
        outs = mmodel(x.expand((N_batch, 1)), resample=False)
        y = torch.cat(outs[:2], -2)
        ret.append(y.cpu().detach().numpy())
        logit_weights.append(outs[2].cpu().detach().numpy())
        torch.cuda.empty_cache()
    ret = np.stack(ret)
    ret = ret.transpose(1, 0, 2, 3)
    logit_weights = np.stack(logit_weights)
    logit_weights = logit_weights.transpose(1, 0, 2)
    torch.cuda.empty_cache()
    for i in range(3):
        gc.collect()

    plt.figure(figsize=(16, 9))
    nc = ret.shape[-1]
    colors = np.array(list(plt.cm.rainbow_r(np.linspace(0, 1, nc))))
    total_samples = []
    for i in range(len(ret)):
        m, logS = ret[i, :, 0, :], ret[i, :, 1, :]
        samples, c = mixture_sample(m, logS, logit_weights[i], colors)
        plt.scatter(test_x, samples, c=c * 0.5, s=1)
        samples, c = mixture_sample(m,
                                    logS,
                                    logit_weights[i],
                                    colors,
                                    noise=False)
        plt.scatter(test_x, samples, c=c, s=1)
        total_samples.append(samples)
    total_samples = np.array(total_samples)
    plt.plot(test_x, f(test_x), linestyle='--', label='true function')
    plt.scatter(X.cpu().numpy().flatten(), Y.cpu().numpy().flatten())
    plt.xlabel('$x$', fontsize=18)
    plt.ylabel('$y$', fontsize=18)

    print(mmodel)

    plt.show()
예제 #9
0
        reward_func = env.reward_func

    # initialize dynamics model
    dynE = 2 * (D + 1) if learn_reward else 2 * D
    if dyn_components > 1:
        output_density = models.MixtureDensity(dynE / 2, dyn_components)
        dynE = (dynE + 1) * dyn_components
        log_likelihood_loss = losses.gaussian_mixture_log_likelihood
    else:
        output_density = models.DiagGaussianDensity(dynE / 2)
        log_likelihood_loss = losses.gaussian_log_likelihood

    dyn_model = models.mlp(D + U,
                           dynE,
                           dyn_hidden,
                           dropout_layers=[
                               models.modules.CDropout(0.1, 0.1)
                               for i in range(len(dyn_hidden))
                           ],
                           nonlin=torch.nn.Tanh)
    dyn = models.DynamicsModel(dyn_model,
                               reward_func=reward_func,
                               output_density=output_density).float()

    # initalize policy
    pol_model = models.mlp(D,
                           U,
                           pol_hidden,
                           dropout_layers=[
                               models.modules.BDropout(0.1)
                               for i in range(len(pol_hidden))
                           ],
예제 #10
0
# model parameters
n_layers = 4
layer_width = 200
drop_rate = 0.25
odims = 1
n_components = 5
N_ensemble = 100
use_cuda = False

# single gaussian output model
model = models.Regressor(models.mlp(
    1,
    2 * odims, [layer_width] * n_layers,
    nonlin=torch.nn.ReLU,
    weights_initializer=partial(torch.nn.init.xavier_normal_,
                                gain=torch.nn.init.calculate_gain('relu')),
    biases_initializer=partial(torch.nn.init.uniform_, a=-1.0, b=1.0),
    dropout_layers=[
        models.CDropout(drop_rate, temperature=.1) for i in range(n_layers)
    ]),
                         output_density=models.DiagGaussianDensity(odims))

# mixture density network
mmodel = models.Regressor(models.mlp(
    1,
    2 * n_components * odims + n_components, [layer_width] * n_layers,
    nonlin=torch.nn.ReLU,
    weights_initializer=partial(torch.nn.init.xavier_normal_,
                                gain=torch.nn.init.calculate_gain('relu')),
    biases_initializer=partial(torch.nn.init.uniform_, a=-1.0, b=1.0),
    dropout_layers=[
예제 #11
0
def main():
    # model parameters
    parser = argparse.ArgumentParser("BNN regression example")
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--num_threads', type=int, default=1)
    parser.add_argument('--net_shape',
                        type=lambda s: [int(d) for d in s.split(',')],
                        default=[200, 200])
    parser.add_argument('--drop_rate', type=float, default=0.1)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--n_components', type=int, default=5)
    parser.add_argument('--N_batch', type=int, default=100)
    parser.add_argument('--train_iters', type=int, default=10000)
    parser.add_argument('--noise_level', type=float, default=1e-1)
    parser.add_argument('--resample', action='store_true')
    parser.add_argument('--use_cuda', action='store_true')
    args = parser.parse_args()

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.set_num_threads(args.num_threads)

    idims, odims = 1, 1
    # single gaussian output model
    mlp = models.mlp(idims,
                     2 * odims,
                     args.net_shape,
                     dropout_layers=[
                         models.CDropout(args.drop_rate * np.ones(hid))
                         for hid in args.net_shape
                     ])
    model = models.Regressor(mlp,
                             output_density=models.DiagGaussianDensity(odims))

    # mixture density network
    mlp2 = models.mlp(idims,
                      2 * args.n_components * odims + args.n_components + 1,
                      args.net_shape,
                      dropout_layers=[
                          models.CDropout(args.drop_rate * np.ones(hid))
                          for hid in args.net_shape
                      ])
    mmodel = models.Regressor(mlp2,
                              output_density=models.GaussianMixtureDensity(
                                  odims, args.n_components))

    # optimizer for single gaussian model
    opt1 = torch.optim.Adam(model.parameters(), args.lr)

    # optimizer for mixture density network
    opt2 = torch.optim.Adam(mmodel.parameters(), args.lr)

    # create training dataset
    train_x = np.concatenate([
        np.linspace(-0.6, -0.25, 100),
        np.linspace(0.1, 0.25, 100),
        np.linspace(0.65, 1.0, 100)
    ])
    train_y = f(train_x)
    train_y += args.noise_level * np.random.randn(*train_y.shape)
    X = torch.from_numpy(train_x[:, None]).float()
    Y = torch.from_numpy(train_y[:, None]).float()

    model.set_dataset(X, Y)
    mmodel.set_dataset(X, Y)

    model = model.float()
    mmodel = mmodel.float()

    if args.use_cuda and torch.cuda.is_available():
        X = X.cuda()
        Y = Y.cuda()
        model = model.cuda()
        mmodel = mmodel.cuda()

    print(('Dataset size:', train_x.shape[0], 'samples'))
    # train unimodal regressor
    utils.train_regressor(model,
                          iters=args.train_iters,
                          batchsize=args.N_batch,
                          resample=args.resample,
                          optimizer=opt1,
                          log_likelihood=model.output_density.log_prob)

    # evaluate single gaussian model
    test_x = np.arange(-1.0, 1.5, 0.005)
    ret = []
    if args.resample:
        model.resample()
    for i, x in enumerate(test_x):
        x = torch.tensor(x[None]).float().to(model.X.device)
        outs = model(x.expand((2 * args.N_batch, 1)), resample=False)
        y = torch.cat(outs[:2], -1)
        ret.append(y.cpu().detach().numpy())
        torch.cuda.empty_cache()
    ret = np.stack(ret)
    ret = ret.transpose(1, 0, 2)
    torch.cuda.empty_cache()
    for i in range(3):
        gc.collect()

    plt.figure(figsize=(16, 9))
    nc = ret.shape[-2]
    colors = np.array(list(plt.cm.rainbow_r(np.linspace(0, 1, nc))))
    for i in range(len(ret)):
        m, logS = ret[i, :, 0], ret[i, :, 1]
        samples = gaussian_sample(m, logS)
        plt.scatter(test_x, m, c=colors[0:1], s=1)
        plt.scatter(test_x, samples, c=colors[0:1] * 0.5, s=1)
    plt.plot(test_x, f(test_x), linestyle='--', label='true function')
    plt.scatter(X.cpu().numpy().flatten(), Y.cpu().numpy().flatten())
    plt.xlabel('$x$', fontsize=18)
    plt.ylabel('$y$', fontsize=18)

    print(model)

    # train mixture regressor
    utils.train_regressor(mmodel,
                          iters=args.train_iters,
                          batchsize=args.N_batch,
                          resample=args.resample,
                          optimizer=opt2,
                          log_likelihood=mmodel.output_density.log_prob)

    # evaluate mixture density network
    test_x = np.arange(-1.0, 1.5, 0.005)
    ret = []
    logit_weights = []
    if args.resample:
        mmodel.resample()
    for i, x in enumerate(test_x):
        x = torch.tensor(x[None]).float().to(mmodel.X.device)
        outs = mmodel(x.expand((2 * args.N_batch, 1)), resample=False)
        y = torch.cat(outs[:2], -2)
        ret.append(y.cpu().detach().numpy())
        logit_weights.append(outs[2].cpu().detach().numpy())
        torch.cuda.empty_cache()
    ret = np.stack(ret)
    ret = ret.transpose(1, 0, 2, 3)
    logit_weights = np.stack(logit_weights)
    logit_weights = logit_weights.transpose(1, 0, 2)
    torch.cuda.empty_cache()
    for i in range(3):
        gc.collect()

    plt.figure(figsize=(16, 9))
    nc = ret.shape[-1]
    colors = np.array(list(plt.cm.rainbow_r(np.linspace(0, 1, nc))))
    total_samples = []
    for i in range(len(ret)):
        m, logS = ret[i, :, 0, :], ret[i, :, 1, :]
        samples, c = mixture_sample(m, logS, logit_weights[i], colors)
        plt.scatter(test_x, samples, c=c * 0.5, s=1)
        samples, c = mixture_sample(m,
                                    logS,
                                    logit_weights[i],
                                    colors,
                                    noise=False)
        plt.scatter(test_x, samples, c=c, s=1)
        total_samples.append(samples)
    total_samples = np.array(total_samples)
    plt.plot(test_x, f(test_x), linestyle='--', label='true function')
    plt.scatter(X.cpu().numpy().flatten(), Y.cpu().numpy().flatten())
    plt.xlabel('$x$', fontsize=18)
    plt.ylabel('$y$', fontsize=18)

    print(mmodel)

    plt.show()