Example #1
0
                                                       args.dyn_components)
        dynE = (dynE + 1) * args.dyn_components + 1
    else:
        output_density = models.DiagGaussianDensity(dynE / 2)

    dyn_model = models.mlp(
        D + U,
        dynE,
        args.dyn_shape,
        dropout_layers=[
            models.modules.CDropout(args.dyn_drop_rate * np.ones(hid))
            if args.dyn_drop_rate > 0 else None for hid in args.dyn_shape
        ],
        nonlin=torch.nn.ReLU)
    dyn = models.DynamicsModel(dyn_model,
                               reward_func=reward_func,
                               output_density=output_density).float()

    # initalize policy
    pol_model = models.mlp(D,
                           2 * U,
                           args.pol_shape,
                           dropout_layers=[
                               models.modules.BDropout(args.pol_drop_rate)
                               if args.pol_drop_rate > 0 else None
                               for hid in args.pol_shape
                           ],
                           nonlin=torch.nn.ReLU,
                           output_nonlin=partial(models.DiagGaussianDensity,
                                                 U))
Example #2
0
# init reward/cost function
if learn_reward:
    dynE = 2 * (D + 1)
    reward_func = None
else:
    dynE = 2 * D
    reward_func = partial(reward_func,
                          target=target,
                          Q=Q,
                          angle_dims=angle_dims)

# init dynamics model (heteroscedastic noise)
dyn = models.DynamicsModel(models.dropout_mlp(
    D + U,
    dynE, [200] * 2,
    dropout_layers=[models.modules.CDropout(0.1)] * 2,
    nonlin=torch.nn.ReLU),
                           reward_func=reward_func).float()
forward_fn = partial(forward, dynamics=dyn)

# init policy
pol = models.Policy(
    models.dropout_mlp(D,
                       U,
                       output_nonlin=torch.nn.Tanh,
                       dropout_layers=[models.modules.BDropout(0.1)] * 2),
    maxU).float()
randpol = RandPolicy(maxU)

# init experience dataset
exp = ExperienceDataset()