Example #1
0
def get_model_params():
    return d(
        cls=LatentModel,
        params=d(
            device=DEVICE,
            preproc_fn=preproc_fn,
            postproc_fn=postproc_fn,
            loss_fn=loss_fn,
            num_nets=NUM_NETS,
            is_probabilistic=PROBABILISTIC,
            deterministic_sigma_multiplier=
            0.01,  # default sigma_obs uncertainty multiplier
            network=SequentialParams([
                LayerParams("linear", in_features=MODEL_IN, out_features=200),
                LayerParams('relu'),
                LayerParams("linear", in_features=200, out_features=200),
                LayerParams('relu'),
                LayerParams("linear", in_features=200, out_features=200),
                LayerParams('relu'),
                LayerParams("linear",
                            in_features=200,
                            out_features=NUM_NETS * MODEL_OUT),
            ]),
            latent_object=d(
                device=DEVICE,
                num_latent_classes=NUM_LATENT_CLASSES,
                latent_dim=LATENT_DIM,
                known_latent_default_mu=DEFAULT_LATENT_MU,
                known_latent_default_log_sigma=DEFAULT_LATENT_LOG_SIGMA,
                beta_kl=
                0.1,  # weighting of the KL term relative to logprob in inference
            ),
        ))
Example #2
0
def get_policy_params():
    policy_params = d(cls=LatentMPCPolicy,
                      params=d(num_particles=None,
                               horizon=HORIZON,
                               cost_function=mpc_cost_fn,
                               advance_obs_function=advance_obs_fn,
                               optimizer_cls=CEM,
                               optimizer_params=d(
                                   popsize=50,
                                   horizon=HORIZON,
                                   act_dim=act_dim,
                                   max_iters=3,
                                   num_elites=10,
                                   epsilon=0.001,
                                   alpha=0.25,
                               )))

    if OFFLINE:
        # offline action reader
        policy_params = d(cls=RosPolicy,
                          params=d(
                              ros_action_topic="/cf/0/motion",
                              ros_action_type=CFMotion,
                              msg_to_numpy_fn=lambda msg: np.array(
                                  [msg.x, msg.y, msg.dz], dtype=np.float32),
                              background_policy_cls=policy_params.cls,
                              background_policy_params=policy_params.params,
                          ))

    # online actions from policy
    return policy_params
Example #3
0
def get_dataset_params(input_file):
    return d(cls=MatDataset,
             params=d(
                 input_file=input_file,
                 output_file='',
                 batch_size=100,
                 planning_horizon=HORIZON,
                 obs_history_length=OBS_HISTORY_LENGTH,
                 acs_history_length=ACT_HISTORY_LENGTH,
             ))
Example #4
0
def get_inference_trainer_params():
    return d(cls=LatentInferenceTrainer,
             params=d(
                 train_every_n_steps=1 if USE_LATENT else 0,
                 latent_learning_rate=1e-3,
                 log_every_n_steps=1e2,
                 save_every_n_steps=0,
                 train_min_buffer_size=2,
                 obs_to_output_obs_fn=obs_to_output_obs_fn,
             ))
Example #5
0
def get_inference_dataset_params():
    return d(
        cls=MatDataset,
        params=d(
            input_file=None,  # empty datadict
            output_file=None,  # no save
            batch_size=16,
            planning_horizon=HORIZON,
            obs_history_length=OBS_HISTORY_LENGTH,
            acs_history_length=ACT_HISTORY_LENGTH,
        ))
Example #6
0
def get_env_params():
    return d(cls=CFSanityEnv,
             params=d(
                 dt=DT,
                 step_dt=STEP_DT,
                 ros_prefix="cf/0/",
                 lag=LAG,
                 use_random_goal=True,
                 num_latent=NUM_LATENT_CLASSES,
                 obs_hist_len=OBS_HISTORY_LENGTH,
                 act_hist_len=ACT_HISTORY_LENGTH,
                 horizon=HORIZON,
             ))
Example #7
0
def get_env_params():
    return d(cls=TelloPendulumController,
             params=d(control=CONTROL,
                      use_data_capture=DATA_CAPTURE,
                      use_future_goals=True,
                      copter_params=d(dt=DT,
                                      horizon=HORIZON,
                                      ros_prefix="/cf/0/",
                                      offline=OFFLINE,
                                      normalize=False,
                                      initial_goal_pos=np.array(
                                          [0.5, 0.5,
                                           0.0005]).astype(np.float32))))
Example #8
0
def get_env_spec_params():
    return d(cls=LatentEnvSpec,
             params=d(names_shapes_limits_dtypes=[
                 ('obs', (obs_dim, ), (0, 1), np.float32),
                 ('prev_obs', (OBS_HISTORY_LENGTH, obs_dim), (0, 1),
                  np.float32),
                 ('prev_act', (ACT_HISTORY_LENGTH, act_dim), (0, 1),
                  np.float32),
                 ('latent', (1, ), (0, NUM_LATENT_CLASSES - 1), np.int),
                 ('next_obs', (obs_dim, ), (0, 1), np.float32),
                 ('next_obs_sigma', (obs_dim, ), (0, np.inf), np.float32),
                 ('goal_obs', (HORIZON + 1, obs_dim), (0, 1), np.float32),
                 ('act', (act_dim, ), (-1, 1), np.float32),
             ]))
Example #9
0
def get_env_spec_params():
    obs_range = (np.zeros(3), np.array([1, 1, 0.5]))
    act_range = (np.array([-0.5, -0.5, -0.4]), np.array([0.5, 0.5, 0.4]))
    return d(
        cls=LatentEnvSpec,
        params=d(names_shapes_limits_dtypes=[
            ('obs', (obs_dim, ), obs_range, np.float32),
            ('prev_obs', (OBS_HISTORY_LENGTH, obs_dim), obs_range, np.float32),
            ('prev_act', (ACT_HISTORY_LENGTH, act_dim), act_range, np.float32),
            ('latent', (LATENT_DIM, ), (0, NUM_LATENT_CLASSES - 1), np.int),
            ('next_obs', (obs_dim, ), obs_range, np.float32),
            ('next_obs_sigma', (obs_dim, ), (0, np.inf), np.float32),
            ('goal_obs', (HORIZON + 1, obs_dim), obs_range, np.float32),
            ('act', (act_dim, ), act_range, np.float32),
        ]))
Example #10
0
def get_policy_params():
    return d(cls=LatentMPCPolicy,
             params=d(num_particles=None,
                      horizon=HORIZON,
                      cost_function=mpc_cost_fn,
                      advance_obs_function=advance_obs_fn,
                      optimizer_cls=CEM,
                      optimizer_params=d(
                          popsize=50,
                          horizon=HORIZON,
                          act_dim=act_dim,
                          max_iters=3,
                          num_elites=10,
                          epsilon=0.001,
                          alpha=0.25,
                      )))
def latent_advance_obs_fn(inputs: d, model_outputs: d, env_spec: EnvSpec) -> d:
    env_spec.clip(model_outputs, ["next_obs"])
    return d(
        obs=model_outputs.next_obs.mean(dim=-2),  # mean over all models
        prev_obs=advance_history(inputs.prev_obs, inputs.obs),
        prev_act=advance_history(inputs.prev_act, inputs.act),
        latent=inputs.latent,
    )
Example #12
0
def get_trainer_params():
    return d(
        cls=LatentTrainer,
        params=d(
            dynamics_learning_rate=1e-4,
            latent_learning_rate=5e-4,
            latent_train_every_n_steps=
            LATENT_TRAIN_EVERY_N,  # 0 for fixed latent
            sample_every_n_steps=0,
            train_every_n_steps=1,
            holdout_every_n_steps=500,
            max_steps=1e5,
            max_train_data_steps=0,
            max_holdout_data_steps=0,
            log_every_n_steps=1e3,
            save_every_n_steps=1e3,
            checkpoint_model_file=MODEL_FILE,
            save_checkpoints=True,
        ))
def rollout(env_spec, model, start_obs, action_seq, advance_obs_fn):
    curr_obs = start_obs
    all_obs = [start_obs]
    all_mouts = []

    for i in range(action_seq.act.shape[1]):
        inputs = d()
        for name in env_spec.observation_names:
            inputs[name] = curr_obs[name]

        # TODO allow for only a subset of actions to be optimized
        for name in env_spec.action_names:
            inputs[name] = action_seq[name][:, i]

        model_outputs = model(inputs)
        next_obs = advance_obs_fn(inputs, model_outputs,
                                  env_spec)  # should do any clipping
        all_obs.append(next_obs)
        all_mouts.append(model_outputs)
        curr_obs = next_obs

    return all_obs, all_mouts
Example #14
0
def get_policy_params():
    return d(cls=LatentMPCPolicy,
             params=d(num_particles=None,
                      horizon=HORIZON,
                      cost_function=mpc_cost_fn,
                      advance_obs_function=advance_obs_fn,
                      optimizer_cls=CEM,
                      optimizer_params=d(
                          popsize=50,
                          horizon=HORIZON,
                          act_dim=act_dim,
                          max_iters=3,
                          num_elites=10,
                          epsilon=0.001,
                          alpha=0.25,
                      )))


params = d(
    exp_name='cf_sanity/lag1/KL_unknown_latent_2class',
    env_spec=get_env_spec_params(),
    env=get_env_params(),
    dataset_train=get_dataset_params(DATA_INPUT_TRAIN),
    dataset_holdout=get_dataset_params(DATA_INPUT_HOLDOUT),
    dataset_inference=get_inference_dataset_params(),
    model=get_model_params(),
    trainer=get_trainer_params(),
    inference_trainer=get_inference_trainer_params(),
    policy=get_policy_params(),
)
def latent_obs_to_output_obs_fn(obs: d) -> d:
    return d(next_obs=obs.obs, next_obs_sigma=1e-20 * torch.ones_like(obs.obs))
Example #16
0
                               )))

    if OFFLINE:
        # offline action reader
        policy_params = d(cls=RosPolicy,
                          params=d(
                              ros_action_topic="/cf/0/motion",
                              ros_action_type=CFMotion,
                              msg_to_numpy_fn=lambda msg: np.array(
                                  [msg.x, msg.y, msg.dz], dtype=np.float32),
                              background_policy_cls=policy_params.cls,
                              background_policy_params=policy_params.params,
                          ))

    # online actions from policy
    return policy_params


params = d(
    exp_name=EXPERIMENT_NAME,
    env_spec=get_env_spec_params(),
    env=get_env_params(),
    dataset_train=get_dataset_params(DATA_INPUT_TRAIN),
    dataset_holdout=get_dataset_params(DATA_INPUT_HOLDOUT),
    dataset_inference=get_inference_dataset_params(),
    model=get_model_params(),
    trainer=get_trainer_params(),
    inference_trainer=get_inference_trainer_params(),
    policy=get_policy_params(),
)