def get_model_params(): return d( cls=LatentModel, params=d( device=DEVICE, preproc_fn=preproc_fn, postproc_fn=postproc_fn, loss_fn=loss_fn, num_nets=NUM_NETS, is_probabilistic=PROBABILISTIC, deterministic_sigma_multiplier= 0.01, # default sigma_obs uncertainty multiplier network=SequentialParams([ LayerParams("linear", in_features=MODEL_IN, out_features=200), LayerParams('relu'), LayerParams("linear", in_features=200, out_features=200), LayerParams('relu'), LayerParams("linear", in_features=200, out_features=200), LayerParams('relu'), LayerParams("linear", in_features=200, out_features=NUM_NETS * MODEL_OUT), ]), latent_object=d( device=DEVICE, num_latent_classes=NUM_LATENT_CLASSES, latent_dim=LATENT_DIM, known_latent_default_mu=DEFAULT_LATENT_MU, known_latent_default_log_sigma=DEFAULT_LATENT_LOG_SIGMA, beta_kl= 0.1, # weighting of the KL term relative to logprob in inference ), ))
def get_policy_params(): policy_params = d(cls=LatentMPCPolicy, params=d(num_particles=None, horizon=HORIZON, cost_function=mpc_cost_fn, advance_obs_function=advance_obs_fn, optimizer_cls=CEM, optimizer_params=d( popsize=50, horizon=HORIZON, act_dim=act_dim, max_iters=3, num_elites=10, epsilon=0.001, alpha=0.25, ))) if OFFLINE: # offline action reader policy_params = d(cls=RosPolicy, params=d( ros_action_topic="/cf/0/motion", ros_action_type=CFMotion, msg_to_numpy_fn=lambda msg: np.array( [msg.x, msg.y, msg.dz], dtype=np.float32), background_policy_cls=policy_params.cls, background_policy_params=policy_params.params, )) # online actions from policy return policy_params
def get_dataset_params(input_file): return d(cls=MatDataset, params=d( input_file=input_file, output_file='', batch_size=100, planning_horizon=HORIZON, obs_history_length=OBS_HISTORY_LENGTH, acs_history_length=ACT_HISTORY_LENGTH, ))
def get_inference_trainer_params(): return d(cls=LatentInferenceTrainer, params=d( train_every_n_steps=1 if USE_LATENT else 0, latent_learning_rate=1e-3, log_every_n_steps=1e2, save_every_n_steps=0, train_min_buffer_size=2, obs_to_output_obs_fn=obs_to_output_obs_fn, ))
def get_inference_dataset_params(): return d( cls=MatDataset, params=d( input_file=None, # empty datadict output_file=None, # no save batch_size=16, planning_horizon=HORIZON, obs_history_length=OBS_HISTORY_LENGTH, acs_history_length=ACT_HISTORY_LENGTH, ))
def get_env_params(): return d(cls=CFSanityEnv, params=d( dt=DT, step_dt=STEP_DT, ros_prefix="cf/0/", lag=LAG, use_random_goal=True, num_latent=NUM_LATENT_CLASSES, obs_hist_len=OBS_HISTORY_LENGTH, act_hist_len=ACT_HISTORY_LENGTH, horizon=HORIZON, ))
def get_env_params(): return d(cls=TelloPendulumController, params=d(control=CONTROL, use_data_capture=DATA_CAPTURE, use_future_goals=True, copter_params=d(dt=DT, horizon=HORIZON, ros_prefix="/cf/0/", offline=OFFLINE, normalize=False, initial_goal_pos=np.array( [0.5, 0.5, 0.0005]).astype(np.float32))))
def get_env_spec_params(): return d(cls=LatentEnvSpec, params=d(names_shapes_limits_dtypes=[ ('obs', (obs_dim, ), (0, 1), np.float32), ('prev_obs', (OBS_HISTORY_LENGTH, obs_dim), (0, 1), np.float32), ('prev_act', (ACT_HISTORY_LENGTH, act_dim), (0, 1), np.float32), ('latent', (1, ), (0, NUM_LATENT_CLASSES - 1), np.int), ('next_obs', (obs_dim, ), (0, 1), np.float32), ('next_obs_sigma', (obs_dim, ), (0, np.inf), np.float32), ('goal_obs', (HORIZON + 1, obs_dim), (0, 1), np.float32), ('act', (act_dim, ), (-1, 1), np.float32), ]))
def get_env_spec_params(): obs_range = (np.zeros(3), np.array([1, 1, 0.5])) act_range = (np.array([-0.5, -0.5, -0.4]), np.array([0.5, 0.5, 0.4])) return d( cls=LatentEnvSpec, params=d(names_shapes_limits_dtypes=[ ('obs', (obs_dim, ), obs_range, np.float32), ('prev_obs', (OBS_HISTORY_LENGTH, obs_dim), obs_range, np.float32), ('prev_act', (ACT_HISTORY_LENGTH, act_dim), act_range, np.float32), ('latent', (LATENT_DIM, ), (0, NUM_LATENT_CLASSES - 1), np.int), ('next_obs', (obs_dim, ), obs_range, np.float32), ('next_obs_sigma', (obs_dim, ), (0, np.inf), np.float32), ('goal_obs', (HORIZON + 1, obs_dim), obs_range, np.float32), ('act', (act_dim, ), act_range, np.float32), ]))
def get_policy_params(): return d(cls=LatentMPCPolicy, params=d(num_particles=None, horizon=HORIZON, cost_function=mpc_cost_fn, advance_obs_function=advance_obs_fn, optimizer_cls=CEM, optimizer_params=d( popsize=50, horizon=HORIZON, act_dim=act_dim, max_iters=3, num_elites=10, epsilon=0.001, alpha=0.25, )))
def latent_advance_obs_fn(inputs: d, model_outputs: d, env_spec: EnvSpec) -> d: env_spec.clip(model_outputs, ["next_obs"]) return d( obs=model_outputs.next_obs.mean(dim=-2), # mean over all models prev_obs=advance_history(inputs.prev_obs, inputs.obs), prev_act=advance_history(inputs.prev_act, inputs.act), latent=inputs.latent, )
def get_trainer_params(): return d( cls=LatentTrainer, params=d( dynamics_learning_rate=1e-4, latent_learning_rate=5e-4, latent_train_every_n_steps= LATENT_TRAIN_EVERY_N, # 0 for fixed latent sample_every_n_steps=0, train_every_n_steps=1, holdout_every_n_steps=500, max_steps=1e5, max_train_data_steps=0, max_holdout_data_steps=0, log_every_n_steps=1e3, save_every_n_steps=1e3, checkpoint_model_file=MODEL_FILE, save_checkpoints=True, ))
def rollout(env_spec, model, start_obs, action_seq, advance_obs_fn): curr_obs = start_obs all_obs = [start_obs] all_mouts = [] for i in range(action_seq.act.shape[1]): inputs = d() for name in env_spec.observation_names: inputs[name] = curr_obs[name] # TODO allow for only a subset of actions to be optimized for name in env_spec.action_names: inputs[name] = action_seq[name][:, i] model_outputs = model(inputs) next_obs = advance_obs_fn(inputs, model_outputs, env_spec) # should do any clipping all_obs.append(next_obs) all_mouts.append(model_outputs) curr_obs = next_obs return all_obs, all_mouts
def get_policy_params(): return d(cls=LatentMPCPolicy, params=d(num_particles=None, horizon=HORIZON, cost_function=mpc_cost_fn, advance_obs_function=advance_obs_fn, optimizer_cls=CEM, optimizer_params=d( popsize=50, horizon=HORIZON, act_dim=act_dim, max_iters=3, num_elites=10, epsilon=0.001, alpha=0.25, ))) params = d( exp_name='cf_sanity/lag1/KL_unknown_latent_2class', env_spec=get_env_spec_params(), env=get_env_params(), dataset_train=get_dataset_params(DATA_INPUT_TRAIN), dataset_holdout=get_dataset_params(DATA_INPUT_HOLDOUT), dataset_inference=get_inference_dataset_params(), model=get_model_params(), trainer=get_trainer_params(), inference_trainer=get_inference_trainer_params(), policy=get_policy_params(), )
def latent_obs_to_output_obs_fn(obs: d) -> d: return d(next_obs=obs.obs, next_obs_sigma=1e-20 * torch.ones_like(obs.obs))
))) if OFFLINE: # offline action reader policy_params = d(cls=RosPolicy, params=d( ros_action_topic="/cf/0/motion", ros_action_type=CFMotion, msg_to_numpy_fn=lambda msg: np.array( [msg.x, msg.y, msg.dz], dtype=np.float32), background_policy_cls=policy_params.cls, background_policy_params=policy_params.params, )) # online actions from policy return policy_params params = d( exp_name=EXPERIMENT_NAME, env_spec=get_env_spec_params(), env=get_env_params(), dataset_train=get_dataset_params(DATA_INPUT_TRAIN), dataset_holdout=get_dataset_params(DATA_INPUT_HOLDOUT), dataset_inference=get_inference_dataset_params(), model=get_model_params(), trainer=get_trainer_params(), inference_trainer=get_inference_trainer_params(), policy=get_policy_params(), )