policy_hparam = dict( dt=dt, activation_nonlin=to.tanh, potentials_dyn_fcn=pd_linear, obs_layer=None, tau_init=10.0 if "oscillation" in data_set_name else 1.0, tau_learnable=True, kappa_init=1e-3, kappa_learnable=True, capacity_learnable=True, potential_init_learnable=True, init_param_kwargs=None, use_cuda=False, ) policy = ADNPolicy(spec=EnvSpec(act_space=InfBoxSpace(shape=1), obs_space=InfBoxSpace(shape=1)), **policy_hparam) # Algorithm algo_hparam = dict( max_iter=1000, windowed=False, cascaded=True, optim_class=optim.Adam, optim_hparam=dict(lr=1e-1, eps=1e-8, weight_decay=1e-4), # momentum=0.7 loss_fcn=nn.MSELoss(), lr_scheduler=lr_scheduler.ExponentialLR, lr_scheduler_hparam=dict(gamma=0.995), ) algo = TSPred(ex_dir, dataset, policy, **algo_hparam)
if policy_type == "RNN": net = RNNPolicy( EnvSpec( BoxSpace(-1, 1, 4), BoxSpace(-1, 1, 2), ), hidden_size=10, num_recurrent_layers=2, ) elif policy_type == "ADN": net = ADNPolicy( EnvSpec( BoxSpace(-1, 1, 4), BoxSpace(-1, 1, 2), ), dt=0.01, activation_nonlin=to.sigmoid, potentials_dyn_fcn=pd_capacity_21, ) else: raise NotImplementedError # Trace the policy # traced_net = trace(net, (to.from_numpy(net.env_spec.obs_space.sample_uniform()), net.init_hidden())) # print(traced_net.graph) # print(traced_net(to.from_numpy(net.env_spec.obs_space.sample_uniform()), None)) stateful_net = script(StatefulRecurrentNetwork(net)) print(stateful_net.graph) print(stateful_net.reset.graph)
def train_and_eval(trial: optuna.Trial, study_dir: str, seed: int): """ Objective function for the Optuna `Study` to maximize. .. note:: Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments. :param trial: Optuna Trial object for hyper-parameter optimization :param study_dir: the parent directory for all trials in this study :param seed: seed value for the random number generators, pass `None` for no seeding :return: objective function value """ # Synchronize seeds between Optuna trials pyrado.set_seed(seed) # Load the data data_set_name = "oscillation_50Hz_initpos-0.5" data = pd.read_csv(osp.join(pyrado.PERMA_DIR, "misc", f"{data_set_name}.csv")) if data_set_name == "daily_min_temperatures": data = to.tensor(data["Temp"].values, dtype=to.get_default_dtype()).view(-1, 1) elif data_set_name == "monthly_sunspots": data = to.tensor(data["Sunspots"].values, dtype=to.get_default_dtype()).view(-1, 1) elif "oscillation" in data_set_name: data = to.tensor(data["Positions"].values, dtype=to.get_default_dtype()).view(-1, 1) else: raise pyrado.ValueErr( given=data_set_name, eq_constraint="'daily_min_temperatures', 'monthly_sunspots', " "'oscillation_50Hz_initpos-0.5', or 'oscillation_100Hz_initpos-0.4", ) # Dataset data_set_hparam = dict( name=data_set_name, ratio_train=0.7, window_size=trial.suggest_int("dataset_window_size", 1, 100), standardize_data=False, scale_min_max_data=True, ) dataset = TimeSeriesDataSet(data, **data_set_hparam) # Policy policy_hparam = dict( dt=0.02 if "oscillation" in data_set_name else 1.0, obs_layer=None, activation_nonlin=to.tanh, potentials_dyn_fcn=fcn_from_str( trial.suggest_categorical("policy_potentials_dyn_fcn", ["pd_linear", "pd_cubic"]) ), tau_init=trial.suggest_loguniform("policy_tau_init", 1e-2, 1e3), tau_learnable=True, kappa_init=trial.suggest_categorical("policy_kappa_init", [0, 1e-4, 1e-2]), kappa_learnable=True, capacity_learnable=True, potential_init_learnable=trial.suggest_categorical("policy_potential_init_learnable", [True, False]), init_param_kwargs=trial.suggest_categorical("policy_init_param_kwargs", [None]), use_cuda=False, ) policy = ADNPolicy(spec=EnvSpec(act_space=InfBoxSpace(shape=1), obs_space=InfBoxSpace(shape=1)), **policy_hparam) # Algorithm algo_hparam = dict( windowed=trial.suggest_categorical("algo_windowed", [True, False]), max_iter=1000, optim_class=optim.Adam, optim_hparam=dict( lr=trial.suggest_uniform("optim_lr", 5e-4, 5e-2), eps=trial.suggest_uniform("optim_eps", 1e-8, 1e-5), weight_decay=trial.suggest_uniform("optim_weight_decay", 5e-5, 5e-3), ), loss_fcn=nn.MSELoss(), ) csv_logger = create_csv_step_logger(osp.join(study_dir, f"trial_{trial.number}")) algo = TSPred(study_dir, dataset, policy, **algo_hparam, logger=csv_logger) # Train without saving the results algo.train(snapshot_mode="latest", seed=seed) # Evaluate num_init_samples = dataset.window_size _, loss_trn = TSPred.evaluate( policy, dataset.data_trn_inp, dataset.data_trn_targ, windowed=algo.windowed, num_init_samples=num_init_samples, cascaded=False, ) _, loss_tst = TSPred.evaluate( policy, dataset.data_tst_inp, dataset.data_tst_targ, windowed=algo.windowed, num_init_samples=num_init_samples, cascaded=False, ) return loss_trn
policy_hparam = dict( obs_layer=FNN( input_size=env.obs_space.flat_dim, output_size=env.act_space.flat_dim, hidden_sizes=[32, 32], hidden_nonlin=to.tanh, dropout=0.0, ), tau_init=10.0, tau_learnable=True, kappa_init=0.02, kappa_learnable=True, activation_nonlin=to.sigmoid, potentials_dyn_fcn=pd_cubic, ) policy = ADNPolicy(spec=env.spec, **policy_hparam) # Algorithm algo_hparam = dict( max_iter=5000, pop_size=None, num_init_states_per_domain=1, num_domains=8, eta_mean=1.0, eta_std=None, expl_std_init=1.0, symm_sampling=False, transform_returns=True, num_workers=8, ) algo = NES(ex_dir, env, policy, **algo_hparam)
def create_adn_setup(dt, max_steps, max_dist_force, physics_engine, normalize_obs=True, obsnorm_cpp=True): pyrado.set_seed(0) # Explicit normalization bounds elb = { 'EffectorLoadCell_Fx': -100., 'EffectorLoadCell_Fz': -100., 'Effector_Xd': -1, 'Effector_Zd': -1, 'GD_DS0d': -1, 'GD_DS1d': -1, 'GD_DS2d': -1, } eub = { 'GD_DS0': 3., 'GD_DS1': 3, 'GD_DS2': 3, 'EffectorLoadCell_Fx': 100., 'EffectorLoadCell_Fz': 100., 'Effector_Xd': .5, 'Effector_Zd': .5, 'GD_DS0d': .5, 'GD_DS1d': .5, 'GD_DS2d': .5, 'PredCollCost_h50': 1000. } extra_kwargs = {} if normalize_obs and obsnorm_cpp: extra_kwargs['normalizeObservations'] = True extra_kwargs['obsNormOverrideLower'] = elb extra_kwargs['obsNormOverrideUpper'] = eub # Set up environment env = Planar3LinkTASim(physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, positionTasks=True, collisionAvoidanceIK=True, taskCombinationMethod='sum', observeTaskSpaceDiscrepancy=True, **extra_kwargs) if normalize_obs and not obsnorm_cpp: env = ObsNormWrapper(env, explicit_lb=elb, explicit_ub=eub) # Set up random policy policy_hparam = dict( tau_init=10., activation_nonlin=to.sigmoid, potentials_dyn_fcn=pd_cubic, ) policy = ADNPolicy(spec=env.spec, **policy_hparam) print_cbt('Running ADNPolicy with random initialization', 'c', bright=True) # Simulate and plot potentials ro = rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True) draw_potentials(ro) return ro
def adn_policy(env: Env): return ADNPolicy( env.spec, activation_nonlin=to.sigmoid, potentials_dyn_fcn=pd_cubic, potential_init_learnable=False )