예제 #1
0
    policy_hparam = dict(
        dt=dt,
        activation_nonlin=to.tanh,
        potentials_dyn_fcn=pd_linear,
        obs_layer=None,
        tau_init=10.0 if "oscillation" in data_set_name else 1.0,
        tau_learnable=True,
        kappa_init=1e-3,
        kappa_learnable=True,
        capacity_learnable=True,
        potential_init_learnable=True,
        init_param_kwargs=None,
        use_cuda=False,
    )
    policy = ADNPolicy(spec=EnvSpec(act_space=InfBoxSpace(shape=1),
                                    obs_space=InfBoxSpace(shape=1)),
                       **policy_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=1000,
        windowed=False,
        cascaded=True,
        optim_class=optim.Adam,
        optim_hparam=dict(lr=1e-1, eps=1e-8,
                          weight_decay=1e-4),  # momentum=0.7
        loss_fcn=nn.MSELoss(),
        lr_scheduler=lr_scheduler.ExponentialLR,
        lr_scheduler_hparam=dict(gamma=0.995),
    )
    algo = TSPred(ex_dir, dataset, policy, **algo_hparam)
예제 #2
0
    if policy_type == "RNN":
        net = RNNPolicy(
            EnvSpec(
                BoxSpace(-1, 1, 4),
                BoxSpace(-1, 1, 2),
            ),
            hidden_size=10,
            num_recurrent_layers=2,
        )
    elif policy_type == "ADN":
        net = ADNPolicy(
            EnvSpec(
                BoxSpace(-1, 1, 4),
                BoxSpace(-1, 1, 2),
            ),
            dt=0.01,
            activation_nonlin=to.sigmoid,
            potentials_dyn_fcn=pd_capacity_21,
        )
    else:
        raise NotImplementedError

    # Trace the policy
    #     traced_net = trace(net, (to.from_numpy(net.env_spec.obs_space.sample_uniform()), net.init_hidden()))
    #     print(traced_net.graph)
    #     print(traced_net(to.from_numpy(net.env_spec.obs_space.sample_uniform()), None))

    stateful_net = script(StatefulRecurrentNetwork(net))
    print(stateful_net.graph)
    print(stateful_net.reset.graph)
예제 #3
0
def train_and_eval(trial: optuna.Trial, study_dir: str, seed: int):
    """
    Objective function for the Optuna `Study` to maximize.

    .. note::
        Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments.

    :param trial: Optuna Trial object for hyper-parameter optimization
    :param study_dir: the parent directory for all trials in this study
    :param seed: seed value for the random number generators, pass `None` for no seeding
    :return: objective function value
    """
    # Synchronize seeds between Optuna trials
    pyrado.set_seed(seed)

    # Load the data
    data_set_name = "oscillation_50Hz_initpos-0.5"
    data = pd.read_csv(osp.join(pyrado.PERMA_DIR, "misc", f"{data_set_name}.csv"))
    if data_set_name == "daily_min_temperatures":
        data = to.tensor(data["Temp"].values, dtype=to.get_default_dtype()).view(-1, 1)
    elif data_set_name == "monthly_sunspots":
        data = to.tensor(data["Sunspots"].values, dtype=to.get_default_dtype()).view(-1, 1)
    elif "oscillation" in data_set_name:
        data = to.tensor(data["Positions"].values, dtype=to.get_default_dtype()).view(-1, 1)
    else:
        raise pyrado.ValueErr(
            given=data_set_name,
            eq_constraint="'daily_min_temperatures', 'monthly_sunspots', "
            "'oscillation_50Hz_initpos-0.5', or 'oscillation_100Hz_initpos-0.4",
        )

    # Dataset
    data_set_hparam = dict(
        name=data_set_name,
        ratio_train=0.7,
        window_size=trial.suggest_int("dataset_window_size", 1, 100),
        standardize_data=False,
        scale_min_max_data=True,
    )
    dataset = TimeSeriesDataSet(data, **data_set_hparam)

    # Policy
    policy_hparam = dict(
        dt=0.02 if "oscillation" in data_set_name else 1.0,
        obs_layer=None,
        activation_nonlin=to.tanh,
        potentials_dyn_fcn=fcn_from_str(
            trial.suggest_categorical("policy_potentials_dyn_fcn", ["pd_linear", "pd_cubic"])
        ),
        tau_init=trial.suggest_loguniform("policy_tau_init", 1e-2, 1e3),
        tau_learnable=True,
        kappa_init=trial.suggest_categorical("policy_kappa_init", [0, 1e-4, 1e-2]),
        kappa_learnable=True,
        capacity_learnable=True,
        potential_init_learnable=trial.suggest_categorical("policy_potential_init_learnable", [True, False]),
        init_param_kwargs=trial.suggest_categorical("policy_init_param_kwargs", [None]),
        use_cuda=False,
    )
    policy = ADNPolicy(spec=EnvSpec(act_space=InfBoxSpace(shape=1), obs_space=InfBoxSpace(shape=1)), **policy_hparam)

    # Algorithm
    algo_hparam = dict(
        windowed=trial.suggest_categorical("algo_windowed", [True, False]),
        max_iter=1000,
        optim_class=optim.Adam,
        optim_hparam=dict(
            lr=trial.suggest_uniform("optim_lr", 5e-4, 5e-2),
            eps=trial.suggest_uniform("optim_eps", 1e-8, 1e-5),
            weight_decay=trial.suggest_uniform("optim_weight_decay", 5e-5, 5e-3),
        ),
        loss_fcn=nn.MSELoss(),
    )
    csv_logger = create_csv_step_logger(osp.join(study_dir, f"trial_{trial.number}"))
    algo = TSPred(study_dir, dataset, policy, **algo_hparam, logger=csv_logger)

    # Train without saving the results
    algo.train(snapshot_mode="latest", seed=seed)

    # Evaluate
    num_init_samples = dataset.window_size
    _, loss_trn = TSPred.evaluate(
        policy,
        dataset.data_trn_inp,
        dataset.data_trn_targ,
        windowed=algo.windowed,
        num_init_samples=num_init_samples,
        cascaded=False,
    )
    _, loss_tst = TSPred.evaluate(
        policy,
        dataset.data_tst_inp,
        dataset.data_tst_targ,
        windowed=algo.windowed,
        num_init_samples=num_init_samples,
        cascaded=False,
    )

    return loss_trn
예제 #4
0
    policy_hparam = dict(
        obs_layer=FNN(
            input_size=env.obs_space.flat_dim,
            output_size=env.act_space.flat_dim,
            hidden_sizes=[32, 32],
            hidden_nonlin=to.tanh,
            dropout=0.0,
        ),
        tau_init=10.0,
        tau_learnable=True,
        kappa_init=0.02,
        kappa_learnable=True,
        activation_nonlin=to.sigmoid,
        potentials_dyn_fcn=pd_cubic,
    )
    policy = ADNPolicy(spec=env.spec, **policy_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=5000,
        pop_size=None,
        num_init_states_per_domain=1,
        num_domains=8,
        eta_mean=1.0,
        eta_std=None,
        expl_std_init=1.0,
        symm_sampling=False,
        transform_returns=True,
        num_workers=8,
    )
    algo = NES(ex_dir, env, policy, **algo_hparam)
예제 #5
0
def create_adn_setup(dt,
                     max_steps,
                     max_dist_force,
                     physics_engine,
                     normalize_obs=True,
                     obsnorm_cpp=True):
    pyrado.set_seed(0)

    # Explicit normalization bounds
    elb = {
        'EffectorLoadCell_Fx': -100.,
        'EffectorLoadCell_Fz': -100.,
        'Effector_Xd': -1,
        'Effector_Zd': -1,
        'GD_DS0d': -1,
        'GD_DS1d': -1,
        'GD_DS2d': -1,
    }
    eub = {
        'GD_DS0': 3.,
        'GD_DS1': 3,
        'GD_DS2': 3,
        'EffectorLoadCell_Fx': 100.,
        'EffectorLoadCell_Fz': 100.,
        'Effector_Xd': .5,
        'Effector_Zd': .5,
        'GD_DS0d': .5,
        'GD_DS1d': .5,
        'GD_DS2d': .5,
        'PredCollCost_h50': 1000.
    }

    extra_kwargs = {}
    if normalize_obs and obsnorm_cpp:
        extra_kwargs['normalizeObservations'] = True
        extra_kwargs['obsNormOverrideLower'] = elb
        extra_kwargs['obsNormOverrideUpper'] = eub

    # Set up environment
    env = Planar3LinkTASim(physicsEngine=physics_engine,
                           dt=dt,
                           max_steps=max_steps,
                           max_dist_force=max_dist_force,
                           positionTasks=True,
                           collisionAvoidanceIK=True,
                           taskCombinationMethod='sum',
                           observeTaskSpaceDiscrepancy=True,
                           **extra_kwargs)

    if normalize_obs and not obsnorm_cpp:
        env = ObsNormWrapper(env, explicit_lb=elb, explicit_ub=eub)

    # Set up random policy
    policy_hparam = dict(
        tau_init=10.,
        activation_nonlin=to.sigmoid,
        potentials_dyn_fcn=pd_cubic,
    )
    policy = ADNPolicy(spec=env.spec, **policy_hparam)
    print_cbt('Running ADNPolicy with random initialization', 'c', bright=True)

    # Simulate and plot potentials
    ro = rollout(env,
                 policy,
                 render_mode=RenderMode(video=True),
                 stop_on_done=True)
    draw_potentials(ro)

    return ro
예제 #6
0
 def adn_policy(env: Env):
     return ADNPolicy(
         env.spec, activation_nonlin=to.sigmoid, potentials_dyn_fcn=pd_cubic, potential_init_learnable=False
     )