예제 #1
0
def test_save_and_laod_yaml():
    ex_dir = setup_experiment('testenv',
                              'testalgo',
                              'testinfo',
                              base_dir=TEMP_DIR)

    # Save test data to YAML-file (ndarrays should be converted to lists)
    save_list_of_dicts_to_yaml(
        [dict(a=1),
         dict(b=2.0),
         dict(c=np.array([1., 2.]).tolist())], ex_dir, 'testfile')

    data = load_dict_from_yaml(osp.join(ex_dir, 'testfile.yaml'))
    assert isinstance(data, dict)

    # Delete the created folder recursively
    shutil.rmtree(osp.join(TEMP_DIR, 'testenv'),
                  ignore_errors=True)  # also deletes read-only files
예제 #2
0
    algo_hparam = dict(
        max_iter=15,
        num_eval_rollouts=5,
        warmstart=True,
        thold_succ_subrtn=100,
        subrtn_snapshot_mode='latest',
    )
    algo = SimOpt(ex_dir, env_sim, env_real, subrtn_policy, subrtn_distr,
                  **algo_hparam)

    # Save the environments and the hyper-parameters
    save_list_of_dicts_to_yaml(
        [
            dict(env=env_hparams, seed=args.seed),
            dict(behav_policy=behav_policy_hparam),
            # dict(critic=critic_hparam, vfcn=vfcn_hparam),
            dict(ddp_policy=ddp_policy_hparam,
                 subrtn_distr_name=ddp_policy.name),
            dict(subrtn_distr=subrtn_distr_hparam,
                 subrtn_distr_name=subrtn_distr.name),
            dict(subsubrtn_distr=subsubrtn_distr_hparam,
                 subsubrtn_distr_name=subsubrtn_distr.name),
            dict(subrtn_policy=subrtn_policy_hparam,
                 subrtn_policy_name=subrtn_policy.name),
            dict(algo=algo_hparam, algo_name=algo.name)
        ],
        ex_dir)

    # Jeeeha
    algo.train(seed=args.seed)
예제 #3
0
            rollout(env_real,
                    policy,
                    eval=True,
                    max_steps=args.max_steps,
                    render_mode=RenderMode()))

    # Print and save results
    avg_return = np.mean([ro.undiscounted_return() for ro in ros])
    print_cbt(f'Average return: {avg_return}', 'g', bright=True)
    save_dir = setup_experiment('evaluation',
                                'qbb_experiment',
                                ex_tag,
                                base_dir=pyrado.TEMP_DIR)
    joblib.dump(ros, osp.join(save_dir, 'experiment_rollouts.pkl'))
    save_list_of_dicts_to_yaml([
        dict(ex_dir=ex_dir,
             init_state=init_state,
             avg_return=avg_return,
             num_runs=len(ros))
    ],
                               save_dir,
                               file_name='experiment_summary')

    # Stabilize at the end
    pdctrl.reset(state_des=np.zeros(2))
    rollout(env_real,
            pdctrl,
            eval=True,
            max_steps=1000,
            render_mode=RenderMode(text=True))
예제 #4
0
        lr_scheduler_hparam=dict(gamma=0.99)
    )
    critic = GAE(vfcn, **critic_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=500,
        min_steps=10000,
        num_workers=4,
        vfcn_coeff=0.7,
        entropy_coeff=4e-5,
        batch_size=100,
        std_init=0.8,
        lr=2e-3,
        lr_scheduler=lr_scheduler.ExponentialLR,
        lr_scheduler_hparam=dict(gamma=0.99)
    )
    algo = A2C(ex_dir, env, policy, critic, **algo_hparam)

    # Save the hyper-parameters
    save_list_of_dicts_to_yaml([
        dict(env=env_hparams, seed=args.seed),
        dict(policy=policy_hparam),
        dict(critic=critic_hparam, vfcn=vfcn_hparam),
        dict(algo=algo_hparam, algo_name=algo.name)],
        ex_dir
    )

    # Jeeeha
    algo.train(seed=args.seed)
예제 #5
0
    # Set the seed
    pyrado.set_seed(1001, verbose=True)

    # Set the hyper-parameters of SysIdViaEpisodicRL
    num_eval_rollouts = 5
    algo_hparam = dict(
        metric=None,
        std_obs_filt=5,
        obs_dim_weight=[1, 1, 1, 1, 10, 10.],
        num_rollouts_per_distr=len(dp_map) * 10,  # former 50
        num_workers=subrtn_hparam['num_workers'])

    # Save the environments and the hyper-parameters
    save_list_of_dicts_to_yaml([
        dict(env=env_hparams),
        dict(subrtn=subrtn_hparam, subrtn_name=subrtn.name),
        dict(
            algo=algo_hparam, algo_name=SysIdViaEpisodicRL.name, dp_map=dp_map)
    ], ex_dir)

    algo = SysIdViaEpisodicRL(subrtn, behavior_policy, **algo_hparam)

    # Jeeeha
    while algo.curr_iter < algo.max_iter and not algo.stopping_criterion_met():
        algo.logger.add_value(algo.iteration_key, algo.curr_iter)

        # Creat fake real-world data
        ro_real = []
        for _ in range(num_eval_rollouts):
            ro_real.append(rollout(env_real, behavior_policy, eval=True))

        algo.step(snapshot_mode='latest',
        bound_lo=np.array([0.8*dp_nom['Mp'], 1e-12, 0.8*dp_nom['Mr'], 1e-12]),
        bound_up=np.array([1.2*dp_nom['Mp'], 1e-11, 1.2*dp_nom['Mr'], 1e-11])
    )

    # Algorithm
    bayrn_hparam = dict(
        max_iter=15,
        acq_fc='UCB',
        acq_param=dict(beta=0.25),
        acq_restarts=500,
        acq_samples=1000,
        num_init_cand=4,
        warmstart=False,
        num_eval_rollouts_real=10,  # sim-2-sim
        # thold_succ_subrtn=300,
    )

    # Save the environments and the hyper-parameters (do it before the init routine of BayRn)
    save_list_of_dicts_to_yaml([
        dict(env_sim=env_sim_hparams, env_real=env_real_hparams, seed=args.seed),
        dict(policy=policy_hparam),
        dict(subrtn=subrtn_hparam, subrtn_name=PoWER.name),
        dict(algo=bayrn_hparam, algo_name=BayRn.name, dp_map=dp_map)],
        ex_dir
    )

    algo = BayRn(ex_dir, env_sim, env_real, subrtn, ddp_space, **bayrn_hparam)

    # Jeeeha
    algo.train(snapshot_mode='latest', seed=args.seed)
예제 #7
0
    # Subroutine
    algo_hparam = dict(
        max_iter=1000,
        min_steps=30 * env.max_steps,
        num_sampler_envs=20,
        num_epoch=5,
        eps_clip=0.1,
        batch_size=100,
        std_init=0.8,
        lr=2e-4,
        # max_grad_norm=1.,
    )
    ppo = PPO(ex_dir, env, policy, critic, **algo_hparam)

    # Meta-Algorithm
    epopt_hparam = dict(skip_iter=100, epsilon=0.2, gamma=critic.gamma)
    algo = EPOpt(ppo, **epopt_hparam)

    # Save the hyper-parameters
    save_list_of_dicts_to_yaml([
        dict(env=env_hparams, seed=ex_dir.seed),
        dict(policy=policy_hparam),
        dict(critic=critic_hparam, value_fcn=value_fcn_hparam),
        dict(algo=algo_hparam, algo_name=algo.name),
        dict(EPOpt=epopt_hparam)
    ], ex_dir)

    # Jeeeha
    algo.train(seed=ex_dir.seed)
예제 #8
0
    algo.train(snapshot_mode='latest', seed=seed)

    # Evaluate
    min_rollouts = 1000
    sampler = ParallelSampler(env, policy, num_envs=20, min_rollouts=min_rollouts)
    ros = sampler.sample()
    mean_ret = sum([r.undiscounted_return() for r in ros])/min_rollouts

    return mean_ret


if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    ex_dir = setup_experiment('hyperparams', QBallBalancerSim.name, 'ppo_250Hz_actnorm', seed=args.seed)

    # Run hyper-parameter optimization
    name = f'{ex_dir.algo_name}_{ex_dir.add_info}'  # e.g. qbb_ppo_fnn_actnorm
    study = optuna.create_study(
        study_name=name,
        storage=f"sqlite:////{osp.join(pyrado.TEMP_DIR, ex_dir, f'{name}.db')}",
        direction='maximize',
        pruner=MedianPruner(),
        load_if_exists=True
    )
    study.optimize(functools.partial(train_and_eval, ex_dir=ex_dir, seed=args.seed), n_trials=100, n_jobs=6)

    # Save the best hyper-parameters
    save_list_of_dicts_to_yaml([study.best_params, dict(seed=args.seed)], ex_dir, 'best_hyperparams')
예제 #9
0
    ros = []
    for r in range(args.num_runs):
        if args.mode == 'wo':
            ro = experiment_wo_distruber(env_real, env_sim)
        elif args.mode == 'w':
            ro = experiment_w_distruber(env_real, env_sim)
        else:
            raise pyrado.ValueErr(
                given=args.mode,
                eq_constraint="without (wo), or with (w) disturber")
        ros.append(ro)

    env_real.close()

    # Print and save results
    avg_return = np.mean([ro.undiscounted_return() for ro in ros])
    print_cbt(f'Average return: {avg_return}', 'g', bright=True)
    save_dir = setup_experiment('evaluation',
                                'qcp-st_experiment',
                                ex_tag,
                                base_dir=pyrado.TEMP_DIR)
    joblib.dump(ros, osp.join(save_dir, 'experiment_rollouts.pkl'))
    save_list_of_dicts_to_yaml([
        dict(ex_dir=ex_dir,
             avg_return=avg_return,
             num_runs=len(ros),
             steps_disturb=steps_disturb)
    ],
                               save_dir,
                               file_name='experiment_summary')
예제 #10
0
        env = wrap_like_other_env(env, env_sim)

        # Sample rollouts
        ros = eval_randomized_domain(pool, env, pert, policy, init_state_list)  # internally calls DomainRandWrapperLive

        # Compute results metrics
        rets = [ro.undiscounted_return() for ro in ros]
        lengths = [float(ro.length) for ro in ros]  # int values are not numeric in pandas
        df = df.append(pd.DataFrame(dict(policy=ex_labels[i], ret=rets, len=lengths)), ignore_index=True)

    metrics = dict(
        avg_len=df.groupby('policy').mean()['len'].to_dict(),
        avg_ret=df.groupby('policy').mean()['ret'].to_dict(),
        median_ret=df.groupby('policy').median()['ret'].to_dict(),
        min_ret=df.groupby('policy').min()['ret'].to_dict(),
        max_ret=df.groupby('policy').max()['ret'].to_dict(),
        std_ret=df.groupby('policy').std()['ret'].to_dict()
    )
    pprint(metrics, indent=4)

    # Create subfolder and save
    save_dir = setup_experiment('multiple_policies', args.env_name, 'randomized', base_dir=pyrado.EVAL_DIR)

    save_list_of_dicts_to_yaml(
        [{'ex_dirs': ex_dirs},
         {'num_rpp': args.num_ro_per_config, 'seed': args.seed},
         dict_arraylike_to_float(metrics)],
        save_dir, file_name='summary'
    )
    df.to_pickle(osp.join(save_dir, 'df_dr_mp.pkl'))
예제 #11
0
        vaired_param_values = [ro.rollout_info['domain_param'][varied_param_key] for ro in ros]
        varied_param = {varied_param_key: vaired_param_values}
        df = df.append(pd.DataFrame(dict(policy=exp_labels[i], ret=rets, len=lengths, **varied_param)),
                       ignore_index=True)

    metrics = dict(
        avg_len=df.groupby('policy').mean()['len'].to_dict(),
        avg_ret=df.groupby('policy').mean()['ret'].to_dict(),
        median_ret=df.groupby('policy').median()['ret'].to_dict(),
        min_ret=df.groupby('policy').min()['ret'].to_dict(),
        max_ret=df.groupby('policy').max()['ret'].to_dict(),
        std_ret=df.groupby('policy').std()['ret'].to_dict(),
        quantile5_ret=df.groupby('policy').quantile(q=0.05)['ret'].to_dict(),
        quantile95_ret=df.groupby('policy').quantile(q=0.95)['ret'].to_dict()
    )
    pprint.pprint(metrics)

    # Create subfolder and save
    save_dir = setup_experiment('multiple_policies', args.env_name, varied_param_key, base_dir=pyrado.EVAL_DIR)

    save_list_of_dicts_to_yaml([
        {'ex_dirs': ex_dirs},
        {
            'varied_param': varied_param_key,
            'num_rpp': args.num_ro_per_config, 'seed': args.seed, 'dt': args.dt, 'max_steps': args.max_steps
        },
        dict_arraylike_to_float(metrics)],
        save_dir, file_name='summary'
    )
    df.to_pickle(osp.join(save_dir, 'df_mp_grid_1d.pkl'))
예제 #12
0
        num_epoch=5,
        lr=1e-3,
        standardize_adv=False,
        max_grad_norm=5.,
    )
    particle_hparam = dict(actor=actor_hparam,
                           vfcn=vfcn_hparam,
                           critic=critic_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=200,
        min_steps=30 * env.max_steps,
        num_particles=3,
        temperature=1,
        lr=1e-3,
        std_init=1.0,
        horizon=50,
        num_workers=12,
    )
    algo = SVPG(ex_dir, env, particle_hparam, **algo_hparam)

    # Save the hyper-parameters
    save_list_of_dicts_to_yaml([
        dict(env=env_hparams, seed=args.seed),
        dict(algo=algo_hparam, algo_name=algo.name)
    ], ex_dir)

    # Jeeeha
    algo.train(seed=args.seed)
예제 #13
0
            pass
        lod.append(d)

    df = pd.DataFrame(lod)
    metrics = dict(avg_len=df['len'].mean(),
                   avg_ret=df['ret'].mean(),
                   median_ret=df['ret'].median(),
                   min_ret=df['ret'].min(),
                   max_ret=df['ret'].max(),
                   std_ret=df['ret'].std())
    pprint.pprint(metrics)

    # Create subfolder and save
    timestamp = datetime.datetime.now()
    add_info = timestamp.strftime(timestamp_format) + '--' + add_info
    save_dir = osp.join(ex_dir, 'eval_domain_grid', add_info)
    os.makedirs(save_dir, exist_ok=True)

    save_list_of_dicts_to_yaml([{
        'ex_dir': str(ex_dir)
    }, {
        'varied_params': list(param_spec.keys())
    }, {
        'num_rpp': args.num_ro_per_config,
        'seed': args.seed
    },
                                dict_arraylike_to_float(metrics)],
                               save_dir,
                               file_name='summary')
    df.to_pickle(osp.join(save_dir, 'df_sp_grid_nd.pkl'))
예제 #14
0
    env = DomainRandWrapperLive(env, randomizer)

    # Policy
    policy = to.load(osp.join(ref_ex_dir, 'policy.pt'))
    policy.init_param()

    # Critic
    vfcn = to.load(osp.join(ref_ex_dir, 'valuefcn.pt'))
    vfcn.init_param()
    critic = GAE(vfcn, **hparams['critic'])

    # Algorithm
    algo_hparam = hparams['subrtn']
    algo_hparam.update({'num_workers': 1})  # should be equivalent to the number of cores per job
    # algo_hparam.update({'max_iter': 300})
    # algo_hparam.update({'max_iter': 600})
    # algo_hparam.update({'min_steps': 3*algo_hparam['min_steps']})
    algo = PPO(ex_dir, env, policy, critic, **algo_hparam)

    # Save the hyper-parameters
    save_list_of_dicts_to_yaml([
        dict(env=env_hparams, seed=args.seed),
        dict(policy=hparams['policy']),
        dict(critic=hparams['critic']),
        dict(algo=algo_hparam, algo_name=algo.name)],
        ex_dir
    )

    # Jeeeha
    algo.train(seed=args.seed, snapshot_mode='latest')
예제 #15
0
        max_iter=10,
        alpha=0.05,
        beta=0.1,
        nG=20,
        nJ=120,
        ntau=5,
        nc_init=5,
        nr_init=1,
        sequence_cand=sequence_add_init,
        sequence_refs=sequence_const,
        warmstart_cand=True,
        warmstart_refs=True,
        cand_policy_param_init=init_policy_param_values,
        num_bs_reps=1000,
        studentized_ci=False,
    )
    algo = SPOTA(ex_dir, env, sr_cand, sr_refs, **algo_hparam)

    # Save the environments and the hyper-parameters
    save_list_of_dicts_to_yaml([
        dict(env=env_hparams, seed=args.seed),
        dict(policy=policy_hparam),
        dict(subrtn_name=sr_cand.name,
             subrtn_cand=subrtn_hparam_cand,
             subrtn_refs=subrtn_hparam_refs),
        dict(algo=algo_hparam, algo_name=algo.name)
    ], ex_dir)

    # Jeeeha
    algo.train(seed=args.seed)
예제 #16
0
    env = QCartPoleStabSim(**env_hparams)
    env = ActNormWrapper(env)

    # Policy
    policy_hparam = dict(feats=FeatureStack([identity_feat]))
    policy = LinearPolicy(spec=env.spec, **policy_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=30,
        pop_size=50,
        num_rollouts=8,
        num_is_samples=10,
        expl_std_init=2.,
        expl_std_min=0.02,
        full_cov=True,
        symm_sampling=False,
        num_sampler_envs=8,
    )
    algo = CEM(ex_dir, env, policy, **algo_hparam)

    # Save the hyper-parameters
    save_list_of_dicts_to_yaml([
        dict(env=env_hparams, seed=ex_dir.seed),
        dict(policy=policy_hparam),
        dict(algo=algo_hparam, algo_name=algo.name)
    ], ex_dir)

    # Jeeeha
    algo.train(seed=ex_dir.seed)
예제 #17
0
        init_param_kwargs=None,
        use_cuda=False)
    policy = ADNPolicy(spec=EnvSpec(act_space=InfBoxSpace(shape=1),
                                    obs_space=InfBoxSpace(shape=1)),
                       **policy_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=1000,
        windowed=False,
        cascaded=True,
        optim_class=optim.Adam,
        optim_hparam=dict(lr=1e-1, eps=1e-8,
                          weight_decay=1e-4),  # momentum=0.7
        loss_fcn=nn.MSELoss(),
        lr_scheduler=lr_scheduler.ExponentialLR,
        lr_scheduler_hparam=dict(gamma=0.995))
    algo = TSPred(ex_dir, dataset, policy, **algo_hparam)

    # Save the hyper-parameters
    save_list_of_dicts_to_yaml([
        dict(data_set=data_set_hparam,
             data_set_name=data_set_name,
             seed=args.seed),
        dict(policy=policy_hparam),
        dict(algo=algo_hparam, algo_name=algo.name)
    ], ex_dir)

    # Jeeeha
    algo.train()
예제 #18
0
    spota_hparam = dict(
        max_iter=10,
        alpha=0.05,
        beta=0.1,
        nG=20,
        nJ=120,
        ntau=5,
        nc_init=5,
        nr_init=1,
        sequence_cand=sequence_add_init,
        sequence_refs=sequence_const,
        warmstart_cand=True,
        warmstart_refs=True,
        cand_policy_param_init=init_policy_param_values,
        num_bs_reps=1000,
        studentized_ci=False,
    )
    algo = SPOTA(ex_dir, env, sr_cand, sr_refs, **spota_hparam)

    # Save the environments and the hyper-parameters
    save_list_of_dicts_to_yaml([
        dict(env=env_hparams, seed=ex_dir.seed),
        dict(policy=policy_hparam),
        dict(subroutine_cand=subrtn_hparam_cand, subroutine_refs=subrtn_hparam_cand, subroutine_name=HCNormal.name),
        dict(algo=spota_hparam, algo_name=SPOTA.name)],
        ex_dir
    )

    # Jeeeha
    algo.train(seed=ex_dir.seed)
예제 #19
0
    # Algorithm
    bayrn_hparam = dict(
        max_iter=15,
        acq_fc='UCB',
        acq_param=dict(beta=0.25),
        acq_restarts=500,
        acq_samples=1000,
        num_init_cand=2,
        warmstart=False,
        num_eval_rollouts_real=100 if isinstance(env_real, QQubeSim) else 5,
    )

    # Save the environments and the hyper-parameters (do it before the init routine of BDR)
    save_list_of_dicts_to_yaml([
        dict(env=env_hparams, seed=ex_dir.seed),
        dict(policy=policy_hparam),
        dict(subroutine=subroutine_hparam, subroutine_name=PoWER.name),
        dict(algo=bayrn_hparam, algo_name=BayRn.name, dp_map=dp_map)
    ], ex_dir)

    algo = BayRn(ex_dir,
                 env_sim,
                 env_real,
                 subroutine=power,
                 bounds=bounds,
                 **bayrn_hparam)

    # Jeeeha
    algo.train(snapshot_mode='best', seed=ex_dir.seed)