Ejemplo n.º 1
0
        # feats=FeatureStack([RandFourierFeat(env.obs_space.flat_dim, num_feat=20, bandwidth=env.obs_space.bound_up)])
        feats=FeatureStack([const_feat, identity_feat, sign_feat, abs_feat, squared_feat, cubic_feat, ATan2Feat(1, 2),
                            MultFeat([3, 4])])
    )
    policy = LinearPolicy(spec=env.spec, **policy_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=500,
        eps=1.0,
        pop_size=20*policy.num_param,
        num_rollouts=4,
        expl_std_init=0.2,
        expl_std_min=0.02,
        use_map=True,
        optim_mode='scipy',
        num_workers=12,
    )
    algo = REPS(ex_dir, env, policy, **algo_hparam)

    # Save the hyper-parameters
    save_list_of_dicts_to_yaml([
        dict(env=env_hparams, seed=args.seed),
        dict(policy=policy_hparam),
        dict(algo=algo_hparam, algo_name=algo.name)],
        ex_dir
    )

    # Jeeeha
    algo.train(snapshot_mode='best', seed=args.seed)
Ejemplo n.º 2
0
        # feats=FeatureStack(RBFFeat(num_feat_per_dim=20, bounds=env.obs_space.bounds, scale=0.8)),
        feats=FeatureStack(identity_feat, sin_feat))
    policy = LinearPolicy(spec=env.spec, **policy_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=500,
        eps=0.2,
        pop_size=10 * policy.num_param,
        num_init_states_per_domain=10,
        expl_std_init=0.2,
        expl_std_min=0.02,
        num_epoch_dual=1000,
        optim_mode="scipy",
        lr_dual=1e-3,
        use_map=True,
        num_workers=8,
    )
    algo = REPS(ex_dir, env, policy, **algo_hparam)

    # Save the hyper-parameters
    save_dicts_to_yaml(
        dict(env=env_hparams, seed=args.seed),
        dict(policy=policy_hparam),
        dict(algo=algo_hparam, algo_name=algo.name),
        save_dir=ex_dir,
    )

    # Jeeeha
    algo.train(seed=args.seed)
Ejemplo n.º 3
0
            cubic_feat,
            ATan2Feat(1, 2),
            MultFeat((3, 4)),
        ]))
    policy = LinearPolicy(spec=env.spec, **policy_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=500,
        eps=1.0,
        pop_size=20 * policy.num_param,
        num_init_states_per_domain=4,
        expl_std_init=0.2,
        expl_std_min=0.02,
        use_map=True,
        optim_mode="scipy",
        num_workers=12,
    )
    algo = REPS(ex_dir, env, policy, **algo_hparam)

    # Save the hyper-parameters
    save_dicts_to_yaml(
        dict(env=env_hparams, seed=args.seed),
        dict(policy=policy_hparam),
        dict(algo=algo_hparam, algo_name=algo.name),
        save_dir=ex_dir,
    )

    # Jeeeha
    algo.train(snapshot_mode="best", seed=args.seed)