예제 #1
0
def get_task_config():
    wann_param_config = task.get_default_wann_hyperparams()
    wann_param_config['task'] = ENV_NAME
    wann_param_config['maxGen'] = 1
    wann_param_config['popSize'] = 100
    wann_param_config['alg_nReps'] = 1

    task_config = dict(
        WANN_ENV_ID='wann-humanoid-v0',
        NUM_WORKERS=5,
        DEVICE='cuda:0',
        GAME_CONFIG=task.Game(env_name=ENV_NAME,
                              actionSelect='all',  # OPTIONS: soft, all, hard
                              input_size=44,
                              output_size=44,
                              time_factor=0,
                              layers=[40, 40],
                              i_act=np.full(44, 1),
                              h_act=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                              o_act=np.full(44, 1),
                              weightCap=2.0,
                              noise_bias=0.0,
                              output_noise=[False, False, False],
                              max_episode_length=1000,
                              n_critic_bootstrap=5,
                              alg_type=task.ALG.SAC,
                              artifacts_path=f'{task.RESULTS_PATH}artifact{os.sep}{config.EXPERIMENT_ID}{os.sep}',
                              in_out_labels=[]),
        AGENT=dict(
            datprep=None,
            policy=MlpPolicy,
            mini_mem_size=int(1e6//10),
            wann_batch_size=1024,
            wann_bootstrap_default=-100,
            learn_params=dict(
                gamma=0.99,
                tau=5e-3,
                learn_rate=1e-4,
                mem_size=int(1e6),
                target_entropy='auto',
                timesteps=1000,
                train_batch_size=100,  # batch buffer size
                episode_len=-1,  # entire episode length
                eval_episode_len=-1,
                alg_checkpoint_interval=500,
                start_steps=100,
                n_trains_per_step=1,  # soft target updates should use 1, try 5 for hard target updates
                gradient_steps_per_step=1,
                eval_interval=1500,
                log_interval=10,
                log_verbose=1,
                replay_sample_ratio=1,  # 4:1 or .25 replay buffer sample to gradient update ratio
            )
        ),
        WANN_PARAM_CONFIG=wann_param_config,
        VIDEO_LENGTH=1500,
        RESULTS_PATH=task.RESULTS_PATH
    )

    return copy.deepcopy(task_config)
예제 #2
0
def get_task_config():
    wann_param_config = task.get_default_wann_hyperparams()
    wann_param_config['task'] = ENV_NAME
    wann_param_config['maxGen'] = 1
    wann_param_config['popSize'] = 20
    wann_param_config['alg_nReps'] = 1

    task_config = dict(
        WANN_ENV_ID='wann-cartpole',
        NUM_WORKERS=5,  # mp.cpu_count() for all cores
        GAME_CONFIG=task.Game(
            env_name='CartPole-v1',
            actionSelect='all',  # all, soft, hard
            input_size=4,  # greyscale + 4 frame stacking
            output_size=4,
            time_factor=0,
            layers=[25, 25],
            i_act=np.full(4, 1),
            h_act=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            o_act=np.full(4, 1),
            weightCap=2.0,
            noise_bias=0.0,
            output_noise=[False, False, False],
            max_episode_length=
            300,  # use full episode length or reasonable trajectory len here
            n_critic_bootstrap=5,
            alg_type=task.ALG.SAC,
            artifacts_path=
            f'{task.RESULTS_PATH}artifact{os.sep}{config.EXPERIMENT_ID}{os.sep}',
            in_out_labels=[]),
        AGENT=dict(
            datprep=None,
            policy=MlpPolicy,
            mini_mem_size=int(1e6 // 10),
            wann_batch_size=1024,
            wann_bootstrap_default=-100,
            learn_params=dict(
                gamma=0.99,
                tau=5e-3,
                learn_rate=1e-4,
                mem_size=int(1e6),
                target_entropy='auto',
                timesteps=
                300,  # for baseline SAC give enough timesteps to be at least 500 episodes
                train_batch_size=100,  # batch buffer size
                episode_len=-1,  # entire episode length
                eval_episode_len=-1,
                alg_checkpoint_interval=500,
                start_steps=10,
                n_trains_per_step=
                1,  # soft target updates should use 1, try 5 for hard target updates
                gradient_steps_per_step=1,
                eval_interval=1500,
                log_interval=10,
                log_verbose=1,
                replay_sample_ratio=1,  # 1:1 or 4:1 for 1e6 replay mem size
            )),
        WANN_PARAM_CONFIG=wann_param_config,
        VIDEO_LENGTH=1500,
        RESULTS_PATH=task.RESULTS_PATH)

    return copy.deepcopy(task_config)