Ejemplo n.º 1
0
    return optimizer_tasks, optimizer_results, optimizer_agent, starting_weights


def make_rollout_agent(ob_size, action_size, starting_weights, args):
    rollout_agent = rollout_master_agent.parallel_rollout_master_agent(
        args, ob_size, action_size
    )
    rollout_agent.set_policy_weights(starting_weights)
    return rollout_agent


if __name__ == '__main__':

    # get the configuration
    logger.info('New environments available : {}'.format(
        register.get_name_list()))
    args = get_config()
    # args.use_nervenet = 0

    if args.write_log:
        logger.set_file_handler(
            path=args.output_dir,
            prefix='mujoco_' + args.task, time_str=args.time_id
        )

    if args.task in dm_control_util.DM_ENV_INFO:
        args.dm = 1

    # optional visdom plotting
    if args.viz:
        viz_item = ['avg_reward', 'entropy', 'kl', 'surr_loss',
Ejemplo n.º 2
0
import gym
import environments.register as register

for env_name in register.get_name_list():
    print(env_name)
    env = gym.make(env_name)
    env.reset()

    for _ in range(10):
        env.render()
        action = env.action_space.sample()
        state, reward, done, info = env.step(action)
        # print(state.shape, action.shape, reward, done, info)
"""
ALL_ENV_LIST = [
    WalkersHopperone-v1
    WalkersHalfhumanoidone-v1
    WalkersHalfcheetahone-v1
    WalkersFullcheetahone-v1
    WalkersOstrichone-v1
    WalkersHoppertwo-v1
    WalkersHalfhumanoidtwo-v1
    WalkersHalfcheetahtwo-v1
    WalkersFullcheetahtwo-v1
    WalkersOstrichtwo-v1
    WalkersHopperthree-v1
    WalkersHalfhumanoidthree-v1
    WalkersHalfcheetahthree-v1
    WalkersFullcheetahthree-v1
    WalkersOstrichthree-v1
    WalkersHopperfour-v1