Ejemplo n.º 1
0
def run_forward_model(config, trial, env_name):
    env = WrapperAtari(gym.make(env_name))
    input_shape = env.observation_space.shape
    action_dim = env.action_space.n

    if config.n_env > 1:
        env_list = []
        print('Creating {0:d} environments'.format(config.n_env))
        for i in range(config.n_env):
            env_list.append(WrapperAtari(gym.make(env_name)))

        print('Start training')
        experiment = ExperimentNEnvPPO(env_name, env_list, config, input_shape, action_dim)
    else:
        experiment = ExperimentPPO(env_name, env, config)
        experiment.add_preprocess(encode_state)

    agent = PPOAtariForwardModelAgent(input_shape, action_dim, config, TYPE.discrete)
    experiment.run_forward_model(agent, trial)

    env.close()

    if config.n_env > 1:
        for i in range(config.n_env):
            env_list[i].close()
Ejemplo n.º 2
0
def run_dop_ref_model(env_name, config, i):
    env = create_env(env_name)
    state_dim = env.observation_space.shape
    action_dim = env.action_space.shape[0]

    experiment = ExperimentPPO(env_name, env, config)

    agent = PPOAerisDOPRefAgent(state_dim, action_dim, config, TYPE.continuous)
    experiment.run_baseline(agent, i)

    env.close()
Ejemplo n.º 3
0
def run_baseline(config, i):
    env = gym.make('MountainCarContinuous-v0')
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]

    experiment = ExperimentPPO('MountainCarContinuous-v0', env, config)

    agent = PPOSimpleAgent(state_dim, action_dim, config, TYPE.continuous)
    experiment.run_baseline(agent, i)

    env.close()
Ejemplo n.º 4
0
def run_baseline(config, i):
    env = gym.make('Pendulum-v0')
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]

    experiment = ExperimentPPO('Pendulum-v0', env, config)
    experiment.add_preprocess(encode)

    agent = PPOAgent(state_dim, action_dim, config, TYPE.continuous)
    experiment.run_baseline(agent, i)

    env.close()
Ejemplo n.º 5
0
def test(config, path, env_name):
    env = WrapperAtari(gym.make(env_name))
    input_shape = env.observation_space.shape
    action_dim = env.action_space.n

    experiment = ExperimentPPO(env_name, env, config)
    experiment.add_preprocess(encode_state)

    agent = PPOAtariAgent(input_shape, action_dim, config, TYPE.discrete)
    agent.load(path)
    experiment.test(agent)

    env.close()
Ejemplo n.º 6
0
def run_rnd_model(config, trial, env_name):
    if config.n_env > 1:
        print('Creating {0:d} environments'.format(config.n_env))
        env = MultiEnvParallel([WrapperAtari(gym.make(env_name)) for _ in range(config.n_env)], config.n_env, config.num_threads)
    else:
        env = WrapperAtari(gym.make(env_name))

    input_shape = env.observation_space.shape
    action_dim = env.action_space.n

    print('Start training')
    if config.n_env > 1:
        experiment = ExperimentNEnvPPO(env_name, env, config)
    else:
        experiment = ExperimentPPO(env_name, env, config)

    experiment.add_preprocess(encode_state)
    agent = PPOAtariRNDAgent(input_shape, action_dim, config, TYPE.discrete)
    experiment.run_rnd_model(agent, trial)

    env.close()
Ejemplo n.º 7
0
def run_baseline(env_name, config, trial):
    if config.n_env > 1:
        print('Creating {0:d} environments'.format(config.n_env))
        env = MultiEnvParallel(
            [create_env(env_name) for _ in range(config.n_env)], config.n_env,
            config.num_threads)
    else:
        env = create_env(env_name)

    input_shape = env.observation_space.shape
    action_dim = env.action_space.shape[0]

    print('Start training')
    if config.n_env > 1:
        experiment = ExperimentNEnvPPO(env_name, env, config)
    else:
        experiment = ExperimentPPO(env_name, env, config)

    agent = PPOAerisAgent(input_shape, action_dim, config, TYPE.continuous)
    experiment.run_baseline(agent, trial)

    env.close()
Ejemplo n.º 8
0
def run_baseline(config, trial):
    env = gym.make('CartPole-v0')
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n

    if config.n_env > 1:
        env_list = []
        print('Creating {0:d} environments'.format(config.n_env))
        for i in range(config.n_env):
            env_list.append(gym.make('CartPole-v0'))

        print('Start training')
        experiment = ExperimentNEnvPPO('Pitfall-v0', env_list, config)
    else:
        experiment = ExperimentPPO('Pitfall-v0', env, config)

    agent = PPOSimpleAgent(state_dim, action_dim, config, TYPE.discrete)
    experiment.run_baseline(agent, trial)

    env.close()

    if config.n_env > 1:
        for i in range(config.n_env):
            env_list[i].close()