コード例 #1
0
def eval_time(args):
    with open(args.config) as fp:
        json_data = json.load(fp)

    video_path = os.path.join("./videos", args.agent)
    config = GameConfig.deserialize(json_data)
    config.agents_config[args.agent]["save_path"] += "_vs_time_pt_check.zip"
    env = DummyVecEnv(
        [lambda: retro.make(config.game_name, state=config.eval_state[1])])
    agent = AgentLoader.get_agent(args.agent,
                                  config.agents_config,
                                  env,
                                  load=True)
    env.close()
    env = DummyVecEnv([
        lambda: retro.make(
            config.game_name, state=config.eval_state[1], record=video_path)
    ])
    obs = env.reset()
    done = False

    while not done:
        actions, _ = agent.agent.predict(obs)
        obs, rew, done, info = env.step(actions)
    #   env.render()

    env.close()
コード例 #2
0
def main(config: str, agent: str):
    with open(config) as fp:
        json_data = json.load(fp)

    config = GameConfig.deserialize(json_data)
    log_dir = config.agents_config[agent]["save_path"]
    # if agent == "DQN":
    #     env = make_atari_env(config.game_name, n_envs=1,
    #                          seed=0, monitor_dir=log_dir)

    # elif agent == "PPO":
    #     env = make_atari_env(config.game_name, n_envs=8,
    #                          seed=0, monitor_dir=log_dir)

    # else:
    #     env = make_atari_env(config.game_name, n_envs=16,
    #                          seed=0, monitor_dir=log_dir)

    env = gym_super_mario_bros.make(config.game_name)
    env = JoypadSpace(env, SIMPLE_MOVEMENT)
    env = Monitor(env, log_dir)

    # env = VecFrameStack(env, n_stack=4)

    agent = AgentLoader.get_agent(agent, config.agents_config, env)
コード例 #3
0
def eval_100_trials(args):
    with open(args.config) as fp:
        json_data = json.load(fp)

    config = GameConfig.deserialize(json_data)
    config.agents_config[args.agent]["save_path"] += "_vs_time_pt.zip"
    env = DummyVecEnv(
        [lambda: retro.make(config.game_name, state=config.eval_state[1])])
    agent = AgentLoader.get_agent(args.agent,
                                  config.agents_config,
                                  env,
                                  load=True)

    rew_list = []
    trials = 100
    for i in tqdm(range(trials)):
        obs = env.reset()
        done = False
        reward = 0
        while not done:
            actions, _ = agent.agent.predict(obs)
            obs, rew, done, info = env.step(actions)
            reward += rew

        rew_list.append(reward)

    env.close()
    count = sum(i > 0 for i in rew_list)

    print("win percentage = {}%".format(count / trials * 100))
コード例 #4
0
def main_vs_5(config: str):

    with open(config) as fp:
        json_data = json.load(fp)

    config = GameConfig.deserialize(json_data)
    config.agents_config["A2C"]["save_path"] += "_vs_5"
    config.agents_config["A2C"]["tensorboard"] += "_vs_5"

    env = DummyVecEnv(
        [lambda: retro.make(config.game_name, state=config.train_states[0])])
    agent = AgentLoader.get_agent("A2C", config.agents_config, env)
    env.close()

    start_time = time.time()
    for st in tqdm(config.train_states, desc='Main Loop'):
        print(st)
        env = DummyVecEnv([
            lambda: retro.make(config.game_name, state=st, scenario='scenario')
        ])
        agent.agent.set_env(env)
        agent.agent.learn(total_timesteps=10000)
        agent.save()
        env.close()
    end_time = time.time() - start_time
    print(f'\n The Training Took {end_time} seconds')
コード例 #5
0
def main(config: str, agent: str):
    with open(config) as fp:
        json_data = json.load(fp)

    config = GameConfig.deserialize(json_data)
    log_dir = config.agents_config[agent]["save_path"]
    if agent == "DQN":
        env = make_atari_env(config.game_name, n_envs=1,
                             seed=0, monitor_dir=log_dir)

    elif agent == "PPO":
        env = make_atari_env(config.game_name, n_envs=8,
                             seed=0, monitor_dir=log_dir)

    else:
        env = make_atari_env(config.game_name, n_envs=16,
                             seed=0, monitor_dir=log_dir)

    env = VecFrameStack(env, n_stack=4)

    agent = AgentLoader.get_agent(agent, config.agents_config, env)

    reward_callback = SaveOnBestTrainingRewardCallback(
        check_freq=100, log_dir=log_dir)

    start_time = time.time()
    steps = 10_000_000
    with ProgressBarManager_new(steps) as progress_callback:
        agent.agent.learn(total_timesteps=steps, callback=[
                          reward_callback, progress_callback])
        # agent.save()
        env.close()

    end_time = time.time() - start_time
    print(f'\n The Training Took {end_time} seconds')
コード例 #6
0
def main(config, agent):
    with open(config) as fp:
        json_data = json.load(fp)

    video_path = os.path.join("./videos", agent, "pong")
    config = GameConfig.deserialize(json_data)
    config.agents_config[args.agent]["save_path"] += "best_model.zip"
    # config.agents_config[args.agent]["save_path"] = "my_models/pong/pong_ppo/best_model.zip"
    print(config.agents_config[args.agent]["save_path"])
    # env = retro.make(config.game_name)
    env = gym.make("PongNoFrameskip-v4")

    agent = AgentLoader.get_agent(args.agent,
                                  config.agents_config,
                                  env,
                                  load=True)
    env.close()
    env = gym.make("PongNoFrameskip-v4")
    env = DummyVecEnv([lambda: env])
    # env = retro.make(config.game_name, record=video_path)
    env = VecVideoRecorder(
        env,
        video_path,
        record_video_trigger=lambda x: x == 0,
    )

    obs = env.reset()
    done = False
    while not done:
        actions, _ = agent.agent.predict(obs)
        obs, rew, done, info = env.step(actions)

    env.close()
コード例 #7
0
def main_vs_time(config: str):
    with open(config) as fp:
        json_data = json.load(fp)

    config = GameConfig.deserialize(json_data)
    config.agents_config["A2C"]["save_path"] += "_vs_time_pt"
    config.agents_config["A2C"]["tensorboard"] += "_vs_time"

    env = DummyVecEnv(
        [lambda: (retro.make(config.game_name, state=config.eval_state[0]))])
    agent = AgentLoader.get_agent("A2C", config.agents_config, env)
    start_time = time.time()
    with ProgressBarManager_new(40000) as callback:
        agent.agent.learn(total_timesteps=40000, callback=callback)
        agent.save()
        env.close()

    end_time = time.time() - start_time
    print(f'\n The Training Took {end_time} seconds')
コード例 #8
0
def main_vs_time(config: str):
    with open(config) as fp:
        json_data = json.load(fp)

    config = GameConfig.deserialize(json_data)
    config.agents_config["PPO"]["save_path"] += "_vs_time_pt_check"
    config.agents_config["PPO"]["tensorboard"] += "_vs_time_check"

    env = DummyVecEnv(
        [lambda: (retro.make(config.game_name, state=config.eval_state[0]))])
    agent = AgentLoader.get_agent("PPO", config.agents_config, env)
    env.close()
    env = DummyVecEnv(
        [lambda: (retro.make(config.game_name, state=config.eval_state[0]))])
    agent.agent.set_env(env)

    with ProgressBarManager_new(1000) as callback:
        agent.agent.learn(1000, callback=callback)
        agent.save()
        env.close()