예제 #1
0
def main(arguments: argparse) -> None:
    """
    Main training loop.
    :param arguments: User input
    :return:
    """
    n_steps = arguments.steps
    n_agents = arguments.envs

    print(f'Training {args.game} using {"cpu" if arguments.cpu else "gpu"}')
    print(f'Number of concurrent environments {args.envs}')
    print(f'Number of steps per batch {args.steps}')

    if arguments.model:
        print(f'Using existing model {arguments.model}')

    env = SubprocVecEnv(
        [make_env(env_id=arguments.game, rank=i) for i in range(n_agents)])
    agent = DeepLearningAgent(observation_space=env.observation_space,
                              action_space=int(env.action_space.n),
                              n_envs=n_agents,
                              n_steps=n_steps,
                              model_path=arguments.model,
                              use_cpu=arguments.cpu)

    # This is the current state (or observation)
    observations = reshape_observations(env.reset())
    actions = agent.get_action(observations)
    initial_training_time = time.time()

    for ep in range(EPISODES):
        # Reset the frame counter each time the batch size is complete
        for i in range(n_steps):
            new_observations, rewards, done, info = env.step(
                actions.cpu().numpy())
            new_observations = reshape_observations(new_observations)

            agent.train(s=observations,
                        r=rewards,
                        s_next=new_observations,
                        a=actions,
                        done=done,
                        step=i)

            actions = agent.get_action(new_observations)
            observations = new_observations

        if ep % 100 == 0:
            fps = ((ep + 1) * n_steps * n_agents) / (time.time() -
                                                     initial_training_time)
            print(f'FPS {fps}')

    env.close()
예제 #2
0
def main():
    # Alter reward in scenario.json (C:\Users\Fergus\Anaconda3\envs\AIGym\Lib\site-packages\retro\data\stable\SonicTheHedgehog-Genesis)

    env = SubprocVecEnv([make_env_3])
    obs = env.reset()
    # env = make_env_3()
    # env2 = make_env_4()
    print(env.observation_space)
    print(env.action_space.n)
    print(obs.shape)
    print(obs[0].shape)
    # obs = env2.reset()
    rew_mb = []
    dones_mb = []
    obs_mb = []
    step = 0
    while True:
        action = env.action_space.sample()
        obs, rew, done, info = env.step([0])
        print("Step {} Reward: {}, Done: {}".format(step, rew, done))
        rew_mb.append(rew)
        dones_mb.append(done)
        obs_mb.append(obs)
        env.render()

        step += 1
        # obs = obs[1] / 255.
        # for i in range(4):
        #     cv2.imshow('GrayScale'+str(i), np.squeeze(obs[:,:,i]))
        #     cv2.waitKey(1)
        if done[0]:
            env.close()
            break
    rew_mb = np.array(rew_mb)
    dones_mb = np.array(dones_mb)
    obs_mb = np.array(obs_mb)
    print("Rewards: ", rew_mb)
    print(rew_mb.shape)
    print(dones_mb)
    print(dones_mb.shape)
    print(obs_mb.shape)
예제 #3
0
def main7():
    retro.data.add_custom_integration("custom")

    def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, grayscale=False):
        env = MaxAndSkipEnv(env, skip=4)
        env = WarpFrame(env, width=150, height=100, grayscale=grayscale)
        env = FrameStack(env, frame_stack)
        env = ScaledFloatFrame(env)
        env = RewardScaler(env, scale=1 / 100.0)
        return env

    def make_env():
        retro.data.add_custom_integration("custom")
        env = retro.n64_env.N64Env(game="SuperSmashBros-N64",
                                   use_restricted_actions=retro.Actions.MULTI_DISCRETE,
                                   inttype=retro.data.Integrations.CUSTOM,
                                   obs_type=retro.Observations.IMAGE)
        env = wrap_deepmind_n64(env)
        return env

    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    nenvs = 2
    # env = DummyVecEnv([make_env] * nenvs)
    env = SubprocVecEnv([make_env] * nenvs)
    network_name = "impala_cnn_lstm"
    policy = build_policy(env, network_name)
    recurrent = "lstm" in network_name
    ob_space = env.observation_space
    ac_space = env.action_space
    nsteps = 10
    nminibatches = 2
    nbatch = nenvs * nsteps
    nbatch_train = nbatch // nminibatches
    model = Model(policy=policy,
                  ob_space=ob_space,
                  ac_space=ac_space,
                  nbatch_act=nenvs,
                  nbatch_train=nbatch_train,
                  nsteps=nsteps,
                  ent_coef=0.01,
                  vf_coef=0.5,
                  max_grad_norm=0.5,
                  comm=None,
                  mpi_rank_weight=1)
    runner = Runner(env=env, model=model, nsteps=10, gamma=.99, lam=.95)

    env.reset()
    num_steps = 20000
    action = [np.array([0, 0, 0]), np.array([0, 0, 0])]
    for i in range(num_steps):
        sys.stdout.write(f"\r{i+1} / {num_steps}")
        action = [env.action_space.sample() for _ in range(nenvs)]
        obs, reward, dones, info = env.step(action)
        # env.reset(dones)
        # env.render()

        if i % 50 == 0:
            if recurrent:
                fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(20, 12))
            else:
                fig, axs = plt.subplots(nrows=4, ncols=2, figsize=(20, 12))
            for env_index in range(nenvs):
                if recurrent:
                    axs[env_index].imshow(obs[env_index, :, :, :])
                else:
                    for j in range(4):
                        row = env_index * 2 + j // 2
                        col = j % 2
                        print(row)
                        print(col)
                        axs[row, col].imshow(obs[env_index, :, :, j])
            plt.show()
            plt.close()
    end = time.time()
    print(end - start)

    return env
예제 #4
0
def main6():
    retro.data.add_custom_integration("custom")

    def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1):
        env = MaxAndSkipEnv(env, skip=4)
        env = WarpFrame(env, width=450, height=300, grayscale=False)
        env = FrameStack(env, frame_stack)
        env = ScaledFloatFrame(env)
        env = RewardScaler(env, scale=reward_scale)
        return env

    def make_env():
        retro.data.add_custom_integration("custom")
        state = "ssb64.pikachu.level9dk.dreamland.state"
        env = retro.n64_env.N64Env(game="SuperSmashBros-N64",
                                   use_restricted_actions=retro.Actions.MULTI_DISCRETE,
                                   inttype=retro.data.Integrations.CUSTOM,
                                   obs_type=retro.Observations.IMAGE,
                                   state=state)
        env = wrap_deepmind_n64(env)
        return env

    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    # env = make_env()
    env = SubprocVecEnv([make_env] * 1)
    # env = DummyVecEnv([make_env] * 1)

    env.reset()
    num_steps = 20000
    # action = [np.array([0, 0, 0])]
    # action = [env.action_space.sample() for _ in range(2)]
    for i in range(num_steps):
        sys.stdout.write(f"\r{i+1} / {num_steps}")
        # action = env.action_space.sample()
        action = [env.action_space.sample() for _ in range(1)]
        obs, reward, done, info = env.step(action)

        print(f"\nreward: {reward} done: {done}")
        # input()
        if (isinstance(done, bool) and done) or (isinstance(done, list) and all(done)):
            env.reset()
        # env.render()

        if i % 50 == 0:
            image = Image.fromarray((obs[0] * 255).astype(np.uint8))
            image.save("/home/wulfebw/Desktop/color.png")

            plt.imshow(obs[0, :, :, 0])

            # fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(12, 12))
            # for j in range(1):
            #     row = j // 2
            #     col = j % 2
            #     print(row)
            #     print(col)
            #     axs[row, col].imshow(obs[:, :])
            plt.show()
            plt.close()
    end = time.time()
    print(end - start)

    return env