while not done:
            if i < render_episodes:
                # render for viewing experience
                env.render()

            obs_img = np.roll(obs_img, 1, axis=0)
            obs_img[0, :] = observation
            reshaped = obs_img.reshape((1, history_len * obs_len))

            action = model.predict_move(reshaped, train=False)

            # if the model is not initialized, take a random action instead
            if action == None:
                action = env.action_space.sample()

            # use action to make a move
            observation, reward, done, info = env.step(action)
            cumulative_reward += reward

        print('current average: {} in {} games'.format(
            cumulative_reward / (i + 1), (i + 1)))

    print('average score: {}'.format(cumulative_reward / eval_episodes))
    return cumulative_reward / eval_episodes


if __name__ == "__main__":
    # create model
    model = nn.Control_Model()
    EvalModel(model)
Beispiel #2
0
train_episodes = 200  # number of episodes before training
eval_episodes = 10  # number of episode when evaluating
render_episodes = 1  # number of episodes to render when evaluating

env = gym.make(env_name)
if enable_video:
    env = gym.wrappers.Monitor(env,
                               directory=vid_dir,
                               force=False,
                               resume=True)

obs_len = len(env.observation_space.low)
act_len = env.action_space.n

# create model
model = nn.Control_Model(obs_len * historic_data_len, act_len)

train_count = 0

# train to until above the threshold
while (em.EvalModel(model, env, eval_episodes, render_episodes,
                    historic_data_len, obs_len) < trained_threshold):
    max_reward = -np.inf

    # run a segment of 200 'games' and train off of the max score
    for i in range(train_episodes):
        cumulative_reward = 0
        obs_log = []
        action_log = []
        done = False
        obs_img = np.zeros((historic_data_len, obs_len))