예제 #1
0
def main():
    times = []
    env = SnakeEnv()
    for i in range(100):
        st = time.time()
        done = False
        env.reset()
        score = 0
        food = 0
        while not done:
            info = {"Food": (food, (10, 30))}
            state, reward, done = env.step(get_input(), info=info)
            score += reward
            if reward == settings.FOOD_REWARD:
                food += 1

            env.render(sleep=False)
            # print(reward)
            if done:
                et = time.time()
                times.append(et - st)
                # quit()
                break
    print(1 / (mean(times)), end=" games per second\n")
    print(1 / (max(times)), end=" slowest games per second\n")
    print(1 / (min(times)), end=" fastest games per second\n")
예제 #2
0
    episode_reward = 0
    step = 1
    current_state = env.reset()
    done = False

    while not done:  # Using Exploitation vs Exploration ($\epsilon$-greedy strategy) to either choose a random action or  a greedy action and pre-process it for further steps.
        if np.random.random() > epsilon:
            action = np.argmax(agent.get_qs(current_state))
        else:
            action = np.random.randint(0, env.ACTION_SPACE_SIZE)

        new_state, reward, done = env.move(action)
        episode_reward += reward

        if ISRENDER and episode % GET_STATS == 0:  #code to render while training
            env.render()

        agent.update_replay_memory(
            (current_state, action, reward, new_state,
             done))  # Append the state to our experience replay memory
        agent.train(done)

        current_state = new_state
        step += 1  #increment Time-step

    ep_rewards.append(episode_reward)
    if MODEL_SAVE and episode % GET_STATS == 0:
        average_reward = sum(ep_rewards[-GET_STATS:]) / len(
            ep_rewards[-GET_STATS:])  #stats storage
        min_reward = min(ep_rewards[-GET_STATS:])
        max_reward = max(ep_rewards[-GET_STATS:])
예제 #3
0
파일: dqn.py 프로젝트: dilithjay/SnakeRL
    global max_reward, model, model_target
    max_reward = running_reward
    model.save(model_name)
    model_target.save(target_model_name)

    with open(result_data_loc, "wb") as fp:
        pickle.dump([
            score_list, max_score_list, running_reward_list, num_episodes_list
        ], fp)


t0 = t = time.time()
while True:
    state = np.array(snake.reset())
    if not explore:
        snake.render(1 / FPS)
    episode_reward = 0
    for _ in range(max_steps_per_episode):
        frame_count += 1

        # Use epsilon-greedy for exploration
        if test_ep_count == 0 and explore and (
                frame_count < epsilon_random_frames
                or epsilon > np.random.rand(1)[0]):
            # Take random action
            action = np.random.choice(4)
        else:
            # Predict action Q-values
            # From environment state
            state_tensor = tf.convert_to_tensor(state)
            state_tensor = tf.expand_dims(state_tensor, 0)