def main(): times = [] env = SnakeEnv() for i in range(100): st = time.time() done = False env.reset() score = 0 food = 0 while not done: info = {"Food": (food, (10, 30))} state, reward, done = env.step(get_input(), info=info) score += reward if reward == settings.FOOD_REWARD: food += 1 env.render(sleep=False) # print(reward) if done: et = time.time() times.append(et - st) # quit() break print(1 / (mean(times)), end=" games per second\n") print(1 / (max(times)), end=" slowest games per second\n") print(1 / (min(times)), end=" fastest games per second\n")
episode_reward = 0 step = 1 current_state = env.reset() done = False while not done: # Using Exploitation vs Exploration ($\epsilon$-greedy strategy) to either choose a random action or a greedy action and pre-process it for further steps. if np.random.random() > epsilon: action = np.argmax(agent.get_qs(current_state)) else: action = np.random.randint(0, env.ACTION_SPACE_SIZE) new_state, reward, done = env.move(action) episode_reward += reward if ISRENDER and episode % GET_STATS == 0: #code to render while training env.render() agent.update_replay_memory( (current_state, action, reward, new_state, done)) # Append the state to our experience replay memory agent.train(done) current_state = new_state step += 1 #increment Time-step ep_rewards.append(episode_reward) if MODEL_SAVE and episode % GET_STATS == 0: average_reward = sum(ep_rewards[-GET_STATS:]) / len( ep_rewards[-GET_STATS:]) #stats storage min_reward = min(ep_rewards[-GET_STATS:]) max_reward = max(ep_rewards[-GET_STATS:])
global max_reward, model, model_target max_reward = running_reward model.save(model_name) model_target.save(target_model_name) with open(result_data_loc, "wb") as fp: pickle.dump([ score_list, max_score_list, running_reward_list, num_episodes_list ], fp) t0 = t = time.time() while True: state = np.array(snake.reset()) if not explore: snake.render(1 / FPS) episode_reward = 0 for _ in range(max_steps_per_episode): frame_count += 1 # Use epsilon-greedy for exploration if test_ep_count == 0 and explore and ( frame_count < epsilon_random_frames or epsilon > np.random.rand(1)[0]): # Take random action action = np.random.choice(4) else: # Predict action Q-values # From environment state state_tensor = tf.convert_to_tensor(state) state_tensor = tf.expand_dims(state_tensor, 0)