def testRunHitSnakeAndDie(self, mock_random): mock_input_interface = Mock() fake_output = FakeOutput() mock_input_interface.get_next_action.side_effect = [ Direction.Y_NEGATIVE, Direction.X_NEGATIVE, Direction.Y_POSITIVE, Direction.Y_POSITIVE, Direction.X_POSITIVE, Direction.Y_NEGATIVE, Direction.X_NEGATIVE ] mock_random.side_effect = [2, 1, 2, 0, 1, 0, 1, 1, 1, 2, 2, 2, 0, 0] SnakeGame.run(mock_input_interface, fake_output, 3, 3) self.assertEqual(fake_output.game_results, [False]) fake_output.verify_game_map( self, len(fake_output.drawn_maps) - 1, { (1, 1): Snake, (2, 1): Snake, (2, 0): Snake, (1, 0): Snake, (1, 2): Snake, (2, 2): Snake, (0, 0): Food })
def train(iters, warm_start=False, verbose=False, learning_rate=0.8, gamma=0.8, epsilon=0.2, dont_repeat=False, name="snake_ai.pkl"): """ QLearn usage example training in the Snake environment """ if warm_start: ai = joblib.load(name) else: ai = QLearn([0, 1, 2, 3]) ai.learning_rate = learning_rate ai.gamma = gamma ai.epsilon = epsilon ai.verbose = verbose ai.no_repeat = dont_repeat evals = [] bu_iter = 100 for i in range(1, iters + 1): game = SnakeGame() ai = game.demo(ai, light_mode=True) evals.append(np.sum(np.array([v for v in ai.memory.values()]))) plt.plot(evals, c="b") plt.pause(0.05) if not i % bu_iter: joblib.dump(ai, name) joblib.dump(ai, name)
def initial_population(self): training_data = [] for _ in range(self.initial_games): game = SnakeGame() _, prev_score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) prev_food_distance = self.get_food_distance(snake, food) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, score, snake, food = game.step(game_action) if done: training_data.append([ self.add_action_to_observation(prev_observation, action), -1 ]) break else: food_distance = self.get_food_distance(snake, food) if score > prev_score or food_distance < prev_food_distance: training_data.append([ self.add_action_to_observation( prev_observation, action), 1 ]) else: training_data.append([ self.add_action_to_observation( prev_observation, action), 0 ]) prev_observation = self.generate_observation(snake, food) prev_food_distance = food_distance return training_data
def initial_population(self): training_data = [] print('Creating initial population out of %s games...' % self.initial_games) for _ in range(self.initial_games): game = SnakeGame() _, _, snake, _ = game.start() prev_observation = self.generate_observation(snake) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, _, snake, _ = game.step(game_action) if done: training_data.append([ self.add_action_to_observation(prev_observation, action), 0 ]) break else: training_data.append([ self.add_action_to_observation(prev_observation, action), 1 ]) prev_observation = self.generate_observation(snake) print('Training data size: %s' % len(training_data)) return training_data
def test_model(self, model): steps_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() _, _, snake, _ = game.start() prev_observation = self.generate_observation(snake) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 4, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, _, snake, _ = game.step(game_action) game_memory.append([prev_observation, action]) if done: break else: prev_observation = self.generate_observation(snake) steps += 1 steps_arr.append(steps) print('Average steps:', mean(steps_arr)) print(Counter(steps_arr))
def __init__(self, step_limit=None): self._action_spec = array_spec.BoundedArraySpec( shape=(), dtype=np.int32, minimum=0, maximum=3, name='action') self._observation_spec = array_spec.BoundedArraySpec( shape=(100,), dtype=np.int32, minimum=0, name="observation") self._game = SnakeGame(size=10) self._episode_ended = False self._reward_count = 0 self._step_limit = step_limit self._step_count = 0
def testRunOutOfSpace(self): mock_input_interface = Mock() fake_output = FakeOutput() mock_input_interface.get_next_action.return_value = Direction.X_POSITIVE SnakeGame.run(mock_input_interface, fake_output, 2, 1) self.assertEqual(fake_output.game_results, [True]) self.assertEqual(2, len(fake_output.drawn_maps)) fake_output.verify_game_map(self, 1, {(0, 0): Snake, (1, 0): Snake})
def test(self): print('--- test ---') start = time.time() steps_arr = [0] scores_arr = [0] for i in range(self.test_games): steps = 0 game = SnakeGame() _, score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) for _ in range(self.goal_steps): game_action = self.get_game_action(game) done, score, snake, food = game.step(game_action) if done: if prev_observation[0] != 1 or prev_observation[ 1] != 1 or prev_observation[2] != 1: action_str = 'UP' if game_action == 1: action_str = 'RIGHT' elif game_action == 2: action_str = 'DOWN' elif game_action == 3: action_str = 'LEFT' print( str(i) + '/' + str(self.test_games) + ' ' + str(prev_observation) + ' ' + action_str + ' [' + str(round(mean(steps_arr), 2)) + ', ' + str(round(mean(scores_arr), 2)) + ']') break else: prev_observation = self.generate_observation(snake, food) steps += 1 print('game: ' + str(i + 1) + '/' + str(self.test_games) + ' [' + str(round(((i + 1) / self.test_games) * 100, 1)) + '%]' + ' goal_steps: ' + str(self.goal_steps) + ' time: ' + str(round(time.time() - start, 3)) + 's', end='\r') steps_arr.append(steps) scores_arr.append(score) end = time.time() avg_steps = mean(steps_arr) avg_score = mean(scores_arr) print('game: ' + str(i + 1) + '/' + str(self.test_games) + ' [' + str(round(((i + 1) / self.test_games) * 100, 1)) + '%]' + ' goal_steps: ' + str(self.goal_steps) + ' time: ' + str(round(end - start, 3)) + 's') print('steps: avg=' + str(round(avg_steps, 2)) + ' max=' + str(max(steps_arr)) + ' min=' + str(min(steps_arr))) print('score: avg=' + str(round(avg_score, 2)) + ' max=' + str(max(scores_arr)) + ' min=' + str(min(scores_arr))) print( time.strftime("Total time elapsed: %H:%M:%S", time.gmtime(end - start)))
class SnakeEnv(py_environment.PyEnvironment): def __init__(self, step_limit=None): self._action_spec = array_spec.BoundedArraySpec( shape=(), dtype=np.int32, minimum=0, maximum=3, name='action') self._observation_spec = array_spec.BoundedArraySpec( shape=(100,), dtype=np.int32, minimum=0, name="observation") self._game = SnakeGame(size=10) self._episode_ended = False self._reward_count = 0 self._step_limit = step_limit self._step_count = 0 def action_spec(self): return self._action_spec def observation_spec(self): return self._observation_spec def _reset(self): self._episode_ended = False self._step_count = 0 self._reward_count = 0 obs = self._game.reset() return ts.restart(obs.flatten()) def _step(self, action): if self._episode_ended: return self.reset() self._step_count += 1 obs, reward, terminal = self._game.step(action) obs = obs.flatten() if terminal: self._episode_ended = True self._reward_count += reward # Stop if we have gotten 1000 treats if self._reward_count >= 1000: return ts.termination(obs, reward) # Reset how long we have to live if we get a treat if reward != 0: self._step_count = 0 if self._step_limit is not None and self._step_count > self._step_limit: self._episode_ended = True if self._episode_ended: return ts.termination(obs, reward) return ts.transition(obs, reward, discount=1.0)
def testRunHitWallAndDie(self, mock_random): mock_input_interface = Mock() mock_input_interface.get_next_action.return_value = Direction.X_POSITIVE fake_output = FakeOutput() mock_random.side_effect = [0, 0] SnakeGame.run(mock_input_interface, fake_output, 3, 3) self.assertEqual(fake_output.game_results, [False]) self.assertEqual(2, len(fake_output.drawn_maps)) fake_output.verify_game_map(self, 0, {(1, 1): Snake, (0, 0): Food}) fake_output.verify_game_map(self, 1, {(2, 1): Snake, (0, 0): Food})
def initial_population(self, num_games): print('--- initial_population ---') start = time.time() training_data = [] for i in range(num_games): game = SnakeGame() _, prev_score, snake, food = game.start() prev_observation = self.generate_observation( snake, food ) # [1/0, 1/0, 1/0, angle] obstacle left, front, right + food angle prev_food_distance = self.get_food_distance(snake, food) for j in range(self.max_steps): action, game_action = self.generate_action( snake) # action -1/0/1 game_action 0/1/2/3 done, score, snake, food = game.step(game_action) if done: # left,forward,right | obst.left,front,right | angle | survived # [ array([-1/0/1, 1/0, 1/0, 1/0 -1-to-1]), -1/0/1 ] training_data.append([ self.add_action_to_observation(action, prev_observation), -15 ]) # -1 snake didn't survive break else: food_distance = self.get_food_distance(snake, food) # always >=1 if score > prev_score or food_distance < prev_food_distance: training_data.append([ self.add_action_to_observation( action, prev_observation), 1 ]) # 1 snake survived and right direction else: training_data.append([ self.add_action_to_observation( action, prev_observation), 0 ]) # 0 snake survived but wrong direction prev_observation = self.generate_observation(snake, food) prev_food_distance = food_distance prev_score = score print(' game: ' + str(i + 1) + '/' + str(num_games) + ' [' + str(round(((i + 1) / num_games) * 100, 1)) + '%]' + ' max_steps: ' + str(self.max_steps) + ' time: ' + str(round(time.time() - start, 3)) + 's', end='\r') end = time.time() print(' game: ' + str(i + 1) + '/' + str(num_games) + ' [' + str(round(((i + 1) / num_games) * 100, 1)) + '%]' + ' max_steps: ' + str(self.max_steps) + ' time: ' + str(round(time.time() - start, 3)) + 's') return training_data
def visualise_game(self, model): game = SnakeGame(gui = True) _, _, snake, food = game.start() prev_observation = self.generate_observation(snake, food) for _ in range(self.goal_steps): precictions = [] for action in range(-1, 2): precictions.append(model.predict(self.add_action_to_observation(prev_observation, action).reshape(-1, 5, 1))) action = np.argmax(np.array(precictions)) game_action = self.get_game_action(snake, action - 1) done, _, snake, food = game.step(game_action) if done: break else: prev_observation = self.generate_observation(snake, food)
def test_model(self, model, print_stats, save_obs, print_avrg=True): steps_arr = [] scores_arr = [] test_observations = [] # Цикл по играм for i in range(self.tests): steps = 0 game = SnakeGame() _isdone, score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) # Цикл по шагам for j in range(self.max_steps): step_predictions = [] # Перебор предсказаний для разных действий for action in range(-1, 2): step_predictions.append( model.predict(self.merge_action_and_observation(prev_observation, action).reshape(-1, 5, 1))) # Выбор действия с наибольшей ценностью action = np.argmax(np.array(step_predictions)) game_action = self.get_game_action(snake, action - 1) # Совершаем выбранное действие isdone, score, snake, food = game.step(game_action) # Сохраняем текущее действие if save_obs: test_observations.append([self.merge_action_and_observation(prev_observation, action), float(step_predictions[action])]) # если игра закончена то печатаем результаты if isdone: if print_stats: print('#####################################') print('steps:' + str(steps)) print('snake length:' + str(len(snake))) # print('last step_predictions;' + str(step_predictions)) break else: prev_observation = self.generate_observation(snake, food) steps += 1 steps_arr.append(steps) scores_arr.append(score) if save_obs: save_list(observations=test_observations, filename=self.test_filename) # Печать средних значений по всем играм if print_avrg: print('Average steps:', mean(steps_arr)) print(Counter(steps_arr)) print('Average score:', mean(scores_arr)) print(Counter(scores_arr))
def new_game(): """New game and old game screen reset""" global new_snake_game # accessing the old global snake game to reset screen new_snake_game.screen.resetscreen() # new game instance new_snake_game = SnakeGame()
def visual_test_model(self, model): game = SnakeGame(gui=True) isdone, score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) for i in range(self.max_steps): step_predictions = [] for action in range(-1, 2): step_predictions.append( model.predict(self.merge_action_and_observation(prev_observation, action).reshape(-1, 5, 1))) action = np.argmax(np.array(step_predictions)) game_action = self.get_game_action(snake, action - 1) done, score, snake, food = game.step(game_action) if done: break else: prev_observation = self.generate_observation(snake, food)
def __init__(self, size, max_without_eating=300, mode='standard'): super(SnakeEnv, self).__init__() self.max_without_eating = max_without_eating self.steps_without_apple = 0 if mode == 'standard': self.action_space = spaces.Discrete(4) elif mode == 'pov': self.action_space = spaces.Discrete(3) else: raise ValueError('Uknown mode: ' + str(mode)) self.observation_space = spaces.Box(low=0, high=1, shape=(size[0], size[1], 3), dtype=np.uint8) self.game = SnakeGame(size, controls=mode)
def test_model(self, model, n): print('--- test_model ---') start = time.time() steps_arr = [0] scores_arr = [0] for i in range(round(self.test_games / n)): steps = 0 game = SnakeGame() _, score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) for _ in range(self.max_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( action, prev_observation).reshape(-1, 5, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, score, snake, food = game.step(game_action) if done: break else: prev_observation = self.generate_observation(snake, food) steps += 1 steps_arr.append(steps) scores_arr.append(score) print('game: ' + str(i + 1) + '/' + str(round(self.test_games / n)) + ' steps_avg=' + str(round(mean(steps_arr), 2)) + ' score_avg=' + str(round(mean(scores_arr), 2)), end='\r') end = time.time() avg_steps = mean(steps_arr) avg_score = mean(scores_arr) print('game: ' + str(i + 1) + '/' + str(round(self.test_games / n)) + ' steps_avg=' + str(round(mean(steps_arr), 2)) + ' score_avg=' + str(round(mean(scores_arr), 2))) print('steps: avg=' + str(round(avg_steps, 2)) + ' max=' + str(max(steps_arr)) + ' min=' + str(min(steps_arr))) print('score: avg=' + str(round(avg_score, 2)) + ' max=' + str(max(scores_arr)) + ' min=' + str(min(scores_arr))) print(time.strftime("Time elapsed: %H:%M:%S", time.gmtime(end - start))) return avg_steps, avg_score
def main(): if settings.input_interface == 'Keyboard': input_interface = Keyboard(settings.time_step_seconds) elif settings.input_interface == 'ML': input_interface = MLInput() else: raise RuntimeError( f'Unknown input interface {settings.input_interface}') if settings.output_interface == 'Text': output_interface = TextOutput() elif settings.output_interface == 'Pygame': output_interface = PygameOutput() else: raise RuntimeError( f'Unknown output interface {settings.output_interface}') SnakeGame.run(input_interface, output_interface, settings.dim_x, settings.dim_y)
def initial_population(self): training_data = [] # Play amount of games equal to value of initial games for _ in range(self.initial_games): # Create new game, for reference of game setup look at snake_game.py start() function. game = SnakeGame() _, prev_score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) prev_food_distance = self.get_food_distance(snake, food) # Run until amount of moves is equal to value of goal_steps or until failure state is reached. for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, score, snake, food = game.step(game_action) # If game has ended. if done: # Add data from this current attempt to the training data and mark as complete either goal # steps have been reached or snake has died, mark as failure (-1). training_data.append([ self.add_action_to_observation(prev_observation, action), -1 ]) break # If not done keep playing. else: food_distance = self.get_food_distance(snake, food) # If the current score is higher than the previous score or the food is closer add to training data # marked as an optimal move (1). if score > prev_score or food_distance < prev_food_distance: training_data.append([ self.add_action_to_observation( prev_observation, action), 1 ]) # Otherwise mark as unoptimal move (0) and add to training data. else: training_data.append([ self.add_action_to_observation( prev_observation, action), 0 ]) # Mark previous observation and food distance as latest values. prev_observation = self.generate_observation(snake, food) prev_food_distance = food_distance return training_data
class SnakeEnv(gym.Env): """Open AI Snake Environment""" metadata = {'render.modes': ['human']} def __init__(self, size, max_without_eating=300, mode='standard'): super(SnakeEnv, self).__init__() self.max_without_eating = max_without_eating self.steps_without_apple = 0 if mode == 'standard': self.action_space = spaces.Discrete(4) elif mode == 'pov': self.action_space = spaces.Discrete(3) else: raise ValueError('Uknown mode: ' + str(mode)) self.observation_space = spaces.Box(low=0, high=1, shape=(size[0], size[1], 3), dtype=np.uint8) self.game = SnakeGame(size, controls=mode) def step(self, action): self.game.update(action) if self.game.ate_apple: reward = 1 self.steps_without_apple = 0 elif self.game.snake.dead: reward = -1 else: reward = 0 self.steps_without_apple += 1 done = self.steps_without_apple > self.max_without_eating or self.game.snake.dead return self.game.get_state(), reward, done, {} def reset(self): self.game.reset() self.explored = np.zeros_like(self.explored) self.steps_without_apple = 0 return self.game.get_state() def render(self, mode='human', close=False): if mode == 'human': time.sleep(0.1) self.game.render(mode=mode) def seed(self, seed=None): np.random.seed(seed)
def visualise_game(self, model): game = SnakeGame(gui=True) _, _, snake, _ = game.start() prev_observation = self.generate_observation(snake) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation(prev_observation, action).reshape( -1, 4, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, _, snake, _ = game.step(game_action) if done: break else: prev_observation = self.generate_observation(snake) # delay between steps for better perception time.sleep(0.2)
def main(n, generations_num, load_and_play, model_filename, elite_size, debug_mode=False): if load_and_play == 0: run_generations(n, generations_num, model_filename, elite_size) else: parent = SnakeNetwork() parent.model = load_saved_model(model_filename) game = SnakeGame(gui=True) x = game.start() prev_score = score = 0 prev_j = 0 x_array = [] x1_array = [] end = 0 for j in range(400): [x, score, end] = game.step(parent.predict_action(x)) if score > prev_score: prev_score = score prev_j = j if end > 0 or j - 100 > prev_j: break if debug_mode: x_array.append(x) x1_array.append(parent.model.predict(x)) if end == 0: game.end_game() if debug_mode: print(x_array) print(x1_array)
def test_model(self, model): steps_arr = [] scores_arr = [] print('Testing model on %s test games' % self.test_games) for _ in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() _, score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 5, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, score, snake, food = game.step(game_action) game_memory.append([prev_observation, action]) if done: # print('-----') # print(steps) # print(snake) # print(food) # print(prev_observation) # print(predictions) uprint('.', end='') break else: prev_observation = self.generate_observation(snake, food) steps += 1 steps_arr.append(steps) scores_arr.append(score) print('\nAverage steps:', mean(steps_arr)) print(Counter(steps_arr)) print('Average score:', mean(scores_arr)) print(Counter(scores_arr))
def generate_train_data(self, add_test=False): training_data = [] for i in range(self.trains): game = SnakeGame() done, prev_score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) prev_food_distance = self.get_food_distance(snake, food) for j in range(self.max_steps): # Генерируем действие action, game_action = self.generate_action(snake) # Меняем состояние игры после совершения действия done, score, snake, food = game.step(game_action) if done: # Если игра завершена(змейка умерла) то добавить в тренеровочные данные наблюдения с действием и оценкой -1 training_data.append([self.merge_action_and_observation(prev_observation, action), -1]) break else: food_distance = self.get_food_distance(snake, food) # Иначе если счет увеличился или расстояние до еды сократилось if score > prev_score or food_distance < prev_food_distance: # Добавить в тренеровочные данные наблюдения с действием и оценкой 1 training_data.append([self.merge_action_and_observation(prev_observation, action), 1]) else: # Иначе добавить в тренеровочные данные наблюдения с действием и оценкой 0 training_data.append([self.merge_action_and_observation(prev_observation, action), 0]) prev_observation = self.generate_observation(snake, food) prev_food_distance = food_distance if add_test: # Дополнительная догрузка данных с предыдущих тестов X = np.load(file=self.test_filename + ' x.npy') Y = np.load(file=self.test_filename + ' y.npy') X.reshape(-1, 5, 1) Y.reshape(-1, 1) for i in range(0, len(X) - 1): training_data.append([X[i], Y[i]]) save_list(observations=training_data, filename='train_set') # Возвращает массив наблюдений и оценок return training_data
def play_test_games(self, model): steps_arr = [] scores_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() _, score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 5, 1))) action = np.argmax(np.array(predictions)) game_action = self.generate_game_action(snake, action - 1) done, score, snake, food = game.step(game_action) game_memory.append([prev_observation, action]) if done: print('-----') print("Steps: " + str(steps)) print("Snake: " + str(snake)) print("Food: " + str(food)) print("Previous Observation: " + str(prev_observation)) print("Predictions: " + str(predictions)) break else: prev_observation = self.generate_observation(snake, food) steps += 1 steps_arr.append(steps) scores_arr.append(score) print('Average steps:', mean(steps_arr)) print(Counter(steps_arr)) print('Average score:', mean(scores_arr)) print(Counter(scores_arr)) print('Highest Score:', max(scores_arr))
def visualise(self): game = SnakeGame(gui=True) game.start() for _ in range(self.goal_steps): game_action = self.get_game_action(game) done, score, snake, food = game.step(game_action) if done: break game.end_game() print('-----') print('snake: ' + str(snake)) print('food: ' + str(food)) print('score: ' + str(score))
def visualise_game(self, model): game = SnakeGame(gui=True) _, score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) for _ in range(self.max_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( action, prev_observation).reshape(-1, 5, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, score, snake, food = game.step(game_action) if done: break else: prev_observation = self.generate_observation(snake, food) game.end_game() print('snake: ' + str(snake)) print('food: ' + str(food)) print('prev_obs: ' + str(prev_observation)) print('score: ' + str(score))
def play_game(z, n, snakes_population): fitness_array = np.zeros(n) best_score = 0 for i, snake in enumerate(snakes_population): game = SnakeGame() x = game.start() prev_score = score = steps = 0 prev_j = 0 for j in range((score + 1) * 100): [x, score, end] = game.step(snake.predict_action(x)) if score > prev_score: prev_score = score prev_j = j steps = 0 steps += 1 if end > 0 or j - (score + 1) * 50 > prev_j: if end == 0: game.end_game() break if score > best_score: best_score = score fitness_array[i] = calculate_fitness(score, steps) snake.set_fitness(fitness_array[i]) # print_progress("Snake game", i, len(snakes_population), start) return [fitness_array, best_score]
# board coordinates- (0, 0) at the top left import copy import os import random as rand import neat from snake_game import SnakeGame seed = rand.random y = 6 x = 6 snake_game = SnakeGame(y, x, seed) generations = 1000 def eval_genomes(genomes, config): for genome_id, genome in genomes: game = copy.deepcopy(snake_game) net = neat.nn.FeedForwardNetwork.create(genome, config) total_moves = 0 cont = True # moves_since_apple = 0 # last_score = 0 # moves_since_apple > y * x * 2 while cont and (game.score / 100 + 1) * x * y * 2 > total_moves: # last_score = game.score output = net.activate(game.get_board().flat) index_of_max = output.index(max(output)) # print('move=' + str(index_of_max))
def train(num_trials=40): score1 = 0 score2 = 0 player1 = 0 player2 = 0 for trial in range(num_trials): game = SnakeGame(board_size=(20, 25)) state = game.start_state() game.print_board(state) while True: action = minimax_agent_first_index(game, state) state = game.successor(state, action, True) if game.is_end(state)[0] == True: reward = game.is_end(state)[2] - state[3][1] incorporateFeedback(game, state, action, reward, succ) break game.print_board(state) current_dir = state[2][1] actions = get_valid(current_dir, game.actions()) action = get_QL_Action(game, state, actions) succ = game.successor(state, action) snake = succ[1][1] food = state[4] reward = succ[3][1] - state[3][1] #reward = 100*(succ[3][1]- state[3][1]) -((snake[0][0] - food[0])**2 + (snake[0][1] - food[1])**2) result = game.is_end(succ) state[0].addstr(28, 10, ' Reward: ' + str(reward) + ' ') state[0].addstr(29, 10, ' ScoreNow: ' + str(succ[3][1]) + ' ') state[0].addstr(30, 10, ' ScorePrev: ' + str(state[3][1]) + ' ') incorporateFeedback(game, state, action, reward, succ) game.print_board(state) state = succ if game.is_end(state)[0] == True: break global explorationProb explorationProb = explorationProb / 2 curses.endwin() '''