def visualise_game(self, model): game = LaberintGame(gui=True) _, _, board, x, y, lastAction = game.start() prev_observation = self.generate_observation(x, y, board, lastAction) for _ in range(self.goal_steps): predictions = [] for action in range(0, 4): predictions.append( model.predict( self.add_action_to_observation(prev_observation, action).reshape( -1, 8, 1))) if predictions[0] > 0.5: action = 0 else: if predictions[3] > 0.5: action = 3 else: if predictions[2] > 0.5: action = 2 else: action = 1 game_action = self.get_game_action(action, lastAction) done, _, board, x, y, lastAction = game.step(game_action) if done: break else: prev_observation = self.generate_observation( x, y, board, lastAction)
def initial_population(self): training_data = [] for number in range(self.initial_games): if number < self.manualGames: game = LaberintGame(gui=True) else: game = LaberintGame() self.positionDic.clear() # print("New game started") done, prev_score, board = game.start() self.maxBoardDistance = self.get_max_board_distance(board) self.init_exit_position(board) self.update_avatar_position(board) # prev_observation = self.generate_observation(board) prev_exit_distance = self.get_exit_distance() for _ in range(self.goal_steps * 10): if number < self.manualGames: game_action = manual.get() else: game_action = self.generate_action() prev_observation = self.generate_observation(board, prev_score) done, score, board = game.step(game_action) self.update_avatar_position(board) # print(str(self.avatarX) + "," + str(self.avatarY)) # print(str(self.exitX) + "," + str(self.exitY)) if done: if score < prev_score: # print("died") training_data.append([ self.add_action_to_observation( prev_observation, game_action), -1 ]) else: print("win") training_data.append([ self.add_action_to_observation( prev_observation, game_action), 1 ]) break else: exit_distance = self.get_exit_distance() if (score + 2 >= prev_score): # print("good direction") training_data.append([ self.add_action_to_observation( prev_observation, game_action), 1 ]) else: # print("bad direction") training_data.append([ self.add_action_to_observation( prev_observation, game_action), 0 ]) prev_exit_distance = exit_distance prev_score = score return training_data
def visualize_game(self, model): game = LaberintGame(gui=True) _, _, board = game.start() prev_observation = self.generate_observation(board) prev_action = self.highest_action(model, prev_observation) for _ in range(self.goal_steps): action = self.next_action(model, prev_observation, prev_action) done, _, board = game.step(action) if done: break else: prev_action = action prev_observation = self.generate_observation(board)
def test_model(self, model): steps_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = LaberintGame() _, _, board, x, y, lastAction = game.start() prev_observation = self.generate_observation( x, y, board, lastAction) for _ in range(self.goal_steps): predictions = [] for action in range(0, 4): predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 8, 1))) if predictions[ 0] > 0.5: #aca se usa la regla de la mano izquierda action = 0 #ir a la izquierda else: if predictions[3] > 0.5: action = 3 #ir adelante else: if predictions[2] > 0.5: action = 2 #ir a la derecha else: action = 1 # volver atras # print(predictions) # print(action) # print(predictions[action]) game_action = self.get_game_action(action, lastAction) # print(game_action) done, _, board, x, y, lastAction = game.step(game_action) game_memory.append([prev_observation, action]) if done: break else: prev_observation = self.generate_observation( x, y, board, lastAction) steps += 1 steps_arr.append(steps) print('Average steps:', mean(steps_arr)) print(Counter(steps_arr))
def visualise_game(self, model): game = LaberintGame(gui=True) _, score, board = game.start() self.init_exit_position(board) self.update_avatar_position(board) self.positionDic.clear() prev_observation = self.generate_observation(board, score) prev_action = -1 for _ in range(self.goal_steps_train): action = self.get_predicted_action(model, prev_observation, prev_action) self.positionDic[self.get_position_string(self.avatarX, self.avatarY)] = 1 done, score, board = game.step(action) self.update_avatar_position(board) if done: break else: prev_observation = self.generate_observation(board, score) prev_action = action
def initial_population(self): training_data = [] for _ in range(self.initial_games): game = LaberintGame() _, _, board, x, y, lastAction = game.start() prev_observation = self.generate_observation( x, y, board, lastAction) old_x, old_y = x, y for _ in range(self.goal_steps): action, game_action = self.generate_action(x, y, lastAction) done, _, board, x, y, lastAction = game.step(game_action) if done: if board[x][y] == "t": training_data.append([ self.add_action_to_observation( prev_observation, action), 0 ]) else: training_data.append([ self.add_action_to_observation( prev_observation, action), 1 ]) break else: if (x == old_x) & (y == old_y): training_data.append([ self.add_action_to_observation( prev_observation, action), 0 ]) else: training_data.append([ self.add_action_to_observation( prev_observation, action), 1 ]) # con estos valores, vamos a tener un vector y unicamente compuesto de 0 y 1 para simplificar el modelo prev_observation = self.generate_observation( x, y, board, lastAction) old_x, old_y = x, y print(len(training_data)) return training_data
def test_model(self, model): steps_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = LaberintGame() _, _, board = game.start() prev_observation = self.generate_observation(board) prev_action = self.highest_action(model, prev_observation) for _ in range(self.goal_steps): action = self.next_action(model, prev_observation, prev_action) done, _, board = game.step(action) game_memory.append([prev_observation, action]) if done: break else: prev_action = action prev_observation = self.generate_observation(board) steps += 1 steps_arr.append(steps) print('Average steps:', mean(steps_arr)) print(Counter(steps_arr))
def test_model(self, model): steps_arr = [] scores_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = LaberintGame(gui=False) done, score, board = game.start() self.maxBoardDistance = self.get_max_board_distance(board) self.init_exit_position(board) self.update_avatar_position(board) self.positionDic.clear() prev_observation = self.generate_observation(board, score) prev_action = -1 for _ in range(self.goal_steps_train): action = self.get_predicted_action(model, prev_observation, prev_action) self.positionDic[self.get_position_string( self.avatarX, self.avatarY)] = 1 done, score, board = game.step(action) self.update_avatar_position(board) game_memory.append([prev_observation, action]) if done: print('-----') print(steps) # print(board) print(prev_observation) # print(predictions) break else: prev_observation = self.generate_observation(board, score) prev_action = action steps += 1 steps_arr.append(steps) scores_arr.append(score) print('Average steps:', mean(steps_arr)) print(Counter(steps_arr)) print('Average score:', mean(scores_arr)) print(Counter(scores_arr))
def initial_population(self): training_data = [] for _ in range(self.initial_games): game = LaberintGame() _, _, board = game.start() prev_observation = self.generate_observation(board) for _ in range(self.goal_steps): action, game_action = self.generate_action(prev_observation) done, score, board = game.step(game_action) if done and score < -99: training_data.append([ self.add_action_to_observation(prev_observation, action), -1 ]) break elif done and score > 99: training_data.append([ self.add_action_to_observation(prev_observation, action), 1 ]) break elif self.same_observation(prev_observation, self.generate_observation(board)): training_data.append([ self.add_action_to_observation(prev_observation, action), -0.5 ]) prev_observation = self.generate_observation(board) else: training_data.append([ self.add_action_to_observation(prev_observation, action), 0.85 ]) prev_observation = self.generate_observation(board) print(len(training_data)) return training_data