class Player(AbstractPlayer): def __init__(self, game_time, penalty_score): AbstractPlayer.__init__( self, game_time, penalty_score ) # keep the inheritance of the parent's (AbstractPlayer) __init__() # TODO: initialize more fields, if needed, and the wanted algorithm from SearchAlgos.py self.max_fruit_turn = None self.penalty_score = penalty_score self.directions = utils.get_directions() self.game_time = game_time self.turn_time = None # TODO: Remember update this self.current_turn = 0 self.current_player_score = 0 self.opponent_player_score = 0 self.board = None self.pos = None self.minimax_algo = AlphaBeta(self.utility, self.succ, None, self.is_goal) self.initialized_fruits_already = False self.opponent_pos = None self.total_fruit_amount = None def set_game_params(self, board): """Set the game parameters needed for this player. This function is called before the game starts. (See GameWrapper.py for more info where it is called) input: - board: np.array, a 2D matrix of the board. No output is expected. """ self.board = board self.creating_initial_graph(board) self.max_fruit_turn = 2 * min(len(board), len(board[0])) self.turn_time = 2 * self.game_time / (len(board) * len(board[0])) def creating_initial_graph(self, board): self.graph, self.pos, self.opponent_pos = create_graph_of_board(board) def make_move(self, time_limit, players_score): """Make move with this Player. input: - time_limit: float, time limit for a single turn. output: - direction: tuple, specifying the Player's movement, chosen from self.directions """ finish_time = time.time() + self.turn_time # if self.pos == (0, 6) or self.pos == (6, 3): # finish_time += 500 if self.pos == (0, 4): finish_time = time.time() + 500 depth = 1 best_move = (-np.inf, (-1, 0)) initial_state = utils.State(self.board, self.graph, (0, 0), self.pos, self.opponent_pos, self.current_turn, self.fruits_on_board_dict, finish_time, None, self.current_player_score, self.opponent_player_score) while True: try: if depth > 40: finish_time += 500 best_move = self.minimax_algo.search(initial_state, depth, True) if best_move[1] == (0, 0): initial_state.board[self.pos[0]][self.pos[1]] = -1 poses = [(utils.tup_add(direction, self.pos), direction) for direction in self.directions] valid_poses = list( filter( lambda tup: self.is_move_legal( initial_state, tup[0][0], tup[0][1]), poses)) if len(valid_poses) == 0: raise ValueError("No valid moves") return valid_poses[0][1] elif (best_move[0] in [-1, 1]): self.finish_turn(best_move, depth) return best_move[1] except TimeoutError: # TODO: Add reference here for score. self.finish_turn(best_move, depth) return best_move[1] depth += 1 # self.finish_turn(best_move, depth) # return best_move[1] # print('bigger_depth : {} '.format(depth)) def finish_turn(self, best_move, depth): new_pos = utils.tup_add(self.pos, best_move[1]) if new_pos in self.fruits_on_board_dict: self.current_player_score += self.fruits_on_board_dict[new_pos] del self.fruits_on_board_dict[new_pos] ## Add reference to lose self.current_turn += 1 self.board[self.pos[0]][self.pos[1]] = -1 self.graph.remove_node(self.pos) print( ' depth: {} my last pos : {} best move is to move to : {} with grade of : {} nodes in graph: {}' .format(depth, self.pos, new_pos, best_move[0], len(fetch_connected_nodes(self.graph, new_pos)))) self.pos = new_pos self.board[self.pos[0]][self.pos[1]] = 1 def set_rival_move(self, pos): """Update your info, given the new position of the rival. input: - pos: tuple, the new position of the rival. No output is expected """ if pos in self.fruits_on_board_dict: self.opponent_player_score += self.fruits_on_board_dict[pos] del self.fruits_on_board_dict[pos] self.board[self.opponent_pos[0]][self.opponent_pos[1]] = -1 self.graph.remove_node(self.opponent_pos) self.board[pos[0]][pos[1]] = 2 self.current_turn += 1 self.opponent_pos = pos def update_fruits(self, fruits_on_board_dict): """Update your info on the current fruits on board (if needed). input: - fruits_on_board_dict: dict of {pos: value} where 'pos' is a tuple describing the fruit's position on board, 'value' is the value of this fruit. No output is expected. """ if not self.initialized_fruits_already: self.total_fruit_amount = 0 self.fruits_on_board_dict = fruits_on_board_dict for pos, val in self.fruits_on_board_dict.items(): self.total_fruit_amount += val self.initialized_fruits_already = True def is_goal(self, state): if state.turn % 2 == 0: return self.is_all_sides_blocked(state, state.pos) or \ self.is_all_sides_blocked(state, state.opponent_pos) def is_all_sides_blocked(self, state, pos): for direction in self.directions: side_pos = (pos[0] + direction[0], pos[1] + direction[1]) if self.is_move_legal(state, side_pos[0], side_pos[1]) \ and state.board[side_pos[0]][side_pos[1]] not in [-1, 1, 2]: return False return True def utility(self, state, is_father_max_node): if self.is_goal(state): my_score = state.current_player_score opponent_score = state.opponent_player_score if self.is_all_sides_blocked(state, state.pos): my_score -= self.penalty_score if self.is_all_sides_blocked(state, state.opponent_pos): opponent_score -= self.penalty_score return 1 if my_score > opponent_score else -1 NEW_MAX = 100 weights = { 'fruit_util': 0.2, 'opponent_fruits_util': 0.2, 'our_score': 0.2, 'opponent_score': 0.2, 'voronoi': 0.2 } fruit_util_val = self.fruit_util(state, True) fruit_util_opponent = self.fruit_util(state, False) our_score_util = state.current_player_score / self.total_fruit_amount opponent_score_util = -state.opponent_player_score / self.total_fruit_amount voronoi_util = self.voronoi_util(state) utils_val = \ weights['fruit_util'] * fruit_util_val + \ weights['our_score'] * our_score_util + \ weights['opponent_score'] * opponent_score_util + \ weights['opponent_fruits_util'] * fruit_util_opponent \ + weights['voronoi'] * voronoi_util # TODO: Change to converted value converted_value = (utils_val + 1) * NEW_MAX # grade value from 0 to 100 return utils_val def succ(self, state, is_father_max_player): # Expecting board, returns list of boards. successors = [] for direction in self.directions: changed_son_pos, my_son_player_pos, opponent_son_pos = self.calculate_new_poses( direction, is_father_max_player, state) i = changed_son_pos[0] j = changed_son_pos[1] if self.is_move_legal(state, i, j): # then move is legal self.add_succesor_to_list(changed_son_pos, direction, i, is_father_max_player, j, my_son_player_pos, opponent_son_pos, state, successors) return successors def add_succesor_to_list(self, changed_son_pos, direction, i, is_father_max_player, j, my_son_player_pos, opponent_son_pos, state, successors): new_player_score = state.current_player_score new_opponent_player_score = state.opponent_player_score if changed_son_pos in state.fruits_on_board_dictionary: if not is_father_max_player: # Our move new_player_score += state.fruits_on_board_dictionary[ changed_son_pos] else: new_opponent_player_score += state.fruits_on_board_dictionary[ changed_son_pos] new_board, new_graph, fruits_on_board_real_dict = self.update_graph_and_board( changed_son_pos, i, is_father_max_player, j, state) new_son_state = utils.State(new_board, new_graph, direction, my_son_player_pos, opponent_son_pos, state.turn + 1, fruits_on_board_real_dict, state.finish_time, state.pos, new_player_score, new_opponent_player_score) # if self.is_goal(new_son_state) and : successors.append(new_son_state) def is_move_legal(self, state, i, j): return 0 <= i < len(state.board) and 0 <= j < len( state.board[0]) and (state.board[i][j] not in [-1, 1, 2]) def calculate_new_poses(self, direction, is_father_max_player, state): my_son_player_pos = state.pos if is_father_max_player else ( state.pos[0] + direction[0], state.pos[1] + direction[1]) opponent_son_pos = ( state.opponent_pos[0] + direction[0], state.opponent_pos[1] + direction[1]) if is_father_max_player else state.opponent_pos changed_son_pos = my_son_player_pos if not is_father_max_player else opponent_son_pos return changed_son_pos, my_son_player_pos, opponent_son_pos def update_graph_and_board(self, changed_son_pos, i, is_father_max_player, j, state): pos_to_remove = state.opponent_pos if is_father_max_player else state.pos fruits_on_board_real_dict = self.update_fruit_dict( state, changed_son_pos) new_graph = self.update_graph(changed_son_pos, is_father_max_player, pos_to_remove, state) new_board = self.update_board(i, is_father_max_player, j, pos_to_remove, state) return new_board, new_graph, fruits_on_board_real_dict def update_fruit_dict(self, state, changed_son_pos): fruits_on_board_real_dict = copy.deepcopy( state.fruits_on_board_dictionary ) if state.turn + 1 < self.max_fruit_turn else {} if changed_son_pos in fruits_on_board_real_dict: del fruits_on_board_real_dict[changed_son_pos] return fruits_on_board_real_dict def update_graph(self, changed_son_pos, is_father_max_player, pos_to_remove, state): new_graph = state.graph.copy() # If moving our player - delete last position new_graph.remove_node( state.opponent_pos if is_father_max_player else state.pos) # if not is_father_max_player: # new_graph.remove_node(state.pos) # else: # new_graph.remove_node(changed_son_pos) return new_graph def update_board(self, i, is_father_max_player, j, pos_to_remove, state): state.board[pos_to_remove[0]][pos_to_remove[1]] = -1 new_board = np.copy(state.board) new_board[i][j] = 2 if is_father_max_player else 1 if state.turn + 1 == self.max_fruit_turn: for pos in self.fruits_on_board_dict.keys(): if new_board[pos[0]][pos[1]] not in [-1, 1, 2]: new_board[pos[0]][pos[1]] = 0 return new_board ########## helper functions for the search algorithm ########## def fruit_util(self, state, my_player_heurisitic): pos_to_remove = state.opponent_pos if my_player_heurisitic else state.pos path_beginning_pos = state.opponent_pos if not my_player_heurisitic else state.pos # Deleting from graph the opponent edges = [i for i in state.graph.edges(pos_to_remove)] state.graph.remove_node(pos_to_remove) weighted_sum = 0 for fruit_pos, val in state.fruits_on_board_dictionary.items(): d_i = self.calc_dist_to_pos(state, path_beginning_pos, fruit_pos) p_i = self.calc_prize(fruit_pos, val, state) if d_i * 2 <= self.max_fruit_turn - state.turn: weighted_sum += p_i / d_i # Restore graph state.graph.add_node(pos_to_remove) state.graph.add_edges_from(edges) ret_val = 0 if weighted_sum == 0 else weighted_sum / self.total_fruit_amount return ret_val if my_player_heurisitic else -ret_val def voronoi_util(self, state): center_nodes = {state.pos, state.opponent_pos} cells = nx.voronoi_cells(state.graph, center_nodes) return (len(cells[state.pos]) - len(state.opponent_pos)) / len( state.graph.nodes) def calc_dist_to_pos(self, state, my_pos, fruit_pos): if nx.has_path(state.graph, source=my_pos, target=fruit_pos): return len( nx.shortest_path(state.graph, source=my_pos, target=fruit_pos)) - 1 return np.inf def calc_prize(self, pos, prize, state): return prize # todo: find better way to classify better prizes def curr_score_util(self, state): pass
class Player(AbstractPlayer): def __init__(self, game_time, penalty_score): # keep the inheritance of the parent's (AbstractPlayer) __init__() AbstractPlayer.__init__(self, game_time, penalty_score) self.penalty_score = penalty_score self.alphabeta = AlphaBeta(utility, succ, None, goal=is_goal_state) self.fruits_poses = None self.fruits_on_board_dict = {} self.locations = [None, None, None] def set_game_params(self, board): """Set the game parameters needed for this player. This function is called before the game starts. (See GameWrapper.py for more info where it is called) input: - board: np.array, a 2D matrix of the board. No output is expected. """ self.board = board.copy() self.n_rows = len(self.board[0]) # cols number self.n_cols = len(self.board) # rows number self.fruits_ttl = min(self.n_rows,self.n_cols)+1 player_pos = np.where(board == 1) rival_pos = np.where(board == 2) self.locations[PLAYER] = tuple(ax[0] for ax in player_pos) self.locations[RIVAL] = tuple(ax[0] for ax in rival_pos) self.turns = 0 # if len(fruits_poses) > 0 and len(fruits_poses[0]) > 0: # self.fruits_poses = tuple(ax[i] for ax in fruits_poses for i in range(len(fruits_poses[0]))) # num_free_places = len(np.where(self.map == 0)[0]) def make_move(self, time_limit, players_score): """Make move with this Player. input: - time_limit: float, time limit for a single turn. output: - direction: tuple, specifing the Player's movement, chosen from self.directions """ start_time = time.time() d = 1 # Make the initial state: reach_the_end = False best_direction = None chosen_state = None if time_limit >= 5: TIME_ESTIMATION = 0.9 else: TIME_ESTIMATION = 0.85 while not reach_the_end: # and d < len(state.board)*len(state.board[0]): iter_time_limit = TIME_ESTIMATION * ( time_limit - (time.time() - start_time) ) state = State(get_directions(),self.board,self.locations,self.fruits_on_board_dict,PLAYER,players_score,self.penalty_score,self.fruits_ttl,self.turns) try: _, best_direction, reach_the_end,chosen_state = self.alphabeta.search(state,d,True,iter_time_limit,alpha=float('-inf'), beta=float('inf')) d += 1 except Exception as e: break # Set new location if best_direction == None: best_direction = self.get_random_move() self.set_player_location(best_direction) self.turns += 1 return best_direction def set_rival_move(self, pos): """Update your info, given the new position of the rival. input: - pos: tuple, the new position of the rival. No output is expected """ self.board[self.locations[RIVAL]] = -1 self.locations[RIVAL] = pos self.board[pos] = 2 # Check for fruit: if pos in self.fruits_on_board_dict.keys(): self.fruits_on_board_dict.pop(pos) def update_fruits(self, fruits_on_board_dict): """Update your info on the current fruits on board (if needed). input: - fruits_on_board_dict: dict of {pos: value} where 'pos' is a tuple describing the fruit's position on board, 'value' is the value of this fruit. No output is expected. """ self.fruits_on_board_dict = deepcopy(fruits_on_board_dict) if self.fruits_ttl <= 0: return self.fruits_ttl -= 1 # Remove all fruits if their TTL expired if self.fruits_ttl <= 0: mask = self.board>2 self.board[mask] = 0 ########## helper functions in class ########## def estimate_next_time(self,cu_time:float) -> float: return cu_time def set_player_location(self, best_direction): self.board[self.locations[PLAYER]] = -1 best_new_location = (self.locations[PLAYER][X] + best_direction[X], self.locations[PLAYER][Y] + best_direction[Y]) self.board[best_new_location] = 1 self.locations[PLAYER] = best_new_location # Check for fruit: if best_new_location in self.fruits_on_board_dict.keys(): self.fruits_on_board_dict.pop(best_new_location) def get_random_move(self): for d in self.directions: (i,j) = self.locations[PLAYER] if 0 <= i < len(self.board) and 0 <= j < len(self.board[0]) and self.board[i][j] not in [-1, 1, 2]: return d raise Exception('No legal moves left')
class Player(AbstractPlayer): def __init__(self, game_time, penalty_score): AbstractPlayer.__init__( self, game_time, penalty_score ) # keep the inheritance of the parent's (AbstractPlayer) __init__() # Might need to add here usage of minimax. self.max_fruit_turn = None self.penalty_score = penalty_score self.directions = utils.get_directions() # TODO: Remember update this self.current_turn = 0 self.board = None self.pos = None self.minimax_algo = AlphaBeta(self.utility, self.succ, None) def set_game_params(self, board): """Set the game parameters needed for this player. This function is called before the game starts. (See GameWrapper.py for more info where it is called) input: - board: np.array, a 2D matrix of the board. No output is expected. """ self.board = board for i in range(len(board)): for j in range(len(board[0])): if board[i][j] == 1: self.pos = (i, j) break self.max_fruit_turn = min(len(board), len(board[0])) def make_move(self, time_limit, players_score): """Make move with this Player. input: - time_limit: float, time limit for a single turn. output: - direction: tuple, specifying the Player's movement, chosen from self.directions """ finish_time = time.time() + time_limit depth = 1 best_move = (-np.inf, (-1, 0)) while True: for direction in self.directions: initial_state = utils.State(self.board, direction, self.pos, self.current_turn, self.fruits_on_board_dict, finish_time) try: outcome = self.minimax_algo.search(initial_state, depth, True) if outcome[0] > best_move[0]: best_move = outcome except TimeoutError: self.board[self.pos[0]][self.pos[1]] = -1 self.pos = (self.pos[0] + best_move[1][0], self.pos[1] + best_move[1][1]) self.board[self.pos[0]][self.pos[1]] = 1 return best_move[1] depth += 1 # print('bigger_depth : {} '.format(depth)) def set_rival_move(self, pos): """Update your info, given the new position of the rival. input: - pos: tuple, the new position of the rival. No output is expected """ self.board[pos[0]][pos[1]] = 2 def update_fruits(self, fruits_on_board_dict): """Update your info on the current fruits on board (if needed). input: - fruits_on_board_dict: dict of {pos: value} where 'pos' is a tuple describing the fruit's position on board, 'value' is the value of this fruit. No output is expected. """ self.fruits_on_board_dict = fruits_on_board_dict ########## helper functions in class ########## # TODO: add here helper functions in class, if needed ########## helper functions for MiniMax algorithm ########## # TODO: add here the utility, succ, and perform_move functions used in MiniMax algorithm def utility(self, state, max_player): enemy_pos = None available_squares = 0 fruit_dist = np.inf my_pos = self.pos for i, l in enumerate(state.board): for j, square in enumerate(l): if square == 2: enemy_pos = (i, j) if square not in [-1, 1, 2]: available_squares += 1 if square != 0: fruit_dist = min( fruit_dist, abs(my_pos[0] - i) + abs(my_pos[1] - j)) distance_from_opponent = abs(my_pos[0] - enemy_pos[0]) + abs(my_pos[1] - enemy_pos[1]) time_factor = time.time() return 1 / distance_from_opponent + available_squares + 1 / fruit_dist + time_factor def succ(self, state, max_player): # Expecting board, returns list of boards. lst = [] state.board[state.pos[0]][state.pos[1]] = -1 for d in self.directions: new_pos = (state.pos[0] + d[0], state.pos[1] + d[1]) i = new_pos[0] j = new_pos[1] if 0 <= i < len(self.board) and 0 <= j < len(self.board[0]) and ( self.board[i][j] not in [-1, 1, 2]): # then move is legal new_board = np.copy(state.board) new_board[i][j] = 1 if max_player else 2 if state.turn + 1 == self.max_fruit_turn: for pos in self.fruits_on_board_dict.keys(): if new_board[pos[0]][pos[1]] not in [-1, 1, 2]: new_board[pos[0]][pos[1]] = 0 lst.append( utils.State(new_board, d, new_pos, state.turn + 1, self.fruits_on_board_dict, state.finish_time)) return lst
class Player(AbstractPlayer): def __init__(self, game_time, penalty_score): AbstractPlayer.__init__( self, game_time, penalty_score ) # keep the inheritance of the parent's (AbstractPlayer) __init__() #TODO: initialize more fields, if needed, and the AlphaBeta algorithm from SearchAlgos.py self.game_time = game_time self.penalty_score = penalty_score self.turn = 0 self.total_time = game_time self.time_left = game_time self.time_tmp = game_time self.ab = players.AlphabetaPlayer.Player(game_time, penalty_score) self.player = AlphaBeta(utility=self.ab.calc_score, succ=self.ab.sorted_moves, perform_move=self.ab.preform_move) def set_game_params(self, board): """Set the game parameters needed for this player. This function is called before the game starts. (See GameWrapper.py for more info where it is called) input: - board: np.array, a 2D matrix of the board. No output is expected. """ self.cur_fruits = None self.board = board for i in range(len(board)): for j in range(len(board[0])): if board[i][j] == 1: self.self_pos = (i, j) if board[i][j] == 2: self.enemy_pos = (i, j) def make_move(self, time_limit, players_score): """Make move with this Player. input: - time_limit: float, time limit for a single turn. output: - direction: tuple, specifing the Player's movement, chosen from self.directions """ #TODO: erase the following line and implement this function. have_same_time = min(len(self.board), len(self.board[0])) / 2 time_per_turn = self.time_tmp * 2 // (3 * have_same_time) print("Turn ", self.turn, " time: ", time_per_turn) time_per_turn = min(time_per_turn, time_limit) start_time = time.time() minimax_ret = 0 iteration_time = 0 depth = 1 state = State(self.board, self.penalty_score, players_score[0], players_score[1], self.cur_fruits, self.turn) if players_score[0] - players_score[ 1] > self.penalty_score: # If it is worthy to end the game # print("Yessss, ", players_score[0], " ", players_score[1], " ", self.penalty_score) #print("AAAA") while time.time( ) - start_time < time_limit + 8: # We want to get to fine, end the game and win minimax_ret = self.ab.get_legal_moves(state.board, state.my_location)[0] minimax_ret = (0, self.ab.calc_direction(state.my_location, minimax_ret)) new_pos = (state.my_location[0] + minimax_ret[1][0], state.my_location[1] + minimax_ret[1][1]) self.board[state.my_location[0]][state.my_location[1]] = -1 self.board[new_pos[0]][new_pos[1]] = 1 self.turn += 1 return minimax_ret[1] while 4 * iteration_time < time_limit - (time.time( ) - start_time) and time.time( ) - start_time < time_per_turn: # total time = iter_time + 3*iter_time (the upper bound of the running time) moves = get_legal_moves(state.board, state.my_location) minimax_ret = [1, 2] if len(moves) == 1: minimax_ret[0] = None minimax_ret[1] = calc_direction(state.my_location, moves[0]) break start_iteration = time.time() minimax_ret = self.player.search(state=state, depth=depth, maximizing_player=True) iteration_time = time.time() - start_iteration depth += 1 new_pos = (state.my_location[0] + minimax_ret[1][0], state.my_location[1] + minimax_ret[1][1]) self.board[state.my_location[0]][state.my_location[1]] = -1 self.board[new_pos[0]][new_pos[1]] = 1 self.turn += 1 time_passed = time.time() - start_time self.time_left -= time_passed if (1 + self.turn) % have_same_time == 0: self.time_tmp = self.time_left return minimax_ret[1] def set_rival_move(self, pos): """Update your info, given the new position of the rival. input: - pos: tuple, the new position of the rival. No output is expected """ self.board[self.enemy_pos[0]][self.enemy_pos[1]] = -1 self.board[pos[0]][pos[1]] = 2 self.enemy_pos = pos def update_fruits(self, fruits_on_board_dict): """Update your info on the current fruits on board (if needed). input: - fruits_on_board_dict: dict of {pos: value} where 'pos' is a tuple describing the fruit's position on board, 'value' is the value of this fruit. No output is expected. """ new_fruit_positions = fruits_on_board_dict.keys() if self.cur_fruits is not None: for pos in self.cur_fruits.keys(): # Remove old fruits if pos not in new_fruit_positions and self.board[pos[0]][ pos[1]] not in [-1, 1, 2]: self.board[pos[0]][pos[1]] = 0 for pos, val in fruits_on_board_dict.items(): # Update new fruits if self.board[pos[0]][pos[1]] not in [-1, 1, 2]: self.board[pos[0]][pos[1]] = val self.cur_fruits = fruits_on_board_dict