def calculate_ranges_for_player_action( initial_state: PlayerState, player_action: PlayerAction, game_board: Board, enemy_probability: np.ndarray, enemy_min_steps: np.ndarray, lookup_round_count: int = -1) \ -> np.ndarray: no_risk_full_range.calculate_ranges_for_player(game_board, initial_state) probability_result = np.zeros((game_board.height, game_board.width)) initial_state = initial_state.copy() initial_state.do_action(player_action) first_next_state = initial_state.do_move() if not first_next_state.verify_move(game_board): return probability_result first_next_state.success_probability = \ 1 - max([enemy_probability[y, x] for x, y in first_next_state.steps_to_this_point if enemy_min_steps[y, x] <= 1] + [0]) probability_result[ first_next_state.position_y, first_next_state.position_x] = first_next_state.success_probability full_range_result_data = {} next_states = [first_next_state] current_round = 0 while len(next_states) > 0 and lookup_round_count != current_round: next_states = calculate_next_states(game_board, next_states, full_range_result_data) for state in next_states: state_max_risk = max([ enemy_probability[y, x] for x, y in state.steps_to_this_point if enemy_min_steps[y, x] <= current_round + 2 ] + [0]) state.success_probability = state.previous[ -1].success_probability * (1 - state_max_risk) probability_result[state.position_y, state.position_x] = \ max(state.success_probability, probability_result[state.position_y, state.position_x]) current_round += 1 return probability_result
def __init__(self, width: int, height: int, player_count: int): self.board = Board(width, height) start_point_distance = self.board.cell_count // (player_count + 1) self.players = [] # Init Player y_start_positions = list(range(1, self.board.width - 1)) x_start_positions = list(range(1, self.board.height - 1)) del y_start_positions[::2] del x_start_positions[::2] start_positions = list( itertools.product(y_start_positions, x_start_positions)) random.shuffle(start_positions) for player_id in range(1, player_count + 1): start_cell = start_positions.pop() player = Player( player_id, PlayerState(random.choice(list(PlayerDirection)), 1, start_cell[0], start_cell[1])) self.board.set_cell(player.current_state.position_x, player.current_state.position_y, player.player_id) self.players.append(player) self.is_started = False self.deadline = None self.on_round_start = Event() self.all_player_moved = False
def calculate_probabilities_for_player( board: Board, player_state: PlayerState, depth: int, step_offset: int = 1, probability_cutoff: float = 0, global_probability_factor: float = 1. ) -> Tuple[np.ndarray, np.ndarray]: """ Returns tuple of numpy arrays: - probability of reaching the given cells in the [depth] next steps - minimum amount of steps needed for the given player to reach the cell """ probabilities = np.zeros((board.height, board.width)) min_player_steps = np.ones( (board.height, board.width)) * __INIT_VALUE_PLAYER_STEPS valid_player_state_tuples = [] for action in PlayerAction: new_player_state = player_state.copy() new_player_state.do_action(action) next_player_state = new_player_state.do_move() if next_player_state.verify_move(board): valid_player_state_tuples.append( (new_player_state, next_player_state)) possible_action_count = len(valid_player_state_tuples) if possible_action_count > 0: local_probability_factor = ( 1 / possible_action_count) * global_probability_factor for new_player_state, next_player_state in valid_player_state_tuples: affected_cells = next_player_state.steps_to_this_point for cell_x, cell_y in affected_cells: if board.point_is_on_board(cell_x, cell_y): probabilities[cell_y, cell_x] += local_probability_factor min_player_steps[cell_y, cell_x] = step_offset # print(f"{local_probability_factor}\t{probability_cutoff}") if depth > 1 and local_probability_factor > probability_cutoff: recursion_probabilities, recursion_min_player_steps = \ calculate_probabilities_for_player(board, next_player_state, depth=depth - 1, step_offset=step_offset + 1, probability_cutoff=probability_cutoff, global_probability_factor=local_probability_factor) probabilities += recursion_probabilities min_player_steps = np.minimum(min_player_steps, recursion_min_player_steps) return probabilities, min_player_steps
def determine_cutting_and_fill_values( player_state: PlayerState, board: Board, search_length: int ) -> Tuple[Dict[PlayerAction, float], Dict[PlayerAction, float]]: original_array = np.array(board.cells) original_labels = get_safe_areas_labels(original_array) original_label_count = get_safe_areas_label_count(original_labels) result_fill_values = {} result_cutting_values = {} for action in PlayerAction: local_base_state = player_state.copy() local_base_state.do_action(action) local_next_state = local_base_state.do_move() fill_value = 0. cutting_value = 1. if local_next_state.verify_move(board): adapted_array = np.array(board.cells) for x, y in local_next_state.steps_to_this_point: adapted_array[y, x] = 1. adapted_labels = get_safe_areas_labels(adapted_array) adapted_labels_count = get_safe_areas_label_count(adapted_labels) if adapted_labels_count > original_label_count: cutting_value = 0. else: x, y = local_base_state.position_x, local_base_state.position_y x_direction, y_direction = local_next_state.direction.to_direction_tuple( ) for distance_idx in range(search_length): x += x_direction y += y_direction if not board.point_is_available(x, y): fill_value = 1 - ((distance_idx - 1) / search_length) break result_fill_values[action] = fill_value result_cutting_values[action] = cutting_value return result_cutting_values, result_fill_values
def determine_fill_values(player_state: PlayerState, board: Board) -> Dict[PlayerAction, float]: input_array = np.array(board.cells) original_safe_area_count = safe_area_detection.count_safe_areas( input_array) fill_result = {} for action in PlayerAction: fill_result[action] = 1. local_base_state = player_state.copy() local_base_state.do_action(action) local_next_state = local_base_state.do_move() if not local_next_state.verify_move(board): fill_result[action] = 0. continue adapted_array = np.array(board.cells) for x, y in local_next_state.steps_to_this_point: adapted_array[y, x] = 1. adapted_labels = safe_area_detection.get_labels(adapted_array) adapted_safe_area_count = safe_area_detection.count_labels( adapted_labels) if adapted_safe_area_count > original_safe_area_count: fill_result[action] = 0. else: x_pos, y_pos = local_next_state.position_x, local_next_state.position_y corridor_sub_map = __CORRIDOR_DETECTION.get_corridor_sub_map( adapted_array, x_pos, y_pos) if np.count_nonzero(corridor_sub_map) > 0: fill_result[action] = 0.5 return fill_result
def __init__(self, player_id: int, direction: PlayerDirection, speed: int, x_position: int, y_position: int, board_width: int, board_height: int): super().__init__(player_id, PlayerState(direction, speed, x_position, y_position)) self.board_width = board_width self.board_height = board_height # Properties for the analytic evaluation of opponents' behavior self.min_speed = speed self.max_speed = speed self.avg_speed = speed self.walked_cells = 1 self.jumped_cells = 1 self.radius = 0.5 self.center_cell_per_round = [(x_position, y_position)] self.center_cell_differences = [] self.median_per_round = [(x_position, y_position)] self.avg_distance_to_median = 0 self.prevent_potential_collisions = 0 self.taken_potential_collisions = 0 self.aggressiveness = 0
def handle_step(self, step_info, slice_viewer): new_occupied_cells = self.enemies.update(step_info) self.roundCounter += 1 own_player = step_info["players"][str(step_info["you"])] # init on first step info if self.roundCounter == 1: self.board = Board(step_info["width"], step_info["height"]) self.playerState = PlayerState(PlayerDirection[own_player["direction"].upper()], own_player["speed"], own_player["x"], own_player["y"], self.roundCounter) # update cells self.board.cells = step_info["cells"] if self.full_range_result: # recycle the last full_range result new_enemies_occupied_cells = [cell for cell in new_occupied_cells if cell not in self.playerState.steps_to_this_point] new_full_range_result = update_full_range_result(self.playerState.game_round, self.playerState.get_position_tuple(), self.full_range_result, new_enemies_occupied_cells) new_path_options = [state for directions in new_full_range_result.values() for speeds in directions.values() for state in speeds.values()] # add new path options to viewer new_path_steps_array = np.zeros((step_info["height"], step_info["width"])) for option in new_path_options: x = option.position_x y = option.position_y current_value = new_path_steps_array[y, x] new_value = option.game_round - self.roundCounter new_path_steps_array[y, x] = new_value if current_value == 0 else min(current_value, new_value) slice_viewer.add_data("recycled_full_range_steps", new_path_steps_array) # calculate action self.full_range_result = no_risk_full_range.calculate_ranges_for_player(self.board, self.playerState, 8) path_options = [state for directions in self.full_range_result.values() for speeds in directions.values() for state in speeds.values()] if len(path_options) > 0: random_player_state_choice = random.choice(path_options) player_states = random_player_state_choice.previous + [random_player_state_choice] action = player_states[self.roundCounter - 1].action # random action if no way to survive else: action = random.choice(list(PlayerAction)) # add path options to viewer path_steps_array = np.zeros((step_info["height"], step_info["width"])) for option in path_options: x = option.position_x y = option.position_y current_value = path_steps_array[y, x] new_value = option.game_round - self.roundCounter path_steps_array[y, x] = new_value if current_value == 0 else min(current_value, new_value) slice_viewer.add_data("full_range_steps", path_steps_array) # apply action to local model self.playerState.do_action(action) self.playerState = self.playerState.do_move() return action
def handle_step(self, step_info, slice_viewer): self.roundCounter += 1 own_player = step_info["players"][str(step_info["you"])] # init on first step info if self.roundCounter == 1: self.board = Board(step_info["width"], step_info["height"]) self.playerState = PlayerState( PlayerDirection[own_player["direction"].upper()], own_player["speed"], own_player["x"], own_player["y"], self.roundCounter) # update cells self.board.cells = step_info["cells"] # build enemy player states enemy_player_states = [] for player_id, player in step_info["players"].items(): if str(step_info["you"]) != player_id and player["active"]: enemy_player_states.append( PlayerState(PlayerDirection[player["direction"].upper()], player["speed"], player["x"], player["y"], self.roundCounter)) # calculate enemy prediction enemy_probabilities, enemy_min_steps = \ probability_based_prediction.calculate_probabilities_for_players(self.board, enemy_player_states, depth=7) # add enemy prediction to viewer slice_viewer.add_data("enemy_probability", enemy_probabilities, normalize=False) slice_viewer.add_data("enemy_min_steps", enemy_min_steps, normalize=True) # add safe_area sizes to viewer safe_areas, safe_area_labels = get_risk_evaluated_safe_areas( self.board) safe_area_sizes = np.zeros(safe_area_labels.shape) for area in safe_areas: for point in area.points: safe_area_sizes[point[1], point[0]] = area.risk slice_viewer.add_data("safe_area_sizes", safe_area_sizes, normalize=False) # add risk_area to viewer slice_viewer.add_data("risk_evaluation", risk_area_calculation.calculate_risk_areas( self.board), normalize=False) # get full range result for each possible action player_action_array = [player_action for player_action in PlayerAction] input_array = [(self.playerState, player_action, self.board, enemy_probabilities, enemy_min_steps) for player_action in player_action_array] pool = mp.Pool(mp.cpu_count()) path_option_results = pool.starmap( enemy_probability_full_range.calculate_ranges_for_player_action, input_array) pool.close() full_range_results = { player_action: path_option_results[player_action_array.index(player_action)] for player_action in PlayerAction } # calculate reachable points for full range results max_reachable_points_value = \ max(max([np.sum(full_range_result) for full_range_result in full_range_results.values()]), 1) reachable_points = { player_action: np.sum(full_range_result) / max_reachable_points_value for player_action, full_range_result in full_range_results.items() } # calculate action distribution for full range results cutting_distribution, fill_distribution = \ basic_cut_fill_area_detection.determine_cutting_and_fill_values(self.playerState, self.board, 4) # build slow down force slow_force = {player_action: 0. for player_action in PlayerAction} slow_base_state = self.playerState.copy() slow_base_state.do_action(PlayerAction.SLOW_DOWN) slow_next_state = slow_base_state.do_move() if slow_next_state.verify_move(self.board): slow_force[PlayerAction.SLOW_DOWN] = 1. # calculate weighted evaluation for each possible action print(f"\t\t\treachable:\t\t{reachable_points}") print(f"\t\t\tcutting:\t\t{cutting_distribution}") print(f"\t\t\tfill:\t\t\t{fill_distribution}") print(f"\t\t\tslow:\t\t\t{slow_force}") weighted_action_evaluation = { action: reachable_points[action] * self.REACHABLE_POINT_WEIGHT + cutting_distribution[action] * self.CUTTING_WEIGHT + fill_distribution[action] * self.FILL_WEIGHT + slow_force[action] * self.SLOW_FORCE_WEIGHT for action in PlayerAction } print(f"\t\t\tevaluation:\t\t{weighted_action_evaluation}") # chose action based of highest value action = max(weighted_action_evaluation, key=weighted_action_evaluation.get) # add reachable points to viewer selected_reachable_points = full_range_results[action] slice_viewer.add_data("full_range_probability", selected_reachable_points, normalize=False) # apply action to local model self.playerState.do_action(action) self.playerState = self.playerState.do_move() return action
def calculate_ranges_for_player(board: Board, initial_state: PlayerState, lookup_round_count: int = -1, updated_last_result=None) \ -> Dict[Tuple[int, int], Dict[PlayerDirection, Dict[int, PlayerState]]]: if updated_last_result is None: updated_last_result = {} result_data = updated_last_result next_states = [initial_state] next_states += [state for directions in result_data.values() for speeds in directions.values() for state in speeds.values()] current_round = 0 while len(next_states) > 0 and lookup_round_count != current_round: next_states = calculate_next_states(board, next_states, result_data) current_round += 1 return result_data if __name__ == "__main__": start = time.time() print(F"start full_range @{datetime.now().time()}") print(len(calculate_ranges_for_player(Board(64, 64), PlayerState(PlayerDirection.DOWN, 1, 4, 4)).keys())) end = time.time() print(F"total seconds: {end - start}") print(F"end full_range @{datetime.now().time()}")
def handle_step(self, step_info, slice_viewer): self.roundCounter += 1 own_player = step_info["players"][str(step_info["you"])] # init on first step info if self.roundCounter == 1: self.board = Board(step_info["width"], step_info["height"]) self.playerState = PlayerState( PlayerDirection[own_player["direction"].upper()], own_player["speed"], own_player["x"], own_player["y"], self.roundCounter) # update cells self.board.cells = step_info["cells"] # build enemy player states enemy_player_states = [] for player_id, player in step_info["players"].items(): if str(step_info["you"]) != player_id and player["active"]: enemy_player_states.append( PlayerState(PlayerDirection[player["direction"].upper()], player["speed"], player["x"], player["y"], self.roundCounter)) # calculate enemy prediction enemy_probabilities, enemy_min_steps = \ probability_based_prediction.calculate_probabilities_for_players(self.board, enemy_player_states, depth=7) # add enemy prediction to viewer slice_viewer.add_data("enemy_probability", enemy_probabilities, normalize=False) slice_viewer.add_data("enemy_min_steps", enemy_min_steps, normalize=True) # add safe_area sizes to viewer safe_areas, safe_area_labels = get_risk_evaluated_safe_areas( self.board) safe_area_sizes = np.zeros(safe_area_labels.shape) for area in safe_areas: for point in area.points: safe_area_sizes[point[1], point[0]] = area.risk slice_viewer.add_data("safe_area_sizes", safe_area_sizes, normalize=False) # add risk_area to viewer slice_viewer.add_data("risk_evaluation", risk_area_calculation.calculate_risk_areas( self.board), normalize=False) # apply threshold to probabilities enemy_probabilities[enemy_probabilities > 0.19] = 1 enemy_probabilities[enemy_probabilities != 1] = 0 # update board with probabilities self.board.cells = enemy_probabilities.tolist() # calculate action full_range_result = no_risk_full_range.calculate_ranges_for_player( self.board, self.playerState) path_options = [ state for directions in full_range_result.values() for speeds in directions.values() for state in speeds.values() ] if len(path_options) > 0: # determine action with highest amount of reachable points action_histogram = { player_action: 0 for player_action in PlayerAction } for path_option in path_options: player_states = path_option.previous + [path_option] path_action = player_states[self.roundCounter - 1].action action_histogram[path_action] += 1 action = max(action_histogram, key=action_histogram.get) # random action if no way to survive else: action = random.choice(list(PlayerAction)) # add path options to viewer path_steps_array = np.zeros((step_info["height"], step_info["width"])) for option in path_options: x = option.position_x y = option.position_y current_value = path_steps_array[y, x] new_value = option.game_round - self.roundCounter path_steps_array[y, x] = new_value if current_value == 0 else min( current_value, new_value) slice_viewer.add_data("full_range_steps", path_steps_array) # apply action to local model self.playerState.do_action(action) self.playerState = self.playerState.do_move() return action
def handle_step(self, step_info, slice_viewer): self.roundCounter += 1 own_player = step_info["players"][str(step_info["you"])] # init on first step info if self.roundCounter == 1: self.board = Board(step_info["width"], step_info["height"]) self.playerState = PlayerState(PlayerDirection[own_player["direction"].upper()], own_player["speed"], own_player["x"], own_player["y"], self.roundCounter) self.pathFinder = BidirectionalPathFinder(self.board, 5, 2) # update cells self.board.cells = step_info["cells"] # build enemy player states enemy_player_states = [] for player_id, player in step_info["players"].items(): if str(step_info["you"]) != player_id and player["active"]: enemy_player_states.append( PlayerState( PlayerDirection[player["direction"].upper()], player["speed"], player["x"], player["y"], self.roundCounter)) # calculate enemy prediction enemy_probabilities, enemy_min_steps = \ probability_based_prediction.calculate_probabilities_for_players(self.board, enemy_player_states, depth=7) # add enemy prediction to viewer slice_viewer.add_data("enemy_probability", enemy_probabilities, normalize=False) slice_viewer.add_data("enemy_min_steps", enemy_min_steps, normalize=True) # get path finder results for each possible action self.pathFinder.update(self.board, self.playerState, enemy_probabilities, enemy_min_steps) path_finder_rating_result_map = self.pathFinder.get_result_rating_map() path_finder_steps_result_map = self.pathFinder.get_result_steps_map() # calculate reachable points for full range results max_reachable_points_value = \ max(max([np.sum(result) for result in path_finder_rating_result_map.values()]), 1) reachable_points = {player_action: np.sum(result) / max_reachable_points_value for player_action, result in path_finder_rating_result_map.items()} # calculate action distribution for full range results fill_distribution = corridor_fill_detection.determine_fill_values(self.playerState, self.board) # build slow down force slow_force = {player_action: 0. for player_action in PlayerAction} slow_base_state = self.playerState.copy() slow_base_state.do_action(PlayerAction.SLOW_DOWN) slow_next_state = slow_base_state.do_move() if slow_next_state.verify_move(self.board): slow_force[PlayerAction.SLOW_DOWN] = 1. # calculate weighted evaluation for each possible action print(f"\t\t\treachable:\t\t{reachable_points}") print(f"\t\t\tfill:\t\t\t{fill_distribution}") print(f"\t\t\tslow:\t\t\t{slow_force}") weighted_action_evaluation = {action: reachable_points[action] * self.REACHABLE_POINT_WEIGHT + fill_distribution[action] * self.FILL_WEIGHT + slow_force[action] * self.SLOW_FORCE_WEIGHT for action in PlayerAction} print(f"\t\t\tevaluation:\t\t{weighted_action_evaluation}") # chose action based of highest value action = max(weighted_action_evaluation, key=weighted_action_evaluation.get) # add reachable points to viewer slice_viewer.add_data("reachable_points_rating", path_finder_rating_result_map[action], normalize=False) slice_viewer.add_data("reachable_points_steps", path_finder_steps_result_map[action], normalize=True) # apply action to local model self.playerState.do_action(action) self.playerState = self.playerState.do_move() return action
def handle_step(self, step_info, slice_viewer): self.roundCounter += 1 own_player = step_info["players"][str(step_info["you"])] # init on first step info if self.roundCounter == 1: self.board = Board(step_info["width"], step_info["height"]) self.playerState = PlayerState(PlayerDirection[own_player["direction"].upper()], own_player["speed"], own_player["x"], own_player["y"], self.roundCounter) # update cells self.board.cells = step_info["cells"] # build enemy player states enemy_player_states = [] for player_id, player in step_info["players"].items(): if str(step_info["you"]) != player_id and player["active"]: enemy_player_states.append( PlayerState( PlayerDirection[player["direction"].upper()], player["speed"], player["x"], player["y"], self.roundCounter)) # calculate enemy prediction enemy_probabilities, enemy_min_steps = \ probability_based_prediction.calculate_probabilities_for_players(self.board, enemy_player_states, depth=15, probability_cutoff=0.001) # add enemy prediction to viewer slice_viewer.add_data("enemy_probability", enemy_probabilities, normalize=False) slice_viewer.add_data("enemy_min_steps", enemy_min_steps, normalize=True) # add safe_area sizes to viewer safe_areas, safe_area_labels = get_risk_evaluated_safe_areas(self.board) safe_area_sizes = np.zeros(safe_area_labels.shape) for area in safe_areas: for point in area.points: safe_area_sizes[point[1], point[0]] = area.risk slice_viewer.add_data("safe_area_sizes", safe_area_sizes, normalize=False) start_time = time.time() # add risk_area to viewer slice_viewer.add_data("risk_evaluation", risk_area_calculation.calculate_risk_areas(self.board), normalize=False) # determine amount of reachable points for each action pool_input_array = [(player_action, enemy_probabilities, enemy_min_steps) for player_action in PlayerAction] pool = mp.Pool(mp.cpu_count()) weighted_points_results = pool.starmap(self.get_weighted_points_for_action, pool_input_array) pool.close() action_rating = {} action_weight_mapping = {} for idx, weighted_points_result in enumerate(weighted_points_results): action_weight_mapping[pool_input_array[idx][0]] = weighted_points_result action_rating[pool_input_array[idx][0]] = np.sum(weighted_points_result) # chose action based of highest value action = max(action_rating, key=action_rating.get) # add weighted points to viewer slice_viewer.add_data("weighted_points", action_weight_mapping[action], normalize=True) # apply action to local model self.playerState.do_action(action) self.playerState = self.playerState.do_move() return action
result_data) # Weight next_status for state in next_states: step_risk_sum = 0 step_count = 0 for step in state.steps_to_this_point: step_risk_sum += game_board[step[1]][step[0]] step_count += 1 state_risk = step_risk_sum / step_count prev_length = len(state.previous) - 1 prev_risk = state.previous[-1].optional_risk state.optional_risk = (state_risk + prev_risk * prev_length) / (prev_length + 1) current_round += 1 return result_data if __name__ == "__main__": board = Board(5, 10) for i in range(0, board.height): for j in range(0, board.width): board[i][j] = random() calculate_ranges_for_player(board, PlayerState(PlayerDirection.RIGHT, 1, 0, 0))
def handle_step(self, step_info, slice_viewer): self.roundCounter += 1 own_player = step_info["players"][str(step_info["you"])] # init on first step info if self.roundCounter == 1: self.board = Board(step_info["width"], step_info["height"]) self.playerState = PlayerState(PlayerDirection[own_player["direction"].upper()], own_player["speed"], own_player["x"], own_player["y"], self.roundCounter) # update cells self.board.cells = step_info["cells"] # build enemy player states enemy_player_states = [] for player_id, player in step_info["players"].items(): if str(step_info["you"]) != player_id and player["active"]: enemy_player_states.append( PlayerState( PlayerDirection[player["direction"].upper()], player["speed"], player["x"], player["y"], self.roundCounter)) # calculate enemy prediction enemy_probabilities, enemy_min_steps = \ probability_based_prediction.calculate_probabilities_for_players(self.board, enemy_player_states, depth=7) # add enemy prediction to viewer slice_viewer.add_data("enemy_probability", enemy_probabilities, normalize=False) slice_viewer.add_data("enemy_min_steps", enemy_min_steps, normalize=True) # add safe_area sizes to viewer safe_areas, safe_area_labels = get_risk_evaluated_safe_areas(self.board) safe_area_sizes = np.zeros(safe_area_labels.shape) for area in safe_areas: for point in area.points: safe_area_sizes[point[1], point[0]] = area.risk slice_viewer.add_data("safe_area_sizes", safe_area_sizes, normalize=False) # add risk_area to viewer slice_viewer.add_data("risk_evaluation", risk_area_calculation.calculate_risk_areas(self.board), normalize=False) # apply threshold to probabilities enemy_probabilities[enemy_probabilities > 0.19] = 1 enemy_probabilities[enemy_probabilities != 1] = 0 # update board with probabilities self.board.cells = enemy_probabilities.tolist() # determine amount of reachable points for each action player_action_array = [player_action for player_action in PlayerAction] pool = mp.Pool(mp.cpu_count()) path_option_results = pool.map( self.get_full_range_path_options_for_action, player_action_array) pool.close() action_histogram = {player_action: len(path_option_results[player_action_array.index(player_action)]) for player_action in PlayerAction} # apply inverse weight based on probability of next possible enemy step probabilities_in_next_step = np.copy(enemy_probabilities) probabilities_in_next_step[enemy_min_steps != 1] = 0 for action, possible_points_count in action_histogram.items(): if possible_points_count > 0: current_player_state = self.playerState.copy() current_player_state.do_action(action) possible_next_player_state = current_player_state.do_move() max_probability_of_steps = 0 for x, y in possible_next_player_state.steps_to_this_point: max_probability_of_steps = max(probabilities_in_next_step[y, x], max_probability_of_steps) action_histogram[action] = (1 - max_probability_of_steps) * possible_points_count # chose action based of highest value action = max(action_histogram, key=action_histogram.get) # add path options to viewer path_steps_array = np.zeros((step_info["height"], step_info["width"])) path_options = path_option_results[player_action_array.index(action)] for option in path_options: x = option.position_x y = option.position_y current_value = path_steps_array[y, x] new_value = option.game_round - self.roundCounter path_steps_array[y, x] = new_value if current_value == 0 else min(current_value, new_value) slice_viewer.add_data("full_range_steps", path_steps_array) # apply action to local model self.playerState.do_action(action) self.playerState = self.playerState.do_move() return action