def get_next_vertex(current_vertex: Vertex, edge_name: str, step_cost: Callable, env_config: EnvironmentConfiguration) -> Vertex: """ :param current_vertex: the current state :param edge_name: edge name from current vertex to the next vertex :param step_cost: function that receives parent_vertex, action, new_node and returns the step cost. :param env_config: environment configuration :return: The new vertex """ current_state = current_vertex.get_state() current_vertex_name = current_vertex.get_vertex_name() edges_dict = env_config.get_edges() vertexes_dict = env_config.get_vertexes() if edge_name not in edges_dict: current_vertex.set_state(current_state) print("No operation for this agent") current_vertex.set_cost( current_vertex.get_cost() + step_cost(current_vertex, Edge("", 0, ("", "")), current_vertex)) return current_vertex # No operation edge = edges_dict[edge_name] first_vertex, sec_vertex = edge.get_vertex_names() next_vertex_name = first_vertex if sec_vertex == current_vertex_name else sec_vertex next_vertex = vertexes_dict[next_vertex_name] next_state = State(next_vertex_name, copy.deepcopy(current_state.get_required_vertexes())) if next_vertex_name in current_state.get_required_vertexes(): next_state.set_visited_vertex(next_vertex_name) next_vertex.set_state(next_state) people_in_next_vertex = next_vertex.get_people_num() new_next_vertex = Vertex(people_in_next_vertex, next_state, next_vertex.get_edges(), current_vertex, edge.get_edge_name(), current_vertex.get_depth(), EnvironmentUtils.g(current_vertex, env_config) + step_cost(current_vertex, edge, next_vertex)) return new_next_vertex
def __make_node(self, state: State, env_conf: EnvironmentConfiguration): name = state.get_current_vertex_name() vertex = env_conf.get_vertexes()[name] vertex.set_state(state) vertex.set_cost( len(state.get_required_vertexes()) - sum(state.get_required_vertexes().values())) return vertex
def terminate_eval(state: State, mode: str, is_max_player: bool) -> Tuple[int, int]: if mode == 'adversarial': print("terminated state, utility= ", TerminalEvaluator.adversarial_utility_eval(state.get_scores_of_agents())) return TerminalEvaluator.adversarial_utility_eval(state.get_scores_of_agents()) if mode == "semi-cooperative": return TerminalEvaluator.semi_cooperative_utility_eval(state.get_scores_of_agents()) else: # cooperative mode return TerminalEvaluator.full_cooperative_utility_eval(state.get_scores_of_agents(), is_max_player)
def cut_off_utility_eval(state: State, is_max_player: bool, vertexes_dict: Dict[str, Vertex]) -> Tuple[int, int]: left_vertexes_to_visit = [state_name for state_name in state.get_required_vertexes().keys() if not state.get_required_vertexes()[state_name]] left_people_to_visit = 0 for left_vertex_to_visit in left_vertexes_to_visit: left_people_to_visit += vertexes_dict[left_vertex_to_visit].get_people_num() max_player_score, min_player_score = state.get_scores_of_agents() if is_max_player: max_player_score += left_people_to_visit else: min_player_score += left_people_to_visit print("cut_off= ", str((max_player_score, min_player_score))) return max_player_score, min_player_score
def get_goal_state(env_config: EnvironmentConfiguration) -> State: temp_dict = EnvironmentUtils.get_required_vertexes(env_config) goal_dict = {} for k, v in temp_dict.items(): goal_dict[k] = True goal_state = State("", goal_dict) return goal_state
def calc_estimation_from_goal(self, current_state: State, goal_state: State): vertex_to_is_visited = current_state.get_required_vertexes() counter = 0 for _, was_visited in vertex_to_is_visited.items(): if not was_visited: counter += 1 return counter
def get_saved_people_num(state: State, current_traveled_states, env_conf: EnvironmentConfiguration) -> List[int]: score = 0 traveled_vertexes = [vertex_name for vertex_name in StateUtils.get_state_traveled_vertexes(state) if vertex_name not in current_traveled_states] current_traveled_states.append(state.get_current_vertex_name()) vertexes_dict = env_conf.get_vertexes() for vertex in traveled_vertexes: score += vertexes_dict[vertex].get_people_num() return score
def get_possible_moves(current_state: State, env_config: EnvironmentConfiguration) -> List[Edge]: current_vertex_name = current_state.get_current_vertex_name() vertexes_dict = env_config.get_vertexes() edges_dict = {k: v for k, v in env_config.get_edges().items() if k not in env_config.get_blocked_edges()} current_vertex = vertexes_dict[current_vertex_name] names_of_edges = [edge for edge in current_vertex.get_edges() if edge not in env_config.get_blocked_edges()] possible_edges = [] for edge_name in names_of_edges: possible_edges.append(edges_dict[edge_name]) return possible_edges
def update_func(self, agent: IAgent, action: str, current_state: State, costs_info: Tuple[List, int], env_conf: EnvironmentConfiguration): vertex = env_conf.get_vertexes()[current_state.get_current_vertex_name()] vertex.set_state(current_state) new_state = EnvironmentUtils.get_next_vertex(vertex, action, agent.step_cost, env_conf).get_state() new_state.set_visited_vertex(new_state.get_current_vertex_name()) costs, agent_num = costs_info edges_dict = env_conf.get_edges() if action in edges_dict.keys(): costs[agent_num] += edges_dict[action].get_weight() return new_state
def create_vertex(input_line: str) -> Optional[Tuple[str, Vertex]]: parts = input_line.split(ConfigurationReader.SPACE_SEPARATOR) parts_length = len(parts) peoples_in_vertex = 0 if parts_length > 2 or parts_length == 0: print( f'input line: {input_line} is invalid. Correct format: #V4 P2 or #V4' ) return None if parts_length == 2: peoples_in_vertex = int(parts[1].replace("P", "")) name = parts[0].replace("#V", "") return name, Vertex(peoples_in_vertex, State(name, (0, 0)), [])
def __result(self, action: str, state: State, is_max: bool, env_config: EnvironmentConfiguration) -> State: """ :param action: edge name :param state: current state :return: next state after moving on edge action from the given state """ next_vertex = env_config.get_vertexes()[ state.get_current_vertex_name()] next_vertex.set_state(state) return EnvironmentUtils.get_next_vertex(next_vertex, action, self.step_cost, env_config, is_max).get_state()
def get_next_vertex(current_vertex: Vertex, edge_name: str, step_cost: Callable, env_config: EnvironmentConfiguration, is_max_player: bool = True) -> Vertex: """ :param current_vertex: the current state :param edge_name: edge name from current vertex to the next vertex :param step_cost: function that receives parent_vertex, action, new_node and returns the step cost. :param is_max_player: True if this is the max player, false otherwise :param env_config: environment configuration :return: The new vertex """ current_state = current_vertex.get_state() current_vertex_name = current_vertex.get_vertex_name() edges_dict = env_config.get_edges() vertexes_dict = env_config.get_vertexes() if edge_name not in edges_dict: current_vertex.set_state(current_state) print("edge_name= ", edge_name) print("No operation for this agent") current_vertex.set_cost(current_vertex.get_cost() + step_cost( current_vertex, Edge("", 0, ("", "")), current_vertex)) return current_vertex # No operation edge = edges_dict[edge_name] first_vertex, sec_vertex = edge.get_vertex_names() next_vertex_name = first_vertex if sec_vertex == current_vertex_name else sec_vertex next_vertex = vertexes_dict[next_vertex_name] scores_of_agents = current_state.get_scores_of_agents() if next_vertex_name in current_state.get_required_vertexes( ) and not current_state.get_required_vertexes()[next_vertex_name]: scores_of_agents = (scores_of_agents[0] + next_vertex.get_people_num(), scores_of_agents[1]) if is_max_player else ( scores_of_agents[0], scores_of_agents[1] + next_vertex.get_people_num()) next_state = State( next_vertex_name, scores_of_agents, copy.deepcopy(current_state.get_required_vertexes()), current_state.get_cost() + step_cost(current_vertex, edge, next_vertex)) if next_vertex_name in current_state.get_required_vertexes(): next_state.set_visited_vertex(next_vertex_name) next_vertex.set_state(next_state) people_in_next_vertex = next_vertex.get_people_num() next_state.set_parent_state(current_state) new_next_vertex = Vertex( people_in_next_vertex, next_state, next_vertex.get_edges(), current_vertex, edge.get_edge_name(), current_vertex.get_depth(), EnvironmentUtils.g(current_vertex, env_config) + step_cost(current_vertex, edge, next_vertex)) return new_next_vertex
def __print_final_scores(self, game_number, scores_of_agents): game_score = None is_max_player = True if len(scores_of_agents) != 2: return first_agent_score, second_agent_score = scores_of_agents temp = State("", (first_agent_score, second_agent_score)) if game_number == 1: game_score = TerminalEvaluator.terminate_eval(temp, MiniMaxAgent.ADVERSARIAL_MODE, is_max_player) elif game_number == 2: game_score = TerminalEvaluator.terminate_eval(temp, MiniMaxAgent.COOPERATIVE_MODE, is_max_player) elif game_number == 3: game_score = TerminalEvaluator.terminate_eval(temp, MiniMaxAgent.SEMI_COOPERATIVE_MODE, is_max_player) print("Final Game Score: ", game_score)
def run(self, env_config: EnvironmentConfiguration): chosen_agents = [] states = [] output_msg = "Choose Agent: \n 1) Adversarial Agent\n 2) Full Cooperative Agent\n 3) Semi Cooperative agent\n" num_of_agent = int(input("Enter number of agents\n")) game_number = int(input(output_msg)) for i in range(num_of_agent): while game_number > 4 or game_number < 1: print("Invalid game number") game_number = int(input(output_msg)) chosen_agents.append(self.__get_agent(game_number)) EnvironmentUtils.print_environment(env_config) initial_state_name = input("Choose initial state for agent{0}:\n".format(i + 1)) states.append(State(initial_state_name, (0, 0), EnvironmentUtils.get_required_vertexes(env_config))) simulator = Simulator() scores = simulator.run_simulate(chosen_agents, simulator.update_func, simulator.terminate_func, simulator.performance_func, env_config, states) self.__print_final_scores(game_number, scores)
def run(self, env_config: EnvironmentConfiguration): chosen_agents = [] states = [] output_msg = "Choose Agent: \n 1) Greedy Agent\n 2) A* Agent\n 3) RTA* agent\n 4) Bonus: SaboteurAgent\n" num_of_agent = int(input("Enter number of agents\n")) for _ in range(num_of_agent): agent_num = int(input(output_msg)) while agent_num > 4 or agent_num < 1: print("Invalid agent number") agent_num = int(input(output_msg)) chosen_agents.append(self.__get_agent(agent_num)) EnvironmentUtils.print_environment(env_config) initial_state_name = input("Choose initial state\n") states.append( State(initial_state_name, EnvironmentUtils.get_required_vertexes(env_config))) simulator = Simulator() simulator.run_simulate(chosen_agents, simulator.update_func, simulator.terminate_func, simulator.performance_func, env_config, states)
def are_no_more_people(state: State): has_unvisited_state = False in state.get_required_vertexes().values() return not has_unvisited_state
def get_state_traveled_vertexes(state: State) -> List[str]: required_vertexes_dict = state.get_required_vertexes() return [vertex_name for vertex_name in required_vertexes_dict.keys() if required_vertexes_dict[vertex_name]]
def minimax(self, state: State, action_to_state: str, depth: int, alpha: int, beta: int, is_max_player: bool, env_config: EnvironmentConfiguration): if TerminalEvaluator.was_deadline_passed(state, env_config.get_deadline()): return None, TerminalEvaluator.terminate_eval( state.get_parent_state(), self.__mode, is_max_player) if TerminalEvaluator.are_no_more_people(state): return action_to_state, TerminalEvaluator.terminate_eval( state, self.__mode, is_max_player) if depth == 0: return action_to_state, TerminalEvaluator.cut_off_utility_eval( state, is_max_player, env_config.get_vertexes()) possible_edges = EnvironmentUtils.get_possible_moves(state, env_config) possible_actions = [edge.get_edge_name() for edge in possible_edges] if is_max_player: # Max Player best_action = None max_utility_value = -10000000 max_opponent_utility = -10000000 best_score = None for action in possible_actions: possible_next_state = self.__result(action, copy.deepcopy(state), is_max_player, env_config) is_max_next_player = False if self.__mode == MiniMaxAgent.ADVERSARIAL_MODE else True new_action, scores = self.minimax( copy.deepcopy(possible_next_state), action, depth - 1, alpha, beta, is_max_next_player, env_config) print("cost of possible_next_state = ", possible_next_state.get_cost()) current_utility, opponent_utility = scores if self.__is_better_score(max_utility_value, current_utility, max_opponent_utility, opponent_utility): max_utility_value = current_utility max_opponent_utility = opponent_utility best_score = scores best_action = action alpha = max(alpha, current_utility) if self.__mode == MiniMaxAgent.ADVERSARIAL_MODE and beta <= alpha: break return best_action, best_score else: # Min Player min_utility_value = 10000000 best_action = None best_score = None for action in possible_actions: possible_next_state = self.__result(action, state, is_max_player, env_config) _, scores = self.minimax(copy.deepcopy(possible_next_state), action, depth - 1, alpha, beta, True, env_config) current_utility = scores[1] # score of the minimum player if current_utility < min_utility_value: min_utility_value = current_utility best_score = scores best_action = action beta = min(beta, current_utility) if self.__mode == MiniMaxAgent.ADVERSARIAL_MODE and beta <= alpha: break return best_action, best_score
def goal_test(self, problem: Tuple[State, State, EnvironmentConfiguration], current_state: State): _, goal_state, _ = problem return goal_state.get_required_vertexes( ) == current_state.get_required_vertexes()
def was_deadline_passed(state: State, deadline): return state.get_cost() > deadline
def goal_test(self, problem: Tuple[State, State, EnvironmentConfiguration], current_state: State): if self._expansions_num >= self.__limit: self._was_terminate = True _, goal_state, _ = problem return goal_state.get_required_vertexes() == current_state.get_required_vertexes()