def terminate(self): print_debug("AGENT " + str(self) + " IS TERMINATING ON " + str(self.curr_state.curr_location)) self.curr_state.is_terminated = True ppl_on_agent = self.curr_state.p_carrying self.curr_state.p_carrying = 0 return ppl_on_agent
def initializeStatesDict(self): self.all_possible_states = self.env_state.getAllPossibleStates() self.all_possible_states = sorted(self.all_possible_states, key=attrgetter('ag_loc', 'time')) print_debug("Number of possible (reachable) states: " + str(len(self.all_possible_states))) for s in self.all_possible_states: self.stateUtilityAndPolicyDict[str(s)] = (0, "")
def traverse_update(self): if self.curr_state.traverse_timer == 0: # Arrived print_debug("AGENT " + str(self) + " ARRIVED AT " + str(self.curr_state.traverse_dest) + " AT TIME " + str(self.curr_state.time)) self.curr_state.state_traverse_update() else: print_debug("AGENT " + str(self) + " TRAVERSING ON " + str(self.curr_state.curr_location) + " TOWARDS " + str(self.curr_state.traverse_dest) + " AT TIME " + str(self.curr_state.time))
def __init__(self, config_file_path): self.graph = Graph(config_file_path) self.env_time = 0 self.bayesNet = bayesNetwork(self.graph) self.dead_ppl = 0 self.total_ppl = self.graph.get_ppl2save() # Environment.PERCEPT = self print_debug("CREATED ENVIRONMENT WITH " + str(self.graph.num_of_vertices()) + " VERTICES, AND " + str(self.graph.num_of_roads()) + " ROADS.")
def at_vertex_auto_actions(self): if self.curr_state.curr_location.is_shelter(): # Dropping off p_dropping_off = self.curr_state.p_carrying if p_dropping_off > 0: print_debug("AGENT " + str(self) + " DROPPING OFF " + str(p_dropping_off) + " PEOPLE AT " + str(self.curr_state.curr_location)) self.curr_state.curr_location.drop_off(p_dropping_off) self.curr_state.p_saved = self.curr_state.p_saved + p_dropping_off self.curr_state.p_carrying = 0 else: # Picking up ppl_count = self.curr_state.curr_location.pick_up() if ppl_count > 0: print_debug("AGENT " + str(self) + " PICKING UP " + str(ppl_count) + " PEOPLE FROM " + str(self.curr_state.curr_location)) self.curr_state.p_carrying = self.curr_state.p_carrying + ppl_count
def update(self): agent = self.agent if agent.curr_state.is_terminated: return ag_location = agent.curr_state.curr_location agent.curr_state.v_people = self.get_people_array_considering_deadlines( ) if agent.hurricane_check(): print_debug("AGENT " + str(agent) + " GOT HIT BY HURRICANE AT " + str(ag_location)) ppl_on_agent = agent.terminate() self.reduce_agent_score(ppl_on_agent + self.k_value) else: if not agent.is_traversing(): agent.at_vertex_auto_actions() agent.curr_state.v_people = self.get_people_array_considering_deadlines( ) self.set_agent_score(agent.curr_state.p_saved) agent_action = agent.action(self) # self.env_state = self.env_state.successor_fn_with_action(agent_action) if agent_action: if agent_action == "TERMINATE": ppl_on_agent = agent.terminate() if ppl_on_agent > 0: self.reduce_agent_score(ppl_on_agent + self.k_value) elif not agent.curr_state.curr_location.is_shelter(): self.reduce_agent_score(self.k_value) else: # "TRAVERSE" dest_e = None dest_v = None if agent_action[0] == 'E': dest_e = self.graph.get_edge_from_string( agent_action) if dest_e.vertex_1 == agent.curr_state.curr_location: dest_v = dest_e.vertex_2 else: dest_v = dest_e.vertex_1 elif agent_action[0] == 'V': dest_v = self.graph.get_vertex_from_string( agent_action) dest_e = self.graph.get_edge( agent.curr_state.curr_location, dest_v) agent.traverse(dest_e, dest_v) agent.curr_state.time_update()
def runValueIteration(self, delta): print_debug("RUNNING VALUE ITERATION ALGORITHM:") iterations = 1 start = timeit.default_timer() prev_dict = deepcopy(self.stateUtilityAndPolicyDict) self.ValueIteration() max_change = 0 for s in self.all_possible_states: change = abs(self.stateUtilityAndPolicyDict[str(s)][0] - prev_dict[str(s)][0]) if change > max_change: max_change = change while max_change >= delta: iterations += 1 prev_dict = deepcopy(self.stateUtilityAndPolicyDict) self.ValueIteration() max_change = 0 for s in self.all_possible_states: change = self.stateUtilityAndPolicyDict[str(s)][0] - prev_dict[ str(s)][0] if change > max_change: max_change = change # print_debug(str(max_change)) stop = timeit.default_timer() print_debug("Took " + str(iterations) + " iterations, " + str(round(stop - start, 2)) + " seconds.") print_debug("Utilities and best policy:") self.printStatesDict()
def simulation(self): print("------------------------------------------------") self.initializeStatesDict() self.runValueIteration(Environment.VALUE_ITERATION_DELTA) dictCopy = deepcopy(self.stateUtilityAndPolicyDict) play = 'Y' while play == 'Y': self.stateUtilityAndPolicyDict = deepcopy(dictCopy) print_info("STARTING SIMULATION:") self.setRealEdgesStatus() self.add_agent(self.env_state.ag_loc) print_info("CREATED RANDOM INSTANCE OF ENVIRONMENT:") self.print_env() if not self.agent: self.print_env() else: ag = self.agent while not ag.curr_state.is_terminated: if ag.is_traversing(): ag.traverse_update() for v in self.graph.vertices: if not v.is_shelter( ) and v.deadline < self.env_state.time: self.dead_ppl += v.ppl_count v.ppl_count = 0 print_debug("PRINTING ENVIRONMENT STATUS:") self.print_changes() print_debug("AGENTS OPERATING IN ENVIRONMENT:") self.update() print_debug("DONE WITH AGENTS OPERATING IN ENVIRONMENT.") self.env_state.time += 1 print("------------------------------------------------") print_debug("GAME OVER") print_info("PRINTING ENVIRONMENT STATUS:") self.print_env() print_query("Play again? (Y/N)") play = input() self.graph = deepcopy(self.grapCopy) self.initEnvironmentVariables()
def traverse(self, e, dest_v): """ :type e: graph.Edge :type dest_v: graph.Vertex """ if e.is_blocked: print_debug("TRIED TO TRAVERSE A BLOCKED ROAD " + str(e)) else: print_debug("AGENT " + str(self) + " TRAVERSING TO " + str(dest_v) + " THROUGH " + str(e) + ".") print_debug("WILL TAKE " + str(e.weight) + " TIME.") self.curr_state.curr_location = e self.curr_state.is_traversing = True self.curr_state.traverse_dest = dest_v self.curr_state.traverse_timer = e.weight
def printStatesDict(self): # sorted_all_states = sorted(self.all_possible_states, key=attrgetter('ag_loc', 'time')) for s in self.all_possible_states: print_debug( str(s) + "\t=\t" + str(self.stateUtilityAndPolicyDict[str(s)]))
from environment import Environment as Env from helper_funcs import print_debug, print_query, print_info ENVIRONMENT_SETTINGS_FILE = "environment_example_forgrading.txt" env = Env(ENVIRONMENT_SETTINGS_FILE) print_info("OUR GRAPH:") print_info(env.graph) print("------------------------------------------------") print_debug("STARTING SIMULATION:") env.simulation()