def __init__(self, env, max_layer): self.env = env self.modifications = [] self.counter = 0 self.nodes = [] agent = QAgent(env) agent.qlearn(600, show=False) wall_dict = wall_interference(agent) cell_dict = cell_frequency(agent) for element in wall_dict: self.modifications.append((0, element[0][0], element[0][1])) for element in cell_dict[0:14]: self.modifications.append((1, element[0][0], element[0][1])) self.num_nodes = 0 self.root = None self.max_layer = max_layer self.threshold = 8 # Storing best reward and corresponding environment self.max_reward = float("-inf") self.opt_env = None
def path_based(env, num_mods): # Train agent in original environment agent = QAgent(env) agent.qlearn(600, render=False) cell_dict = cell_frequency(agent) wall_dict = wall_interference(agent) opt_seq = None opt_val = float("-inf") for k in range(num_mods): seq = [] ref = copy.deepcopy(env) # Pick k modifications on walls walls_to_remove = [wall_dict[i][0] for i in range(k)] for wall in walls_to_remove: ref = ref.transition([(wall)]) seq.append((0, wall[0], wall[1])) num_special_cells = num_mods - k cells_to_assign = [cell_dict[i][0] for i in range(num_special_cells)] for cell in cells_to_assign: ref.special.append(cell) seq.append((1, cell[0], cell[1])) agent_k = QAgent(ref) print(colored("Iteration {} begins!".format(k), "red")) print(ref.walls, ref.special) agent_k.qlearn(600, render=False) rews = utility(agent_k) if rews > opt_val: opt_val = rews opt_seq = seq return (opt_seq, opt_val)
def greedy(env, num_mods): # This function returns the sequence of modifications based on the wall and cell heuristics # Parameters # =============================================================== # env: the original environment # num_mods: the number of modifications # =============================================================== greedy_seq = [] ref = copy.deepcopy(env) agent = None for i in range(num_mods): # For each iteration, find out the wall that most interferes and the cell that is crossed the most. Try out all options. if i == 0: agent = QAgent(ref) agent.qlearn(600, render=False) else: agent = connected_qlearn(agent, ref, 300) # Take out the lists from the heuristics. wall_dict = wall_interference(agent) cell_dict = cell_frequency(agent) # Take out the max values, and the options to try out. wall_nums = [elem[1] for elem in wall_dict] max_wall = max(wall_nums) cell_nums = [elem[1] for elem in cell_dict] max_cell = max(cell_nums) wall_options = [elem[0] for elem in wall_dict if elem[1] == max_wall] cell_options = [elem[0] for elem in cell_dict if elem[1] == max_cell] # Test out all the options, get optimal modification opt_value = float("-inf") opt_choice = None category = -1 for wall in wall_options: print(colored("Testing environment", "red")) e = ref.transition([wall]) new_agent = connected_qlearn(agent, e, 300) # Get utility val = utility(new_agent) if val > opt_value: opt_value = val opt_choice = wall category = 0 for cell in cell_options: print(colored("Testing environment", "red")) e = copy.deepcopy(ref) e.special.append(cell) new_agent = connected_qlearn(agent, e, 300) # Get utility val = utility(new_agent) if val > opt_value: opt_value = val opt_choice = cell category = 1 assert (category != -1) # Store found modification and change the reference environment if category == 0: mod = (0, opt_choice[0], opt_choice[1]) greedy_seq.append(mod) ref = ref.transition([opt_choice]) elif category == 1: mod = (1, opt_choice[0], opt_choice[1]) greedy_seq.append(mod) ref.special.append(opt_choice) # Evaluate utility total_agent = QAgent(ref) total_agent.qlearn(600, render=False) result = utility(total_agent) # print(colored(result, "red")) return greedy_seq, result
processes = [] manager = Manager() agents = manager.list() for i in range(rounds * num_processes): agents.append(0) # keeper categories = [] num_mods = 1 map_to_numpy = np.asarray(map, dtype="c") env = TaxiEnv(map_to_numpy) # reference environment orig_agent = QAgent(env) orig_agent.qlearn(600, show=False) cell_dict = cell_frequency(orig_agent) wall_dict = wall_interference(orig_agent) modifications = [] for element in wall_dict: modifications.append((0, element[0])) for element in cell_dict[0:14]: row, col = element[0] modifications.append((1, (row, col))) for iter in range(rounds): print(colored("Data addition round {} begins!".format(iter), "red")) for i in range(num_processes): results = simulate_env(env, num_mods) modified = results[0] categories.append(results[1])
res.append(list[index]) return res if num_mods == 6: num_trials = int(2e+6) else: num_trials = int(1e+6) agent = QAgent(env) agent.qlearn(600, show=False) cell_dict = cell_frequency(agent) wall_dict = wall_interference(agent) modifications = [] for element in wall_dict: modifications.append((0, element[0][0], element[0][1])) for element in cell_dict[0:14]: row, col = element[0] modifications.append((1, row, col)) # Initialize and build heap sz = min(12 * num_mods, len(x_test)) h = Heap(model, x_test[0:sz], sz) h.build_heap() ls = [] for i in range(num_trials):