def run(global_env): rewards = None if isinstance(global_env, ArmEnvArticle): env = global_env.env internal_machine = M1(env=env) machine = RootMachine( LoopInvokerMachine( super_runner(call_me_maybe=internal_machine, env=env))) draw_graph(file_name="maze_env_special", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) params = HAMParamsCommon(env) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] elif isinstance(global_env, MazeEnvArticle): env = global_env.env internal_machine = M2(env=env) machine = RootMachine( LoopInvokerMachine( super_runner(call_me_maybe=internal_machine, env=env))) draw_graph(file_name="maze_env_special", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) params = HAMParamsCommon(env) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] elif isinstance(global_env, MazeEnvArticleSpecial): env = global_env.env internal_machine = M3(env=env) machine = RootMachine( LoopInvokerMachine( super_runner(call_me_maybe=internal_machine, env=env))) draw_graph(file_name="maze_env_special", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) params = HAMParamsCommon(env) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] else: raise KeyError if rewards is not None: full_name = "_" + global_env.__class__.__name__ with open(full_name + " cumulative_reward.txt", "w") as w: for out in get_cumulative_rewards(rewards=rewards): w.write(str(out) + '\n', ) with open(full_name + " reward.txt", "w") as w: for out in rewards: w.write(str(out) + '\n', )
def run(global_env): if isinstance(global_env, ArmEnvArticle): env = global_env.env internal_machine = PullUpMachine(env=env) machine = RootMachine(machine_to_invoke=LoopInvokerMachine( machine_to_invoke=internal_machine)) params = HAMParamsCommon(env) draw_graph(file_name="arm_env", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] elif isinstance(global_env, MazeEnvArticle): env = global_env.env internal_machine = InterestingMachine(env=env) machine = RootMachine(machine_to_invoke=LoopInvokerMachine( machine_to_invoke=internal_machine)) draw_graph(file_name="maze_env", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) params = HAMParamsCommon(env) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] elif isinstance(global_env, MazeEnvArticleSpecial): env = global_env.env internal_machine = InterestingMachineLeftUpInteresting(env=env) machine = RootMachine(machine_to_invoke=LoopInvokerMachine( machine_to_invoke=internal_machine)) draw_graph(file_name="maze_env_special", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) params = HAMParamsCommon(env) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] else: raise KeyError full_name = name + "_" + global_env.__class__.__name__ # with open(full_name + " cumulative_reward.txt", "w") as w: # for out in get_cumulative_rewards(rewards=rewards): # w.write(str(out) + '\n', ) with open(full_name + " reward.txt", "w") as w: for out in rewards: w.write(str(out) + '\n', )
def part_two(env): with open("machines_part_one.json") as json_file: machines = [MachineStored.ms_from_machine(AutoMachineSimple(env), env)] machines_to_save = [] for ms_dict in json.load(json_file): machines.append( MachineStored.from_dict(graph_dict=ms_dict, env=env)) m_id = 0 params = HAMParamsCommon(env) am = AutoMachineSimple(env) runner( ham=am, num_episodes=2000, env=env, params=params, on_model_mapping={}, ) qv = params.q_value for on_model_part in list(reversed(env.get_all_on_model())): for ms in machines: machine = ms.get_machine() params = HAMParamsCommon(env) params.q_value = qv runner( ham=am, num_episodes=1, env=env, params=params, on_model_mapping={on_model_part: machine}, ) to_plot = list() to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_with_pull_up")) total_reward = sum(params.logs["ep_rewards"]) print("rewards sum:", total_reward) # plot_multi(to_plot, filename="pics/" + str(m_id) + ":::" + str(on_model_part) + ":::" + str(ms.binary_matrix_representation) + ":::" + str(sum(params.logs["ep_rewards"]))) # ms.draw("pics/" + str(m_id) + ":" + str(ms.binary_matrix_representation) + ":" + str(total_reward)) m_id += 1 if total_reward > 10: machines_to_save.append(ms) with open("machines_part_two.json", "w") as out_f: t = compress_graphs_dicts([_.to_dict() for _ in machines_to_save]) json.dump(obj=t, fp=out_f, sort_keys=True, indent=4)
def go(transitions, brute_force, index_): machine = AbstractMachine(MachineGraph(transitions=transitions)) am = RootMachine(LoopInvokerMachine(machine)) # if randrange(1000) == 0: # draw_graph("{brute_force}".format(**locals()), am.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict())) # exit(0) if is_it_machine_runnable(machine): sum_rew = 0 try: params = HAMParamsCommon(environments[0]) ham_runner(ham=am, num_episodes=2, env=environments[0], params=params) sum_rew = sum(params.logs["ep_rewards"]) except ChildProcessError: # print(brute_force) pass # if randrange(1500) == 0: # draw_graph("bf{brute_force}".format(**locals()), am.get_graph_to_draw()) if sum_rew > 0: # TODO # with open("out.txt", "a") as f: # f.write(str(brute_force) + "\n") # return # print("\n\n EPISODE REWARD: ", sum_rew) # draw_graph("{sum_rew}__{brute_force}".format(**locals()), am.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict())) rew = None print("\n\n\n") for e in environments: params = HAMParamsCommon(e) ham_runner(ham=am, num_episodes=600, env=e, params=params) if rew is None: rew = 0 rew += sum(params.logs["ep_rewards"]) print("to_add:", sum(params.logs["ep_rewards"])) # except ChildProcessError: # draw_graph("{rew}__{brute_force}".format(**locals()), am.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict())) # exit(0) # pass if rew is not None: draw_graph( "{rew}__{brute_force}_{index_}".format(**locals()), am.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict()))
def part_seven(env): with open("machines_part_six.json") as json_file: cluster_best_machine_mapper_str_key = json.load(json_file) cluster_best_machine_mapper = {} for key in cluster_best_machine_mapper_str_key: tuple_key = key tuple_key = tuple_key.replace("(", "") tuple_key = tuple_key.replace(")", "") tuple_key = tuple(map(eval, tuple_key.split(","))) cluster_best_machine_mapper[tuple_key] = MachineStored.from_dict( cluster_best_machine_mapper_str_key[key], env=env).get_machine() MachineStored.from_dict(cluster_best_machine_mapper_str_key[key], env=env).draw("ololo" + str(key)) params = HAMParamsCommon(env) runner( ham=AutoMachineSimple(env), num_episodes=2000, env=env, params=params, on_model_mapping=cluster_best_machine_mapper, # no_output=True, ) to_plot = list() to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="clustering")) save_to_gif("olololo", params.logs["gif"][-1]) params = HAMParamsCommon(env) runner( ham=AutoMachineSimple(env), num_episodes=2000, env=env, params=params, on_model_mapping={}, # no_output=True, ) to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="q-learning")) plot_multi(to_plot, filename="ololo_result")
def test_draw_gid(): env = ArmEnvToggleTopOnly(size_x=5, size_y=4, cubes_cnt=4, episode_max_length=50, finish_reward=100, action_minus_reward=-0.001, tower_target_size=4) def get_on_model(self): return self.get_arm_x(), self.is_cube_graped() def get_all_on_model(self): res = [] for height in range(0, self._size_x): for graped in [True, False]: if height == self._size_x - 1 and graped is True: continue res.append((height, graped)) return res def get_arm_x(self): return self._arm_x return self._size_x - self._arm_x def is_cube_graped(self): cube_dx, cube_dy = self.MOVE_ACTIONS[self.ACTIONS.DOWN] cube_x, cube_y = self._arm_x + cube_dx, self._arm_y + cube_dy return self._magnet_toggle and self.ok( cube_x, cube_y) and self._grid[cube_x][cube_y] == 1 ArmEnvToggleTopOnly.get_arm_x = get_arm_x ArmEnvToggleTopOnly.get_all_on_model = get_all_on_model ArmEnvToggleTopOnly.is_cube_graped = is_cube_graped ArmEnvToggleTopOnly.get_on_model = get_on_model params = HAMParamsCommon(env) runner( ham=AutoMachineSimple(env), num_episodes=1, env=env, params=params, on_model_mapping={}, no_output=True, ) save_to_gif("olololo", params.logs["gif"][0]) # imageio.mimsave('movie.gif', images) # numpngw.write_apng('foo.png', images, delay=250, use_palette=True) exit(0)
def run(global_env): full_name = name params = HAMParamsCommon(environments[0]) ham_runner(ham=am, num_episodes=global_episodes, env=env, params=params) rewards = params.logs["ep_rewards"] # with open(full_name + " cumulative_reward.txt", "w") as w: # for out in get_cumulative_rewards(rewards=rewards): # w.write(str(out) + '\n', ) with open(full_name + " reward.txt", "w") as w: for out in rewards: w.write(str(out) + '\n', )
def _step(self, action): self.state = self.state + tuple([action]) self.ham = RootMachine( LoopInvokerMachine( machine_to_invoke=super_runner(self.machine, self.env))) reward = None if action is None: raise KeyError elif action == self.ACTIONS.ACTION_01: self.add(Action(action=action)) elif action == self.ACTIONS.ACTION_02: self.add(Action(action=action)) elif action == self.ACTIONS.ACTION_03: self.add(Action(action=action)) elif action == self.ACTIONS.ACTION_04: self.add(Action(action=action)) elif action == self.ACTIONS.ACTION_05: self.add(Action(action=action)) elif action == self.ACTIONS.ACTION_06: self.add(Action(action=action)) if is_it_machine_runnable(self.machine): if self.state in self.dp: reward = self.dp[self.state] else: params = HAMParamsCommon(self.env) ham_runner(ham=self.ham, num_episodes=self.num_of_episodes, env=self.env, params=params, no_output=True) reward = sum(params.logs["ep_rewards"]) self.dp[self.state] = reward draw_graph( "pics/" + str(reward).rjust(10, "0") + str(self.state) + " ", self.machine.get_graph_to_draw( action_to_name_mapping=self.env.get_actions_as_dict())) observation = self.state if len(self.state) >= self.max_size: self._done = True return observation, reward, self._done, None
def main(begin_seed=0): for seed in range(begin_seed, begin_seed + 5000): # maze = maze_world_input_special() # maze = generate_maze_please(size_x=2, size_y=2) # env = MazeWorldEpisodeLength(maze=maze) # global_env, save_folder = MazeEnvArticleSpecial(), "laby_spec/" global_env, save_folder = MazeEnvArticle(), "laby/" # global_env, save_folder = ArmEnvArticle(), "arm/" env, num_episodes = global_env.env, global_env.episodes_count new_machine = create_random_machine(maximal_number_of_vertex=6, maximal_number_of_edges=6, random_seed=seed, env=env) if is_it_machine_runnable(new_machine): params = HAMParamsCommon(env) try: ham_runner( ham=RootMachine(LoopInvokerMachine(machine_to_invoke=super_runner(new_machine, env))), num_episodes=num_episodes, env=env, params=params, no_output=True ) ham_runner(ham=RootMachine(machine_to_invoke=LoopInvokerMachine(new_machine)), num_episodes=num_episodes, env=env, params=params, no_output=True) # to_plot.append(PlotParams(curve_to_draw=params.logs["ep_rewards"], label="Random" + str(seed + 1))) reward = sum(params.logs["ep_rewards"]) draw_graph(save_folder + str(reward) + ":::" + str(seed), new_machine.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict())) # draw_graph("pics/" + str(reward).rjust(10, "0"), # new_machine.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict())) except KeyError: print("keyError", end="") except AssertionError: print("assertion", end="") plot_multi(to_plot)
def main(): def get_on_model(self): return self.get_arm_x(), self.is_cube_graped() def get_arm_x(self): return self._size_x - self._arm_x def is_cube_graped(self): cube_dx, cube_dy = self.MOVE_ACTIONS[self.ACTIONS.DOWN] cube_x, cube_y = self._arm_x + cube_dx, self._arm_y + cube_dy return self._magnet_toggle and self.ok( cube_x, cube_y) and self._grid[cube_x][cube_y] == 1 ArmEnvToggleTopOnly.get_arm_x = get_arm_x ArmEnvToggleTopOnly.is_cube_graped = is_cube_graped ArmEnvToggleTopOnly.get_on_model = get_on_model env = ArmEnvToggleTopOnly(size_x=5, size_y=5, cubes_cnt=4, episode_max_length=600, finish_reward=100, action_minus_reward=-0.001, tower_target_size=4) params = HAMParamsCommon(env) runner( ham=AutoMachineNoLoop(env), num_episodes=2000, env=env, params=params, # no_output=True ) to_plot = [] to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_with_pull_up")) plot_multi(to_plot)
def run(global_env): rewards = None if isinstance(global_env, ArmEnvArticle): pass elif isinstance(global_env, MazeEnvArticle): pass elif isinstance(global_env, MazeEnvArticleSpecial): env = global_env.env seed = 573846788 internal_machine = create_random_machine(maximal_number_of_vertex=6, maximal_number_of_edges=6, random_seed=seed, env=env) machine = RootMachine(machine_to_invoke=LoopInvokerMachine( machine_to_invoke=internal_machine)) draw_graph(file_name="maze_env_special", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) params = HAMParamsCommon(env) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] else: raise KeyError if rewards is not None: full_name = name + "_" + global_env.__class__.__name__ # with open(full_name + " cumulative_reward.txt", "w") as w: # for out in get_cumulative_rewards(rewards=rewards): # w.write(str(out) + '\n', ) with open(full_name + " reward.txt", "w") as w: for out in rewards: w.write(str(out) + '\n', )
def main(global_env, begin_seed=0): for seed in range(begin_seed, begin_seed + 5000): env = global_env.env num_episodes = global_env.episodes_count new_machine = create_random_machine(maximal_number_of_vertex=6, maximal_number_of_edges=6, random_seed=seed, env=env) if is_it_machine_runnable(new_machine): params = HAMParamsCommon(env) try: ham_runner(ham=RootMachine( machine_to_invoke=LoopInvokerMachine(new_machine)), num_episodes=num_episodes, env=env, params=params) if sum(params.logs["ep_rewards"][-100:]) > 0: print("{test}done_it".format(**locals()), sum(params.logs["ep_rewards"])) to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="Random" + str(seed + 1))) draw_graph( "pics/" + str(seed), new_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) except KeyError: print("keyError", end="") except AssertionError: print("assertion", end="") plot_multi(to_plot)
def part_four(env): with open("machines_part_three.json") as json_file: cluster_best_machine_mapper_str_key = json.load(json_file) cluster_best_machine_mapper = {} for key in cluster_best_machine_mapper_str_key: tuple_key = key # tuple_key = key tuple_key = tuple_key.replace("(", "") tuple_key = tuple_key.replace(")", "") tuple_key = tuple(map(eval, tuple_key.split(","))) cluster_best_machine_mapper[tuple_key] = MachineStored.from_dict( cluster_best_machine_mapper_str_key[key]["graph_dict"], env=env) cluster_best_machine_mapper_machine = {} for i in cluster_best_machine_mapper: cluster_best_machine_mapper_machine[ i] = cluster_best_machine_mapper[i].get_machine() params = HAMParamsCommon(env) runner( ham=AutoMachineSimple(env), num_episodes=300, env=env, params=params, on_model_mapping=cluster_best_machine_mapper_machine, no_output=True, ) for cluster in cluster_best_machine_mapper: ms = cluster_best_machine_mapper[cluster] ms.draw(filename=str(cluster)) to_plot = list() to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="clustering, same env")) plot_multi(to_plot, filename="a")
def runner(ham, num_episodes, env, params, no_output=None): ham2 = AutoMachineNoLoop(env) params2 = HAMParamsCommon(env) for i_episode in range(1, num_episodes + 1): env.reset() print("****" * 10) while not env.is_done(): print(env.get_on_model()) if i_episode % 10 >= 5: ham.run(params) else: pass ham2.run(params2) # print(params.previous_machine_choice_state) env.render() assert env.is_done( ), "The machine is STOPPED before STOP(done) of the environment" if i_episode % 10 == 0: if no_output is None: print("\r{ham} episode {i_episode}/{num_episodes}.".format( **locals()), end="") sys.stdout.flush()
def part_six(env): # with open("machines_part_three.json") as json_file: cluster_best_machine_mapper_str_key = json.load(json_file) ololo_mapping = {} ololo_to_sort = [] for key in cluster_best_machine_mapper_str_key: tuple_key = key tuple_key = tuple_key.replace("(", "") tuple_key = tuple_key.replace(")", "") tuple_key = tuple(map(eval, tuple_key.split(","))) ololo_mapping[tuple_key] = MachineStored.from_dict( cluster_best_machine_mapper_str_key[key]["graph_dict"], env=env) ololo_to_sort.append([ cluster_best_machine_mapper_str_key[key]["total_reward"], tuple_key ]) best_clusters = {} for i in sorted(ololo_to_sort, reverse=True): key = i[1] print(key, type(key), key[0]) # print(ololo_mapping[key]) total_reward_a = 0 for i in range(10): params = HAMParamsCommon(env) to_run = {} ss = {**best_clusters, key: ololo_mapping[key]} for i in ss: to_run[i] = ss[i].get_machine() runner( ham=AutoMachineSimple(env), num_episodes=800, env=env, params=params, on_model_mapping=to_run, ) total_reward_a += sum(params.logs["ep_rewards"]) total_reward_b = 0 for i in range(10): to_run = {} ss = {**best_clusters} for i in ss: to_run[i] = ss[i].get_machine() to_run = {} params = HAMParamsCommon(env) runner( ham=AutoMachineSimple(env), num_episodes=800, env=env, params=params, on_model_mapping=to_run, ) total_reward_b += sum(params.logs["ep_rewards"]) if total_reward_a > total_reward_b: best_clusters[key] = ololo_mapping[key] print() print(total_reward_a, " ::: ", total_reward_b) clusters_to_save = {} for i in best_clusters: on_model_part_str = str(i) clusters_to_save[on_model_part_str] = best_clusters[i].to_dict() with open("machines_part_six.json", "w") as out_f: json.dump(obj=clusters_to_save, fp=out_f, sort_keys=True, indent=4)
from HAM.HAM_core import AutoBasicMachine from HAM.HAM_experiments.HAM_utils import HAMParamsCommon, maze_world_input_01, plot_multi, ham_runner, PlotParams from environments.grid_maze_env.maze_world_env import MazeWorld env = MazeWorld(maze_world_input_01()) params = HAMParamsCommon(env) ham_runner(ham=AutoBasicMachine(env), num_episodes=300, env=env, params=params) plot_multi((PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_basic"), ))
def part_three(env): with open("machines_part_two.json") as json_file: machines = [] for ms_dict in json.load(json_file): machines.append( MachineStored.from_dict(graph_dict=ms_dict, env=env)) cluster_best_result_mapper = {} cluster_best_machine_mapper = {} clusters_to_save = {} for on_model_part in list(reversed(env.get_all_on_model())): for index, ms in enumerate(machines): machine = ms.get_machine() total_reward = 0 for tests in range(5): params = HAMParamsCommon(env) runner( ham=AutoMachineSimple(env), num_episodes=30, env=env, params=params, on_model_mapping={on_model_part: machine}, no_output=True, ) to_plot = list() to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_with_pull_up")) total_reward += sum(params.logs["ep_rewards"]) # print(total_reward) on_model_part_str = str(on_model_part) if on_model_part_str in cluster_best_result_mapper: if cluster_best_result_mapper[ on_model_part_str] < total_reward: cluster_best_result_mapper[ on_model_part_str], cluster_best_machine_mapper[ on_model_part_str] = total_reward, ms.to_dict( ) clusters_to_save[on_model_part_str] = { "total_reward": total_reward, "graph_dict": ms.to_dict() } else: cluster_best_result_mapper[ on_model_part_str], cluster_best_machine_mapper[ on_model_part_str] = total_reward, ms.to_dict() clusters_to_save[on_model_part_str] = { "total_reward": total_reward, "graph_dict": ms.to_dict() } # print('\n') print("****") ms_len = len(machines) print("machine {index} of {ms_len}".format(**locals())) print() for i in ms.vertex_types: print(i) print(on_model_part_str, total_reward) # print(clusters_to_save) # exit(0) with open("machines_part_three.json", "w") as out_f: json.dump(obj=clusters_to_save, fp=out_f, sort_keys=True, indent=4)
def q_learning(env, num_episodes, eps=0.1, alpha=0.1, gamma=0.9): to_plot = [] q_table = defaultdict(lambda: 0) bns_count = defaultdict(lambda: 0) V = defaultdict(lambda: None) for _ in tqdm(range(num_episodes)): ep_reward = 0 eps *= 0.9 s = env.reset() bn_added = {} while True: if np.random.rand(1) < eps: action = np.random.choice(env.action_space.n, size=1)[0] else: action = arg_max_action(q_dict=q_table, state=s, action_space=env.action_space.n) next_s, reward, done, _ = env.step(action) a = arg_max_action(q_dict=q_table, state=s, action_space=env.action_space.n) # noinspection PyTypeChecker V[s] = (*env.decode(s), q_table[s, a]) # making +1 to bn_counts once for each episode if not bn_added.get(s, False): bns_count[s] += 1 bn_added[s] = True q_table[s, action] = (1 - alpha) * q_table[s, action] + alpha * (reward + gamma * q_table[next_s, a]) ep_reward += reward if done: break s = next_s to_plot.append(ep_reward) sleep(0.1) def get_clusters(V, n_clusters, affinity): states = sorted(V.keys()) ss = {"state": states} # noinspection PyTypeChecker for i in range(len(V[states[0]])): ss[str(i)] = [V[_][i] for _ in states] df = pd.DataFrame(ss).set_index("state") sc = MinMaxScaler() df = df.rename(index=str, columns={"0": "x", "1": "y", "2": 'V'}) X = df[["x", "y", "V"]] X[["V"]] *= 0.5 sc.fit(np.vstack((df[["x"]], df[["y"]]))) df[["x", "y"]] = sc.transform(df[["x", "y"]]) ag = AgglomerativeClustering(n_clusters=n_clusters, affinity=affinity) clustered = list(ag.fit_predict(X)) cluster_state_mapping = {} for i in range(len(states)): cluster_state_mapping[states[i]] = clustered[i] return cluster_state_mapping # all_states = V.keys() n_clusters = 4 map_state_to_cluster = get_clusters(V=V, n_clusters=n_clusters, affinity="euclidean") def get_bns_in_increasing_order(bns_count): state_count_pairs = sorted([(bns_count[_], _) for _ in bns_count], reverse=True) return list(map(lambda x: x[1], state_count_pairs, )) def get_mapping_for_cluster_to_sorted_bns(sorted_bns, map_state_to_cluster): res = defaultdict(lambda: list()) for state in sorted_bns: res[map_state_to_cluster[state]].append(state) return res # bns = bottlenecks sorted_bns = get_bns_in_increasing_order(bns_count=bns_count) map_cluster_to_sorted_bns = get_mapping_for_cluster_to_sorted_bns(sorted_bns=sorted_bns, map_state_to_cluster=map_state_to_cluster) env.mark = {} for current_state in map_state_to_cluster: env.mark[current_state] = str(map_state_to_cluster[current_state]) class colors: HEADER = '\033[95m' OKBLUE = '\033[94m' OKGREEN = '\033[92m' WARNING = '\033[93m' FAIL = '\033[91m' ENDC = '\033[0m' BOLD = '\033[1m' UNDERLINE = '\033[4m' COLOR_LIST = [HEADER, OKBLUE, OKGREEN, WARNING, FAIL] # draw best bns for clusters BNS_FOR_CLUSTER = 10 for q in map_cluster_to_sorted_bns: for j in map_cluster_to_sorted_bns[q][:BNS_FOR_CLUSTER]: env.mark[j] = colors.COLOR_LIST[q % len(colors.COLOR_LIST)] + str(q) + colors.ENDC env.render() env.mark = {} def runner(hams, num_episodes, env): for i_episode in range(1, num_episodes + 1): env.reset() while not env.is_done(): for ham in hams: if env.s in ham.states_in_my_cluster: while not env.is_done() and env.s not in ham.bns: ham.machine.run(params) while not env.is_done() and env.s in ham.states_in_my_cluster: ham.machine.run(params) if i_episode % 10 == 0: print("\r{ham} episode {i_episode}/{num_episodes}.".format(**locals()), end="") sys.stdout.flush() class BnsMachine: def __init__(self, params, cluster_index, list_of_bns, states_in_my_cluster): self.machine = AutoMachineSimple(env) self.cluster_index = cluster_index self.bns = set(list_of_bns) self.states_in_my_cluster = states_in_my_cluster self.params = params params = HAMParamsCommon(env) hams = [BnsMachine(params=params, cluster_index=_, list_of_bns=map_cluster_to_sorted_bns[_][:BNS_FOR_CLUSTER], states_in_my_cluster=set(map_cluster_to_sorted_bns[_])) for _ in map_cluster_to_sorted_bns] runner(hams = hams, num_episodes=2000, env=env, ) to_plot = list() to_plot.append(PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_with_pull_up")) plot_multi(to_plot) # print(params.logs["ep_rewards"]) return to_plot, q_table