def part_seven(env): with open("machines_part_six.json") as json_file: cluster_best_machine_mapper_str_key = json.load(json_file) cluster_best_machine_mapper = {} for key in cluster_best_machine_mapper_str_key: tuple_key = key tuple_key = tuple_key.replace("(", "") tuple_key = tuple_key.replace(")", "") tuple_key = tuple(map(eval, tuple_key.split(","))) cluster_best_machine_mapper[tuple_key] = MachineStored.from_dict( cluster_best_machine_mapper_str_key[key], env=env).get_machine() MachineStored.from_dict(cluster_best_machine_mapper_str_key[key], env=env).draw("ololo" + str(key)) params = HAMParamsCommon(env) runner( ham=AutoMachineSimple(env), num_episodes=2000, env=env, params=params, on_model_mapping=cluster_best_machine_mapper, # no_output=True, ) to_plot = list() to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="clustering")) save_to_gif("olololo", params.logs["gif"][-1]) params = HAMParamsCommon(env) runner( ham=AutoMachineSimple(env), num_episodes=2000, env=env, params=params, on_model_mapping={}, # no_output=True, ) to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="q-learning")) plot_multi(to_plot, filename="ololo_result")
def main(): def get_on_model(self): return self.get_arm_x(), self.is_cube_graped() def get_arm_x(self): return self._size_x - self._arm_x def is_cube_graped(self): cube_dx, cube_dy = self.MOVE_ACTIONS[self.ACTIONS.DOWN] cube_x, cube_y = self._arm_x + cube_dx, self._arm_y + cube_dy return self._magnet_toggle and self.ok( cube_x, cube_y) and self._grid[cube_x][cube_y] == 1 ArmEnvToggleTopOnly.get_arm_x = get_arm_x ArmEnvToggleTopOnly.is_cube_graped = is_cube_graped ArmEnvToggleTopOnly.get_on_model = get_on_model env = ArmEnvToggleTopOnly(size_x=5, size_y=5, cubes_cnt=4, episode_max_length=600, finish_reward=100, action_minus_reward=-0.001, tower_target_size=4) params = HAMParamsCommon(env) runner( ham=AutoMachineNoLoop(env), num_episodes=2000, env=env, params=params, # no_output=True ) to_plot = [] to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_with_pull_up")) plot_multi(to_plot)
def main(begin_seed=0): for seed in range(begin_seed, begin_seed + 5000): # maze = maze_world_input_special() # maze = generate_maze_please(size_x=2, size_y=2) # env = MazeWorldEpisodeLength(maze=maze) # global_env, save_folder = MazeEnvArticleSpecial(), "laby_spec/" global_env, save_folder = MazeEnvArticle(), "laby/" # global_env, save_folder = ArmEnvArticle(), "arm/" env, num_episodes = global_env.env, global_env.episodes_count new_machine = create_random_machine(maximal_number_of_vertex=6, maximal_number_of_edges=6, random_seed=seed, env=env) if is_it_machine_runnable(new_machine): params = HAMParamsCommon(env) try: ham_runner( ham=RootMachine(LoopInvokerMachine(machine_to_invoke=super_runner(new_machine, env))), num_episodes=num_episodes, env=env, params=params, no_output=True ) ham_runner(ham=RootMachine(machine_to_invoke=LoopInvokerMachine(new_machine)), num_episodes=num_episodes, env=env, params=params, no_output=True) # to_plot.append(PlotParams(curve_to_draw=params.logs["ep_rewards"], label="Random" + str(seed + 1))) reward = sum(params.logs["ep_rewards"]) draw_graph(save_folder + str(reward) + ":::" + str(seed), new_machine.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict())) # draw_graph("pics/" + str(reward).rjust(10, "0"), # new_machine.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict())) except KeyError: print("keyError", end="") except AssertionError: print("assertion", end="") plot_multi(to_plot)
def main(global_env, begin_seed=0): for seed in range(begin_seed, begin_seed + 5000): env = global_env.env num_episodes = global_env.episodes_count new_machine = create_random_machine(maximal_number_of_vertex=6, maximal_number_of_edges=6, random_seed=seed, env=env) if is_it_machine_runnable(new_machine): params = HAMParamsCommon(env) try: ham_runner(ham=RootMachine( machine_to_invoke=LoopInvokerMachine(new_machine)), num_episodes=num_episodes, env=env, params=params) if sum(params.logs["ep_rewards"][-100:]) > 0: print("{test}done_it".format(**locals()), sum(params.logs["ep_rewards"])) to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="Random" + str(seed + 1))) draw_graph( "pics/" + str(seed), new_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) except KeyError: print("keyError", end="") except AssertionError: print("assertion", end="") plot_multi(to_plot)
def part_four(env): with open("machines_part_three.json") as json_file: cluster_best_machine_mapper_str_key = json.load(json_file) cluster_best_machine_mapper = {} for key in cluster_best_machine_mapper_str_key: tuple_key = key # tuple_key = key tuple_key = tuple_key.replace("(", "") tuple_key = tuple_key.replace(")", "") tuple_key = tuple(map(eval, tuple_key.split(","))) cluster_best_machine_mapper[tuple_key] = MachineStored.from_dict( cluster_best_machine_mapper_str_key[key]["graph_dict"], env=env) cluster_best_machine_mapper_machine = {} for i in cluster_best_machine_mapper: cluster_best_machine_mapper_machine[ i] = cluster_best_machine_mapper[i].get_machine() params = HAMParamsCommon(env) runner( ham=AutoMachineSimple(env), num_episodes=300, env=env, params=params, on_model_mapping=cluster_best_machine_mapper_machine, no_output=True, ) for cluster in cluster_best_machine_mapper: ms = cluster_best_machine_mapper[cluster] ms.draw(filename=str(cluster)) to_plot = list() to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="clustering, same env")) plot_multi(to_plot, filename="a")
from HAM.HAM_core import AutoBasicMachine from HAM.HAM_experiments.HAM_utils import HAMParamsCommon, maze_world_input_01, plot_multi, ham_runner, PlotParams from environments.grid_maze_env.maze_world_env import MazeWorld env = MazeWorld(maze_world_input_01()) params = HAMParamsCommon(env) ham_runner(ham=AutoBasicMachine(env), num_episodes=300, env=env, params=params) plot_multi((PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_basic"), ))
stop = Stop() transitions = ( MachineRelation(left=start, right=choice_one), MachineRelation(left=choice_one, right=left), MachineRelation(left=choice_one, right=right), MachineRelation(left=choice_one, right=off), MachineRelation(left=choice_one, right=call), MachineRelation(left=call, right=stop), MachineRelation(left=left, right=stop, label=0), MachineRelation(left=right, right=stop, label=0), MachineRelation(left=off, right=stop, label=0), MachineRelation(left=left, right=stop, label=1), MachineRelation(left=right, right=stop, label=1), MachineRelation(left=off, right=stop, label=1), ) pull_up_machine = RootMachine(machine_to_invoke=LoopInvokerMachine( AbstractMachine(MachineGraph(transitions=transitions)))) params = HAMParamsCommon(env) ham_runner(ham=pull_up_machine, num_episodes=num_episodes, env=env, params=params) to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_with_pull_up")) plot_multi(to_plot)
def q_learning(env, num_episodes, eps=0.1, alpha=0.1, gamma=0.9): to_plot = [] q_table = defaultdict(lambda: 0) bns_count = defaultdict(lambda: 0) V = defaultdict(lambda: None) for _ in tqdm(range(num_episodes)): ep_reward = 0 eps *= 0.9 s = env.reset() bn_added = {} while True: if np.random.rand(1) < eps: action = np.random.choice(env.action_space.n, size=1)[0] else: action = arg_max_action(q_dict=q_table, state=s, action_space=env.action_space.n) next_s, reward, done, _ = env.step(action) a = arg_max_action(q_dict=q_table, state=s, action_space=env.action_space.n) # noinspection PyTypeChecker V[s] = (*env.decode(s), q_table[s, a]) # making +1 to bn_counts once for each episode if not bn_added.get(s, False): bns_count[s] += 1 bn_added[s] = True q_table[s, action] = (1 - alpha) * q_table[s, action] + alpha * (reward + gamma * q_table[next_s, a]) ep_reward += reward if done: break s = next_s to_plot.append(ep_reward) sleep(0.1) def get_clusters(V, n_clusters, affinity): states = sorted(V.keys()) ss = {"state": states} # noinspection PyTypeChecker for i in range(len(V[states[0]])): ss[str(i)] = [V[_][i] for _ in states] df = pd.DataFrame(ss).set_index("state") sc = MinMaxScaler() df = df.rename(index=str, columns={"0": "x", "1": "y", "2": 'V'}) X = df[["x", "y", "V"]] X[["V"]] *= 0.5 sc.fit(np.vstack((df[["x"]], df[["y"]]))) df[["x", "y"]] = sc.transform(df[["x", "y"]]) ag = AgglomerativeClustering(n_clusters=n_clusters, affinity=affinity) clustered = list(ag.fit_predict(X)) cluster_state_mapping = {} for i in range(len(states)): cluster_state_mapping[states[i]] = clustered[i] return cluster_state_mapping # all_states = V.keys() n_clusters = 4 map_state_to_cluster = get_clusters(V=V, n_clusters=n_clusters, affinity="euclidean") def get_bns_in_increasing_order(bns_count): state_count_pairs = sorted([(bns_count[_], _) for _ in bns_count], reverse=True) return list(map(lambda x: x[1], state_count_pairs, )) def get_mapping_for_cluster_to_sorted_bns(sorted_bns, map_state_to_cluster): res = defaultdict(lambda: list()) for state in sorted_bns: res[map_state_to_cluster[state]].append(state) return res # bns = bottlenecks sorted_bns = get_bns_in_increasing_order(bns_count=bns_count) map_cluster_to_sorted_bns = get_mapping_for_cluster_to_sorted_bns(sorted_bns=sorted_bns, map_state_to_cluster=map_state_to_cluster) env.mark = {} for current_state in map_state_to_cluster: env.mark[current_state] = str(map_state_to_cluster[current_state]) class colors: HEADER = '\033[95m' OKBLUE = '\033[94m' OKGREEN = '\033[92m' WARNING = '\033[93m' FAIL = '\033[91m' ENDC = '\033[0m' BOLD = '\033[1m' UNDERLINE = '\033[4m' COLOR_LIST = [HEADER, OKBLUE, OKGREEN, WARNING, FAIL] # draw best bns for clusters BNS_FOR_CLUSTER = 10 for q in map_cluster_to_sorted_bns: for j in map_cluster_to_sorted_bns[q][:BNS_FOR_CLUSTER]: env.mark[j] = colors.COLOR_LIST[q % len(colors.COLOR_LIST)] + str(q) + colors.ENDC env.render() env.mark = {} def runner(hams, num_episodes, env): for i_episode in range(1, num_episodes + 1): env.reset() while not env.is_done(): for ham in hams: if env.s in ham.states_in_my_cluster: while not env.is_done() and env.s not in ham.bns: ham.machine.run(params) while not env.is_done() and env.s in ham.states_in_my_cluster: ham.machine.run(params) if i_episode % 10 == 0: print("\r{ham} episode {i_episode}/{num_episodes}.".format(**locals()), end="") sys.stdout.flush() class BnsMachine: def __init__(self, params, cluster_index, list_of_bns, states_in_my_cluster): self.machine = AutoMachineSimple(env) self.cluster_index = cluster_index self.bns = set(list_of_bns) self.states_in_my_cluster = states_in_my_cluster self.params = params params = HAMParamsCommon(env) hams = [BnsMachine(params=params, cluster_index=_, list_of_bns=map_cluster_to_sorted_bns[_][:BNS_FOR_CLUSTER], states_in_my_cluster=set(map_cluster_to_sorted_bns[_])) for _ in map_cluster_to_sorted_bns] runner(hams = hams, num_episodes=2000, env=env, ) to_plot = list() to_plot.append(PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_with_pull_up")) plot_multi(to_plot) # print(params.logs["ep_rewards"]) return to_plot, q_table