Example #1
0
def part_seven(env):
    with open("machines_part_six.json") as json_file:
        cluster_best_machine_mapper_str_key = json.load(json_file)
        cluster_best_machine_mapper = {}
        for key in cluster_best_machine_mapper_str_key:
            tuple_key = key
            tuple_key = tuple_key.replace("(", "")
            tuple_key = tuple_key.replace(")", "")
            tuple_key = tuple(map(eval, tuple_key.split(",")))
            cluster_best_machine_mapper[tuple_key] = MachineStored.from_dict(
                cluster_best_machine_mapper_str_key[key],
                env=env).get_machine()
            MachineStored.from_dict(cluster_best_machine_mapper_str_key[key],
                                    env=env).draw("ololo" + str(key))
        params = HAMParamsCommon(env)

        runner(
            ham=AutoMachineSimple(env),
            num_episodes=2000,
            env=env,
            params=params,
            on_model_mapping=cluster_best_machine_mapper,
            # no_output=True,
        )
        to_plot = list()
        to_plot.append(
            PlotParams(curve_to_draw=params.logs["ep_rewards"],
                       label="clustering"))

        save_to_gif("olololo", params.logs["gif"][-1])

        params = HAMParamsCommon(env)
        runner(
            ham=AutoMachineSimple(env),
            num_episodes=2000,
            env=env,
            params=params,
            on_model_mapping={},
            # no_output=True,
        )
        to_plot.append(
            PlotParams(curve_to_draw=params.logs["ep_rewards"],
                       label="q-learning"))

        plot_multi(to_plot, filename="ololo_result")
Example #2
0
def main():
    def get_on_model(self):
        return self.get_arm_x(), self.is_cube_graped()

    def get_arm_x(self):
        return self._size_x - self._arm_x

    def is_cube_graped(self):
        cube_dx, cube_dy = self.MOVE_ACTIONS[self.ACTIONS.DOWN]
        cube_x, cube_y = self._arm_x + cube_dx, self._arm_y + cube_dy
        return self._magnet_toggle and self.ok(
            cube_x, cube_y) and self._grid[cube_x][cube_y] == 1

    ArmEnvToggleTopOnly.get_arm_x = get_arm_x
    ArmEnvToggleTopOnly.is_cube_graped = is_cube_graped
    ArmEnvToggleTopOnly.get_on_model = get_on_model

    env = ArmEnvToggleTopOnly(size_x=5,
                              size_y=5,
                              cubes_cnt=4,
                              episode_max_length=600,
                              finish_reward=100,
                              action_minus_reward=-0.001,
                              tower_target_size=4)

    params = HAMParamsCommon(env)
    runner(
        ham=AutoMachineNoLoop(env),
        num_episodes=2000,
        env=env,
        params=params,
        # no_output=True
    )
    to_plot = []
    to_plot.append(
        PlotParams(curve_to_draw=params.logs["ep_rewards"],
                   label="HAM_with_pull_up"))
    plot_multi(to_plot)
Example #3
0
def main(begin_seed=0):
    for seed in range(begin_seed, begin_seed + 5000):
        # maze = maze_world_input_special()
        # maze = generate_maze_please(size_x=2, size_y=2)
        # env = MazeWorldEpisodeLength(maze=maze)
        # global_env, save_folder  = MazeEnvArticleSpecial(), "laby_spec/"
        global_env, save_folder  = MazeEnvArticle(), "laby/"
        # global_env, save_folder  = ArmEnvArticle(), "arm/"

        env, num_episodes = global_env.env, global_env.episodes_count

        new_machine = create_random_machine(maximal_number_of_vertex=6, maximal_number_of_edges=6, random_seed=seed,
                                            env=env)

        if is_it_machine_runnable(new_machine):
            params = HAMParamsCommon(env)
            try:
                ham_runner(
                    ham=RootMachine(LoopInvokerMachine(machine_to_invoke=super_runner(new_machine, env))),
                    num_episodes=num_episodes,
                    env=env, params=params,
                    no_output=True
                    )
                ham_runner(ham=RootMachine(machine_to_invoke=LoopInvokerMachine(new_machine)),
                           num_episodes=num_episodes,
                           env=env, params=params, no_output=True)

                # to_plot.append(PlotParams(curve_to_draw=params.logs["ep_rewards"], label="Random" + str(seed + 1)))
                reward = sum(params.logs["ep_rewards"])
                draw_graph(save_folder + str(reward) + ":::" + str(seed),
                           new_machine.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict()))
                # draw_graph("pics/" + str(reward).rjust(10, "0"),
                #            new_machine.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict()))
            except KeyError:
                print("keyError", end="")
            except AssertionError:
                print("assertion", end="")
    plot_multi(to_plot)
Example #4
0
def main(global_env, begin_seed=0):
    for seed in range(begin_seed, begin_seed + 5000):
        env = global_env.env

        num_episodes = global_env.episodes_count

        new_machine = create_random_machine(maximal_number_of_vertex=6,
                                            maximal_number_of_edges=6,
                                            random_seed=seed,
                                            env=env)

        if is_it_machine_runnable(new_machine):
            params = HAMParamsCommon(env)
            try:
                ham_runner(ham=RootMachine(
                    machine_to_invoke=LoopInvokerMachine(new_machine)),
                           num_episodes=num_episodes,
                           env=env,
                           params=params)

                if sum(params.logs["ep_rewards"][-100:]) > 0:
                    print("{test}done_it".format(**locals()),
                          sum(params.logs["ep_rewards"]))

                    to_plot.append(
                        PlotParams(curve_to_draw=params.logs["ep_rewards"],
                                   label="Random" + str(seed + 1)))
                    draw_graph(
                        "pics/" + str(seed),
                        new_machine.get_graph_to_draw(
                            action_to_name_mapping=env.get_actions_as_dict()))
            except KeyError:
                print("keyError", end="")
            except AssertionError:
                print("assertion", end="")
    plot_multi(to_plot)
Example #5
0
def part_four(env):
    with open("machines_part_three.json") as json_file:
        cluster_best_machine_mapper_str_key = json.load(json_file)
        cluster_best_machine_mapper = {}

        for key in cluster_best_machine_mapper_str_key:
            tuple_key = key
            # tuple_key = key
            tuple_key = tuple_key.replace("(", "")
            tuple_key = tuple_key.replace(")", "")
            tuple_key = tuple(map(eval, tuple_key.split(",")))
            cluster_best_machine_mapper[tuple_key] = MachineStored.from_dict(
                cluster_best_machine_mapper_str_key[key]["graph_dict"],
                env=env)
        cluster_best_machine_mapper_machine = {}
        for i in cluster_best_machine_mapper:
            cluster_best_machine_mapper_machine[
                i] = cluster_best_machine_mapper[i].get_machine()
        params = HAMParamsCommon(env)

        runner(
            ham=AutoMachineSimple(env),
            num_episodes=300,
            env=env,
            params=params,
            on_model_mapping=cluster_best_machine_mapper_machine,
            no_output=True,
        )
        for cluster in cluster_best_machine_mapper:
            ms = cluster_best_machine_mapper[cluster]
            ms.draw(filename=str(cluster))
        to_plot = list()
        to_plot.append(
            PlotParams(curve_to_draw=params.logs["ep_rewards"],
                       label="clustering, same env"))
        plot_multi(to_plot, filename="a")
Example #6
0
from HAM.HAM_core import AutoBasicMachine
from HAM.HAM_experiments.HAM_utils import HAMParamsCommon, maze_world_input_01, plot_multi, ham_runner, PlotParams
from environments.grid_maze_env.maze_world_env import MazeWorld

env = MazeWorld(maze_world_input_01())
params = HAMParamsCommon(env)
ham_runner(ham=AutoBasicMachine(env), num_episodes=300, env=env, params=params)

plot_multi((PlotParams(curve_to_draw=params.logs["ep_rewards"],
                       label="HAM_basic"), ))
Example #7
0
stop = Stop()

transitions = (
    MachineRelation(left=start, right=choice_one),
    MachineRelation(left=choice_one, right=left),
    MachineRelation(left=choice_one, right=right),
    MachineRelation(left=choice_one, right=off),
    MachineRelation(left=choice_one, right=call),
    MachineRelation(left=call, right=stop),
    MachineRelation(left=left, right=stop, label=0),
    MachineRelation(left=right, right=stop, label=0),
    MachineRelation(left=off, right=stop, label=0),
    MachineRelation(left=left, right=stop, label=1),
    MachineRelation(left=right, right=stop, label=1),
    MachineRelation(left=off, right=stop, label=1),
)

pull_up_machine = RootMachine(machine_to_invoke=LoopInvokerMachine(
    AbstractMachine(MachineGraph(transitions=transitions))))

params = HAMParamsCommon(env)
ham_runner(ham=pull_up_machine,
           num_episodes=num_episodes,
           env=env,
           params=params)
to_plot.append(
    PlotParams(curve_to_draw=params.logs["ep_rewards"],
               label="HAM_with_pull_up"))

plot_multi(to_plot)
Example #8
0
def q_learning(env, num_episodes, eps=0.1, alpha=0.1, gamma=0.9):
    to_plot = []

    q_table = defaultdict(lambda: 0)
    bns_count = defaultdict(lambda: 0)
    V = defaultdict(lambda: None)

    for _ in tqdm(range(num_episodes)):
        ep_reward = 0
        eps *= 0.9
        s = env.reset()

        bn_added = {}
        while True:
            if np.random.rand(1) < eps:
                action = np.random.choice(env.action_space.n, size=1)[0]
            else:
                action = arg_max_action(q_dict=q_table, state=s, action_space=env.action_space.n)

            next_s, reward, done, _ = env.step(action)
            a = arg_max_action(q_dict=q_table, state=s, action_space=env.action_space.n)
            # noinspection PyTypeChecker
            V[s] = (*env.decode(s), q_table[s, a])
            # making +1 to bn_counts once for each episode
            if not bn_added.get(s, False):
                bns_count[s] += 1
                bn_added[s] = True
            q_table[s, action] = (1 - alpha) * q_table[s, action] + alpha * (reward + gamma * q_table[next_s, a])

            ep_reward += reward
            if done:
                break

            s = next_s
        to_plot.append(ep_reward)
    sleep(0.1)

    def get_clusters(V, n_clusters, affinity):
        states = sorted(V.keys())
        ss = {"state": states}
        # noinspection PyTypeChecker
        for i in range(len(V[states[0]])):
            ss[str(i)] = [V[_][i] for _ in states]
        df = pd.DataFrame(ss).set_index("state")
        sc = MinMaxScaler()
        df = df.rename(index=str, columns={"0": "x", "1": "y", "2": 'V'})
        X = df[["x", "y", "V"]]
        X[["V"]] *= 0.5
        sc.fit(np.vstack((df[["x"]], df[["y"]])))
        df[["x", "y"]] = sc.transform(df[["x", "y"]])
        ag = AgglomerativeClustering(n_clusters=n_clusters, affinity=affinity)
        clustered = list(ag.fit_predict(X))
        cluster_state_mapping = {}
        for i in range(len(states)):
            cluster_state_mapping[states[i]] = clustered[i]
        return cluster_state_mapping

    # all_states = V.keys()
    n_clusters = 4
    map_state_to_cluster = get_clusters(V=V, n_clusters=n_clusters, affinity="euclidean")

    def get_bns_in_increasing_order(bns_count):
        state_count_pairs = sorted([(bns_count[_], _) for _ in bns_count], reverse=True)
        return list(map(lambda x: x[1], state_count_pairs, ))

    def get_mapping_for_cluster_to_sorted_bns(sorted_bns, map_state_to_cluster):
        res = defaultdict(lambda: list())
        for state in sorted_bns:
            res[map_state_to_cluster[state]].append(state)
        return res

    # bns = bottlenecks
    sorted_bns = get_bns_in_increasing_order(bns_count=bns_count)
    map_cluster_to_sorted_bns = get_mapping_for_cluster_to_sorted_bns(sorted_bns=sorted_bns,
                                                                      map_state_to_cluster=map_state_to_cluster)

    env.mark = {}

    for current_state in map_state_to_cluster:
        env.mark[current_state] = str(map_state_to_cluster[current_state])

    class colors:
        HEADER = '\033[95m'
        OKBLUE = '\033[94m'
        OKGREEN = '\033[92m'
        WARNING = '\033[93m'
        FAIL = '\033[91m'
        ENDC = '\033[0m'
        BOLD = '\033[1m'
        UNDERLINE = '\033[4m'

        COLOR_LIST = [HEADER, OKBLUE, OKGREEN, WARNING, FAIL]

    # draw best bns for clusters
    BNS_FOR_CLUSTER = 10
    for q in map_cluster_to_sorted_bns:
        for j in map_cluster_to_sorted_bns[q][:BNS_FOR_CLUSTER]:
            env.mark[j] = colors.COLOR_LIST[q % len(colors.COLOR_LIST)] + str(q) + colors.ENDC
    env.render()
    env.mark = {}

    def runner(hams, num_episodes, env):
        for i_episode in range(1, num_episodes + 1):
            env.reset()
            while not env.is_done():
                for ham in hams:
                    if env.s in ham.states_in_my_cluster:
                        while not env.is_done() and env.s not in ham.bns:
                            ham.machine.run(params)
                        while not env.is_done() and env.s in ham.states_in_my_cluster:
                            ham.machine.run(params)

            if i_episode % 10 == 0:
                    print("\r{ham} episode {i_episode}/{num_episodes}.".format(**locals()), end="")
                    sys.stdout.flush()

    class BnsMachine:
        def __init__(self, params, cluster_index, list_of_bns, states_in_my_cluster):
            self.machine = AutoMachineSimple(env)
            self.cluster_index = cluster_index
            self.bns = set(list_of_bns)
            self.states_in_my_cluster = states_in_my_cluster
            self.params = params

    params = HAMParamsCommon(env)
    hams = [BnsMachine(params=params, cluster_index=_, list_of_bns=map_cluster_to_sorted_bns[_][:BNS_FOR_CLUSTER], states_in_my_cluster=set(map_cluster_to_sorted_bns[_])) for _ in
            map_cluster_to_sorted_bns]


    runner(hams = hams,
           num_episodes=2000,
           env=env,
           )
    to_plot = list()
    to_plot.append(PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_with_pull_up"))
    plot_multi(to_plot)
    # print(params.logs["ep_rewards"])
    return to_plot, q_table