コード例 #1
0
ファイル: main.py プロジェクト: cog-isa/HRL-grid
def go(transitions, brute_force, index_):
    machine = AbstractMachine(MachineGraph(transitions=transitions))
    am = RootMachine(LoopInvokerMachine(machine))

    # if randrange(1000) == 0:
    #     draw_graph("{brute_force}".format(**locals()), am.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict()))
    #     exit(0)

    if is_it_machine_runnable(machine):
        sum_rew = 0
        try:
            params = HAMParamsCommon(environments[0])
            ham_runner(ham=am,
                       num_episodes=2,
                       env=environments[0],
                       params=params)
            sum_rew = sum(params.logs["ep_rewards"])

        except ChildProcessError:
            # print(brute_force)
            pass
            # if randrange(1500) == 0:
            #     draw_graph("bf{brute_force}".format(**locals()), am.get_graph_to_draw())

        if sum_rew > 0:
            # TODO
            # with open("out.txt", "a") as f:
            #     f.write(str(brute_force) + "\n")
            # return

            # print("\n\n EPISODE REWARD: ", sum_rew)
            # draw_graph("{sum_rew}__{brute_force}".format(**locals()), am.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict()))
            rew = None
            print("\n\n\n")
            for e in environments:
                params = HAMParamsCommon(e)
                ham_runner(ham=am, num_episodes=600, env=e, params=params)
                if rew is None:
                    rew = 0
                rew += sum(params.logs["ep_rewards"])
                print("to_add:", sum(params.logs["ep_rewards"]))
                # except ChildProcessError:
                #     draw_graph("{rew}__{brute_force}".format(**locals()), am.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict()))
                #     exit(0)
                # pass
            if rew is not None:
                draw_graph(
                    "{rew}__{brute_force}_{index_}".format(**locals()),
                    am.get_graph_to_draw(
                        action_to_name_mapping=env.get_actions_as_dict()))
コード例 #2
0
ファイル: test.py プロジェクト: cog-isa/HRL-grid
    def run(global_env):
        full_name = name
        params = HAMParamsCommon(environments[0])
        ham_runner(ham=am,
                   num_episodes=global_episodes,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
        # with open(full_name + " cumulative_reward.txt", "w") as w:
        #     for out in get_cumulative_rewards(rewards=rewards):
        #         w.write(str(out) + '\n', )

        with open(full_name + " reward.txt", "w") as w:
            for out in rewards:
                w.write(str(out) + '\n', )
コード例 #3
0
    def _step(self, action):
        self.state = self.state + tuple([action])

        self.ham = RootMachine(
            LoopInvokerMachine(
                machine_to_invoke=super_runner(self.machine, self.env)))
        reward = None

        if action is None:
            raise KeyError
        elif action == self.ACTIONS.ACTION_01:
            self.add(Action(action=action))
        elif action == self.ACTIONS.ACTION_02:
            self.add(Action(action=action))
        elif action == self.ACTIONS.ACTION_03:
            self.add(Action(action=action))
        elif action == self.ACTIONS.ACTION_04:
            self.add(Action(action=action))
        elif action == self.ACTIONS.ACTION_05:
            self.add(Action(action=action))
        elif action == self.ACTIONS.ACTION_06:
            self.add(Action(action=action))

        if is_it_machine_runnable(self.machine):
            if self.state in self.dp:
                reward = self.dp[self.state]
            else:
                params = HAMParamsCommon(self.env)
                ham_runner(ham=self.ham,
                           num_episodes=self.num_of_episodes,
                           env=self.env,
                           params=params,
                           no_output=True)
                reward = sum(params.logs["ep_rewards"])
                self.dp[self.state] = reward
            draw_graph(
                "pics/" + str(reward).rjust(10, "0") + str(self.state) + " ",
                self.machine.get_graph_to_draw(
                    action_to_name_mapping=self.env.get_actions_as_dict()))

        observation = self.state
        if len(self.state) >= self.max_size:
            self._done = True

        return observation, reward, self._done, None
コード例 #4
0
ファイル: handcrafted_hie.py プロジェクト: cog-isa/HRL-grid
def run(global_env):
    rewards = None
    if isinstance(global_env, ArmEnvArticle):
        env = global_env.env
        internal_machine = M1(env=env)
        machine = RootMachine(
            LoopInvokerMachine(
                super_runner(call_me_maybe=internal_machine, env=env)))
        draw_graph(file_name="maze_env_special",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        params = HAMParamsCommon(env)
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
    elif isinstance(global_env, MazeEnvArticle):
        env = global_env.env
        internal_machine = M2(env=env)
        machine = RootMachine(
            LoopInvokerMachine(
                super_runner(call_me_maybe=internal_machine, env=env)))
        draw_graph(file_name="maze_env_special",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        params = HAMParamsCommon(env)
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
    elif isinstance(global_env, MazeEnvArticleSpecial):
        env = global_env.env
        internal_machine = M3(env=env)
        machine = RootMachine(
            LoopInvokerMachine(
                super_runner(call_me_maybe=internal_machine, env=env)))
        draw_graph(file_name="maze_env_special",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        params = HAMParamsCommon(env)
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
    else:
        raise KeyError

    if rewards is not None:
        full_name = "_" + global_env.__class__.__name__
        with open(full_name + " cumulative_reward.txt", "w") as w:
            for out in get_cumulative_rewards(rewards=rewards):
                w.write(str(out) + '\n', )

        with open(full_name + " reward.txt", "w") as w:
            for out in rewards:
                w.write(str(out) + '\n', )
コード例 #5
0
ファイル: experiment_04.py プロジェクト: cog-isa/HRL-grid
def main(begin_seed=0):
    for seed in range(begin_seed, begin_seed + 5000):
        # maze = maze_world_input_special()
        # maze = generate_maze_please(size_x=2, size_y=2)
        # env = MazeWorldEpisodeLength(maze=maze)
        # global_env, save_folder  = MazeEnvArticleSpecial(), "laby_spec/"
        global_env, save_folder  = MazeEnvArticle(), "laby/"
        # global_env, save_folder  = ArmEnvArticle(), "arm/"

        env, num_episodes = global_env.env, global_env.episodes_count

        new_machine = create_random_machine(maximal_number_of_vertex=6, maximal_number_of_edges=6, random_seed=seed,
                                            env=env)

        if is_it_machine_runnable(new_machine):
            params = HAMParamsCommon(env)
            try:
                ham_runner(
                    ham=RootMachine(LoopInvokerMachine(machine_to_invoke=super_runner(new_machine, env))),
                    num_episodes=num_episodes,
                    env=env, params=params,
                    no_output=True
                    )
                ham_runner(ham=RootMachine(machine_to_invoke=LoopInvokerMachine(new_machine)),
                           num_episodes=num_episodes,
                           env=env, params=params, no_output=True)

                # to_plot.append(PlotParams(curve_to_draw=params.logs["ep_rewards"], label="Random" + str(seed + 1)))
                reward = sum(params.logs["ep_rewards"])
                draw_graph(save_folder + str(reward) + ":::" + str(seed),
                           new_machine.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict()))
                # draw_graph("pics/" + str(reward).rjust(10, "0"),
                #            new_machine.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict()))
            except KeyError:
                print("keyError", end="")
            except AssertionError:
                print("assertion", end="")
    plot_multi(to_plot)
コード例 #6
0
def run(global_env):
    rewards = None
    if isinstance(global_env, ArmEnvArticle):
        pass
    elif isinstance(global_env, MazeEnvArticle):
        pass
    elif isinstance(global_env, MazeEnvArticleSpecial):
        env = global_env.env
        seed = 573846788
        internal_machine = create_random_machine(maximal_number_of_vertex=6,
                                                 maximal_number_of_edges=6,
                                                 random_seed=seed,
                                                 env=env)
        machine = RootMachine(machine_to_invoke=LoopInvokerMachine(
            machine_to_invoke=internal_machine))
        draw_graph(file_name="maze_env_special",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        params = HAMParamsCommon(env)
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
    else:
        raise KeyError

    if rewards is not None:
        full_name = name + "_" + global_env.__class__.__name__
        # with open(full_name + " cumulative_reward.txt", "w") as w:
        #     for out in get_cumulative_rewards(rewards=rewards):
        #         w.write(str(out) + '\n', )

        with open(full_name + " reward.txt", "w") as w:
            for out in rewards:
                w.write(str(out) + '\n', )
コード例 #7
0
ファイル: random_serch.py プロジェクト: cog-isa/HRL-grid
def main(global_env, begin_seed=0):
    for seed in range(begin_seed, begin_seed + 5000):
        env = global_env.env

        num_episodes = global_env.episodes_count

        new_machine = create_random_machine(maximal_number_of_vertex=6,
                                            maximal_number_of_edges=6,
                                            random_seed=seed,
                                            env=env)

        if is_it_machine_runnable(new_machine):
            params = HAMParamsCommon(env)
            try:
                ham_runner(ham=RootMachine(
                    machine_to_invoke=LoopInvokerMachine(new_machine)),
                           num_episodes=num_episodes,
                           env=env,
                           params=params)

                if sum(params.logs["ep_rewards"][-100:]) > 0:
                    print("{test}done_it".format(**locals()),
                          sum(params.logs["ep_rewards"]))

                    to_plot.append(
                        PlotParams(curve_to_draw=params.logs["ep_rewards"],
                                   label="Random" + str(seed + 1)))
                    draw_graph(
                        "pics/" + str(seed),
                        new_machine.get_graph_to_draw(
                            action_to_name_mapping=env.get_actions_as_dict()))
            except KeyError:
                print("keyError", end="")
            except AssertionError:
                print("assertion", end="")
    plot_multi(to_plot)
コード例 #8
0
def run(global_env):
    if isinstance(global_env, ArmEnvArticle):
        env = global_env.env
        internal_machine = PullUpMachine(env=env)
        machine = RootMachine(machine_to_invoke=LoopInvokerMachine(
            machine_to_invoke=internal_machine))
        params = HAMParamsCommon(env)
        draw_graph(file_name="arm_env",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]

    elif isinstance(global_env, MazeEnvArticle):
        env = global_env.env
        internal_machine = InterestingMachine(env=env)
        machine = RootMachine(machine_to_invoke=LoopInvokerMachine(
            machine_to_invoke=internal_machine))
        draw_graph(file_name="maze_env",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        params = HAMParamsCommon(env)
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
    elif isinstance(global_env, MazeEnvArticleSpecial):
        env = global_env.env
        internal_machine = InterestingMachineLeftUpInteresting(env=env)
        machine = RootMachine(machine_to_invoke=LoopInvokerMachine(
            machine_to_invoke=internal_machine))
        draw_graph(file_name="maze_env_special",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        params = HAMParamsCommon(env)
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
    else:
        raise KeyError
    full_name = name + "_" + global_env.__class__.__name__
    # with open(full_name + " cumulative_reward.txt", "w") as w:
    #     for out in get_cumulative_rewards(rewards=rewards):
    #         w.write(str(out) + '\n', )

    with open(full_name + " reward.txt", "w") as w:
        for out in rewards:
            w.write(str(out) + '\n', )
コード例 #9
0
from HAM.HAM_core import AutoBasicMachine
from HAM.HAM_experiments.HAM_utils import HAMParamsCommon, maze_world_input_01, plot_multi, ham_runner, PlotParams
from environments.grid_maze_env.maze_world_env import MazeWorld

env = MazeWorld(maze_world_input_01())
params = HAMParamsCommon(env)
ham_runner(ham=AutoBasicMachine(env), num_episodes=300, env=env, params=params)

plot_multi((PlotParams(curve_to_draw=params.logs["ep_rewards"],
                       label="HAM_basic"), ))
コード例 #10
0
ファイル: experiment_03.py プロジェクト: cog-isa/HRL-grid
from HAM.HAM_experiments.HAM_utils import HAMParamsCommon, maze_world_input_01, plot_multi, ham_runner, PlotParams
from environments.arm_env.arm_env import ArmEnv

to_plot = []
env = ArmEnv(episode_max_length=300,
             size_x=5,
             size_y=3,
             cubes_cnt=4,
             action_minus_reward=-1,
             finish_reward=100,
             tower_target_size=4)
num_episodes = 300

params = HAMParamsCommon(env)
ham_runner(ham=AutoBasicMachine(env),
           num_episodes=num_episodes,
           env=env,
           params=params)
to_plot.append(
    PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_basic"))

# --------------------------------------------------------

pull_up_start = Start()
pull_up_on = Action(action=env.get_actions_as_dict()["ON"])
pull_up_down_01 = Action(action=env.get_actions_as_dict()["DOWN"])
pull_up_down_02 = Action(action=env.get_actions_as_dict()["DOWN"])
pull_up_down_03 = Action(action=env.get_actions_as_dict()["DOWN"])
pull_up_down_04 = Action(action=env.get_actions_as_dict()["DOWN"])
pull_up_up_01 = Action(action=env.get_actions_as_dict()["UP"])
pull_up_up_02 = Action(action=env.get_actions_as_dict()["UP"])
pull_up_up_03 = Action(action=env.get_actions_as_dict()["UP"])