def go(transitions, brute_force, index_): machine = AbstractMachine(MachineGraph(transitions=transitions)) am = RootMachine(LoopInvokerMachine(machine)) # if randrange(1000) == 0: # draw_graph("{brute_force}".format(**locals()), am.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict())) # exit(0) if is_it_machine_runnable(machine): sum_rew = 0 try: params = HAMParamsCommon(environments[0]) ham_runner(ham=am, num_episodes=2, env=environments[0], params=params) sum_rew = sum(params.logs["ep_rewards"]) except ChildProcessError: # print(brute_force) pass # if randrange(1500) == 0: # draw_graph("bf{brute_force}".format(**locals()), am.get_graph_to_draw()) if sum_rew > 0: # TODO # with open("out.txt", "a") as f: # f.write(str(brute_force) + "\n") # return # print("\n\n EPISODE REWARD: ", sum_rew) # draw_graph("{sum_rew}__{brute_force}".format(**locals()), am.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict())) rew = None print("\n\n\n") for e in environments: params = HAMParamsCommon(e) ham_runner(ham=am, num_episodes=600, env=e, params=params) if rew is None: rew = 0 rew += sum(params.logs["ep_rewards"]) print("to_add:", sum(params.logs["ep_rewards"])) # except ChildProcessError: # draw_graph("{rew}__{brute_force}".format(**locals()), am.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict())) # exit(0) # pass if rew is not None: draw_graph( "{rew}__{brute_force}_{index_}".format(**locals()), am.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict()))
def run(global_env): full_name = name params = HAMParamsCommon(environments[0]) ham_runner(ham=am, num_episodes=global_episodes, env=env, params=params) rewards = params.logs["ep_rewards"] # with open(full_name + " cumulative_reward.txt", "w") as w: # for out in get_cumulative_rewards(rewards=rewards): # w.write(str(out) + '\n', ) with open(full_name + " reward.txt", "w") as w: for out in rewards: w.write(str(out) + '\n', )
def _step(self, action): self.state = self.state + tuple([action]) self.ham = RootMachine( LoopInvokerMachine( machine_to_invoke=super_runner(self.machine, self.env))) reward = None if action is None: raise KeyError elif action == self.ACTIONS.ACTION_01: self.add(Action(action=action)) elif action == self.ACTIONS.ACTION_02: self.add(Action(action=action)) elif action == self.ACTIONS.ACTION_03: self.add(Action(action=action)) elif action == self.ACTIONS.ACTION_04: self.add(Action(action=action)) elif action == self.ACTIONS.ACTION_05: self.add(Action(action=action)) elif action == self.ACTIONS.ACTION_06: self.add(Action(action=action)) if is_it_machine_runnable(self.machine): if self.state in self.dp: reward = self.dp[self.state] else: params = HAMParamsCommon(self.env) ham_runner(ham=self.ham, num_episodes=self.num_of_episodes, env=self.env, params=params, no_output=True) reward = sum(params.logs["ep_rewards"]) self.dp[self.state] = reward draw_graph( "pics/" + str(reward).rjust(10, "0") + str(self.state) + " ", self.machine.get_graph_to_draw( action_to_name_mapping=self.env.get_actions_as_dict())) observation = self.state if len(self.state) >= self.max_size: self._done = True return observation, reward, self._done, None
def run(global_env): rewards = None if isinstance(global_env, ArmEnvArticle): env = global_env.env internal_machine = M1(env=env) machine = RootMachine( LoopInvokerMachine( super_runner(call_me_maybe=internal_machine, env=env))) draw_graph(file_name="maze_env_special", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) params = HAMParamsCommon(env) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] elif isinstance(global_env, MazeEnvArticle): env = global_env.env internal_machine = M2(env=env) machine = RootMachine( LoopInvokerMachine( super_runner(call_me_maybe=internal_machine, env=env))) draw_graph(file_name="maze_env_special", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) params = HAMParamsCommon(env) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] elif isinstance(global_env, MazeEnvArticleSpecial): env = global_env.env internal_machine = M3(env=env) machine = RootMachine( LoopInvokerMachine( super_runner(call_me_maybe=internal_machine, env=env))) draw_graph(file_name="maze_env_special", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) params = HAMParamsCommon(env) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] else: raise KeyError if rewards is not None: full_name = "_" + global_env.__class__.__name__ with open(full_name + " cumulative_reward.txt", "w") as w: for out in get_cumulative_rewards(rewards=rewards): w.write(str(out) + '\n', ) with open(full_name + " reward.txt", "w") as w: for out in rewards: w.write(str(out) + '\n', )
def main(begin_seed=0): for seed in range(begin_seed, begin_seed + 5000): # maze = maze_world_input_special() # maze = generate_maze_please(size_x=2, size_y=2) # env = MazeWorldEpisodeLength(maze=maze) # global_env, save_folder = MazeEnvArticleSpecial(), "laby_spec/" global_env, save_folder = MazeEnvArticle(), "laby/" # global_env, save_folder = ArmEnvArticle(), "arm/" env, num_episodes = global_env.env, global_env.episodes_count new_machine = create_random_machine(maximal_number_of_vertex=6, maximal_number_of_edges=6, random_seed=seed, env=env) if is_it_machine_runnable(new_machine): params = HAMParamsCommon(env) try: ham_runner( ham=RootMachine(LoopInvokerMachine(machine_to_invoke=super_runner(new_machine, env))), num_episodes=num_episodes, env=env, params=params, no_output=True ) ham_runner(ham=RootMachine(machine_to_invoke=LoopInvokerMachine(new_machine)), num_episodes=num_episodes, env=env, params=params, no_output=True) # to_plot.append(PlotParams(curve_to_draw=params.logs["ep_rewards"], label="Random" + str(seed + 1))) reward = sum(params.logs["ep_rewards"]) draw_graph(save_folder + str(reward) + ":::" + str(seed), new_machine.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict())) # draw_graph("pics/" + str(reward).rjust(10, "0"), # new_machine.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict())) except KeyError: print("keyError", end="") except AssertionError: print("assertion", end="") plot_multi(to_plot)
def run(global_env): rewards = None if isinstance(global_env, ArmEnvArticle): pass elif isinstance(global_env, MazeEnvArticle): pass elif isinstance(global_env, MazeEnvArticleSpecial): env = global_env.env seed = 573846788 internal_machine = create_random_machine(maximal_number_of_vertex=6, maximal_number_of_edges=6, random_seed=seed, env=env) machine = RootMachine(machine_to_invoke=LoopInvokerMachine( machine_to_invoke=internal_machine)) draw_graph(file_name="maze_env_special", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) params = HAMParamsCommon(env) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] else: raise KeyError if rewards is not None: full_name = name + "_" + global_env.__class__.__name__ # with open(full_name + " cumulative_reward.txt", "w") as w: # for out in get_cumulative_rewards(rewards=rewards): # w.write(str(out) + '\n', ) with open(full_name + " reward.txt", "w") as w: for out in rewards: w.write(str(out) + '\n', )
def main(global_env, begin_seed=0): for seed in range(begin_seed, begin_seed + 5000): env = global_env.env num_episodes = global_env.episodes_count new_machine = create_random_machine(maximal_number_of_vertex=6, maximal_number_of_edges=6, random_seed=seed, env=env) if is_it_machine_runnable(new_machine): params = HAMParamsCommon(env) try: ham_runner(ham=RootMachine( machine_to_invoke=LoopInvokerMachine(new_machine)), num_episodes=num_episodes, env=env, params=params) if sum(params.logs["ep_rewards"][-100:]) > 0: print("{test}done_it".format(**locals()), sum(params.logs["ep_rewards"])) to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="Random" + str(seed + 1))) draw_graph( "pics/" + str(seed), new_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) except KeyError: print("keyError", end="") except AssertionError: print("assertion", end="") plot_multi(to_plot)
def run(global_env): if isinstance(global_env, ArmEnvArticle): env = global_env.env internal_machine = PullUpMachine(env=env) machine = RootMachine(machine_to_invoke=LoopInvokerMachine( machine_to_invoke=internal_machine)) params = HAMParamsCommon(env) draw_graph(file_name="arm_env", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] elif isinstance(global_env, MazeEnvArticle): env = global_env.env internal_machine = InterestingMachine(env=env) machine = RootMachine(machine_to_invoke=LoopInvokerMachine( machine_to_invoke=internal_machine)) draw_graph(file_name="maze_env", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) params = HAMParamsCommon(env) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] elif isinstance(global_env, MazeEnvArticleSpecial): env = global_env.env internal_machine = InterestingMachineLeftUpInteresting(env=env) machine = RootMachine(machine_to_invoke=LoopInvokerMachine( machine_to_invoke=internal_machine)) draw_graph(file_name="maze_env_special", graph=internal_machine.get_graph_to_draw( action_to_name_mapping=env.get_actions_as_dict())) params = HAMParamsCommon(env) ham_runner(ham=machine, num_episodes=global_env.episodes_count, env=env, params=params) rewards = params.logs["ep_rewards"] else: raise KeyError full_name = name + "_" + global_env.__class__.__name__ # with open(full_name + " cumulative_reward.txt", "w") as w: # for out in get_cumulative_rewards(rewards=rewards): # w.write(str(out) + '\n', ) with open(full_name + " reward.txt", "w") as w: for out in rewards: w.write(str(out) + '\n', )
from HAM.HAM_core import AutoBasicMachine from HAM.HAM_experiments.HAM_utils import HAMParamsCommon, maze_world_input_01, plot_multi, ham_runner, PlotParams from environments.grid_maze_env.maze_world_env import MazeWorld env = MazeWorld(maze_world_input_01()) params = HAMParamsCommon(env) ham_runner(ham=AutoBasicMachine(env), num_episodes=300, env=env, params=params) plot_multi((PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_basic"), ))
from HAM.HAM_experiments.HAM_utils import HAMParamsCommon, maze_world_input_01, plot_multi, ham_runner, PlotParams from environments.arm_env.arm_env import ArmEnv to_plot = [] env = ArmEnv(episode_max_length=300, size_x=5, size_y=3, cubes_cnt=4, action_minus_reward=-1, finish_reward=100, tower_target_size=4) num_episodes = 300 params = HAMParamsCommon(env) ham_runner(ham=AutoBasicMachine(env), num_episodes=num_episodes, env=env, params=params) to_plot.append( PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_basic")) # -------------------------------------------------------- pull_up_start = Start() pull_up_on = Action(action=env.get_actions_as_dict()["ON"]) pull_up_down_01 = Action(action=env.get_actions_as_dict()["DOWN"]) pull_up_down_02 = Action(action=env.get_actions_as_dict()["DOWN"]) pull_up_down_03 = Action(action=env.get_actions_as_dict()["DOWN"]) pull_up_down_04 = Action(action=env.get_actions_as_dict()["DOWN"]) pull_up_up_01 = Action(action=env.get_actions_as_dict()["UP"]) pull_up_up_02 = Action(action=env.get_actions_as_dict()["UP"]) pull_up_up_03 = Action(action=env.get_actions_as_dict()["UP"])