def main(): env = MiningCraftingEnv(stochastic_reset=False) d_s = env.d_s n_a = env.high_a[0] given_conditions = False # Learning conditions from data or fixed conditions? if given_conditions: primitive_skills = [ PrimitiveSkill( Effect([i], [0], [1], n_a), i, FixedCondition(env.conditions[i], [1.] * len(env.conditions[i]))) for i in range(n_a) ] else: primitive_skills = [ PrimitiveSkill(Effect([i], [0], [1], n_a), i, None) for i in range(n_a) ] runner = Runner(env, primitive_skills, verbose=1) goals = [ MiningCraftingEnv.goal_stick, MiningCraftingEnv.goal_stone_pick, MiningCraftingEnv.goal_coal, MiningCraftingEnv.goal_furnace, MiningCraftingEnv.goal_smelt_iron, MiningCraftingEnv.goal_iron_pick, MiningCraftingEnv.goal_gold_ore, MiningCraftingEnv.goal_goldware, MiningCraftingEnv.goal_necklace, MiningCraftingEnv.goal_earrings ] n_success = 8 for i in range(len(goals)): success = 0 for ite in range(20): goal = SparseState(goals[i][0], goals[i][1], d_s) if runner.run(goal, 20): success += 1 if success >= n_success: print("####### Success at skill {} after {} " "episodes\n\n".format(i, ite + 1)) break if success < n_success: print("####### Failed at skill {}\n\n".format(i)) print("\n\n\n\n%%%%%%% END") print_skills(env, runner.il.skill_base)
def main(): n_runs = 1 env_name = "mining" if env_name == "mining": env = MiningCraftingEnv(stochastic_reset=False) elif env_name == "baking": env = BakingEnv(stochastic_reset=False) elif env_name == "random": env = RandomGraphEnv(stochastic_reset=False, noise_prob=0.2) else: raise RuntimeError("Unknown environment name") n_mcts_iter = 100 goal = env.curriculum[-1] s_goal = SparseState(goal[0], goal[1], env.d_s) agent = Mcts(iteration_limit=n_mcts_iter) trajectory_length, time_ = run_n_episodes(env, agent, s_goal, n_runs) print("#### FINAL ####") print("number of runs:", n_runs) print("number mcts iter:", n_mcts_iter) print("trajectory length:", trajectory_length) print("time:", time_)
print(seq_a) # Compute stats time_buffer = time_buffer[1:len(time_buffer)] + [time.time() - t] success_buffer = success_buffer[1:len(success_buffer)] + [success] length_buffer = length_buffer[1:len(length_buffer)] + [len(seq_a)] stats = compute_stats(success_buffer, length_buffer, time_buffer) return stats[0], stats[1] if __name__ == "__main__": n_runs = 1 env_name = "mining" if env_name == "mining": env = MiningCraftingEnv(stochastic_reset=False) elif env_name == "baking": env = BakingEnv(stochastic_reset=False) elif env_name == "random": env = RandomGraphEnv(stochastic_reset=False, noise_prob=0.2) else: raise RuntimeError("Unknown environment name") goal = env.curriculum[-1] s_goal = SparseState(goal[0], goal[1], env.d_s) n_a = env.high_a[0] primitive_skills = [ PrimitiveSkill( Effect([i], [0], [1], n_a), i, FixedCondition(env.conditions[i], [1.] * len(env.conditions[i]))) for i in range(n_a)
n_ep_buffer = n_ep_buffer[1:len(n_ep_buffer)] + [n_train_ep] # Print learned skills if n_ep == n_eps - 1: print_skills(env, agent.skill_base) stats = compute_stats(success_buffer, length_buffer, time_buffer, n_ep_buffer) return stats[0], stats[1], stats[2] if __name__ == "__main__": n_runs = 1 env_name = "mining" if env_name == "mining": env, ep_length = MiningCraftingEnv(stochastic_reset=False), 20 elif env_name == "baking": env, ep_length = BakingEnv(stochastic_reset=False), 40 elif env_name == "drawer": env, ep_length = DrawerEnv(stochastic_reset=False), 10 else: raise RuntimeError("Unknown environment name") goal = env.curriculum[-1] ds = env.d_s n_a = env.high_a[0] n_max_success_per_goal = 5 s_goal = SparseState(goal[0], goal[1], ds) primitive_skills = [PrimitiveSkill(Effect([i], [0], [1], n_a), i, None) for i in range(n_a)] trajectory_length, time_, n_train_ep = run_n_episodes(