def learn_demonstration(self, trajectory, goal): """ Learn from demonstration. :param trajectory: (Trajectory) :param goal: demonstration goal as (SparseState) :return: None """ # Identify if existing skills match the trajectory and goal s_start = SparseState.from_dense_state(trajectory.initial_state) effect = Effect.from_sparse_start_goal(s_start, goal) skills = self.skill_base.skills_from_effect(effect) s_init = SparseState.from_dense_state(trajectory.initial_state) candidate_skills = [s for s in skills if not s.fails_in(s_init)] # trajectory.refine() # skill_seq, seq_effect = trajectory.to_skill_seq(effect) skill_seq = [n.skill for n in trajectory.nodes] seq_effect = reduce(add, [s.effect for s in skill_seq]) # If none found, create a new skill if len(candidate_skills) == 0: # Learn new skill new_skill = Skill(seq_effect, skill_seq, trajectory.initial_state) self.skill_base.add_skill(new_skill) else: for skill in candidate_skills: if not isinstance(skill, PrimitiveSkill): skill.add_successful_skill_seq(seq_effect, skill_seq, trajectory.initial_state)
def fails_in(self, x): """ Returns whether the skill has previously failed in this state. :param x: dense state :return: bool """ dims, values = self.sample() ss = SparseState(dims, values, x.shape[0]) return not ss.matches(x)
def get_condition(self): """ Returns the necessary conditions for skill to succeed. :return: conditions as (SparseState) """ # Logistic regression weights reflect which dimensions are important ds = len(self.effect.end_state) dims, values = self.conditions.sample() return SparseState(dims, values, ds)
def main(): env = MiningCraftingEnv(stochastic_reset=False) d_s = env.d_s n_a = env.high_a[0] given_conditions = False # Learning conditions from data or fixed conditions? if given_conditions: primitive_skills = [ PrimitiveSkill( Effect([i], [0], [1], n_a), i, FixedCondition(env.conditions[i], [1.] * len(env.conditions[i]))) for i in range(n_a) ] else: primitive_skills = [ PrimitiveSkill(Effect([i], [0], [1], n_a), i, None) for i in range(n_a) ] runner = Runner(env, primitive_skills, verbose=1) goals = [ MiningCraftingEnv.goal_stick, MiningCraftingEnv.goal_stone_pick, MiningCraftingEnv.goal_coal, MiningCraftingEnv.goal_furnace, MiningCraftingEnv.goal_smelt_iron, MiningCraftingEnv.goal_iron_pick, MiningCraftingEnv.goal_gold_ore, MiningCraftingEnv.goal_goldware, MiningCraftingEnv.goal_necklace, MiningCraftingEnv.goal_earrings ] n_success = 8 for i in range(len(goals)): success = 0 for ite in range(20): goal = SparseState(goals[i][0], goals[i][1], d_s) if runner.run(goal, 20): success += 1 if success >= n_success: print("####### Success at skill {} after {} " "episodes\n\n".format(i, ite + 1)) break if success < n_success: print("####### Failed at skill {}\n\n".format(i)) print("\n\n\n\n%%%%%%% END") print_skills(env, runner.il.skill_base)
def plan(self, state, goal): """ Generates a plan from state to goal. :param state: Starting state (dense) :param goal: Goal as (SparseState) :return: """ s_start = SparseState.from_dense_state(state) if self.current_plan is None: try: self.current_plan = self.planner.plan(s_start, goal) except RuntimeError: self.current_plan = [self.skill_base.random_skill()] if self.verbose > 1: print("Planner failed (low rec), random action.") return self.current_plan # If next skill's effect is already satisfied, remove it plan = self.current_plan while len(plan) > 0: skill, new_plan = IntentPlanner.behead(plan, randomness=False) if not skill.effect.end_state.matches(s_start): break plan = new_plan self.current_plan = plan # # If the next skill can't be executed, execute random action # if len(self.current_plan) > 0: # first_skill = IntentPlanner.behead(plan)[0] # if first_skill.fails_in(s_start): # # self.current_plan = [] # # Random action # self.current_plan = [self.skill_base.random_skill()] # if self.verbose > 1: # print("Random action2") # return self.current_plan # If no plan left, try to plan again if len(self.current_plan) == 0: self.current_plan = None return self.plan(state, goal) return self.current_plan
def train_with_curriculum(env, agent, curriculum, ep_length, n_max_success_per_goal=5, n_max_ep_per_goal=50): runner = Runner(env, [], verbose=0) runner.il = agent n_train_ep = 0 for i in range(len(curriculum)): success = 0 for ite in range(n_max_ep_per_goal): goal = SparseState(curriculum[i][0], curriculum[i][1], env.d_s) n_train_ep += 1 if runner.run(goal, ep_length): success += 1 if success >= n_max_success_per_goal: print("Success at skill {} after {} episodes".format( i, ite + 1)) break if success < n_max_success_per_goal: print("Failed at skill {}\n\n".format(i)) return n_train_ep
def main(): n_runs = 1 env_name = "mining" if env_name == "mining": env = MiningCraftingEnv(stochastic_reset=False) elif env_name == "baking": env = BakingEnv(stochastic_reset=False) elif env_name == "random": env = RandomGraphEnv(stochastic_reset=False, noise_prob=0.2) else: raise RuntimeError("Unknown environment name") n_mcts_iter = 100 goal = env.curriculum[-1] s_goal = SparseState(goal[0], goal[1], env.d_s) agent = Mcts(iteration_limit=n_mcts_iter) trajectory_length, time_ = run_n_episodes(env, agent, s_goal, n_runs) print("#### FINAL ####") print("number of runs:", n_runs) print("number mcts iter:", n_mcts_iter) print("trajectory length:", trajectory_length) print("time:", time_)
def _extract_condition(self, start_state, skill_seq): """ Extract skill condition using successful trajectory :param start_state: starting state as dense state :param skill_seq: sequence of skills as (list) of (Skill) """ # For non-primitive skill, conditions of sub-skills in the successful # trajectory can be used to infer necessary conditions. conditions = SparseState([], [], len(self.effect.end_state)) # Combine all sub-skill conditions for sub_skill in skill_seq: conditions = conditions.union(sub_skill.get_condition()) # Remove all sub-skill effects (except last) for sub_skill in skill_seq[:-1]: conditions.remove(sub_skill.effect.end_state) conditions.values = np.clip(conditions.values, 0., 1.) # Get rid of -1 valid_state = start_state.copy() valid_state[conditions.dims] = conditions.values
if __name__ == "__main__": n_runs = 1 env_name = "mining" if env_name == "mining": env = MiningCraftingEnv(stochastic_reset=False) elif env_name == "baking": env = BakingEnv(stochastic_reset=False) elif env_name == "random": env = RandomGraphEnv(stochastic_reset=False, noise_prob=0.2) else: raise RuntimeError("Unknown environment name") goal = env.curriculum[-1] s_goal = SparseState(goal[0], goal[1], env.d_s) n_a = env.high_a[0] primitive_skills = [ PrimitiveSkill( Effect([i], [0], [1], n_a), i, FixedCondition(env.conditions[i], [1.] * len(env.conditions[i]))) for i in range(n_a) ] agent = IntentLearner(primitive_skills, planner_max_rec=100, verbose=0) trajectory_length, time_ = run_n_episodes(env, agent, s_goal, n_runs) print("#### FINAL ####") print("number of runs:", n_runs) print("trajectory length:", trajectory_length) print("time:", time_)
if __name__ == "__main__": n_runs = 1 env_name = "mining" if env_name == "mining": env, ep_length = MiningCraftingEnv(stochastic_reset=False), 20 elif env_name == "baking": env, ep_length = BakingEnv(stochastic_reset=False), 40 elif env_name == "drawer": env, ep_length = DrawerEnv(stochastic_reset=False), 10 else: raise RuntimeError("Unknown environment name") goal = env.curriculum[-1] ds = env.d_s n_a = env.high_a[0] n_max_success_per_goal = 5 s_goal = SparseState(goal[0], goal[1], ds) primitive_skills = [PrimitiveSkill(Effect([i], [0], [1], n_a), i, None) for i in range(n_a)] trajectory_length, time_, n_train_ep = run_n_episodes( env, primitive_skills, s_goal, n_runs, ep_length, n_max_success_per_goal) print("#### FINAL ####") print("number of training episodes:", n_train_ep) print("number of runs:", n_runs) print("trajectory length:", trajectory_length) print("time:", time_)
envs = { "mining": (MiningCraftingEnv, { "stochastic_reset": False }), "baking": (BakingEnv, { "stochastic_reset": False }), "random": (RandomGraphEnv, { "stochastic_reset": False, "noise_prob": 0.2 }) } # Create env env_name = "mining" curriculum = envs[env_name][0].curriculum d_s = len(envs[env_name][0].conditions) s_goal = SparseState(curriculum[-1][0], curriculum[-1][1], d_s) env = envs[env_name][0](**(envs[env_name][1]), goal=s_goal) n_eps = [0] * n_runs trajectory_length = [0] * n_runs time_ = [0] * n_runs for i in range(n_runs): n_eps[i], trajectory_length[i], time_[i] = run(env, s_goal) print("#### FINAL ####") print("training episodes:", sum(n_eps) / float(n_runs)) print("trajectory length:", sum(trajectory_length) / float(n_runs)) print("time:", sum(time_) / float(n_runs))
def __init__(self, stochastic_reset=False): goal = MiningCraftingEnv.curriculum[-1] d_s = len(MiningCraftingEnv.conditions) s_goal = SparseState(goal[0], goal[1], d_s) super().__init__(stochastic_reset, s_goal)