def learn_demonstration(self, trajectory, goal):
        """
        Learn from demonstration.
        :param trajectory: (Trajectory)
        :param goal: demonstration goal as (SparseState)
        :return: None
        """
        # Identify if existing skills match the trajectory and goal
        s_start = SparseState.from_dense_state(trajectory.initial_state)
        effect = Effect.from_sparse_start_goal(s_start, goal)
        skills = self.skill_base.skills_from_effect(effect)
        s_init = SparseState.from_dense_state(trajectory.initial_state)
        candidate_skills = [s for s in skills if not s.fails_in(s_init)]

        # trajectory.refine()
        # skill_seq, seq_effect = trajectory.to_skill_seq(effect)
        skill_seq = [n.skill for n in trajectory.nodes]
        seq_effect = reduce(add, [s.effect for s in skill_seq])

        # If none found, create a new skill
        if len(candidate_skills) == 0:
            # Learn new skill
            new_skill = Skill(seq_effect, skill_seq, trajectory.initial_state)
            self.skill_base.add_skill(new_skill)
        else:
            for skill in candidate_skills:
                if not isinstance(skill, PrimitiveSkill):
                    skill.add_successful_skill_seq(seq_effect, skill_seq,
                                                   trajectory.initial_state)
Esempio n. 2
0
 def fails_in(self, x):
     """
     Returns whether the skill has previously failed in this state.
     :param x: dense state
     :return: bool
     """
     dims, values = self.sample()
     ss = SparseState(dims, values, x.shape[0])
     return not ss.matches(x)
Esempio n. 3
0
 def get_condition(self):
     """
     Returns the necessary conditions for skill to succeed.
     :return: conditions as (SparseState)
     """
     # Logistic regression weights reflect which dimensions are important
     ds = len(self.effect.end_state)
     dims, values = self.conditions.sample()
     return SparseState(dims, values, ds)
Esempio n. 4
0
def main():
    env = MiningCraftingEnv(stochastic_reset=False)
    d_s = env.d_s
    n_a = env.high_a[0]
    given_conditions = False
    # Learning conditions from data  or fixed conditions?
    if given_conditions:
        primitive_skills = [
            PrimitiveSkill(
                Effect([i], [0], [1], n_a), i,
                FixedCondition(env.conditions[i],
                               [1.] * len(env.conditions[i])))
            for i in range(n_a)
        ]
    else:
        primitive_skills = [
            PrimitiveSkill(Effect([i], [0], [1], n_a), i, None)
            for i in range(n_a)
        ]

    runner = Runner(env, primitive_skills, verbose=1)

    goals = [
        MiningCraftingEnv.goal_stick, MiningCraftingEnv.goal_stone_pick,
        MiningCraftingEnv.goal_coal, MiningCraftingEnv.goal_furnace,
        MiningCraftingEnv.goal_smelt_iron, MiningCraftingEnv.goal_iron_pick,
        MiningCraftingEnv.goal_gold_ore, MiningCraftingEnv.goal_goldware,
        MiningCraftingEnv.goal_necklace, MiningCraftingEnv.goal_earrings
    ]
    n_success = 8
    for i in range(len(goals)):
        success = 0
        for ite in range(20):
            goal = SparseState(goals[i][0], goals[i][1], d_s)
            if runner.run(goal, 20):
                success += 1
            if success >= n_success:
                print("####### Success at skill {} after {} "
                      "episodes\n\n".format(i, ite + 1))
                break
        if success < n_success:
            print("####### Failed at skill {}\n\n".format(i))

    print("\n\n\n\n%%%%%%% END")
    print_skills(env, runner.il.skill_base)
    def plan(self, state, goal):
        """
        Generates a plan from state to goal.
        :param state: Starting state (dense)
        :param goal: Goal as (SparseState)
        :return:
        """
        s_start = SparseState.from_dense_state(state)
        if self.current_plan is None:
            try:
                self.current_plan = self.planner.plan(s_start, goal)
            except RuntimeError:
                self.current_plan = [self.skill_base.random_skill()]
                if self.verbose > 1:
                    print("Planner failed (low rec), random action.")
                return self.current_plan

        # If next skill's effect is already satisfied, remove it
        plan = self.current_plan
        while len(plan) > 0:
            skill, new_plan = IntentPlanner.behead(plan, randomness=False)
            if not skill.effect.end_state.matches(s_start):
                break
            plan = new_plan
        self.current_plan = plan

        # # If the next skill can't be executed, execute random action
        # if len(self.current_plan) > 0:
        #     first_skill = IntentPlanner.behead(plan)[0]
        #     if first_skill.fails_in(s_start):
        #         # self.current_plan = []
        #         # Random action
        #         self.current_plan = [self.skill_base.random_skill()]
        #         if self.verbose > 1:
        #             print("Random action2")
        #         return self.current_plan

        # If no plan left, try to plan again
        if len(self.current_plan) == 0:
            self.current_plan = None
            return self.plan(state, goal)

        return self.current_plan
Esempio n. 6
0
def train_with_curriculum(env, agent, curriculum, ep_length,
                          n_max_success_per_goal=5, n_max_ep_per_goal=50):
    runner = Runner(env, [], verbose=0)
    runner.il = agent
    n_train_ep = 0
    for i in range(len(curriculum)):
        success = 0
        for ite in range(n_max_ep_per_goal):
            goal = SparseState(curriculum[i][0], curriculum[i][1], env.d_s)
            n_train_ep += 1
            if runner.run(goal, ep_length):
                success += 1
            if success >= n_max_success_per_goal:
                print("Success at skill {} after {}  episodes".format(
                    i, ite + 1))
                break
        if success < n_max_success_per_goal:
            print("Failed at skill {}\n\n".format(i))

    return n_train_ep
def main():
    n_runs = 1
    env_name = "mining"
    if env_name == "mining":
        env = MiningCraftingEnv(stochastic_reset=False)
    elif env_name == "baking":
        env = BakingEnv(stochastic_reset=False)
    elif env_name == "random":
        env = RandomGraphEnv(stochastic_reset=False, noise_prob=0.2)
    else:
        raise RuntimeError("Unknown environment name")
    n_mcts_iter = 100
    goal = env.curriculum[-1]
    s_goal = SparseState(goal[0], goal[1], env.d_s)
    agent = Mcts(iteration_limit=n_mcts_iter)
    trajectory_length, time_ = run_n_episodes(env, agent, s_goal, n_runs)

    print("#### FINAL ####")
    print("number of runs:", n_runs)
    print("number mcts iter:", n_mcts_iter)
    print("trajectory length:", trajectory_length)
    print("time:", time_)
Esempio n. 8
0
 def _extract_condition(self, start_state, skill_seq):
     """
     Extract skill condition using successful trajectory
     :param start_state: starting state as dense state
     :param skill_seq: sequence of skills as (list) of (Skill)
     """
     # For non-primitive skill, conditions of sub-skills in the successful
     # trajectory can be used to infer necessary conditions.
     conditions = SparseState([], [], len(self.effect.end_state))
     # Combine all sub-skill conditions
     for sub_skill in skill_seq:
         conditions = conditions.union(sub_skill.get_condition())
     # Remove all sub-skill effects (except last)
     for sub_skill in skill_seq[:-1]:
         conditions.remove(sub_skill.effect.end_state)
     conditions.values = np.clip(conditions.values, 0., 1.)  # Get rid of -1
     valid_state = start_state.copy()
     valid_state[conditions.dims] = conditions.values
Esempio n. 9
0

if __name__ == "__main__":
    n_runs = 1
    env_name = "mining"
    if env_name == "mining":
        env = MiningCraftingEnv(stochastic_reset=False)
    elif env_name == "baking":
        env = BakingEnv(stochastic_reset=False)
    elif env_name == "random":
        env = RandomGraphEnv(stochastic_reset=False, noise_prob=0.2)
    else:
        raise RuntimeError("Unknown environment name")

    goal = env.curriculum[-1]
    s_goal = SparseState(goal[0], goal[1], env.d_s)
    n_a = env.high_a[0]
    primitive_skills = [
        PrimitiveSkill(
            Effect([i], [0], [1], n_a), i,
            FixedCondition(env.conditions[i], [1.] * len(env.conditions[i])))
        for i in range(n_a)
    ]
    agent = IntentLearner(primitive_skills, planner_max_rec=100, verbose=0)
    trajectory_length, time_ = run_n_episodes(env, agent, s_goal, n_runs)

    print("#### FINAL ####")
    print("number of runs:", n_runs)
    print("trajectory length:", trajectory_length)
    print("time:", time_)
Esempio n. 10
0

if __name__ == "__main__":
    n_runs = 1
    env_name = "mining"
    if env_name == "mining":
        env, ep_length = MiningCraftingEnv(stochastic_reset=False), 20
    elif env_name == "baking":
        env, ep_length = BakingEnv(stochastic_reset=False), 40
    elif env_name == "drawer":
        env, ep_length = DrawerEnv(stochastic_reset=False), 10
    else:
        raise RuntimeError("Unknown environment name")

    goal = env.curriculum[-1]
    ds = env.d_s
    n_a = env.high_a[0]
    n_max_success_per_goal = 5
    s_goal = SparseState(goal[0], goal[1], ds)
    primitive_skills = [PrimitiveSkill(Effect([i], [0], [1], n_a), i, None) for
                        i in range(n_a)]
    trajectory_length, time_, n_train_ep = run_n_episodes(
        env, primitive_skills, s_goal, n_runs, ep_length,
        n_max_success_per_goal)

    print("#### FINAL ####")
    print("number of training episodes:", n_train_ep)
    print("number of runs:", n_runs)
    print("trajectory length:", trajectory_length)
    print("time:", time_)
    envs = {
        "mining": (MiningCraftingEnv, {
            "stochastic_reset": False
        }),
        "baking": (BakingEnv, {
            "stochastic_reset": False
        }),
        "random": (RandomGraphEnv, {
            "stochastic_reset": False,
            "noise_prob": 0.2
        })
    }

    # Create env
    env_name = "mining"
    curriculum = envs[env_name][0].curriculum
    d_s = len(envs[env_name][0].conditions)
    s_goal = SparseState(curriculum[-1][0], curriculum[-1][1], d_s)
    env = envs[env_name][0](**(envs[env_name][1]), goal=s_goal)

    n_eps = [0] * n_runs
    trajectory_length = [0] * n_runs
    time_ = [0] * n_runs
    for i in range(n_runs):
        n_eps[i], trajectory_length[i], time_[i] = run(env, s_goal)

    print("#### FINAL ####")
    print("training episodes:", sum(n_eps) / float(n_runs))
    print("trajectory length:", sum(trajectory_length) / float(n_runs))
    print("time:", sum(time_) / float(n_runs))
Esempio n. 12
0
 def __init__(self, stochastic_reset=False):
     goal = MiningCraftingEnv.curriculum[-1]
     d_s = len(MiningCraftingEnv.conditions)
     s_goal = SparseState(goal[0], goal[1], d_s)
     super().__init__(stochastic_reset, s_goal)