예제 #1
0
def main():
    env = MiningCraftingEnv(stochastic_reset=False)
    d_s = env.d_s
    n_a = env.high_a[0]
    given_conditions = False
    # Learning conditions from data  or fixed conditions?
    if given_conditions:
        primitive_skills = [
            PrimitiveSkill(
                Effect([i], [0], [1], n_a), i,
                FixedCondition(env.conditions[i],
                               [1.] * len(env.conditions[i])))
            for i in range(n_a)
        ]
    else:
        primitive_skills = [
            PrimitiveSkill(Effect([i], [0], [1], n_a), i, None)
            for i in range(n_a)
        ]

    runner = Runner(env, primitive_skills, verbose=1)

    goals = [
        MiningCraftingEnv.goal_stick, MiningCraftingEnv.goal_stone_pick,
        MiningCraftingEnv.goal_coal, MiningCraftingEnv.goal_furnace,
        MiningCraftingEnv.goal_smelt_iron, MiningCraftingEnv.goal_iron_pick,
        MiningCraftingEnv.goal_gold_ore, MiningCraftingEnv.goal_goldware,
        MiningCraftingEnv.goal_necklace, MiningCraftingEnv.goal_earrings
    ]
    n_success = 8
    for i in range(len(goals)):
        success = 0
        for ite in range(20):
            goal = SparseState(goals[i][0], goals[i][1], d_s)
            if runner.run(goal, 20):
                success += 1
            if success >= n_success:
                print("####### Success at skill {} after {} "
                      "episodes\n\n".format(i, ite + 1))
                break
        if success < n_success:
            print("####### Failed at skill {}\n\n".format(i))

    print("\n\n\n\n%%%%%%% END")
    print_skills(env, runner.il.skill_base)
def main():
    n_runs = 1
    env_name = "mining"
    if env_name == "mining":
        env = MiningCraftingEnv(stochastic_reset=False)
    elif env_name == "baking":
        env = BakingEnv(stochastic_reset=False)
    elif env_name == "random":
        env = RandomGraphEnv(stochastic_reset=False, noise_prob=0.2)
    else:
        raise RuntimeError("Unknown environment name")
    n_mcts_iter = 100
    goal = env.curriculum[-1]
    s_goal = SparseState(goal[0], goal[1], env.d_s)
    agent = Mcts(iteration_limit=n_mcts_iter)
    trajectory_length, time_ = run_n_episodes(env, agent, s_goal, n_runs)

    print("#### FINAL ####")
    print("number of runs:", n_runs)
    print("number mcts iter:", n_mcts_iter)
    print("trajectory length:", trajectory_length)
    print("time:", time_)
예제 #3
0
        print(seq_a)

        # Compute stats
        time_buffer = time_buffer[1:len(time_buffer)] + [time.time() - t]
        success_buffer = success_buffer[1:len(success_buffer)] + [success]
        length_buffer = length_buffer[1:len(length_buffer)] + [len(seq_a)]

    stats = compute_stats(success_buffer, length_buffer, time_buffer)
    return stats[0], stats[1]


if __name__ == "__main__":
    n_runs = 1
    env_name = "mining"
    if env_name == "mining":
        env = MiningCraftingEnv(stochastic_reset=False)
    elif env_name == "baking":
        env = BakingEnv(stochastic_reset=False)
    elif env_name == "random":
        env = RandomGraphEnv(stochastic_reset=False, noise_prob=0.2)
    else:
        raise RuntimeError("Unknown environment name")

    goal = env.curriculum[-1]
    s_goal = SparseState(goal[0], goal[1], env.d_s)
    n_a = env.high_a[0]
    primitive_skills = [
        PrimitiveSkill(
            Effect([i], [0], [1], n_a), i,
            FixedCondition(env.conditions[i], [1.] * len(env.conditions[i])))
        for i in range(n_a)
예제 #4
0
        n_ep_buffer = n_ep_buffer[1:len(n_ep_buffer)] + [n_train_ep]

        # Print learned skills
        if n_ep == n_eps - 1:
            print_skills(env, agent.skill_base)

    stats = compute_stats(success_buffer, length_buffer, time_buffer,
                          n_ep_buffer)
    return stats[0], stats[1], stats[2]


if __name__ == "__main__":
    n_runs = 1
    env_name = "mining"
    if env_name == "mining":
        env, ep_length = MiningCraftingEnv(stochastic_reset=False), 20
    elif env_name == "baking":
        env, ep_length = BakingEnv(stochastic_reset=False), 40
    elif env_name == "drawer":
        env, ep_length = DrawerEnv(stochastic_reset=False), 10
    else:
        raise RuntimeError("Unknown environment name")

    goal = env.curriculum[-1]
    ds = env.d_s
    n_a = env.high_a[0]
    n_max_success_per_goal = 5
    s_goal = SparseState(goal[0], goal[1], ds)
    primitive_skills = [PrimitiveSkill(Effect([i], [0], [1], n_a), i, None) for
                        i in range(n_a)]
    trajectory_length, time_, n_train_ep = run_n_episodes(