예제 #1
0
def main(args):

    env = ContinuousEnv3()
    g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True)

    def visualize_b(state_action_partition):

        vis_utils.plot_background(env, show=False)
        vis_utils.plot_decision_boundary(g,
                                         env.STATE_ACTION_MAP.shape[1],
                                         env.STATE_ACTION_MAP.shape[0],
                                         show=False)
        vis_utils.plot_state_action_partition(state_action_partition,
                                              show=True)

    def visualize_sb(state_partition):

        vis_utils.plot_background(env, show=False)
        vis_utils.plot_state_partition(state_partition, show=True)

    experience = gather_experience(env, args.num_experience)
    h**o = online_homomorphism_g.OnlineHomomorphismG(
        experience,
        g,
        sample_actions,
        args.state_action_slit_threshold,
        args.state_split_threshold,
        20,
        visualize_b=visualize_b,
        visualize_sb=visualize_sb)
    h**o.partition_iteration()

    hits, total = evaluation.overlap(env, list(h**o.partition))
    print("{:.2f}% accuracy ({:d}/{:d})".format((hits / total) * 100, hits,
                                                total))
예제 #2
0
def main(args):

    env = ContinuousEnv3()

    def visualize_b(state_action_partition):
        vis_utils.plot_background(env, show=False)
        vis_utils.plot_state_action_partition(state_action_partition,
                                              show=True)

    def visualize_sb(state_partition):
        vis_utils.plot_background(env, show=False)
        vis_utils.plot_state_partition(state_partition, show=True)

    g = model_utils.GModel(DecisionTreeClassifier())

    experience = gather_experience(env, args.num_experience)
    h**o = online_homomorphism_g.OnlineHomomorphismG(experience,
                                                     g,
                                                     sample_actions,
                                                     1,
                                                     1,
                                                     20,
                                                     visualize_b=visualize_b,
                                                     visualize_sb=visualize_sb)
    h**o.partition_iteration()
예제 #3
0
def main(args):

    env = ContinuousEnv3()
    g = model_utils.BalancedMLP([1], [8, 16, 32], 0.0001, 128, 0.0001, verbose=True)

    def visualize_b(state_action_partition):
        vis_utils.plot_background(env, show=False)

        xx, yy = np.meshgrid(np.arange(0, env.STATE_ACTION_MAP.shape[1], 0.01),
                             np.arange(0, env.STATE_ACTION_MAP.shape[0], 0.01))
        data = np.c_[xx.ravel(), yy.ravel()]
        Z = g.batch_predict(data[:, 0], data[:, 1])
        Z = np.array(Z).reshape(xx.shape)
        plt.contourf(xx, yy, Z, alpha=0.4)

        vis_utils.plot_state_action_partition(state_action_partition, show=True)

    def visualize_sb(state_partition):
        vis_utils.plot_background(env, show=False)
        vis_utils.plot_state_partition(state_partition, show=True)

    experience = gather_experience(env, args.num_experience)
    h**o = online_homomorphism_g.OnlineHomomorphismG(experience, g, sample_actions, 1, 1, 20, visualize_b=visualize_b,
                                                     visualize_sb=visualize_sb)
    h**o.partition_iteration()
예제 #4
0
def run(resolution):

    env = ContinuousEnv3()
    g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True)

    experience = gather_experience(env, NUM_EXPERIENCE)
    h**o = OnlineHomomorphismGDict(experience, g, sample_actions,
                                   SPLIT_THRESHOLD, resolution, 20)
    h**o.partition_iteration()

    hits, total = evaluation.overlap(env, list(h**o.partition))
    accuracy = hits / total

    return accuracy
def run(num_experience, split_threshold, min_confidence):

    env = ContinuousEnv3()
    g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True)

    experience = gather_experience(env, num_experience)
    h**o = OnlineHomomorphismG(experience, g, sample_actions, split_threshold,
                               0, 20, min_confidence)
    h**o.partition_iteration()

    hits, total = evaluation.overlap(env, list(h**o.partition))
    accuracy = hits / total

    return accuracy
def run(num_experience, threshold_multiplier):

    env = ContinuousEnv3()
    g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True)

    experience = gather_experience(env, num_experience)
    h**o = OnlineHomomorphismGDict(experience, g, sample_actions, threshold_multiplier, MINIMUM_THRESHOLD,
                                   OnlineHomomorphismGDict.RESOLVE_IGNORE, 20)
    h**o.partition_iteration()

    hits, total = evaluation.overlap(env, list(h**o.partition))
    accuracy = hits / total

    return accuracy
예제 #7
0
            if done:
                break

    return transitions


def visualize_state_action_partition(state_action_partition):

    vis_utils.plot_background(env, show=False)
    vis_utils.plot_state_action_partition(state_action_partition, show=True)


def visualize_state_partition(state_partition):

    vis_utils.plot_background(env, show=False)
    vis_utils.plot_state_partition(state_partition, show=True)


env = ContinuousEnv3()

d = env.state_distance
k = 10

state_action_partition, state_partition = online_homomorphism_knn.full_partition_iteration(
    lambda: gather_experience(env, 400),
    d,
    k,
    1,
    visualize_state_action_partition=visualize_state_action_partition,
    visualize_state_partition=visualize_state_partition,
    max_iteration_steps=2)