Example #1
0
def main(args):

    if args.environment == 1:
        from envs.continuous_1 import ContinuousEnv1 as Env
    elif args.environment == 2:
        from envs.continuous_2 import ContinuousEnv2 as Env
    else:
        from envs.continuous_3 import ContinuousEnv3 as Env

    env = Env()
    g = model_utils.BalancedMLP([1], [8, 16, 32], 0.0001, 128, 0.0001, verbose=True)

    def visualize_b(state_action_partition):
        vis_utils.plot_background(env, show=False)

        xx, yy = np.meshgrid(np.arange(0, env.STATE_ACTION_MAP.shape[1], 0.01),
                             np.arange(0, env.STATE_ACTION_MAP.shape[0], 0.01))
        data = np.c_[xx.ravel(), yy.ravel()]
        Z = g.batch_predict(data[:, 0], data[:, 1])
        Z = np.array(Z).reshape(xx.shape)
        plt.contourf(xx, yy, Z, alpha=0.4)

        vis_utils.plot_state_action_partition(state_action_partition, show=True)

    experience = gather_experience(env, args.num_experience)
    h**o = OnlineHomomorphismGDict(experience, g, sample_actions, args.threshold_multiplier, args.threshold_minimum,
                                   OnlineHomomorphismGDict.RESOLVE_IGNORE, 20, visualize_b=visualize_b)
    h**o.partition_iteration()
def main(args):

    env = ContinuousEnv3()
    g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True)

    def visualize_b(state_action_partition):

        vis_utils.plot_background(env, show=False)
        vis_utils.plot_decision_boundary(g,
                                         env.STATE_ACTION_MAP.shape[1],
                                         env.STATE_ACTION_MAP.shape[0],
                                         show=False)
        vis_utils.plot_state_action_partition(state_action_partition,
                                              show=True)

    def visualize_sb(state_partition):

        vis_utils.plot_background(env, show=False)
        vis_utils.plot_state_partition(state_partition, show=True)

    experience = gather_experience(env, args.num_experience)
    h**o = online_homomorphism_g.OnlineHomomorphismG(
        experience,
        g,
        sample_actions,
        args.state_action_slit_threshold,
        args.state_split_threshold,
        20,
        visualize_b=visualize_b,
        visualize_sb=visualize_sb)
    h**o.partition_iteration()

    hits, total = evaluation.overlap(env, list(h**o.partition))
    print("{:.2f}% accuracy ({:d}/{:d})".format((hits / total) * 100, hits,
                                                total))
Example #3
0
def main(args):

    env = ContinuousEnv3()
    g = model_utils.BalancedMLP([1], [8, 16, 32], 0.0001, 128, 0.0001, verbose=True)

    def visualize_b(state_action_partition):
        vis_utils.plot_background(env, show=False)

        xx, yy = np.meshgrid(np.arange(0, env.STATE_ACTION_MAP.shape[1], 0.01),
                             np.arange(0, env.STATE_ACTION_MAP.shape[0], 0.01))
        data = np.c_[xx.ravel(), yy.ravel()]
        Z = g.batch_predict(data[:, 0], data[:, 1])
        Z = np.array(Z).reshape(xx.shape)
        plt.contourf(xx, yy, Z, alpha=0.4)

        vis_utils.plot_state_action_partition(state_action_partition, show=True)

    def visualize_sb(state_partition):
        vis_utils.plot_background(env, show=False)
        vis_utils.plot_state_partition(state_partition, show=True)

    experience = gather_experience(env, args.num_experience)
    h**o = online_homomorphism_g.OnlineHomomorphismG(experience, g, sample_actions, 1, 1, 20, visualize_b=visualize_b,
                                                     visualize_sb=visualize_sb)
    h**o.partition_iteration()
Example #4
0
def run(resolution):

    env = ContinuousEnv3()
    g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True)

    experience = gather_experience(env, NUM_EXPERIENCE)
    h**o = OnlineHomomorphismGDict(experience, g, sample_actions,
                                   SPLIT_THRESHOLD, resolution, 20)
    h**o.partition_iteration()

    hits, total = evaluation.overlap(env, list(h**o.partition))
    accuracy = hits / total

    return accuracy
def run(num_experience, split_threshold, min_confidence):

    env = ContinuousEnv3()
    g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True)

    experience = gather_experience(env, num_experience)
    h**o = OnlineHomomorphismG(experience, g, sample_actions, split_threshold,
                               0, 20, min_confidence)
    h**o.partition_iteration()

    hits, total = evaluation.overlap(env, list(h**o.partition))
    accuracy = hits / total

    return accuracy
def run(num_experience, threshold_multiplier):

    env = ContinuousEnv3()
    g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True)

    experience = gather_experience(env, num_experience)
    h**o = OnlineHomomorphismGDict(experience, g, sample_actions, threshold_multiplier, MINIMUM_THRESHOLD,
                                   OnlineHomomorphismGDict.RESOLVE_IGNORE, 20)
    h**o.partition_iteration()

    hits, total = evaluation.overlap(env, list(h**o.partition))
    accuracy = hits / total

    return accuracy
Example #7
0
def main(args):

    if args.environment == 1:
        from envs.continuous_1 import ContinuousEnv1 as Env
    elif args.environment == 2:
        from envs.continuous_2 import ContinuousEnv2 as Env
    else:
        from envs.continuous_3 import ContinuousEnv3 as Env

    env = Env()
    g = model_utils.BalancedMLP([1], [8, 16, 32], 0.0001, 128, 0.0001, verbose=True)

    def visualize_b(state_action_partition):
        vis_utils.plot_background(env, show=False)

        xx, yy = np.meshgrid(np.arange(0, env.STATE_ACTION_MAP.shape[1], 0.01),
                             np.arange(0, env.STATE_ACTION_MAP.shape[0], 0.01))
        data = np.c_[xx.ravel(), yy.ravel()]
        Z = g.batch_predict(data[:, 0], data[:, 1])
        Z = np.array(Z).reshape(xx.shape)
        plt.contourf(xx, yy, Z, alpha=0.4)

        vis_utils.plot_state_action_partition(state_action_partition, show=True)

    def visualize_ignored(states):

        vis_utils.plot_background(env, show=False)

        plt.hist(states, bins=10, range=[0, len(env.STATE_MAP)], normed=True)
        plt.show()

    experience = gather_experience(env, args.num_experience)
    h**o = OnlineHomomorphismGDict(experience, g, sample_actions, args.b_threshold, args.conf_threshold,
                                   OnlineHomomorphismGDict.RESOLVE_ADD_CLOSEST, 20, percentile=args.percentile,
                                   visualize_b=visualize_b, visualize_conf=vis_utils.show_confidences,
                                   visualize_ignored=visualize_ignored)
    h**o.partition_iteration()