def main(args): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True) def visualize_b(state_action_partition): vis_utils.plot_background(env, show=False) vis_utils.plot_decision_boundary(g, env.STATE_ACTION_MAP.shape[1], env.STATE_ACTION_MAP.shape[0], show=False) vis_utils.plot_state_action_partition(state_action_partition, show=True) def visualize_sb(state_partition): vis_utils.plot_background(env, show=False) vis_utils.plot_state_partition(state_partition, show=True) experience = gather_experience(env, args.num_experience) h**o = online_homomorphism_g.OnlineHomomorphismG( experience, g, sample_actions, args.state_action_slit_threshold, args.state_split_threshold, 20, visualize_b=visualize_b, visualize_sb=visualize_sb) h**o.partition_iteration() hits, total = evaluation.overlap(env, list(h**o.partition)) print("{:.2f}% accuracy ({:d}/{:d})".format((hits / total) * 100, hits, total))
def main(args): env = ContinuousEnv3() def visualize_b(state_action_partition): vis_utils.plot_background(env, show=False) vis_utils.plot_state_action_partition(state_action_partition, show=True) def visualize_sb(state_partition): vis_utils.plot_background(env, show=False) vis_utils.plot_state_partition(state_partition, show=True) g = model_utils.GModel(DecisionTreeClassifier()) experience = gather_experience(env, args.num_experience) h**o = online_homomorphism_g.OnlineHomomorphismG(experience, g, sample_actions, 1, 1, 20, visualize_b=visualize_b, visualize_sb=visualize_sb) h**o.partition_iteration()
def main(args): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16, 32], 0.0001, 128, 0.0001, verbose=True) def visualize_b(state_action_partition): vis_utils.plot_background(env, show=False) xx, yy = np.meshgrid(np.arange(0, env.STATE_ACTION_MAP.shape[1], 0.01), np.arange(0, env.STATE_ACTION_MAP.shape[0], 0.01)) data = np.c_[xx.ravel(), yy.ravel()] Z = g.batch_predict(data[:, 0], data[:, 1]) Z = np.array(Z).reshape(xx.shape) plt.contourf(xx, yy, Z, alpha=0.4) vis_utils.plot_state_action_partition(state_action_partition, show=True) def visualize_sb(state_partition): vis_utils.plot_background(env, show=False) vis_utils.plot_state_partition(state_partition, show=True) experience = gather_experience(env, args.num_experience) h**o = online_homomorphism_g.OnlineHomomorphismG(experience, g, sample_actions, 1, 1, 20, visualize_b=visualize_b, visualize_sb=visualize_sb) h**o.partition_iteration()
def run(resolution): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True) experience = gather_experience(env, NUM_EXPERIENCE) h**o = OnlineHomomorphismGDict(experience, g, sample_actions, SPLIT_THRESHOLD, resolution, 20) h**o.partition_iteration() hits, total = evaluation.overlap(env, list(h**o.partition)) accuracy = hits / total return accuracy
def run(num_experience, split_threshold, min_confidence): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True) experience = gather_experience(env, num_experience) h**o = OnlineHomomorphismG(experience, g, sample_actions, split_threshold, 0, 20, min_confidence) h**o.partition_iteration() hits, total = evaluation.overlap(env, list(h**o.partition)) accuracy = hits / total return accuracy
def run(num_experience, threshold_multiplier): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True) experience = gather_experience(env, num_experience) h**o = OnlineHomomorphismGDict(experience, g, sample_actions, threshold_multiplier, MINIMUM_THRESHOLD, OnlineHomomorphismGDict.RESOLVE_IGNORE, 20) h**o.partition_iteration() hits, total = evaluation.overlap(env, list(h**o.partition)) accuracy = hits / total return accuracy
if done: break return transitions def visualize_state_action_partition(state_action_partition): vis_utils.plot_background(env, show=False) vis_utils.plot_state_action_partition(state_action_partition, show=True) def visualize_state_partition(state_partition): vis_utils.plot_background(env, show=False) vis_utils.plot_state_partition(state_partition, show=True) env = ContinuousEnv3() d = env.state_distance k = 10 state_action_partition, state_partition = online_homomorphism_knn.full_partition_iteration( lambda: gather_experience(env, 400), d, k, 1, visualize_state_action_partition=visualize_state_action_partition, visualize_state_partition=visualize_state_partition, max_iteration_steps=2)