def main(args): if args.environment == 1: from envs.continuous_1 import ContinuousEnv1 as Env elif args.environment == 2: from envs.continuous_2 import ContinuousEnv2 as Env else: from envs.continuous_3 import ContinuousEnv3 as Env env = Env() g = model_utils.BalancedMLP([1], [8, 16, 32], 0.0001, 128, 0.0001, verbose=True) def visualize_b(state_action_partition): vis_utils.plot_background(env, show=False) xx, yy = np.meshgrid(np.arange(0, env.STATE_ACTION_MAP.shape[1], 0.01), np.arange(0, env.STATE_ACTION_MAP.shape[0], 0.01)) data = np.c_[xx.ravel(), yy.ravel()] Z = g.batch_predict(data[:, 0], data[:, 1]) Z = np.array(Z).reshape(xx.shape) plt.contourf(xx, yy, Z, alpha=0.4) vis_utils.plot_state_action_partition(state_action_partition, show=True) experience = gather_experience(env, args.num_experience) h**o = OnlineHomomorphismGDict(experience, g, sample_actions, args.threshold_multiplier, args.threshold_minimum, OnlineHomomorphismGDict.RESOLVE_IGNORE, 20, visualize_b=visualize_b) h**o.partition_iteration()
def main(args): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True) def visualize_b(state_action_partition): vis_utils.plot_background(env, show=False) vis_utils.plot_decision_boundary(g, env.STATE_ACTION_MAP.shape[1], env.STATE_ACTION_MAP.shape[0], show=False) vis_utils.plot_state_action_partition(state_action_partition, show=True) def visualize_sb(state_partition): vis_utils.plot_background(env, show=False) vis_utils.plot_state_partition(state_partition, show=True) experience = gather_experience(env, args.num_experience) h**o = online_homomorphism_g.OnlineHomomorphismG( experience, g, sample_actions, args.state_action_slit_threshold, args.state_split_threshold, 20, visualize_b=visualize_b, visualize_sb=visualize_sb) h**o.partition_iteration() hits, total = evaluation.overlap(env, list(h**o.partition)) print("{:.2f}% accuracy ({:d}/{:d})".format((hits / total) * 100, hits, total))
def main(args): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16, 32], 0.0001, 128, 0.0001, verbose=True) def visualize_b(state_action_partition): vis_utils.plot_background(env, show=False) xx, yy = np.meshgrid(np.arange(0, env.STATE_ACTION_MAP.shape[1], 0.01), np.arange(0, env.STATE_ACTION_MAP.shape[0], 0.01)) data = np.c_[xx.ravel(), yy.ravel()] Z = g.batch_predict(data[:, 0], data[:, 1]) Z = np.array(Z).reshape(xx.shape) plt.contourf(xx, yy, Z, alpha=0.4) vis_utils.plot_state_action_partition(state_action_partition, show=True) def visualize_sb(state_partition): vis_utils.plot_background(env, show=False) vis_utils.plot_state_partition(state_partition, show=True) experience = gather_experience(env, args.num_experience) h**o = online_homomorphism_g.OnlineHomomorphismG(experience, g, sample_actions, 1, 1, 20, visualize_b=visualize_b, visualize_sb=visualize_sb) h**o.partition_iteration()
def run(resolution): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True) experience = gather_experience(env, NUM_EXPERIENCE) h**o = OnlineHomomorphismGDict(experience, g, sample_actions, SPLIT_THRESHOLD, resolution, 20) h**o.partition_iteration() hits, total = evaluation.overlap(env, list(h**o.partition)) accuracy = hits / total return accuracy
def run(num_experience, split_threshold, min_confidence): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True) experience = gather_experience(env, num_experience) h**o = OnlineHomomorphismG(experience, g, sample_actions, split_threshold, 0, 20, min_confidence) h**o.partition_iteration() hits, total = evaluation.overlap(env, list(h**o.partition)) accuracy = hits / total return accuracy
def run(num_experience, threshold_multiplier): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True) experience = gather_experience(env, num_experience) h**o = OnlineHomomorphismGDict(experience, g, sample_actions, threshold_multiplier, MINIMUM_THRESHOLD, OnlineHomomorphismGDict.RESOLVE_IGNORE, 20) h**o.partition_iteration() hits, total = evaluation.overlap(env, list(h**o.partition)) accuracy = hits / total return accuracy
def main(args): if args.environment == 1: from envs.continuous_1 import ContinuousEnv1 as Env elif args.environment == 2: from envs.continuous_2 import ContinuousEnv2 as Env else: from envs.continuous_3 import ContinuousEnv3 as Env env = Env() g = model_utils.BalancedMLP([1], [8, 16, 32], 0.0001, 128, 0.0001, verbose=True) def visualize_b(state_action_partition): vis_utils.plot_background(env, show=False) xx, yy = np.meshgrid(np.arange(0, env.STATE_ACTION_MAP.shape[1], 0.01), np.arange(0, env.STATE_ACTION_MAP.shape[0], 0.01)) data = np.c_[xx.ravel(), yy.ravel()] Z = g.batch_predict(data[:, 0], data[:, 1]) Z = np.array(Z).reshape(xx.shape) plt.contourf(xx, yy, Z, alpha=0.4) vis_utils.plot_state_action_partition(state_action_partition, show=True) def visualize_ignored(states): vis_utils.plot_background(env, show=False) plt.hist(states, bins=10, range=[0, len(env.STATE_MAP)], normed=True) plt.show() experience = gather_experience(env, args.num_experience) h**o = OnlineHomomorphismGDict(experience, g, sample_actions, args.b_threshold, args.conf_threshold, OnlineHomomorphismGDict.RESOLVE_ADD_CLOSEST, 20, percentile=args.percentile, visualize_b=visualize_b, visualize_conf=vis_utils.show_confidences, visualize_ignored=visualize_ignored) h**o.partition_iteration()