def main(args): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True) def visualize_b(state_action_partition): vis_utils.plot_background(env, show=False) vis_utils.plot_decision_boundary(g, env.STATE_ACTION_MAP.shape[1], env.STATE_ACTION_MAP.shape[0], show=False) vis_utils.plot_state_action_partition(state_action_partition, show=True) def visualize_sb(state_partition): vis_utils.plot_background(env, show=False) vis_utils.plot_state_partition(state_partition, show=True) experience = gather_experience(env, args.num_experience) h**o = online_homomorphism_g.OnlineHomomorphismG( experience, g, sample_actions, args.state_action_slit_threshold, args.state_split_threshold, 20, visualize_b=visualize_b, visualize_sb=visualize_sb) h**o.partition_iteration() hits, total = evaluation.overlap(env, list(h**o.partition)) print("{:.2f}% accuracy ({:d}/{:d})".format((hits / total) * 100, hits, total))
def test_overlap_single_block(self): block1 = [[0.1, 0.1], [0.4, 1.2], [1.2, 0.1], [1.9, 1.9]] partition = [block1] hits = evaluation.overlap(self.MockEnv, partition) self.assertEqual(hits, 2)
def test_overlap_two_blocks(self): block1 = [[0.1, 0.1], [0.4, 0.2]] block2 = [[1.5, 0.5], [1.3, 1.7]] partition = [block1, block2] hits = evaluation.overlap(self.MockEnv, partition) self.assertEqual(hits, 3)
def test_overlap_total_match(self): block1 = [[0.1, 0.1], [0.4, 0.2]] block2 = [[1.5, 0.5], [1.3, 0.7]] block3 = [[1.7, 1.1], [1.9, 1.2]] partition = [block1, block2, block3] hits = evaluation.overlap(self.MockEnv, partition) self.assertEqual(hits, 6)
def run(resolution): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True) experience = gather_experience(env, NUM_EXPERIENCE) h**o = OnlineHomomorphismGDict(experience, g, sample_actions, SPLIT_THRESHOLD, resolution, 20) h**o.partition_iteration() hits, total = evaluation.overlap(env, list(h**o.partition)) accuracy = hits / total return accuracy
def run(num_experience, split_threshold, min_confidence): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True) experience = gather_experience(env, num_experience) h**o = OnlineHomomorphismG(experience, g, sample_actions, split_threshold, 0, 20, min_confidence) h**o.partition_iteration() hits, total = evaluation.overlap(env, list(h**o.partition)) accuracy = hits / total return accuracy
def run(num_experience, threshold_multiplier): env = ContinuousEnv3() g = model_utils.BalancedMLP([1], [8, 16], 0.001, 32, 0.0, verbose=True) experience = gather_experience(env, num_experience) h**o = OnlineHomomorphismGDict(experience, g, sample_actions, threshold_multiplier, MINIMUM_THRESHOLD, OnlineHomomorphismGDict.RESOLVE_IGNORE, 20) h**o.partition_iteration() hits, total = evaluation.overlap(env, list(h**o.partition)) accuracy = hits / total return accuracy