Exemple #1
0
def main(argv):
    args = get_args(argv)

    print(args)
    policy_mod = import_module(args.policy_source)
    policy_class = getattr(policy_mod, 'DaggerPolicy')
    policy = policy_class(**vars(args))

    dagger = Dagger(None,
                    policy,
                   **vars(args))

    # dagger.learn_all_samples(save_file_name="dagger_block_world", load_file_name="dagger_block_world")
    dagger.learn_all_samples()
def main(argv):
    args = get_args(argv)

    x = env_args(args)
    print(x)
    env = make(**vars(x))

    policy_mod = import_module(args.policy_source)
    policy_class = getattr(policy_mod, 'DaggerPolicy')
    policy = policy_class(**vars(args))

    dagger = Dagger(env, policy, **vars(args))

    dagger.test(args.save_file_name)

    env.close()
def main(argv):
    args = get_args(argv)

    policy_mod = import_module(args.policy_source)
    policy_class = getattr(policy_mod, 'DaggerPolicy')
    policy = policy_class(**vars(args))

    x = env_args(args)
    env = make(**vars(x))

    args.iterations = 0
    args.num_rollouts = 250
    print(args.num_rollouts)

    dagger = Dagger(env, policy, **vars(args))

    dagger.num_probes = 0
    dagger.explore_only()

    env.close()
Exemple #4
0
def main(argv):
    parser = argparse.ArgumentParser(description='block_world')
    parser.add_argument('--greedy', dest='greedy', action='store_true')
    parser.add_argument('--no-greedy', dest='greedy', action='store_false')
    parser.add_argument('--dims', type=int, default=3)
    parser.add_argument('--span', type=int, default=10)
    parser.add_argument('--max-timesteps', type=int, default=2000000)
    parser.add_argument('--exploration_fraction', type=float, default=0.1)
    parser.add_argument('--exploration_final_eps', type=float, default=0.01)
    parser.add_argument('--l2-penalty', type=float, default=None)
    parser.add_argument('--continous-actions',
                        dest='continous_actions',
                        action='store_true')
    parser.add_argument('--no-continous-actions',
                        dest='continous_actions',
                        action='store_false')
    parser.add_argument('--reach-minimum', type=float, default=0.1)
    parser.set_defaults(greedy=False)
    parser.set_defaults(continous_actions=False)
    cmd_args = parser.parse_args(argv)

    print(cmd_args)

    env = make(span=cmd_args.span,
               dims=cmd_args.dims,
               greedy=cmd_args.greedy,
               l2_penalty=cmd_args.l2_penalty,
               continous_actions=cmd_args.continous_actions,
               reach_minimum=cmd_args.reach_minimum)

    dagger = Dagger(env,
                    DaggerPolicy,
                    num_rollouts=25,
                    train_batch_size=25,
                    train_epochs=20,
                    iterations=20,
                    dir_name='tmp_storage')

    dagger.learn(save_file_name="dagger_dist_world")
    env.close()
def check_keydown_events(event, ai_settings, screen, player, daggers,
                         game_state, screen_state, shop_menu):
    # Respond to key presses
    if game_state.get_state() == GS.VICTORY:
        # TODO: add victory closing animation here
        screen_state.set_state(ScS.FADE_OUT)
    elif game_state.get_state() == GS.INVASION:
        if event.key == pygame.K_UP or event.key == pygame.K_w:
            player.moving_up = True
            # player.sprite_loop = player.moving_up_sprite
            # debug
            print('moving up')
        if event.key == pygame.K_DOWN or event.key == pygame.K_s:
            player.moving_down = True
            # player.sprite_loop = player.moving_down_sprite
            # debug
            print('moving down')
        if event.key == pygame.K_LEFT or event.key == pygame.K_a:
            player.moving_left = True
            # player.sprite_loop = player.moving_left_sprite
            # debug
            print('moving left')
        if event.key == pygame.K_RIGHT or event.key == pygame.K_d:
            player.moving_right = True
            # player.sprite_loop = player.moving_right_sprite
            # debug
            print('moving right')
        if event.key == pygame.K_SPACE:
            # Create a new dagger and add it to the daggers group
            if len(daggers) < ai_settings.daggers_allowed:
                new_dagger = Dagger(ai_settings, screen, player)
                daggers.add(new_dagger)
        if event.key == pygame.K_BACKQUOTE:
            ai_settings.dagger_height = 300
    elif game_state.get_state() == GS.SHOP:
        if event.key == pygame.K_RETURN:
            shop_menu.get_selections()[
                shop_menu.get_current_selection()].select()
        else:
            if event.key == pygame.K_UP or event.key == pygame.K_w:
                shop_menu.update_selection_rev()
            if event.key == pygame.K_DOWN or event.key == pygame.K_s:
                shop_menu.update_selection()
Exemple #6
0
def learn(env, policy):
    dagger = Dagger(env)
    dagger.learn(policy)
Exemple #7
0
    if t == 0:
        il_analysis.count_states(sup.get_states())
        il_analysis.save_states("comparisons/comparisons/net_classic_il.png")
        il_analysis.show_states()
        plotter.plot_state_actions(mdp.pi, rewards = grid.reward_states, sinks = grid.sink_states,
                filename='comparisons/comparisons/net_classic_il_state_action.png')
    classic_il_data[t,:] = np.zeros(ITER) + r
    
    
# DAGGER
dagger_data = np.zeros((TRIALS, ITER))
dagger_train, dagger_test = np.zeros((TRIALS, ITER)), np.zeros((TRIALS, ITER))
dagger_analysis = Analysis(H, W, ITER, rewards = grid.reward_states, sinks=grid.sink_states, desc="Dagger's policy progression")
for t in range(TRIALS):
    mdp.load_policy()
    dagger = Dagger(grid, mdp)
    dagger.rollout()
    r_D = np.zeros(ITER)
    dagger_test_acc = np.zeros(ITER)
    dagger_train_acc = np.zeros(ITER)
    for _ in range(ITER):
        print "Dagger iteration:", _
        dagger.retrain()
        dagger_train_acc[_], dagger_test_acc[_] = dagger.net.return_stats()
        
        iteration_states = []        
        for i in range(SAMP):
            dagger.rollout()
            iteration_states += dagger.get_recent_rollout_states().tolist()            
            r_D[_] = r_D[_]+dagger.get_reward() / SAMP
        if _ == ITER - 1 and t == 0:
def main(argv):
    parser = argparse.ArgumentParser(description='block_world')

    parser.add_argument('--dims', type=int, default=3)
    parser.add_argument('--span', type=int, default=10)
    parser.add_argument('--episodes', type=int, default=100)
    parser.add_argument('--run', dest='run', action='store_true')
    parser.add_argument('--no-run', dest='run', action='store_false')
    parser.add_argument('--vis', dest='vis', action='store_true')
    parser.add_argument('--no-vis', dest='vis', action='store_false')
    parser.set_defaults(vis=False, run=True)

    cmd_args = parser.parse_args(argv)

    print(cmd_args)

    np.random.seed()
    env = make(span=cmd_args.span,
               dims=cmd_args.dims)

    dagger = Dagger(env)

    with tf.Session() as sess:
        dagger.test(DaggerPolicy,"dagger_dist_world")

        if cmd_args.vis:
            block_env = make_block_env(run=cmd_args.run)
            y_origin = 0.2 + cmd_args.span * 0.1
            block_env.set_params(tray_length=2. * float(cmd_args.span) * 0.1,
                                tray_width=2. * float(cmd_args.span) * 0.1,
                                tray_height=0.1,
                                rim_height=0.05,
                                rim_width=0.05)


        total_reward = 0

        def shift_y(pos):
            shifted = np.copy(pos)
            shifted[1] += y_origin
            return shifted

        for _ in range(cmd_args.episodes):
            obs, done = env.reset(), False
            if cmd_args.vis:
                block_env.clear_tray()
                pos = env.finger_pos.copy()
                pos[1] += y_origin
                block_env.set_finger(shift_y(env.finger_pos.astype(float) * 0.1))
                block_env.set_target(shift_y(env.target_pos.astype(float) * 0.1))

            episode_rew = 0
            while not done:
                env.render()
                action = dagger.eval_policy(obs)[0]
                obs, rew, done, _ = env.step(action)
                episode_rew += rew

                if cmd_args.vis:
                    action = env.map_discrete_action(action)
                    block_env.move_finger(action.astype(float) * 0.1)

            print("Episode reward", episode_rew)
            total_reward += episode_rew

    print("average reward " + str(float(total_reward)/float(cmd_args.episodes)))
    env.close()
    if cmd_args.vis:
        block_env.close()