Esempio n. 1
0
def test2():
    print()
    print('blackjack')
    print()
    ex = Blackjack()
    run_environment(
        ex,
        lambda: TransitionBOW(
            [BlackjackFeatures()],  #ex.width, ex.height)],
            [AttendAt(lambda _: 0, 'blackjack')],
            ex.n_actions),
        BlackjackLoss(),
    )
Esempio n. 2
0
def test0():
    print()
    print('micro pocman')
    print()
    ex = MicroPOCMAN()
    run_environment(
        ex,
        lambda: TransitionBOW(
            [LocalPOCFeatures(history_length=4)],  #ex.width, ex.height)],
            [AttendAt(lambda _: 0, 'poc')],
            4),
        POCLoss(),
    )
Esempio n. 3
0
def test():
    print('')
    print('Cart Pole')
    print('')
    ex = CartPoleEnv()
    run_environment(
        ex,
        lambda: TransitionBOW([CartPoleFeatures(
        )], [AttendAt(lambda _: 0, 'cartpole')], ex.n_actions),
        CartPoleLoss(),
        rl_alg=reinforce,
        n_epochs=100,
        lr=0.1,
    )
Esempio n. 4
0
def test3():
    print()
    print('hex')
    print()
    board_size = 3
    ex = Hex(Hex.BLACK, board_size)
    run_environment(
        ex,
        lambda: TransitionBOW(
            [HexFeatures(board_size)],  #ex.width, ex.height)],
            [AttendAt(lambda _: 0, 'hex')],
            ex.n_actions),
        HexLoss(),
    )
Esempio n. 5
0
def test():
    print('')
    print('Proximal Policy Optimization')
    print('')
    args = parse_arguments()
    if args.task == 'mountaincar':
        print('Mountain Car')
        ex = MountainCar()
        run_ppo(
            ex,
            lambda dy_model:
            TransitionBOW(
                          [MountainCarFeatures()],
                          [AttendAt(lambda _: 0, 'mountain_car')],
                          ex.n_actions),
            MountainCarLoss(),
            args.eps,
            args.learner,
        )
    elif args.task == 'cartpole':
        print('Cart Pole')
        ex = CartPoleEnv()
        run_ppo(
            ex,
            lambda dy_model:
            TransitionBOW(
                          [CartPoleFeatures()],
                          [AttendAt(lambda _: 0, 'cartpole')],
                          ex.n_actions),
            CartPoleLoss(),
            args.eps,
            args.learner,
        )
    else:
        print('Unsupported Task!')
        exit(-1)
Esempio n. 6
0
def test4():
    print '\n===\n=== test4: big grid world, global features\n==='
    ex = make_big_gridworld()
    run_gridworld(
        ex, lambda: TransitionBOW([GlobalGridFeatures(ex.width, ex.height)],
                                  [AttendAt(lambda _: 0, 'grid')], 4))
Esempio n. 7
0
def test3():
    print '\n===\n=== test3: p_step_success=0.8, but local features only\n==='
    ex = make_default_gridworld(p_step_success=0.8, start_random=True)
    run_gridworld(
        ex, lambda: TransitionBOW([LocalGridFeatures(ex.width, ex.height)],
                                  [AttendAt(lambda _: 0, 'grid')], 4))
Esempio n. 8
0
def test2():
    print '\n===\n=== test2: p_step_success=0.8 and per_step_cost=0.1\n==='
    ex = make_default_gridworld(per_step_cost=0.1, p_step_success=0.8)
    run_gridworld(
        ex, lambda: TransitionBOW([GlobalGridFeatures(ex.width, ex.height)],
                                  [AttendAt(lambda _: 0, 'grid')], 4))
Esempio n. 9
0
def test0():
    print '\n===\n=== test0: p_step_success=1.0\n==='
    ex = make_default_gridworld(p_step_success=1.0)
    run_gridworld(
        ex, lambda: TransitionBOW([GlobalGridFeatures(ex.width, ex.height)],
                                  [AttendAt(lambda _: 0, 'grid')], 4))