def test2(): print() print('blackjack') print() ex = Blackjack() run_environment( ex, lambda: TransitionBOW( [BlackjackFeatures()], #ex.width, ex.height)], [AttendAt(lambda _: 0, 'blackjack')], ex.n_actions), BlackjackLoss(), )
def test0(): print() print('micro pocman') print() ex = MicroPOCMAN() run_environment( ex, lambda: TransitionBOW( [LocalPOCFeatures(history_length=4)], #ex.width, ex.height)], [AttendAt(lambda _: 0, 'poc')], 4), POCLoss(), )
def test(): print('') print('Cart Pole') print('') ex = CartPoleEnv() run_environment( ex, lambda: TransitionBOW([CartPoleFeatures( )], [AttendAt(lambda _: 0, 'cartpole')], ex.n_actions), CartPoleLoss(), rl_alg=reinforce, n_epochs=100, lr=0.1, )
def test3(): print() print('hex') print() board_size = 3 ex = Hex(Hex.BLACK, board_size) run_environment( ex, lambda: TransitionBOW( [HexFeatures(board_size)], #ex.width, ex.height)], [AttendAt(lambda _: 0, 'hex')], ex.n_actions), HexLoss(), )
def test(): print('') print('Proximal Policy Optimization') print('') args = parse_arguments() if args.task == 'mountaincar': print('Mountain Car') ex = MountainCar() run_ppo( ex, lambda dy_model: TransitionBOW( [MountainCarFeatures()], [AttendAt(lambda _: 0, 'mountain_car')], ex.n_actions), MountainCarLoss(), args.eps, args.learner, ) elif args.task == 'cartpole': print('Cart Pole') ex = CartPoleEnv() run_ppo( ex, lambda dy_model: TransitionBOW( [CartPoleFeatures()], [AttendAt(lambda _: 0, 'cartpole')], ex.n_actions), CartPoleLoss(), args.eps, args.learner, ) else: print('Unsupported Task!') exit(-1)
def test4(): print '\n===\n=== test4: big grid world, global features\n===' ex = make_big_gridworld() run_gridworld( ex, lambda: TransitionBOW([GlobalGridFeatures(ex.width, ex.height)], [AttendAt(lambda _: 0, 'grid')], 4))
def test3(): print '\n===\n=== test3: p_step_success=0.8, but local features only\n===' ex = make_default_gridworld(p_step_success=0.8, start_random=True) run_gridworld( ex, lambda: TransitionBOW([LocalGridFeatures(ex.width, ex.height)], [AttendAt(lambda _: 0, 'grid')], 4))
def test2(): print '\n===\n=== test2: p_step_success=0.8 and per_step_cost=0.1\n===' ex = make_default_gridworld(per_step_cost=0.1, p_step_success=0.8) run_gridworld( ex, lambda: TransitionBOW([GlobalGridFeatures(ex.width, ex.height)], [AttendAt(lambda _: 0, 'grid')], 4))
def test0(): print '\n===\n=== test0: p_step_success=1.0\n===' ex = make_default_gridworld(p_step_success=1.0) run_gridworld( ex, lambda: TransitionBOW([GlobalGridFeatures(ex.width, ex.height)], [AttendAt(lambda _: 0, 'grid')], 4))