def value_iteration_demo(): np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent = TableAgent(env) pi_algo = ValueIteration() with timer('Timer ValueIter'): pi_algo.value_iteration(agent) print('return_pi={}'.format(eval_game(env, agent)))
def generalized_iteration_demo(): np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent = TableAgent(env) pi_algo = GeneralizedPolicyIteration() with timer('Timer GeneralizedIter'): pi_algo.generalized_policy_iteration(agent) print('return_pi={}'.format(eval_game(env, agent)))
def policy_iteration_demo(): np.random.seed(0) env = SnakeEnv(10, [3, 6]) agent = TableAgent(env) pi_algo = PolicyIterationWithTimer() with timer('Timer PolicyIter'): pi_algo.policy_iteration(agent) print 'return_pi={}'.format(eval_game(env, agent))