def main():
    env = Blackjack()
    policy = init_policy(env)
    v = init_state_map(env)
    visits_map = init_state_map(env)
    for _ in xrange(20000):
        episode = generate_episode(env, policy)
        on_policy_state_evaluation(episode, v, visits_map)
    env.visualize_state_value(v)