fig = plt.figure() 
    ax = fig.gca(projection='3d') 
    ax.plot_surface(dealer, player, values_ua,  cmap=cm.coolwarm,
                       linewidth=0)
    plt.title("State value function (usable ace)")
    plt.xlabel("Dealer showing")
    plt.ylabel("Player sum")


#%%
if __name__ ==  "__main__":   

    num_episodes = 500000
    gamma = 1
    env = BlackJack()
    states = env.state_space()
    init_policy_player = policy(states)
    
    policy_player = Monte_carlo_exploring_starts(env, states, 
                                                 init_policy_player,
                                                 num_episodes, gamma) 
    #%%
    VF = first_visit_MC_prediction(env,  states, policy_player, 
                                   num_episodes, gamma) 
    
    #%%
    plot_policy(policy_player)  
    plot_value_function(VF)