fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(dealer, player, values_ua, cmap=cm.coolwarm, linewidth=0) plt.title("State value function (usable ace)") plt.xlabel("Dealer showing") plt.ylabel("Player sum") #%% if __name__ == "__main__": num_episodes = 500000 gamma = 1 env = BlackJack() states = env.state_space() init_policy_player = policy(states) policy_player = Monte_carlo_exploring_starts(env, states, init_policy_player, num_episodes, gamma) #%% VF = first_visit_MC_prediction(env, states, policy_player, num_episodes, gamma) #%% plot_policy(policy_player) plot_value_function(VF)