def policy_visualize(Q, env, decks): Q = rl.convert_to_sum_states(Q, env) Q_ = q_with_optimalaction(Q) optQ = rl.fill_missing_sum_states(rl.filter_states(Q_), default_value=0.5) data = pd.DataFrame(list(optQ.items())) for i in data[0]: if i == data[0][0]: x = np.array(i[0]) y = np.array(i[1]) z = np.array(i[2]) else: x = np.append(x, i[0]) y = np.append(y, i[1]) z = np.append(z, i[2]) data["player_hand"] = x data["show_card"] = y data["use_ace"] = z data.drop(0, axis=1, inplace=True) use_ace_set = data[data["use_ace"] == True] nouse_ace_set = data[data["use_ace"] == False] use_ace_set = use_ace_set.pivot(index="player_hand", columns="show_card", values=1).sort_index(ascending=False) nouse_ace_set = nouse_ace_set.pivot(index="player_hand", columns="show_card", values=1).sort_index(ascending=False) """ax1, ax2 = plt.axes() ax1.set_title("Optimal Policy with use ace") ax2.set_title("Optimal Policy without use ace") fig1 = sns.heatmap(use_ace_set, ax = ax1).get_figure() fig2 = sns.heatmap(nouse_ace_set, ax = ax2).get_figure() fig1.savefig("figures/Optimal Policy with use ace in {}deck.jpg".format(decks)) fig2.savefig("figures/Optimal Policy without use ace in {}decks.jpg".format(decks))""" fig, ax = plt.subplots(1, 2, figsize=(20, 10)) fig.suptitle("optimal policy in {}decks".format(decks), fontsize=16) ax[0].set_title("with use ace") ax[1].set_title("without use ace") color = ["k", "w", "g"] cmap = sns.color_palette(color, n_colors=3) sns.heatmap(use_ace_set, ax=ax[0], cmap=cmap, linewidths=.5, linecolor="lightgray", cbar_kws={"ticks": [0., 0.5, 1.]}) sns.heatmap(nouse_ace_set, ax=ax[1], cmap=cmap, linewidths=.5, linecolor="lightgray", cbar_kws={"ticks": [0., 0.5, 1.]}) fig.savefig("figures/Optimal Policy in {}deck.jpg".format(decks))
episode_file=path_fun("sum_state"), warmup=warmup) time_to_completion_sum = time.time() - start_time_sum print("Number of explored states (sum states): " + str(len(sumQ))) print("Cumulative avg. reward = " + str(sum_avg_reward)) print( "Training time: \n " + "Expanded state space MC: {} \n Expanded state space: {} \n Sum state space: {}" .format(time_to_completion_MC, time_to_completion_expanded, time_to_completion_sum)) # Convert Q (extended state) to sum state representation and make 3D plots # Extended state MC-learning Q_conv_MC = rl.convert_to_sum_states(Q_MC, env) V_conv_MC = rl.convert_to_value_function(Q_conv_MC) V_conv_filt_MC = rl.fill_missing_sum_states( rl.filter_states(V_conv_MC)) pl.plot_value_function( V_conv_filt_MC, title="Expanded state MC, " + str(decks) + " decks", directory=plot_dir, file_name="3D_exp_MC_" + str(decks) + "_decks.png") # Extended state Q-learning Q_conv = rl.convert_to_sum_states(Q, env) V_conv = rl.convert_to_value_function(Q_conv) V_conv_filt = rl.fill_missing_sum_states(rl.filter_states(V_conv)) pl.plot_value_function(V_conv_filt, title="Expanded state, " + str(decks) +