Esempio n. 1
0
def policy_visualize(Q, env, decks):
    Q = rl.convert_to_sum_states(Q, env)
    Q_ = q_with_optimalaction(Q)
    optQ = rl.fill_missing_sum_states(rl.filter_states(Q_), default_value=0.5)

    data = pd.DataFrame(list(optQ.items()))
    for i in data[0]:
        if i == data[0][0]:
            x = np.array(i[0])
            y = np.array(i[1])
            z = np.array(i[2])
        else:
            x = np.append(x, i[0])
            y = np.append(y, i[1])
            z = np.append(z, i[2])
    data["player_hand"] = x
    data["show_card"] = y
    data["use_ace"] = z
    data.drop(0, axis=1, inplace=True)

    use_ace_set = data[data["use_ace"] == True]
    nouse_ace_set = data[data["use_ace"] == False]

    use_ace_set = use_ace_set.pivot(index="player_hand",
                                    columns="show_card",
                                    values=1).sort_index(ascending=False)
    nouse_ace_set = nouse_ace_set.pivot(index="player_hand",
                                        columns="show_card",
                                        values=1).sort_index(ascending=False)
    """ax1, ax2 = plt.axes()
    ax1.set_title("Optimal Policy with use ace")
    ax2.set_title("Optimal Policy without use ace")

    fig1 = sns.heatmap(use_ace_set, ax = ax1).get_figure()
    fig2 = sns.heatmap(nouse_ace_set, ax = ax2).get_figure()

    fig1.savefig("figures/Optimal Policy with use ace in {}deck.jpg".format(decks))
    fig2.savefig("figures/Optimal Policy without use ace in {}decks.jpg".format(decks))"""

    fig, ax = plt.subplots(1, 2, figsize=(20, 10))
    fig.suptitle("optimal policy in {}decks".format(decks), fontsize=16)
    ax[0].set_title("with use ace")
    ax[1].set_title("without use ace")
    color = ["k", "w", "g"]
    cmap = sns.color_palette(color, n_colors=3)

    sns.heatmap(use_ace_set,
                ax=ax[0],
                cmap=cmap,
                linewidths=.5,
                linecolor="lightgray",
                cbar_kws={"ticks": [0., 0.5, 1.]})
    sns.heatmap(nouse_ace_set,
                ax=ax[1],
                cmap=cmap,
                linewidths=.5,
                linecolor="lightgray",
                cbar_kws={"ticks": [0., 0.5, 1.]})

    fig.savefig("figures/Optimal Policy in {}deck.jpg".format(decks))
            episode_file=path_fun("sum_state"),
            warmup=warmup)
        time_to_completion_sum = time.time() - start_time_sum
        print("Number of explored states (sum states): " + str(len(sumQ)))
        print("Cumulative avg. reward = " + str(sum_avg_reward))

        print(
            "Training time: \n " +
            "Expanded state space MC: {} \n Expanded state space: {} \n Sum state space: {}"
            .format(time_to_completion_MC, time_to_completion_expanded,
                    time_to_completion_sum))

        # Convert Q (extended state) to sum state representation and make 3D plots
        # Extended state MC-learning

        Q_conv_MC = rl.convert_to_sum_states(Q_MC, env)
        V_conv_MC = rl.convert_to_value_function(Q_conv_MC)
        V_conv_filt_MC = rl.fill_missing_sum_states(
            rl.filter_states(V_conv_MC))
        pl.plot_value_function(
            V_conv_filt_MC,
            title="Expanded state MC, " + str(decks) + " decks",
            directory=plot_dir,
            file_name="3D_exp_MC_" + str(decks) + "_decks.png")

        # Extended state Q-learning
        Q_conv = rl.convert_to_sum_states(Q, env)
        V_conv = rl.convert_to_value_function(Q_conv)
        V_conv_filt = rl.fill_missing_sum_states(rl.filter_states(V_conv))
        pl.plot_value_function(V_conv_filt,
                               title="Expanded state, " + str(decks) +