def plot_rewards(self, line_informations, avg_window_size=None):
        """
        :param line_informations: a list of 3 tuples (label, rewards, softmax_values)
        :param avg_window_size:
        :return:
        """
        nb_row = len(line_informations) + 1

        ax1 = plt.subplot(nb_row, 1, nb_row)
        ax1.set_title("reward in fct. of iteration")

        for i, (label, rewards,
                softmax_values) in enumerate(line_informations):

            ax2 = plt.subplot(nb_row, 1, i + 1)
            ax2.set_title("%s - policy in fct. of iteration" % label)

            if rewards.shape[0] == 1:
                rewards = rewards.flatten()
                xpoints = np.array(range(rewards.size))
                for j, s in enumerate(softmax_values.T):
                    ax2.plot(xpoints, s.flatten(), label=self.option_label[j])

                if i == 0:
                    legend = ax2.legend(loc='upper right', shadow=True)

                if not (avg_window_size is None):
                    rewards = movingaverage(rewards, avg_window_size)
                    xpoints = xpoints[avg_window_size - 1:]

                ax1.plot(xpoints, rewards, label=label)

                legend = ax1.legend(loc='lower right', shadow=True)
            else:
                raise NotImplementedError()

        plt.show()
def plot_comparative(results,
                     labels,
                     moving_avg_window_size=None,
                     plot_iter_average=False,
                     all_lines=False):

    if plot_iter_average:
        ax1 = plt.subplot(2, 1, 1)
    else:
        ax1 = plt.subplot(1, 1, 1)
    ax1.set_ylabel("Avg. Reward")
    ax1.set_xlabel("# of episodes completed")
    ax1.set_title("Average reward in fct. of # of episodes completed ")

    if plot_iter_average:
        ax2 = plt.subplot(2, 1, 2)
        ax2.set_title("Average reward in fct of # iteration completed")
        ax2.set_ylabel("Reward")
        ax2.set_xlabel("# of iterations completed")

    if len(results[0]) == 3:
        colors = ['b-', 'r-', 'g-']
    else:
        colors = ['b:', 'r:', 'g:', 'b-', 'r-', 'g-']

    for i, lbl in enumerate(labels):

        c = colors[i]

        num_episodes = len(results[0][i])
        avg_r_episode = np.zeros((num_episodes, ))
        iter_reward_list = []

        for res in map(itemgetter(i), results):
            l, r = zip(*list(map(lambda x: (len(x[2]), np.sum(x[2])), res)))
            assert len(l) == num_episodes
            avg_r_episode += r
            xpoints = np.array(range(num_episodes))

            if not (moving_avg_window_size is None):
                r = movingaverage(r, moving_avg_window_size)
                xpoints = xpoints[moving_avg_window_size - 1:]
            #    xpoints2 = xpoints2[moving_avg_window_size - 1:]

            if all_lines:
                ax1.plot(xpoints, r, c, alpha=0.12)

            if plot_iter_average:
                xpoints2 = reduce(lambda x, y: (x[0] + [x[1] + y], x[1] + y),
                                  l, ([], 0))[0]
                step_size = float(xpoints2[-1] - xpoints2[0]) / num_episodes
                width = 50
                xpoints2, avg_r_iter_taken = weighted_moving_average(
                    xpoints2, r, step_size, width=width)
                if all_lines:
                    ax2.plot(xpoints2, avg_r_iter_taken, c, alpha=0.12)
                iter_reward_list += zip(xpoints2, avg_r_iter_taken)
            #ax2.plot(xpoints2, r, c,alpha=0.1)
            #iter_reward_list += zip(xpoints2,r)

        avg_r_episode = 1.0 / len(results) * avg_r_episode
        xpoints = np.array(range(num_episodes))
        if not (moving_avg_window_size is None):
            avg_r_episode = movingaverage(avg_r_episode,
                                          moving_avg_window_size)
            xpoints = xpoints[moving_avg_window_size - 1:]

        ax1.plot(xpoints, avg_r_episode, c, label=lbl)

        if plot_iter_average:
            iter_reward_list = sorted(iter_reward_list, key=itemgetter(0))
            step_size = float(iter_reward_list[-1][0] -
                              iter_reward_list[0][0]) / num_episodes
            x, y = zip(*iter_reward_list)
            width = 205
            xpoints2, avg_r_iter_taken = weighted_moving_average(x,
                                                                 y,
                                                                 step_size,
                                                                 width=width)
            ax2.plot(xpoints2, avg_r_iter_taken, c, label=lbl)

    legend = ax1.legend(loc='lower right', shadow=True)

    if plot_iter_average:
        legend = ax2.legend(loc='lower right', shadow=True)

    plt.show()
Beispiel #3
0
 def test_MA_edge(self):
     result = utils.movingaverage([1, 1, 1, 1, 1], 3, edgeCorrection=True)
     correct_result = np.array([1., 1., 1., 1., 1.])
     np.testing.assert_array_equal(result, correct_result)
Beispiel #4
0
 def test_MA_2(self):
     result = utils.movingaverage([1, 1, 1, 1, 1, 1, 1, 1], 4)
     correct_result = np.array([0.5, 0.75, 1., 1., 1., 1., 1., 0.75])
     np.testing.assert_array_equal(result, correct_result)
Beispiel #5
0
 def test_MA_1(self):
     result = utils.movingaverage([1, 1, 1, 1, 1], 3)
     correct_result = np.array([0.66666667, 1, 1, 1, 0.66666667])
     np.testing.assert_array_almost_equal(result, correct_result)
Beispiel #6
0
 def test_MA_edge2(self):
     result = utils.movingaverage([1, 2, 3, 4, 5], 3, edgeCorrection=True)
     correct_result = np.array([1.5, 2., 3., 4., 4.5])
     np.testing.assert_array_almost_equal(result, correct_result)