Esempio n. 1
0
def main():
    TD_path = COMMON_PATH + 'TD/Data/' + str(lam) + '/'
    ETD_path = COMMON_PATH + 'ETD/Data/' + str(lam) + '/'

    # result_TD is a list of lists where each component list is
    # [step_size, mean, SEM]
    result_TD = get_mean_and_SEM_all_stepSizes(TD_path, curve,
                                               performance_measure)
    result_ETD = get_mean_and_SEM_all_stepSizes(ETD_path, curve,
                                                performance_measure)

    # Sort the results of both method according to the mean performance measure
    result_TD.sort(key=lambda x: x[1])
    result_ETD.sort(key=lambda x: x[1])

    best_TD_step_size = result_TD[0][0]
    best_TD_step_size_2RaisedTo = int(math.log(best_TD_step_size, 2))
    best_ETD_step_size = result_ETD[0][0]
    best_ETD_step_size_2RaisedTo = int(math.log(best_ETD_step_size, 2))

    # Now that we have the best step size, let's go and plot the learning curve of that step size
    best_TD_directory = COMMON_PATH + 'TD/Data/' + str(lam) + '/2' + str(
        best_TD_step_size_2RaisedTo) + '/'
    best_ETD_directory = COMMON_PATH + 'ETD/Data/' + str(lam) + '/2' + str(
        best_ETD_step_size_2RaisedTo) + '/'

    # measures_TD is a list of lists, where each element (each list)
    # is the measures_TD of a trial. See performance_measure() for an example
    measures_TD = get_all_files(best_TD_directory, performance_measure)
    measures_ETD = get_all_files(best_ETD_directory, performance_measure)

    # Plot the graph
    mean_TD = np.mean(measures_TD, axis=0)
    yerror_TD = np.std(measures_TD, axis=0) / math.sqrt(
        len(measures_TD)
    )  # Approximate the standard error of the mean using std(X)/sqrt(n), where n = len(X)
    mean_ETD = np.mean(measures_ETD, axis=0)
    yerror_ETD = np.std(measures_ETD, axis=0) / math.sqrt(len(measures_ETD))

    # plt.yscale('log')
    plt.errorbar(np.arange(mean_TD.size),
                 mean_TD,
                 yerr=yerror_TD,
                 label=r'TD, $\alpha$=' + str(best_TD_step_size))
    plt.errorbar(np.arange(mean_ETD.size),
                 mean_ETD,
                 yerr=yerror_ETD,
                 label=r'ETD, $\alpha$=' + str(best_ETD_step_size))
    plt.xlabel('Episodes')
    plt.ylabel('negated ' + str(performance_measure) +
               ' per episode\nAveraged over ' + str(len(measures_TD)) +
               ' runs',
               rotation=0)
    plt.title('Best learning curve for TD and ETD PuddleWorld control \n' +
              r'$\lambda$ = ' + str(lam) + '\n' + str(curve_verbose))
    plt.legend(loc=0)
    plt.show()
Esempio n. 2
0
def main():
    # measures is a list of lists, where each element (each list)
    # is the measures of a trial
    measures = get_all_files(directory)

    # Plot the graph
    mean = np.mean(measures, axis=0)
    plt.plot(np.arange(mean.size), mean, label=r'$\lambda$' + '=' + str(lam) + ', alpha=' + str(alpha))
    plt.xlabel('Episodes')
    plt.ylabel('Steps per Episode\nAveraged over ' + str(len(measures)) + ' runs', rotation=0)
    plt.title('Learning Curve for ' + method + ' Control on MountainCar')
    plt.legend(loc=0)
    plt.show()
Esempio n. 3
0
def main():
    TD_path = COMMON_PATH + 'TD/Data/' + str(lam) + '/'
    ETD_path = COMMON_PATH + 'ETD/Data/' + str(lam) + '/'

    # result_TD is a list of lists where each component list is
    # [step_size, mean, SEM]
    result_TD = get_mean_and_SEM_all_stepSizes(TD_path, curve)
    result_ETD = get_mean_and_SEM_all_stepSizes(ETD_path, curve)

    # Sort the results of both method according to the mean performance measure
    result_TD.sort(key=lambda x: x[1])
    result_ETD.sort(key=lambda x: x[1])

    best_TD_step_size = result_TD[0][0]
    best_TD_step_size_2RaisedTo = int(math.log(best_TD_step_size, 2))
    best_ETD_step_size = result_ETD[0][0]
    best_ETD_step_size_2RaisedTo = int(math.log(best_ETD_step_size, 2))

    # Now that we have the best step size, let's go and plot the learning curve of that step size
    best_TD_directory = COMMON_PATH + 'TD/Data/' + str(lam) + '/2' + str(
        best_TD_step_size_2RaisedTo) + '/'
    best_ETD_directory = COMMON_PATH + 'ETD/Data/' + str(lam) + '/2' + str(
        best_ETD_step_size_2RaisedTo) + '/'

    # measures_TD is a list of lists, where each element (each list)
    # is the measures_TD of a trial.
    measures_TD = get_all_files(best_TD_directory)
    measures_ETD = get_all_files(best_ETD_directory)

    # Plot the graph
    mean_TD = np.mean(measures_TD, axis=0)
    yerror_TD = np.std(measures_TD, axis=0) / math.sqrt(
        len(measures_TD)
    )  # Approximate the standard error of the mean using std(X)/sqrt(n), where n = len(X)
    mean_ETD = np.mean(measures_ETD, axis=0)
    yerror_ETD = np.std(measures_ETD, axis=0) / math.sqrt(len(measures_ETD))

    fig, ax = plt.subplots(nrows=1, ncols=1)
    plt.xticks([0, 10000, 20000, 30000], [0, '10K', '20K', '30K'])
    ax.errorbar(np.arange(mean_TD.size),
                mean_TD,
                yerr=yerror_TD,
                label=r'TD, $\alpha$=' + str(best_TD_step_size),
                color='tab:blue')
    ax.errorbar(np.arange(mean_ETD.size),
                mean_ETD,
                yerr=yerror_ETD,
                label=r'ETD, $\alpha$=' + str(best_ETD_step_size),
                color='tab:red')
    ax.set_xlim(left=0, right=30000)
    ax.set_ylim(bottom=20, top=30)
    ax.set_xlabel('Episodes', fontsize=35)
    ax.set_ylabel(r'$\sqrt{\widehat{\overline{VE}}}$',
                  rotation=0,
                  fontsize=35,
                  labelpad=45)
    ax.tick_params(labelsize=30, which='major', axis='both')
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    fig.savefig('/Users/rlai/Desktop/test.pdf',
                format='pdf',
                dpi=300,
                bbox_inches='tight')
    # plt.title('Best learning curve for TD and ETD Prediction on MountainCar \n' + r'$\lambda$ = ' + str(lam) + '\n' + str(curve_verbose) )
    # plt.title(r'$\lambda$ = ' + str(lam), fontsize=25)
    # plt.legend(loc=0, fontsize=15)
    plt.show()
    plt.close(fig)