コード例 #1
0
def _test_sarsa(averaging_runs=1, plot_learning_curve=False, multiproc=True):
    """ Test the SARSA algorithm
    :param averaging_runs: Number of runs to use in averaging SARSA results
    :param plot_learning_curve: Whether to plot the learning curve for lambda being 0 or 1
    """
    print '\nTesting SARSA...'

    try:
        mc_value_function = pickle.load(open("Data/MC_value_function.pickle", "rb"))
    except:
        mc_value_function = rl_algorithms.monte_carlo(iterations=1000000)
    average_mse = []
    lambda_space = np.linspace(0, 1, 11)

    # get MSE errors over all averaging runs
    for i in xrange(averaging_runs):
        mse = []
        for _lambda in lambda_space:
            sarsa_value_function = rl_algorithms.sarsa_lambda(l=_lambda, \
                                        plot_learning_curve=plot_learning_curve, multiproc=multiproc)
            mse.append(mean_sq.calculate_mse(mc_value_function, sarsa_value_function))
        average_mse.append(mse)

    # average scores from n runs of SARSA and plot
    average_mse = [float(sum(col))/len(col) for col in zip(*average_mse)]
    average_lambda_mse = zip(lambda_space, average_mse)

    for entry in average_lambda_mse:
        print('--------------------')
        print('Lambda: %.1f' % entry[0])
        print('Mean Squared Error: %.4f' % entry[1])
        print('\n')

    plotting.plot_sarsa_mse(average_lambda_mse)
コード例 #2
0
def _test_monte_carlo(iterations=1000000, pickle_file=False):
    """ Test the monte carlo control algorithm
    :param iterations: Number of monte carlo iterations
    :param pickle_file: whether to save the results or not for use in other algorithms
    """
    print '\nTesting Monte-Carlo control...'

    mc_value_function = rl_algorithms.monte_carlo(iterations)
    if pickle_file:
        print 'Pickling...'
        pickle.dump(mc_value_function, open("Data/MC_value_function.pickle", "wb"))
        print 'Done pickling...'
コード例 #3
0
def _test_monte_carlo(iterations=100000, pickle_file=False):
    """ Test the monte carlo control algorithm

    :param iterations: Number of monte carlo iterations
    :param pickle_file: whether to save the results or not for use in other algorithms
    """
    print "\nTesting Monte-Carlo control..."

    mc_value_function = rl_algorithms.monte_carlo(iterations)
    if pickle_file:
        print "Pickling..."
        pickle.dump(mc_value_function, open("Data/MC_value_function.pickle", "wb"))
        print "Done pickling..."
コード例 #4
0
def _test_sarsa(averaging_runs=1, plot_learning_curve=False, multiproc=True):
    """ Test the SARSA algorithm

    :param averaging_runs: Number of runs to use in averaging SARSA results
    :param plot_learning_curve: Whether to plot the learning curve for lambda being 0 or 1
    """
    print "\nTesting SARSA..."

    try:
        mc_value_function = pickle.load(open("Data/MC_value_function.pickle", "rb"))
    except:
        mc_value_function = rl_algorithms.monte_carlo(iterations=1000000)
    average_mse = []
    lambda_space = np.linspace(0, 1, 11)

    # get MSE errors over all averaging runs
    for i in xrange(averaging_runs):
        mse = []
        for _lambda in lambda_space:
            sarsa_value_function = rl_algorithms.sarsa_lambda(
                l=_lambda, plot_learning_curve=plot_learning_curve, multiproc=multiproc
            )
            mse.append(utilities.calculate_mse(mc_value_function, sarsa_value_function))
        average_mse.append(mse)
        if (i % 5) == 0:
            print i

    # average scores from n runs of SARSA and plot
    average_mse = [float(sum(col)) / len(col) for col in zip(*average_mse)]
    average_lambda_mse = zip(lambda_space, average_mse)

    for entry in average_lambda_mse:
        print ("--------------------")
        print ("Lambda: %.1f" % entry[0])
        print ("Mean Squared Error: %.4f" % entry[1])
        print ("\n")

    plotting.plot_sarsa_mse(average_lambda_mse)