Esempio n. 1
0
    trail_results = np.zeros((trail, epoch))
    for x in range(trail):
        trail_results[x] = sarsa_cartpole(lr,
                                          baseparams,
                                          epoch=epoch,
                                          eps=eps,
                                          base=base)  # (epoch, )
    std_error = np.std(trail_results, axis=0)
    mean_rewards = np.mean(trail_results, axis=0)
    return mean_rewards, std_error


def draw_plot(data, error, epoch=100, filename='tests.png'):
    fig, ax = plt.subplots()
    plt.xlabel('episode')
    plt.ylabel('reward')
    ax.errorbar(np.array(range(epoch)), data, yerr=error, fmt='o')
    plt.savefig(filename, dpi=200)

    plt.show()


rewards, error = sarsa_grid_trail(0.5, lambda x: 5, trail=100)
ah.save_cp_csvdata(rewards, error, 'sarsa_grid_f_1.csv')
draw_plot(rewards, error, filename='testsarsagridf.png')

# easy to go inf; should be careful when tuning; difficult to converge; chaos at the first several loops
rewards, err = sarsa_cp_trail(5e-4, {'order': 3}, trail=100, eps=0.05)
ah.save_cp_csvdata(rewards, err, 'sarsa_cartpole_f_1.csv')
draw_plot(rewards, err, filename='testsarsacpf.png')
Esempio n. 2
0
        trail_results[x] = qlearning_cartpole(lr,
                                              baseparams,
                                              decaylambda,
                                              epoch=epoch,
                                              base=base)  # (epoch, )
    std_error = np.std(trail_results, axis=0)
    mean_rewards = np.mean(trail_results, axis=0)
    return mean_rewards, std_error


def draw_plot(data, error, epoch=100, filename='tests.png'):
    fig, ax = plt.subplots()
    plt.xlabel('episode')
    plt.ylabel('reward')
    ax.errorbar(np.array(range(epoch)), data, yerr=error, fmt='o')
    plt.savefig(filename, dpi=200)

    plt.show()


rewards, error = qlearning_grid_trail(0.5, lambda x: 5, trail=100)
ah.save_cp_csvdata(rewards, error, 'qlearning_grid_f_1.csv')
draw_plot(rewards, error, filename='testqgridf.png')

# easy to go inf; should be careful when tuning; difficult to converge; chaos at the first several loops
rewards, err = qlearning_cp_trail(2e-3, {'order': 3},
                                  lambda x: 0.1 * (0.8**(x - 1)) + 0.01,
                                  trail=100)
ah.save_cp_csvdata(rewards, err, 'q_cartpole_f.csv')
draw_plot(rewards, err, filename='testqcpf.png')
        trail_results[x] = qlearning_mountaincar(lr,
                                                 baseparams,
                                                 eps,
                                                 epoch=epoch,
                                                 base=base)  # (epoch, )
    std_error = np.std(trail_results, axis=0)
    mean_rewards = np.mean(trail_results, axis=0)
    return mean_rewards, std_error


def draw_plot(data, err, epoch=100, filename='testq.png'):
    fig, ax = plt.subplots()
    plt.xlabel('episode')
    plt.ylabel('reward')
    ax.errorbar(np.array(range(epoch)), data, yerr=err, fmt='o')
    plt.savefig(filename, dpi=200)

    plt.show()


rewards, err = sarsa_mc_trail(2e-2, {'order': 5},
                              trail=100,
                              eps=lambda x: 0.01)
ah.save_cp_csvdata(rewards, err, 'sarsa_mountaincar.csv')
draw_plot(rewards, err, filename='testsarsamc.png')

rewards, err = qlearning_mc_trail(1e-2, {'order': 5},
                                  trail=100,
                                  eps=lambda x: 0.2)
ah.save_cp_csvdata(rewards, err, 'qlearning_mountaincar.csv')
draw_plot(rewards, err, filename='testqlearningmc.png')
            dtheta[:, idx * phi.shape[1]:(idx + 1) *
                   phi.shape[1]] = (1 - pi[idx]) * phi
        else:
            dtheta[:, idx * phi.shape[1]:(idx + 1) *
                   phi.shape[1]] = -pi[idx] * phi
    return dtheta


def draw_plot(data, error, epoch=100, filename='tests.png'):
    plt.xlabel('episode')
    plt.ylabel('reward')
    plt.plot(np.array(range(epoch)), data)
    plt.fill_between(range(epoch), data - error, data + error, alpha=0.3)
    plt.savefig(filename, dpi=200)

    plt.show()


rewards, error = reinforce_grid_trail(0.047266, lambda x: 2,
                                      epoch=200)  # 0.097866
ah.save_cp_csvdata(rewards, error, 'rf_grid_alt.csv')
draw_plot(rewards, error, filename='rf_grid.png', epoch=200)

rewards, err = reinforce_mc_trail(1.675643e-3,
                                  2.124e-3,
                                  0.8, {'order': 7},
                                  lambda x: 0.5,
                                  trail=100)
ah.save_cp_csvdata(rewards, err, 'rf_mc.csv')
draw_plot(rewards, err, filename='rf_mc.png')
Esempio n. 5
0

def draw_plot(data, error, epoch=100, filename='tests.png'):
    fig, ax = plt.subplots()
    plt.xlabel('episode')
    plt.ylabel('reward')
    ax.errorbar(np.array(range(epoch)), data, yerr=error, fmt='o')
    plt.savefig(filename, dpi=200)

    plt.show()


# rewards, error = sarsa_grid_trail(1e-2, lambda x: 0.3 if x < 80 else 0.01, trail=100)
# ah.save_cp_csvdata(rewards, error, 'sarsa_grid_f_1.csv')
# draw_plot(rewards, error, filename='testsarsagridf.png')
#
# rewards, err = sarsa_cp_trail(8e-3, {'order': 3}, trail=100)
# ah.save_cp_csvdata(rewards, err, 'sarsa_cartpole_f_1.csv')
# draw_plot(rewards, err, filename='testsarsacpf.png')

# rewards, err = (sarsa_cp_trail(1e-2, {'num_tilings': 10, 'tiles_per_tiling': 11}, eps=0.1, base='tile', trail=100))
# ah.save_cp_csvdata(rewards, err, 'sarsa_cartpole_tile.csv')
# draw_plot(rewards, err, filename='testscptile1.png')

rewards, err = sarsa_cp_trail(1e-3, {'order': 6},
                              base='rbf',
                              eps=0.01,
                              trail=10)
ah.save_cp_csvdata(rewards, err, 'sarsa_cartpole_rbf.csv')
draw_plot(rewards, err, filename='testsarsacprbf.png')
Esempio n. 6
0
def sarsa_lambda_mc_trail(lr, l, baseparams, eps, base='fourier', epoch=100, trail=100):
    trail_results = np.zeros((trail, epoch))
    for x in range(trail):
        trail_results[x] = sarsa_lambda_mc(lr, l, baseparams, eps, epoch=epoch, base=base)  # (epoch, )
    std_error = np.std(trail_results, axis=0)
    mean_rewards = np.mean(trail_results, axis=0)
    return mean_rewards, std_error


def draw_plot(data, error, epoch=100, filename='tests.png'):
    plt.xlabel('episode')
    plt.ylabel('reward')
    plt.plot(np.array(range(epoch)), data, 'k')
    plt.fill_between(range(epoch), data - error, data + error, alpha=0.3)
    plt.savefig(filename, dpi=200)

    plt.show()


rewards, error = sarsa_lambda_grid_trail(5e-2, 0.95, lambda x: 0.3 if x < 20 else 0.01, trail=100)
ah.save_cp_csvdata(rewards, error, 'sarsa_grid.csv')
draw_plot(rewards, error, filename='sarsa_grid.png')

# rewards, err = sarsa_lambda_mc_trail(1e-2, 0.95, {'order': 3}, trail=100, eps=lambda x: 0.2 if x < 80 else 0.01)
# ah.save_cp_csvdata(rewards, err, 'sarsa_mountaincar_25.csv')
# draw_plot(rewards, err, filename='sarsa_mc_25.png')

rewards, err = sarsa_lambda_mc_trail(1e-2, 0.8, {'order': 7}, trail=100, eps=lambda x: 0.3 if x < 20 else 0.01)
ah.save_cp_csvdata(rewards, err, 'sarsa_mc.csv')
draw_plot(rewards, err, filename='sarsa_mc.png')
Esempio n. 7
0
        print('episode: ', x, ', reward: ', estimated_rewards[x])
    return estimated_rewards


def qlearning_lambda_mc_trail(lr, l, baseparams, eps, base='fourier', epoch=100, trail=100):
    trail_results = np.zeros((trail, epoch))
    for x in range(trail):
        trail_results[x] = qlearning_lambda_mc(lr, l, baseparams, eps, epoch=epoch, base=base)  # (epoch, )
    std_error = np.std(trail_results, axis=0)
    mean_rewards = np.mean(trail_results, axis=0)
    return mean_rewards, std_error


def draw_plot(data, error, epoch=100, filename='tests.png'):
    plt.xlabel('episode')
    plt.ylabel('reward')
    plt.plot(np.array(range(epoch)), data, 'k')
    plt.fill_between(range(epoch), data - error, data + error, alpha=0.3)
    plt.savefig(filename, dpi=200)

    plt.show()


rewards, error = qlearning_lambda_grid_trail(4e-2, 0.8, lambda x: 0.3 if x < 20 else 0.01, trail=100)
ah.save_cp_csvdata(rewards, error, 'q_grid_2.csv')
draw_plot(rewards, error, filename='q_grid.png')

# rewards, err = qlearning_lambda_mc_trail(8e-3, 0.8, {'order': 5}, trail=1, eps=lambda x: 0.1 if x < 20 else 0.01)
rewards, err = qlearning_lambda_mc_trail(8e-3, 0.8, {'order': 5}, trail=100, eps=lambda x: 0.1 if x < 20 else 0.01)
ah.save_cp_csvdata(rewards, err, 'q_mc.csv')
draw_plot(rewards, err, filename='q_mc.png')
Esempio n. 8
0
    fig, ax = plt.subplots()

    plt.xlabel('episode')
    plt.ylabel('reward')
    ax.errorbar(np.array(range(converge_count)),
                reward_avg,
                yerr=reward_std,
                fmt='o')
    plt.savefig('grid_ce.png', dpi=200)

    plt.show()

    toc = time.time()
    print('running time: ', (toc - tic) / 60, ' mins')
    return reward_avg, reward_std


rewards, err = execute_grid(20, 100)
ah.save_cp_csvdata(rewards, err, 'ce_grid.csv')

# print('optimized theta: ', grid.pi_params)

# theta, cm = cartpole_trail()
# print('optimized reward: ', cartpole_evaluate(theta.reshape(4, 2), 50))
# print('optimized theta: ', theta.reshape(4, 2))

# pool = ThreadPoolExecutor(5)
# futures = []
# for x in range(5):
#     futures.append(pool.submit(trail, x))
    # trail_num = 3
    # converge_count = 250
    reward_plt_data = np.zeros((trail_num, converge_count))
    for x in range(trail_num):
        reward_plt_data[x] = np.array(cartpole_trail(converge_count)[2])
    reward_std = reward_plt_data.std(0)
    reward_avg = reward_plt_data.mean(0)

    fig, ax = plt.subplots()
    plt.xlabel('episode')
    plt.ylabel('reward')
    ax.errorbar(np.array(range(converge_count)),
                reward_avg,
                yerr=reward_std,
                fmt='o')
    plt.savefig('cartpole_ce.png', dpi=200)

    plt.show()

    toc = time.time()
    print('running time: ', (toc - tic) / 60, ' mins')
    return reward_avg, reward_std


rewards, err = execute_cartpole(20, 100)
# rewards_t = np.zeros(100) + 1010
# err_t = np.zeros(100)
# rewards_t[: 10] = rewards
# err_t[: 10] = err
ah.save_cp_csvdata(rewards, err, 'ce_cartpole.csv')
        trail_results[x] = actor_critic_mc(lr,
                                           l,
                                           baseparams,
                                           eps,
                                           epoch=epoch,
                                           base=base)  # (epoch, )
    std_error = np.std(trail_results, axis=0)
    mean_rewards = np.mean(trail_results, axis=0)
    return mean_rewards, std_error


def draw_plot(data, error, epoch=100, filename='tests.png'):
    plt.xlabel('episode')
    plt.ylabel('reward')
    plt.plot(np.array(range(epoch)), data, 'k')
    plt.fill_between(range(epoch), data - error, data + error, alpha=0.3)
    plt.savefig(filename, dpi=200)

    plt.show()


rewards, error = actor_critic_grid_trail(0.1, lambda x: 2, trail=100)
ah.save_cp_csvdata(rewards, error, 'ac_grid.csv')
draw_plot(rewards, error, filename='ac_grid.png')

rewards, err = actor_critic_mc_trail(1e-2,
                                     0.8, {'order': 5},
                                     lambda x: 0.25,
                                     trail=100)
ah.save_cp_csvdata(rewards, err, 'ac_mc.csv')
draw_plot(rewards, err, filename='ac_mc.png')