예제 #1
0
    #%% Control setup
    import matplotlib.pyplot as plt

    All_U = np.array([[0, 1]])
    u_bounds = np.array([0, 1])

    def cost(xs, us):  # states, actions
        return -cartpole_reward.defaultCartpoleRewardMatrix(xs, us)

    #%% Control
    algos = algorithmsv2.algos(X,
                               All_U,
                               u_bounds,
                               phi,
                               psi,
                               K,
                               cost,
                               epsilon=8000.0,
                               bellmanErrorType=0,
                               learning_rate=1)
    bellmanErrors, gradientNorms = algos.algorithm2(batch_size=256)
    # algos = tf_algorithmsv2.Algorithms(X, All_U, phi, psi, K, cost)
    # bellmanErrors = algos.algorithm2()

    #%% Plots
    plt.plot(np.arange(len(bellmanErrors)), bellmanErrors)
    plt.show()
    plt.plot(np.arange(len(gradientNorms)), gradientNorms)
    plt.show()

    #%%
예제 #2
0
for i in range(Y_opt.shape[1]):
    actual_phi_x_prime = phi(Y_opt[:, starting_point + i])
    predicted_phi_x_prime = K_u(U_opt[0, starting_point + i]) @ phi(
        X_opt[:, starting_point + i])

    norms.append(l2_norm(actual_phi_x_prime, predicted_phi_x_prime))
norms = np.array(norms)
print("Mean single-step prediction norm:", norms.mean())

#%%
u_bounds = [-np.inf, np.inf]
# U = np.array([[i] for i in range(-5,6)])
algos = algorithmsv2.algos(X_opt,
                           U_opt,
                           u_bounds[0],
                           u_bounds[1],
                           phi,
                           psi,
                           K,
                           cost,
                           epsilon=0.0001)
print("Algorithm 2 output:", algos.algorithm2())

# 8.396373847797
# 5.69035812501408
# 62.9375147908342
# 262.992038646761
# 1975984.33548684
# 9607.35529825491
# 1437.39744728116
# 8434.3524823486

#%% Discretize all controls
step_size = 0.1
All_U = np.arange(start=u_bounds[0, 0],
                  stop=u_bounds[0, 1] + step_size,
                  step=step_size).reshape(1, -1)
#All_U = U.reshape(1,-1) # continuous case is just original domain

#%% Control
algos = algorithmsv2.algos(X,
                           All_U,
                           u_bounds[0],
                           phi,
                           psi,
                           K,
                           cost,
                           epsilon=0.01,
                           bellmanErrorType=0,
                           weightRegularizationBool=0,
                           u_batch_size=30)
# bellmanErrors, gradientNorms = algos.algorithm2(batch_size=64)
# algos.w = np.ones([K.shape[0],1])
algos.w = np.load('bellman-weights.npy')
print("Weights:", algos.w)


#%% Retrieve policy
def policy(x):
    pis = algos.pis(x)
    # pis = pis + ((1 - np.sum(pis)) / pis.shape[0])
예제 #4
0
#%% Discretize all controls
def discretize(start, end, num_points):
    step_size = (np.abs(start) + np.abs(end)) / num_points
    ret = [start]
    for i in range(1, num_points):
        ret.append(ret[i - 1] + step_size)
    return ret


U = []
for i in range(41):
    U.append([-2 + (i * 0.1)])
U = np.array(U)

#%% Control
algos = algorithmsv2.algos(X,
                           U,
                           u_bounds,
                           phi,
                           psi,
                           K,
                           cost,
                           epsilon=1,
                           bellmanErrorType=1,
                           u_batchSize=2)
pi = algos.algorithm2(batch_size=50)
# pi = algos.algorithm3()

#%% Bellman Errors
# 1184.3180405984
# 3508912268.71883
예제 #5
0
#%% Discretize all controls
def discretize(start, end, num_points):
    step_size = (np.abs(start) + np.abs(end)) / num_points
    ret = [start]
    for i in range(1, num_points):
        ret.append(ret[i - 1] + step_size)
    return ret


U = []
for i in range(41):
    U.append([-2 + (i * 0.1)])
U = np.array(U)

#%% Control
algos = algorithmsv2.algos(X,
                           U,
                           u_bounds[0],
                           u_bounds[1],
                           phi,
                           psi,
                           K,
                           cost,
                           epsilon=1)
pi = algos.algorithm2()
# pi = algos.algorithm3()

#%% Bellman Errors
# 1184.3180405984
# 3508912268.71883