#%% Control setup import matplotlib.pyplot as plt All_U = np.array([[0, 1]]) u_bounds = np.array([0, 1]) def cost(xs, us): # states, actions return -cartpole_reward.defaultCartpoleRewardMatrix(xs, us) #%% Control algos = algorithmsv2.algos(X, All_U, u_bounds, phi, psi, K, cost, epsilon=8000.0, bellmanErrorType=0, learning_rate=1) bellmanErrors, gradientNorms = algos.algorithm2(batch_size=256) # algos = tf_algorithmsv2.Algorithms(X, All_U, phi, psi, K, cost) # bellmanErrors = algos.algorithm2() #%% Plots plt.plot(np.arange(len(bellmanErrors)), bellmanErrors) plt.show() plt.plot(np.arange(len(gradientNorms)), gradientNorms) plt.show() #%%
for i in range(Y_opt.shape[1]): actual_phi_x_prime = phi(Y_opt[:, starting_point + i]) predicted_phi_x_prime = K_u(U_opt[0, starting_point + i]) @ phi( X_opt[:, starting_point + i]) norms.append(l2_norm(actual_phi_x_prime, predicted_phi_x_prime)) norms = np.array(norms) print("Mean single-step prediction norm:", norms.mean()) #%% u_bounds = [-np.inf, np.inf] # U = np.array([[i] for i in range(-5,6)]) algos = algorithmsv2.algos(X_opt, U_opt, u_bounds[0], u_bounds[1], phi, psi, K, cost, epsilon=0.0001) print("Algorithm 2 output:", algos.algorithm2()) # 8.396373847797 # 5.69035812501408 # 62.9375147908342 # 262.992038646761 # 1975984.33548684 # 9607.35529825491 # 1437.39744728116 # 8434.3524823486
#%% Discretize all controls step_size = 0.1 All_U = np.arange(start=u_bounds[0, 0], stop=u_bounds[0, 1] + step_size, step=step_size).reshape(1, -1) #All_U = U.reshape(1,-1) # continuous case is just original domain #%% Control algos = algorithmsv2.algos(X, All_U, u_bounds[0], phi, psi, K, cost, epsilon=0.01, bellmanErrorType=0, weightRegularizationBool=0, u_batch_size=30) # bellmanErrors, gradientNorms = algos.algorithm2(batch_size=64) # algos.w = np.ones([K.shape[0],1]) algos.w = np.load('bellman-weights.npy') print("Weights:", algos.w) #%% Retrieve policy def policy(x): pis = algos.pis(x) # pis = pis + ((1 - np.sum(pis)) / pis.shape[0])
#%% Discretize all controls def discretize(start, end, num_points): step_size = (np.abs(start) + np.abs(end)) / num_points ret = [start] for i in range(1, num_points): ret.append(ret[i - 1] + step_size) return ret U = [] for i in range(41): U.append([-2 + (i * 0.1)]) U = np.array(U) #%% Control algos = algorithmsv2.algos(X, U, u_bounds, phi, psi, K, cost, epsilon=1, bellmanErrorType=1, u_batchSize=2) pi = algos.algorithm2(batch_size=50) # pi = algos.algorithm3() #%% Bellman Errors # 1184.3180405984 # 3508912268.71883
#%% Discretize all controls def discretize(start, end, num_points): step_size = (np.abs(start) + np.abs(end)) / num_points ret = [start] for i in range(1, num_points): ret.append(ret[i - 1] + step_size) return ret U = [] for i in range(41): U.append([-2 + (i * 0.1)]) U = np.array(U) #%% Control algos = algorithmsv2.algos(X, U, u_bounds[0], u_bounds[1], phi, psi, K, cost, epsilon=1) pi = algos.algorithm2() # pi = algos.algorithm3() #%% Bellman Errors # 1184.3180405984 # 3508912268.71883