def batch_predict(SA): sa = np.mean(SA, 0) idx = kdt.query(sa.reshape(1, -1), k=K, return_distance=False) X_nn = Xtrain[idx, :].reshape(K, state_action_dim) Y_nn = Ytrain[idx, :].reshape(K, state_dim) if useDiffusionMaps: X_nn, Y_nn = reduction(sa, X_nn, Y_nn) m = np.zeros((SA.shape[0], state_dim)) s = np.zeros((SA.shape[0], state_dim)) for i in range(state_dim): if i == 0: gp_est = GaussianProcess(X_nn[:, :4], Y_nn[:, i], optimize=False, theta=None) theta = gp_est.cov.theta else: gp_est = GaussianProcess(X_nn[:, :4], Y_nn[:, i], optimize=False, theta=theta) mm, ss = gp_est.batch_predict(SA[:, :4]) m[:, i] = mm s[:, i] = np.diag(ss) return m, s
def batch_predict(self, SA): sa = np.mean(SA, 0) # Theta, K = self.get_theta(sa) # Get hyper-parameters for this query point K = 1 idx = self.kdt.query(sa.reshape(1, -1), k=K, return_distance=False) X_nn = self.Xtrain[idx, :].reshape(K, self.state_action_dim) Y_nn = self.Ytrain[idx, :].reshape(K, self.state_dim) if useDiffusionMaps: X_nn, Y_nn = self.reduction(sa, X_nn, Y_nn) dS_next = np.zeros((SA.shape[0], self.state_dim)) std_next = np.zeros((SA.shape[0], self.state_dim)) for i in range(self.state_dim): gp_est = GaussianProcess(X_nn[:, :self.state_action_dim], Y_nn[:, i], optimize=True, theta=None, algorithm='Matlab') mm, vv = gp_est.batch_predict(SA[:, :self.state_action_dim]) dS_next[:, i] = mm std_next[:, i] = np.sqrt(np.diag(vv)) S_next = SA[:, :self. state_dim] + dS_next #np.random.normal(dS_next, std_next) return S_next
def batch_predict_iterative(self, SA): S_next = [] while SA.shape[0]: sa = np.copy(SA[np.random.randint(SA.shape[0]), :]) Theta, K = self.get_theta( sa) # Get hyper-parameters for this query point D, idx = self.kdt.query(sa.reshape(1, -1), k=K, return_distance=True) r = np.max(D) * 1.1 X_nn = self.Xtrain[idx, :].reshape(K, self.state_action_dim) Y_nn = self.Ytrain[idx, :].reshape(K, self.state_dim) neigh = NearestNeighbors(radius=r) neigh.fit(SA) idx_local = neigh.radius_neighbors(sa.reshape(1, -1), return_distance=False)[0] SA_local = np.copy(SA[idx_local, :]) SA = np.delete(SA, idx_local, axis=0) if useDiffusionMaps: X_nn, Y_nn = self.reduction(sa, X_nn, Y_nn) dS_next = np.zeros((SA_local.shape[0], self.state_dim)) std_next = np.zeros((SA_local.shape[0], self.state_dim)) for i in range(self.state_dim): gp_est = GaussianProcess(X_nn[:, :self.state_action_dim], Y_nn[:, i], optimize=False, theta=Theta[i], algorithm='Matlab') mm, vv = gp_est.batch_predict( SA_local[:, :self.state_action_dim]) dS_next[:, i] = mm std_next[:, i] = np.sqrt(np.diag(vv)) S_next_local = SA_local[:, :self.state_dim] + np.random.normal( dS_next, std_next) for s in S_next_local: S_next.append(s) return np.array(S_next)
m, s = gpup_est.predict(x_n, var_n) # We sample N particles in the initial distribution to validate the GPUP computation N = int(1e4) X_belief = np.array([np.random.normal(x_n, np.sqrt(var_n)) for _ in range(N)]).reshape(N, 1) # ax4 = plt.subplot2grid((3, 5), (2, 2), colspan=3, rowspan=1) plt.plot(X_belief, np.tile(0., N), '.k', label='input particles') x = np.linspace(0, 6, 1000).reshape(-1, 1) plt.plot(x, scipy.stats.norm.pdf(x, x_n, np.sqrt(var_n)), label='input dist.') plt.xlabel('x') plt.legend() # Propagate all particles through the GP to get an approximated distribution ax2 = plt.subplot2grid((3, 5), (0, 0), colspan=1, rowspan=2) means_b, variances_b = gp_est.batch_predict( X_belief) # Use the GP batch prediction variances_b = np.diag(variances_b) Y_belief = np.array([ np.random.normal(means_b[i], np.sqrt(variances_b[i])) for i in range(N) ]).reshape(N, 1) # plt.plot(np.tile(0., N), Y_belief, '.k', label='output particles') plt.ylabel('p(y)') ylim = ax1.get_ylim() mu_Y = np.mean(Y_belief) sigma2_Y = np.std(Y_belief) y = np.linspace(ylim[0], ylim[1], 100000).reshape(-1, 1) plt.plot(scipy.stats.norm.pdf(y, mu_Y, sigma2_Y), y, '-b', label='particles output dist.')