####### # GMR # ####### gmm = GMM(n_components=4) gmm.from_samples(data) #x0 = 0 x0 = 1.5 #y0 = 1.5 y0 = -1.5 z0 = np.squeeze( gmm.predict(np.array([0, 1]), np.array([x0, y0])[np.newaxis, :])) dxdy = np.squeeze( gmm.condition_derivative(np.array([0, 1]), np.array([x0, y0]))) print('z( %g, %g ):' % (x0, y0), z0) print('dydx( %g, %g ):' % (x0, y0), dxdy) Zp = np.zeros_like(Zr) for i, x in enumerate(x_scale): for j, y in enumerate(y_scale): Zp[j][i] = np.squeeze( gmm.predict(np.array([0, 1]), np.array([x, y])[np.newaxis, :])) ######### # PLOTS # ######### #azim = -120 ; elev = 40
df = df.drop([name + ' s2' for name in features_to_drop], axis=1) s_sprime_indices = np.array( list(range(dim_s)) + list(range(dim_s + dim_a, dim_t))) s_a_indices = np.array(range(dim_s + dim_a)) delta_a = [] grad_a = [] delta_s_norm = [] for _, samp in tqdm(df.iterrows(), total=len(df), leave=False): s_a = samp[state_1 + actions].to_numpy()[np.newaxis, :] s_sprime = samp[state_1 + state_2].to_numpy() sprime = samp[state_2].to_numpy() expected_sprime = gmm.predict(s_a_indices, s_a) grad_a_sprime = gmm.condition_derivative(s_sprime_indices, s_sprime)[:, dim_s:] delta_a.append(grad_a_sprime.dot((expected_sprime - sprime).T)) #if 'grad_a_prev' in globals() : #grad_a.append( grad_a_sprime - grad_a_prev ) #grad_a_prev = grad_a_sprime grad_a.append(grad_a_sprime) delta_s_norm.append(np.linalg.norm(expected_sprime - sprime)) fig, ax = plt.subplots(2, 1, sharex=True) fig.canvas.set_window_title('Policy corrections') ax[0].set_title('Trial result') ax[0].plot(df[actions]) ax[0].legend(['Steering rate', 'Boggie torque'])
s = covariance.ravel() plt.fill_between(X_test, y - s, y + s, alpha=0.2) plt.plot(X_test, y, lw=2) n_samples = 100 X = np.ndarray((n_samples, 2)) X[:, 0] = np.linspace(0, 2 * np.pi, n_samples) X[:, 1] = np.sin(X[:, 0]) + random_state.randn(n_samples) * 0.1 gmm = GMM(n_components=3, random_state=0) gmm.from_samples(X) Y = gmm.predict(np.array([0]), X_test[:, np.newaxis]) #x = 1.5 x = 1.65 #x = 3 y = np.squeeze( gmm.predict(np.array([0]), np.array([ x ])[np.newaxis,:] ) ) dxdy = np.squeeze( gmm.condition_derivative( np.array([0]), np.array([ x ]) ) ) print( 'y( %g ):' % x, y ) print( 'dydx( %g ):' % x, dxdy ) plt.subplot(1, 2, 2) plt.title("Mixture of Experts: $p(Y | X) = \Sigma_k \pi_{k, Y|X} " "\mathcal{N}_{k, Y|X}$") plt.scatter(X[:, 0], X[:, 1]) plot_error_ellipses(plt.gca(), gmm, colors=["r", "g", "b"]) plt.plot(X_test, Y.ravel(), c="k", lw=2) d = 0.5 plt.plot( [ x - d, x + d ], [ y - dxdy*d, y + dxdy*d ] ) plt.show()