def shift_Xy_to_matrices(shift_X, shift_y=None, weights=False): flatten = lambda ls: [l_i for l in ls for l_i in l] source_X_l = [] target_X_l = [] source_y_l = [] try: for (source_X_elt, target_X_elt, source_y_elt) in zip(*zip(*shift_X)[0:3]): if len(target_X_elt) > 0: target_X_l.append(target_X_elt) source_X_l.append(source_X_elt) source_y_l.append(source_y_elt) except: pdb.set_trace() if shift_y is None: if not weights: return np.array(source_X_l), np.vstack(tuple(flatten(target_X_l))), np.array(source_y_l) else: assert len(iter(shift_X).next()) == 4 return np.array(source_X_l), np.vstack(tuple(flatten(target_X_l))), np.array(source_y_l), np.array([shift_X_elt[-1] for shift_X_elt in shift_X]) else: target_y_l = [] for (source_X_elt, target_X_elt, source_y_elt), target_y_elt in itertools.izip(zip(*zip(*shift_X)[0:3]), shift_y): if len(target_X_elt) > 0: target_y_l.append(target_y_elt) if not weights: return np.array(source_X_l), np.vstack(tuple(flatten(target_X_l))), np.array(source_y_l), np.hstack(tuple(flatten(target_y_l))) else: return np.array(source_X_l), np.vstack(tuple(flatten(target_X_l))), np.array(source_y_l), np.hstack(tuple(flatten(target_y_l))), np.array([shift_X_elt[-1] for shift_X_elt in shift_X])
def PhotometricError(iref, inew, R, T, points, D): # points is a tuple ([y], [x]); convert to homogeneous siz = iref.shape npoints = len(points[0]) f = siz[1] # focal length, FIXME Xref = np.vstack(((points[1] - siz[1]*0.5) / f, # x (siz[0]*0.5 - points[0]) / f, # y (left->right hand) np.ones(npoints))) # z = 1 # this is confusingly written -- i am broadcasting the translation T to # every column, but numpy broadcasting only works if it's rows, hence all # the transposes # print D * Xref Xnew = (np.dot(so3.exp(R), (D * Xref)).T + T).T # print Xnew # right -> left hand projection proj = Xnew[0:2] / Xnew[2] p = (-proj[1]*f + siz[0]*0.5, proj[0]*f + siz[1]*0.5) margin = 10 # int(siz[0] / 5) inwindow_mask = ((p[0] >= margin) & (p[0] < siz[0]-margin-1) & (p[1] >= margin) & (p[1] < siz[1]-margin-1)) npts_inw = sum(inwindow_mask) if npts_inw < 10: return 1e6, np.zeros(6 + npoints) # todo: filter points which are now out of the window oldpointidxs = (points[0][inwindow_mask], points[1][inwindow_mask]) newpointidxs = (p[0][inwindow_mask], p[1][inwindow_mask]) origpointidxs = np.nonzero(inwindow_mask)[0] E = InterpolatedValues(inew, newpointidxs) - iref[oldpointidxs] # dE/dk -> # d/dk r_p^2 = d/dk (Inew(w(r, T, D, p)) - Iref(p))^2 # = -2r_p dInew/dp dp/dw dw/dX dX/dk # = -2r_p * g(w(r, T, D, p)) * dw(r, T, D, p) # intensity gradients for each point Ig = InterpolatedGradients(inew, newpointidxs) # TODO: use tensors for this # gradients for R, T, and D gradient = np.zeros(6 + npoints) for i in range(npts_inw): # print 'newidx (y,x) = ', newpointidxs[0][i], newpointidxs[1][i] # Jacobian of w oi = origpointidxs[i] Jw = dw(Xref[0][oi], Xref[1][oi], D[oi], R, T) # scale back up into pixel space, right->left hand coords to get # Jacobian of p Jp = f * np.vstack((-Jw[1], Jw[0])) # print origpointidxs[i], 'Xref', Xref[:, i], 'Ig', Ig[:, i], \ # 'dwdRz', Jw[:, 2], 'dpdRz', Jp[:, 2] # full Jacobian = 2*E + Ig * Jp J = np.sign(E[i]) * np.dot(Ig[:, i], Jp) # print '2 E[i]', 2*E[i], 'Ig*Jp', np.dot(Ig[:, i], Jp) gradient[:6] += J[:6] # print J[:6] gradient[6+origpointidxs[i]] += J[6] print R, T, np.sum(np.abs(E)), npts_inw # return ((0.2*(npoints - npts_inw) + np.dot(E, E)), gradient) return np.sum(np.abs(E)) / (npts_inw), gradient / (npts_inw)
def get_dL_dp_thru_xopt(lin_solver, df_dx, d_dp_df_dx, d_dx_df_dx, dL_dxopt, A, b, xopt, p, L_args=None, f_args=None): # assumes L(x_opt), x_opt = argmin_x f(x,p) subject to Ax<=b # L_args is for arguments to L besides x_opt # first, get dL/dws to calculate the gradient at ws1 if not L_args is None: pass #print 'L_args len:', len(L_args) else: print 'NONE' if L_args is None: dL_dxopt_anal_val1 = dL_dxopt(xopt) # else: # pdb.set_trace() dL_dxopt_anal_val1 = dL_dxopt(xopt, L_args) # get tight constraints A_tight, b_tight = get_tight_constraints(A, b, xopt) num_tight = A_tight.shape[0] # make C matrix # pdb.set_trace() if f_args is None: C_corner = d_dx_df_dx(xopt, p) else: C_corner = d_dx_df_dx(xopt, p, f_args) C = np.vstack((np.hstack((C_corner,-A_tight.T)), np.hstack((A_tight,np.zeros((num_tight,num_tight)))))) # print 'C', C # print 'C rank', np.linalg.matrix_rank(C), C.shape # print 'C corner rank', np.linalg.matrix_rank(C_corner), C_corner.shape # make d vector d = np.hstack((dL_dxopt_anal_val1, np.zeros(num_tight))) # solve Cv=d for x v = lin_solver(C, d) # print 'v', v #print C #print d print 'solver error:', np.linalg.norm(np.dot(C,v) - d) # make D if f_args is None: d_dp_df_dx_anal_val1 = d_dp_df_dx(p, xopt) else: d_dp_df_dx_anal_val1 = d_dp_df_dx(p, xopt, f_args) D = np.vstack((-d_dp_df_dx_anal_val1, np.zeros((num_tight,)+p.shape))) # print 'D', D[0:10] return np.sum(D.T * v[tuple([np.newaxis for i in xrange(len(p.shape))])+(slice(None),)], axis=-1).T
def get_dxopt_delta_p(lin_solver, df_dx, d_dp_df_dx, d_dx_df_dx, A, b, xopt, p, delta_p_direction): # f(x, p) should be convex x_len = A.shape[1] # get tight constraints A_tight, b_tight = get_tight_constraints(A, b, xopt) num_tight = A_tight.shape[0] # get d p_dim = len(delta_p_direction.shape) delta_p_direction_broadcasted = np.tile(delta_p_direction, tuple([x_len] + [1 for i in xrange(p_dim)])) d_top = -np.sum(d_dp_df_dx(p, xopt) * delta_p_direction_broadcasted, axis=tuple(range(1,1+p_dim))) d_bottom = np.zeros(num_tight) d = np.hstack((d_top,d_bottom)) # get C C = np.vstack((np.hstack((d_dx_df_dx(xopt, p), -A_tight.T)), np.hstack((A_tight, np.zeros((num_tight, num_tight)))))) # get deriv deriv = lin_solver(C, d) # print 'solver error:', np.linalg.norm(np.dot(C,deriv) - d) return deriv
def evaluate_trajectory_cost(self, x_array, u_array): #Note x_array contains X_T, so a dummy u is required to make the arrays #be of consistent length u_array_sup = np.vstack([u_array, np.zeros(len(u_array[0]))]) J_array = [self.cost(x, u, t, self.aux) for t, (x, u) in enumerate(zip(x_array, u_array_sup))] return np.sum(J_array)
def plot_ellipse(ax, alpha, mean, cov, line=None): t = np.linspace(0, 2*np.pi, 100) % (2*np.pi) circle = np.vstack((np.sin(t), np.cos(t))) ellipse = 2.*np.dot(np.linalg.cholesky(cov), circle) + mean[:,None] if line: line.set_data(ellipse) line.set_alpha(alpha) else: ax.plot(ellipse[0], ellipse[1], alpha=alpha, linestyle='-', linewidth=2)
def get_KMM_ineq_constraints(num_train, B_max, eps): G_gt_0 = -np.eye(num_train) h_gt_0 = np.zeros(num_train) G_lt_B_max = np.eye(num_train) h_lt_B_max = np.ones(num_train) * B_max G_B_sum_lt = np.ones(num_train, dtype=float) h_B_sum_lt = (1+eps) * float(num_train) * np.ones(1) G_B_sum_gt = -np.ones(num_train, dtype=float) h_B_sum_gt = -(1-eps) * float(num_train) * np.ones(1) G = np.vstack((G_gt_0,G_lt_B_max,G_B_sum_lt,G_B_sum_gt)) (h_gt_0,h_lt_B_max,h_B_sum_lt,h_B_sum_gt) h = np.hstack((h_gt_0,h_lt_B_max,h_B_sum_lt,h_B_sum_gt)) return G,h
def test_jacobian_against_stacked_grads(): scalar_funs = [ lambda x: np.sum(x ** 3), lambda x: np.prod(np.sin(x) + np.sin(x)), lambda x: grad(lambda y: np.exp(y) * np.tanh(x[0]))(x[1]), ] vector_fun = lambda x: np.array([f(x) for f in scalar_funs]) x = npr.randn(5) jac = jacobian(vector_fun)(x) grads = [grad(f)(x) for f in scalar_funs] assert np.allclose(jac, np.vstack(grads))
def plot_trace(ps, ttl): x = np.linspace(-5, 5, 100) y = np.linspace(-5, 5, 100) X, Y = np.meshgrid(x, y) Z = rosen(np.vstack([X.ravel(), Y.ravel()])).reshape((100,100)) ps = np.array(ps) plt.figure(figsize=(12,4)) plt.subplot(121) plt.contour(X, Y, Z, np.arange(10)**5) plt.plot(ps[:, 0], ps[:, 1], '-o') plt.plot(1, 1, 'r*', markersize=12) # global minimum plt.subplot(122) plt.semilogy(range(len(ps)), rosen(ps.T)) plt.title(ttl)
def kstep_mse(self, obs, act, horizon=1, stoch=True, infer='viterbi'): from sklearn.metrics import mean_squared_error, explained_variance_score mse, norm_mse = [], [] for _obs, _act in zip(obs, act): _hist_obs, _hist_act, _nxt_act = [], [], [] _target, _prediction = [], [] _nb_steps = _obs.shape[0] - horizon for t in range(_nb_steps): _hist_obs.append(_obs[:t + 1, :]) _hist_act.append(_act[:t + 1, :]) _nxt_act.append(_act[t:t + horizon, :]) _hr = [horizon for _ in range(_nb_steps)] _, _obs_hat = self.forcast(hist_obs=_hist_obs, hist_act=_hist_act, nxt_act=_nxt_act, horizon=_hr, stoch=stoch, infer=infer) for t in range(_nb_steps): _target.append(_obs[t + horizon, :]) _prediction.append(_obs_hat[t][-1, :]) _target = np.vstack(_target) _prediction = np.vstack(_prediction) _mse = mean_squared_error(_target, _prediction) _norm_mse = explained_variance_score( _target, _prediction, multioutput='variance_weighted') mse.append(_mse) norm_mse.append(_norm_mse) return np.mean(mse), np.mean(norm_mse)
def m_step(self, discrete_expectations, continuous_expectations, datas, inputs, masks, tags, optimizer="bfgs", maxiter=100, **kwargs): if self.single_subspace: # Return exact m-step updates for C, F, d, and inv_etas # stack across all datas x = np.vstack(continuous_expectations) u = np.vstack(inputs) y = np.vstack(datas) T, D = np.shape(x) xb = np.hstack((np.ones((T, 1)), x, u)) # design matrix params = np.linalg.lstsq(xb.T @ xb, xb.T @ y, rcond=None)[0].T self.ds = params[:, 0].reshape((1, self.N)) self.Cs = params[:, 1:D + 1].reshape((1, self.N, self.D)) if self.M > 0: self.Fs = params[:, D + 1:].reshape((1, self.N, self.M)) mu = np.dot(xb, params.T) Sigma = (y - mu).T @ (y - mu) / T self.inv_etas = np.log(np.diag(Sigma)).reshape((1, self.N)) else: Emissions.m_step(self, discrete_expectations, continuous_expectations, datas, inputs, masks, tags, optimizer=optimizer, maxiter=maxiter, **kwargs)
def cp_mds_reg(X, D, lam=1.0, v=1, maxiter=1000): """Version of MDS in which "signs" are also an optimization parameter. Rather than performing a full optimization and then resetting the sign matrix, here we treat the signs as a parameter `A = [a_ij]` and minimize the cost function F(X,A) = ||W*(X^H(A*X) - cos(D))||^2 + lambda*||A - X^HX/|X^HX| ||^2 Lambda is a regularization parameter we can experiment with. The collection of data, `X`, is treated as a point on the `Oblique` manifold, consisting of `k*n` matrices with unit-norm columns. Since we are working on a sphere in complex space we require `k` to be even. The first `k/2` entries of each column are the real components and the last `k/2` entries are the imaginary parts. Parameters ---------- X : ndarray (k, n) Initial guess for data. D : ndarray (k, k) Goal distance matrix. lam : float, optional Weight to give regularization term. v : int, optional Verbosity Returns ------- X_opt : ndarray (k, n) Collection of points optimizing cost. """ dim = X.shape[0] num_points = X.shape[1] W = distance_to_weights(D) Sreal, Simag = norm_rotations(X) A = np.vstack( (np.reshape(Sreal, (1, num_points**2)), np.reshape(Simag, num_points**2))) cp_manifold = Oblique(dim, num_points) a_manifold = Oblique(2, num_points**2) manifold = Product((cp_manifold, a_manifold)) solver = ConjugateGradient(maxiter=maxiter, maxtime=float('inf')) cost = setup_reg_autograd_cost(D, int(dim / 2), num_points, lam=lam) problem = pymanopt.Problem(cost=cost, manifold=manifold) Xopt, Aopt = solver.solve(problem, x=(X, A)) Areal = np.reshape(Aopt[0, :], (num_points, num_points)) Aimag = np.reshape(Aopt[1, :], (num_points, num_points)) return Xopt, Areal, Aimag
def p_log_prob(self, idx, z): x = self.data[idx] mu, tau, pi = z['mu'], softplus(z['tau']), stick_breaking(z['pi']) matrix = [] log_prior = 0. log_prior += np.sum(gamma_logpdf(tau, 1e-5, 1e-5) + np.log(jacobian_softplus(z['tau']))) log_prior += np.sum(norm.logpdf(mu, 0, 1.)) log_prior += dirichlet.logpdf(pi, 1e3 * np.ones(self.clusters)) + np.log(jacobian_stick_breaking(z['pi'])) for k in range(self.clusters): matrix.append(np.log(pi[k]) + np.sum(norm.logpdf(x, mu[(k * self.D):((k + 1) * self.D)], np.full([self.D], 1./np.sqrt(tau[k]))), 1)) matrix = np.vstack(matrix) vector = logsumexp(matrix, axis=0) log_lik = np.sum(vector) return self.scale * log_lik + log_prior
def get_x0(): min_dist = -np.inf count = 0 while min_dist < param.get('min_dist'): for i in range(param.get('ni')): p_i = param.get('plim')*np.random.rand(param.get('nd'),1) \ - param.get('plim')/2. v_i = param.get('vlim')*np.random.rand(param.get('nd'),1) \ - param.get('vlim')/2. try: x0 = np.vstack((x0, p_i, v_i)) except: x0 = np.vstack((p_i, v_i)) count += 1 min_dist = get_min_dist(x0) if count > 100: print('Error: Incompatible Initial Conditions') return param['x0'] = x0
def _make_A(self, eps_vec): C = - 1 / MU_0 * self.Dxf.dot(self.Dxb) \ - 1 / MU_0 * self.Dyf.dot(self.Dyb) #print('C的size',C.shape) print('CC',C) entries_c, indices_c = get_entries_indices(C) # indices into the diagonal of a sparse matrix entries_diag = - EPSILON_0 * self.omega**2 * eps_vec #print('eps_vec的size',eps_vec.shape) indices_diag = npa.vstack((npa.arange(self.N), npa.arange(self.N))) print('EE',entries_diag) entries_a = npa.hstack((entries_diag, entries_c)) indices_a = npa.hstack((indices_diag, indices_c)) print('AA',entries_a) return entries_a, indices_a
def propagate(m, s, plant, dynmodel, policy): angi = plant.angi poli = plant.poli dyni = plant.dyni difi = plant.difi D0 = len(m) D1 = D0 + 2 * len(angi) D2 = D1 + len(policy.max_u) M = np.array(m) S = s i, j = np.arange(D0), np.arange(D0, D1) m, s, c = gaussian_trig(M[i], S[np.ix_(i, i)], angi) q = np.matmul(S[np.ix_(i, i)], c) M = np.hstack([M, m]) S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])]) i, j = poli, np.arange(D1) m, s, c = policy.fcn(M[i], S[np.ix_(i, i)]) q = np.matmul(S[np.ix_(j, i)], c) M = np.hstack([M, m]) S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])]) i, j = np.hstack([dyni, np.arange(D1, D2)]), np.arange(D2) m, s, c = dynmodel.fcn(M[i], S[np.ix_(i, i)]) q = np.matmul(S[np.ix_(j, i)], c) M = np.hstack([M, m]) S = np.vstack([np.hstack([S, q]), np.hstack([q.T, s])]) P = np.hstack([np.zeros((D0, D2)), np.eye(D0)]) P = fill_mat(np.eye(len(difi)), P, difi, difi) M_next = np.matmul(P, M[:, newaxis]).flatten() S_next = P @ S @ P.T S_next = (S_next + S_next.T) / 2 return M_next, S_next
def pendulum_dynamics(xu): dt = 0.05 m = 1.0 l = 1.0 d = 1e-2 # damping g = 9.80665 u_mx = 2.0 x, u = xu[:, :2], xu[:, 2:] u = np.clip(u, -u_mx, u_mx) th_dot_dot = -3.0 * g / (2 * l) * np.sin(x[:, 0] + np.pi) - d * x[:, 1] th_dot_dot += 3.0 / (m * l ** 2) * u.squeeze() x_dot = x[:, 1] + th_dot_dot * dt x_pos = x[:, 0] + x_dot * dt x2 = np.vstack((x_pos, x_dot)).T return x2
def run_hmc(self, check_point, position_init, momentum_init): ### Initialize position and momentum position_current = position_init momentum_current = momentum_init ### Perform multiple HMC steps for i in range(self.params['total_samples']): self.iterations += 1 ### output accept rate at check point iterations if i % check_point == 0 and i > 0: accept_rate = self.accepts * 100. / i print('HMC {}: accept rate of {}'.format(i, accept_rate)) position_current, momentum_current = self.hmc( position_current, momentum_current) # add sample to trace if i % self.params['thinning_factor'] == 0: self.trace = np.vstack((self.trace, position_current)) self.potential_energy_trace = np.vstack( (self.potential_energy_trace, self.potential_energy(position_current))) self.trace = self.trace[1:]
def _make_A(self, eps_vec, delta_matrix, phi_matrix): """ Builds the multi-frequency electromagnetic operator A in Ax = b """ M = 2*self.Nsb + 1 N = self.Nx * self.Ny W = self.omega + npa.arange(-self.Nsb,self.Nsb+1)*self.omega_mod C = sp.kron(sp.eye(M), - 1 / MU_0 * self.Dxf.dot(self.Dxb) - 1 / MU_0 * self.Dyf.dot(self.Dyb)) entries_c, indices_c = get_entries_indices(C) # diagonal entries representing static refractive index # this part is just a block diagonal version of the single frequency fdfd_ez entries_diag = - EPSILON_0 * npa.kron(W**2, eps_vec) indices_diag = npa.vstack((npa.arange(M*N), npa.arange(M*N))) entries_a = npa.hstack((entries_diag, entries_c)) indices_a = npa.hstack((indices_diag, indices_c)) # off-diagonal entries representing dynamic modulation # this part couples different frequencies due to modulation # for a derivation of these entries, see Y. Shi, W. Shin, and S. Fan. Optica 3(11), 2016. Nfreq = npa.shape(delta_matrix)[0] for k in npa.arange(Nfreq): # super-diagonal entries (note the +1j phase) mod_p = - 0.5 * EPSILON_0 * delta_matrix[k,:] * npa.exp(1j*phi_matrix[k,:]) entries_p = npa.kron(W[:-k-1]**2, mod_p) indices_p = npa.vstack((npa.arange((M-k-1)*N), npa.arange((k+1)*N, M*N))) entries_a = npa.hstack((entries_p, entries_a)) indices_a = npa.hstack((indices_p,indices_a)) # sub-diagonal entries (note the -1j phase) mod_m = - 0.5 * EPSILON_0 * delta_matrix[k,:] * npa.exp(-1j*phi_matrix[k,:]) entries_m = npa.kron(W[k+1:]**2, mod_m) indices_m = npa.vstack((npa.arange((k+1)*N, M*N), npa.arange((M-k-1)*N))) entries_a = npa.hstack((entries_m, entries_a)) indices_a = npa.hstack((indices_m,indices_a)) return entries_a, indices_a
def predict(self, X=None, y=None): ''' Function to make predictions ''' X = np.vstack([np.ones((1,X.shape[1])), X]) AL = self.softmax((np.dot(self.trained_theta, X))) y_hat = AL.argmax(axis=0) y = np.argmax(y, axis=0) acc = (y_hat == y).mean() print("Accuracy:", acc) # print(y_hat.shape, y.shape) # print(y_hat) return y_hat, y
def train_test_split(T1, X1, T2, X2, train_rate=0.8): """ :param train_rate: fraction of data used for training :param parameters: specification for the data generation of two scenarios :return:training and testing data for C2ST, note each is a combination of data from two samples """ # %% Data Preprocessing # interpolate T1, X1 = interpolate(T1, X1) T2, X2 = interpolate(T2, X2) dataX1 = np.zeros((X1.shape[0], X1.shape[1], 2)) dataX2 = np.zeros((X2.shape[0], X2.shape[1], 2)) # Dataset build for i in range(len(X1)): dataX1[i, :, :] = np.hstack((X1[i, np.newaxis].T, T1[i, np.newaxis].T)) dataX2[i, :, :] = np.hstack((X2[i, np.newaxis].T, T2[i, np.newaxis].T)) dataY1 = np.random.choice([0], size=(len(dataX1), )) dataY2 = np.random.choice([1], size=(len(dataX2), )) dataY1 = dataY1[:, np.newaxis] dataY2 = dataY2[:, np.newaxis] dataX = Permute(np.vstack((dataX1, dataX2))) dataY = Permute(np.vstack((dataY1, dataY2))) # %% Train / Test Division train_size = int(len(dataX) * train_rate) trainX, testX = np.array(dataX[0:train_size]), np.array( dataX[train_size:len(dataX)]) trainY, testY = np.array(dataY[0:train_size]), np.array( dataY[train_size:len(dataX)]) return trainX, trainY, testX, testY
def log_prior(self, x, w): ''' Returns log(p(Y|x,w)) ''' n = x.shape[0] if self.prior_model == "logistic_regression": negative_energy = np.dot(x, w) return np.vstack( (-np.log1p(np.exp(negative_energy)) * np.ones(x.shape[0]), negative_energy - np.log1p(np.exp(negative_energy)))).T elif self.prior_model == "mlp": cur_idx = self.n_features * self.hidden_layer_sizes[0] wi = w[:cur_idx].reshape( (self.n_features, self.hidden_layer_sizes[0])) bi = w[cur_idx:cur_idx + self.hidden_layer_sizes[0]] ho = np.dot(x, wi) + bi hi = np.tanh(ho) cur_idx += self.hidden_layer_sizes[0] for i in range(len(self.hidden_layer_sizes) - 1): wi = w[cur_idx:cur_idx + self.hidden_layer_sizes[i] * self.hidden_layer_sizes[i + 1]].reshape( (self.hidden_layer_sizes[i], self.hidden_layer_sizes[i + 1])) cur_idx += self.hidden_layer_sizes[ i] * self.hidden_layer_sizes[i + 1] bi = w[cur_idx:cur_idx + self.hidden_layer_sizes[i + 1]] cur_idx += self.hidden_layer_sizes[i + 1] ho = np.dot(hi, wi) + bi hi = np.tanh(ho) # cur_idx = cur_idx+self.n_hidden_units**2 negative_energy = np.dot(hi, w[cur_idx:-1]) + w[-1] negative_energy = np.vstack((np.zeros(n), negative_energy)).T return negative_energy - logsumexp(negative_energy, axis=1).reshape((n, 1)) else: raise ValueError("Invalid prior model: %s" % self.prior_model)
def obj_noise_space(sqrt_gwidth, z): zp = z[:J] zq = z[J:] torch_zp = to_torch_variable(zp, shape=(-1, zp.shape[1], 1, 1)) torch_zq = to_torch_variable(zq, shape=(-1, zq.shape[1], 1, 1)) # need preprocessing probably global model_input_size s = model_input_size upsample = nn.Upsample(size=(s, s), mode='bilinear') fp = model(upsample(gen_p(torch_zp))).cpu().data.numpy() fp = fp.reshape((J, -1)) fq = model(upsample(gen_q(torch_zq))).cpu().data.numpy() fq = fq.reshape((J, -1)) F = np.vstack([fp, fq]) return obj_feat_space(sqrt_gwidth, F)
def initialize(self, x, u, **kwargs): localize = kwargs.get('localize', False) Ts = [_x.shape[0] for _x in x] if localize: from sklearn.cluster import KMeans km = KMeans(self.nb_states, random_state=1) km.fit((np.vstack(x))) zs = np.split(km.labels_, np.cumsum(Ts)[:-1]) zs = [z[:-1] for z in zs] else: zs = [npr.choice(self.nb_states, size=T - 1) for T in Ts] _cov = np.zeros((self.nb_states, self.dm_obs, self.dm_obs)) for k in range(self.nb_states): ## Select the transformation si = int(self.rot_lds[k, 0]) sj = int(self.rot_lds[k, 1]) T = self.T[sj, ...] ts = [np.where(z == k)[0] for z in zs] xs = [] ys = [] for i in range(len(ts)): _x = x[i][ts[i], :] _x = np.dot(T, _x.T).T _y = x[i][ts[i] + 1, :] _y = np.dot(T, _y.T).T xs.append(_x) ys.append(_y) ## THIS SHOULD NOT BE LIKE THIS , DUE TO IF SEVERAL TRANSFORMATIONS NOT WORK coef_, intercept_, sigma = linear_regression(xs, ys) self.A[si, ...] = coef_[:, :self.dm_obs] #self.B[k, ...] = coef_[:, self.dm_obs:] self.c[si, :] = intercept_ _cov[si, ...] = sigma self.cov = _cov self.covt = np.zeros([self.nb_states, self.dm_obs, self.dm_obs]) for k in range(self.nb_states): i = int(self.rot_lds[k, 0]) j = int(self.rot_lds[k, 1]) T_inv = self.T_inv[j, ...] self.covt[k, ...] = np.dot(T_inv, self.cov[i, ...])
def _ntied_transmat_prior(self, transmat_val): # TODO: document choices transmat = np.empty((0, self.n_components)) for r in range(self.n_unique): row = np.empty((self.n_chain, 0)) for c in range(self.n_unique): if r == c: subm = np.array(sp.diags([transmat_val[r, c], 1.0], [0, 1], shape=(self.n_chain, self.n_chain)).todense()) else: lower_left = np.zeros((self.n_chain, self.n_chain)) lower_left[self.n_tied, 0] = 1.0 subm = np.kron(transmat_val[r, c], lower_left) row = np.hstack((row, subm)) transmat = np.vstack((transmat, row)) return transmat
def forward_pass(self, wb, inputs, nodes_rows, nodes_cols, graph_idxs): """ Parameters: =========== - inputs: (np.array) the output from the previous layer, of shape (n_all_nodes, n_features) - graphs: (list of nx.Graphs) """ fingerprints = [] for g, idxs in sorted(graph_idxs.items()): fp = np.sum(inputs[idxs], axis=0) fingerprints.append(fp) assert len(fingerprints) == len(graph_idxs) return np.vstack(fingerprints)
def inference(self, x, return_std=False): self.likelihood(self.params) c_c = self.rho * self.RBF(self.theta_c, self.Xc, x) c_e = self.rho**2 * self.RBF(self.theta_c, self.Xe, x) + self.RBF( self.theta_e, self.Xe, x) c = np.vstack((c_c, c_e)) mean = np.matmul(c.T, self.alpha) v = np.linalg.solve(self.L.T, np.linalg.solve(self.L, c)) var = self.rho**2 * self.RBF(self.theta_c, x) + self.RBF( self.theta_e, x) - np.matmul(c.T, v) std = np.sqrt(np.diag(var)) if return_std is False: return mean, var else: return mean, std
def chebyshev_centre(A, b, gamma): rows, cols = A.shape c = np.zeros(cols + 1) c[-1] = -1 A_ = np.hstack([A, np.sqrt(np.sum(np.power(A, 2), axis=1)).reshape(-1, 1)]) A_ = np.vstack([A_, -c.reshape(1, -1)]) b_ = np.append(b, 100).reshape(-1, 1) # l2 norm minimisation of w P = gamma * np.eye(cols + 1) P[:, -1] = P[-1, :] = 0 res = solve_qp(P=P, q=c, G=A_, h=b_) x_c = np.array(res[:-1]) R = np.float(res[-1]) return x_c, R
def get_untilted_draws(self, num_draws): v = self.mix_par.values z = sp.stats.multinomial.rvs(n=num_draws, p=v['w'][0, :], size=1)[0] covs = [ np.linalg.inv(v['info'][k, :, :]) \ for k in range(self.num_components)] means = [ v['loc'][k, :] for k in range(self.num_components) ] draws = [ np.array(sp.stats.multivariate_normal.rvs( means[k], cov=covs[k], size=z[k])) \ for k in range(self.num_components)] # Oh, numpy. :( if self.dim == 1: draws = [ np.expand_dims(d, 1) for d in draws] return np.vstack(draws)
def mixture_log_density(var_mixture_params, x): """Returns a weighted average over component densities.""" log_weights, var_params = unpack_mixture_params(var_mixture_params) component_log_densities = np.vstack( [component_log_density(params_k, x) for params_k in var_params]).T # print ((component_log_densities).shape) # print ((component_log_densities + log_weights).shape) # fasdfa # print (logsumexp(component_log_densities + log_weights, axis=1, keepdims=False).shape) # fsafda return logsumexp(component_log_densities + log_weights, axis=1, keepdims=False) #over clusters
def draw_legacy(self, show=True, fig_to_plot_on=None, ax_to_plot_on=None ): # Draws the airplane using matplotlib. # This method is deprecated (superseded by draw() ) and will be removed in a future release. # Setup if fig_to_plot_on == None or ax_to_plot_on == None: fig, ax = fig3d() fig.set_size_inches(12, 9) else: fig = fig_to_plot_on ax = ax_to_plot_on # TODO plot bodies # Plot wings for wing in self.wings: for i in range(len(wing.sections) - 1): le_start = wing.sections[i].xyz_le + wing.xyz_le le_end = wing.sections[i + 1].xyz_le + wing.xyz_le te_start = wing.sections[i].xyz_te() + wing.xyz_le te_end = wing.sections[i + 1].xyz_te() + wing.xyz_le points = np.vstack((le_start, le_end, te_end, te_start, le_start)) x = points[:, 0] y = points[:, 1] z = points[:, 2] ax.plot(x, y, z, color='#cc0039') if wing.symmetric: ax.plot(x, -1 * y, z, color='#cc0039') # Plot reference point x = self.xyz_ref[0] y = self.xyz_ref[1] z = self.xyz_ref[2] ax.scatter(x, y, z) set_axes_equal(ax) plt.tight_layout() if show: plt.show()
def step(self, policy_new, sample_dict={}): """ compute one step of the update Args: policy_new: policy instance to make the step for sample_dict: dict with samples (currently not used) Returns: """ self.sample(None, None, None) reward = np.vstack(self._reward_q) res = optimize.minimize( self.dual_function, 1.0, method='SLSQP', # method='L-BFGS-B', jac=grad(self.dual_function), args=(reward, ), bounds=((1e-8, 1e8), )) eta = res.x kl_samples = self.kl_divergence(eta, reward) r = np.asarray(self._reward_q) x = np.stack(self._sample_q) self.update_policy(policy_new, eta=eta, x=x, r=r) # maintain old policy to sample from for later use self.policy.set_params(policy_new.params()) return { 'epsilon': self.epsilon, # 'beta': self.beta, 'eta': eta.item(), 'kl': kl_samples.item(), # 'entropy_diff': entropy_diff, 'entropy': self.policy.entropy(), 'reward': (self.objective(policy_new.mean) - self.objective.f_opt).item(), }
def visualizeLatentState(X, rs, gen_params, rec_params): q_means, q_log_stds = nn_predict_gaussian(rec_params, X) latents = sample_diag_gaussian(q_means, q_log_stds, rs) gen = sigmoid(neural_net_predict(gen_params, latents)) gen = gen[:,:gen.shape[1]/2] print(gen.shape) print(X.shape) #yTrain =y[:genTrain.shape[0],:] #yTest = y[genTrain.shape[0]:,:] #pdb.set_trace y = tsne(np.vstack((X,gen*10))) plt.figure() plt.clf() plt.scatter(y[:gen.shape[0],0],y[:gen.shape[0],1],color='red') plt.scatter(y[gen.shape[0]:,0],y[gen.shape[0]:,1],color='blue') plt.legend(['X', 'Xdecoded'],) plt.savefig('hidden.jpg')
def generate_random_features_ind(data1, data2, num_feat): ''' This functions generates random features for a set of paired inputs, such as times and observations Inputs: - data: (n x d) array - num_feat: number of random features to be generated Output: - features in fixed dimensional space (n x num_feat) array ''' # find length-scale for random features using median heuristic sig = meddistance(np.vstack((data1, data2)), subsample=1000) random_parameters = rp_ind(num_feat, sig, 1) rff1 = np.array( [f1(row[:, np.newaxis], random_parameters) for row in data1]) rff2 = np.array( [f1(row[:, np.newaxis], random_parameters) for row in data2]) return rff1, rff2
def mixture_elbo(var_mixture_params, t): # We need to only sample the continuous component parameters, # and integrate over the discrete component choice def mixture_lower_bound(params): """Provides a stochastic estimate of the variational lower bound.""" samples = component_sample(params, num_samples, rs) log_qs = mixture_log_density(var_mixture_params, samples) log_ps = logprob(samples, t) log_ps = np.reshape(log_ps, (num_samples, -1)) log_qs = np.reshape(log_qs, (num_samples, -1)) return np.mean(log_ps - log_qs) log_weights, var_params = unpack_mixture_params(var_mixture_params) component_elbos = np.vstack( [mixture_lower_bound(params_k) for params_k in var_params]) return np.sum(component_elbos + log_weights)
def Gardner_Krauth_Mezard(N, patterns, weights, biases, sc, lr, k, maxiter): ''' Gardner rule rule proposed in (1987) Krauth Learning algorithms with optimal stability in neural networks + Krauth Mezard update strategy ''' Z = np.array(patterns).T M = 0 p = Z.shape[-1] Z_ = np.vstack([Z, np.ones(p)]) w_and_b = deepcopy(np.hstack([weights, biases.reshape(N, 1)])) y_global = ((w_and_b @ Z_).T / (np.sqrt(np.sum(w_and_b**2, axis=1)))) * Z.T # while (np.any(y_global < k) and M < maxiter): for i in range(N): # for each neuron independently # compute normalised stability measure (h_i, sigma_i)/|w_i|^2_2 sum_of_squares = np.sum(weights[i, :]**2 + biases[i]**2) ys = ((weights[i, :] @ Z + biases[i]) / (np.sqrt(sum_of_squares))) * Z[i, :] # #pick the pattern with the weakest y ind_min = np.argmin(ys) weakest_pattern = np.array( deepcopy(patterns[ind_min].reshape(1, N))) h_i = (weights[i, :].reshape(1, N) @ weakest_pattern.T + biases[i]).squeeze() # if the new weakest pattern is not yet stable with the margin k y = (h_i * weakest_pattern[0, i]) / (np.sqrt(sum_of_squares)) # while (y < k): weights[i, :] = deepcopy( weights[i, :] + lr * (weakest_pattern[0, i] * weakest_pattern).squeeze()) #set diagonal elements to zero if sc == True: weights[i, i] = 0 biases[i] = biases[i] + lr * weakest_pattern[0, i] sum_of_squares = np.sum(weights[i, :]**2 + biases[i]**2) h_i = (weights[i, :].reshape(1, N) @ weakest_pattern.T + biases[i]).squeeze() y = (h_i * weakest_pattern[0, i]) / (np.sqrt(sum_of_squares) ) # w_and_b = deepcopy(np.hstack([weights, biases.reshape(N, 1)])) y_global = ((w_and_b @ Z_).T / (np.sqrt(np.sum(w_and_b**2, axis=1)))) * Z.T # M += 1 if M >= maxiter: print('Maximum number of iterations has been exceeded') return weights, biases
def mixture_elbo(var_mixture_params, t): # We need to only sample the continuous component parameters, # and integrate over the discrete component choice # sample_sum = 0 # for i in range(num_samples): def mixture_lower_bound(params): """Provides a stochastic estimate of the variational lower bound.""" samples = component_sample(params, num_samples, rs) # print (samples.shape) log_qs = mixture_log_density(var_mixture_params, samples) # print (log_qs.shape) log_ps = logprob(samples, t) log_ps = np.reshape(log_ps, (num_samples, -1)) log_qs = np.reshape(log_qs, (num_samples, -1)) # print (log_qs.shape) log_w = log_ps - log_qs elbo = logmeanexp(log_w) # w_log_w = np.exp(log_w) * log_w # print (w_log_w.shape) # dfasd # w_log_w = softmax(log_w) * log_w # w_log_w = np.square(np.exp(log_w)) # elbo = np.mean(w_log_w) # elbo = np.sum(w_log_w) return elbo log_weights, var_params = unpack_mixture_params(var_mixture_params) component_elbos = np.vstack( [mixture_lower_bound(params_k) for params_k in var_params]) return np.sum(component_elbos + log_weights)
def sig_all(): """ This method returns a numpy array of shape=(2, 3, 3) which contains the 3 Pauli matrices in it. sigx = sig_all[:, :, 0], sigy = sig_all[:, :, 1], sigz = sig_all[:, :, 2], Returns ------- np.ndarray shape = (2, 2, 3) """ sigx = np.array([[0, 1], [1, 0]]) sigy = np.array([[0, -1j], [1j, 0]]) sigz = np.array([[1, 0], [0, -1]]) all_paulis = np.vstack([sigx, sigy, sigz]) all_paulis = np.reshape(all_paulis, (3, 2, 2)).transpose(1, 2, 0) return all_paulis
def _loss(self, params): WtW = self.WtW B = self.base * params[:, None] p, n = B.shape scale = 1.0 + np.sum(B, axis=0) B = B / np.max(scale) diag = 1.0 - np.sum(B, axis=0) # A = [diag; B] - always has sensitivity 1 #TODO(ryan): use woodbury identity to make more efficient A = np.vstack([np.diag(diag), B]) AtA1 = np.linalg.inv(np.dot(A.T, A)) #D2 = 1.0 / diag**2 #C = np.eye(p) + np.dot(B, B.T) #C1 = np.linalg.inv(C) #X = np.dot(B.T, np.dot(C1, B)) # inverse calculated using woodbury identity #AtA1 = np.diag(D2) - X*D2*D2[:,None] return np.trace(np.dot(WtW, AtA1))
def back_propagation(self, x_array, u_array): """ Back propagation along the given state and control trajectories to solve the Riccati equations for the error system (delta_x, delta_u, t) Need to approximate the dynamics/costs/constraints along the given trajectory dynamics needs a time-varying first-order approximation costs and constraints need time-varying second-order approximation """ #Note x_array contains X_T, so a dummy u is required to make the arrays #be of consistent length u_array_sup = np.vstack([u_array, np.zeros(len(u_array[0]))]) lqr_sys = self.build_lqr_system(x_array, u_array_sup) #k and K fdfwd = [None] * self.T fdbck_gain = [None] * self.T #initialize with the terminal cost parameters to prepare the backpropagation Vxx = lqr_sys['dldxx'][-1] Vx = lqr_sys['dldx'][-1] for t in reversed(range(self.T)): #note the double check if we need the transpose or not Qx = lqr_sys['dldx'][t] + lqr_sys['dfdx'][t].T.dot(Vx) Qu = lqr_sys['dldu'][t] + lqr_sys['dfdu'][t].T.dot(Vx) Qxx = lqr_sys['dldxx'][t] + lqr_sys['dfdx'][t].T.dot(Vxx).dot(lqr_sys['dfdx'][t]) Qux = lqr_sys['dldux'][t] + lqr_sys['dfdu'][t].T.dot(Vxx).dot(lqr_sys['dfdx'][t]) Quu = lqr_sys['dlduu'][t] + lqr_sys['dfdu'][t].T.dot(Vxx).dot(lqr_sys['dfdu'][t]) #use regularized inverse for numerical stability inv_Quu = self.regularized_persudo_inverse_(Quu, reg=self.reg) #get k and K fdfwd[t] = -inv_Quu.dot(Qu) fdbck_gain[t] = -inv_Quu.dot(Qux) #update value function for the previous time step Vxx = Qxx - fdbck_gain[t].T.dot(Quu).dot(fdbck_gain[t]) Vx = Qx - fdbck_gain[t].T.dot(Quu).dot(fdfwd[t]) return fdfwd, fdbck_gain
def _ntied_transmat(self, transmat_val): # TODO: document choices # +-----------------+ # |a|1|0|0|0|0|0|0|0| # +-----------------+ # |0|a|1|0|0|0|0|0|0| # +-----------------+ # +---+---+---+ |0|0|a|b|0|0|c|0|0| # | a | b | c | +-----------------+ # +-----------+ |0|0|0|e|1|0|0|0|0| # | d | e | f | +----> +-----------------+ # +-----------+ |0|0|0|0|e|1|0|0|0| # | g | h | i | +-----------------+ # +---+---+---+ |d|0|0|0|0|e|f|0|0| # +-----------------+ # |0|0|0|0|0|0|i|1|0| # +-----------------+ # |0|0|0|0|0|0|0|i|1| # +-----------------+ # |g|0|0|h|0|0|0|0|i| # +-----------------+ # for a model with n_unique = 3 and n_tied = 2 transmat = np.empty((0, self.n_components)) for r in range(self.n_unique): row = np.empty((self.n_chain, 0)) for c in range(self.n_unique): if r == c: subm = np.array(sp.diags([transmat_val[r, c], 1 - transmat_val[r, c]], [0, 1], shape=(self.n_chain, self.n_chain)).todense()) else: lower_left = np.zeros((self.n_chain, self.n_chain)) lower_left[self.n_tied, 0] = 1.0 subm = np.kron(transmat_val[r, c], lower_left) row = np.hstack((row, subm)) transmat = np.vstack((transmat, row)) return transmat
def job_lin_mmd(sample_source, tr, te, r): """Linear mmd with grid search to choose the best Gaussian width.""" # should be completely deterministic # If n is too large, pairwise meddian computation can cause a memory error. with util.ContextTimer() as t: X, Y = tr.xy() Xr = X[:min(X.shape[0], 1000), :] Yr = Y[:min(Y.shape[0], 1000), :] med = util.meddistance(np.vstack((Xr, Yr)) ) widths = [ (med*f) for f in 2.0**np.linspace(-1, 4, 40)] list_kernels = [kernel.KGauss( w**2 ) for w in widths] # grid search to choose the best Gaussian width besti, powers = tst.LinearMMDTest.grid_search_kernel(tr, list_kernels, alpha) # perform test best_ker = list_kernels[besti] lin_mmd_test = tst.LinearMMDTest(best_ker, alpha) test_result = lin_mmd_test.perform_test(te) result = {'test_method': lin_mmd_test, 'test_result': test_result, 'time_secs': t.secs} return result
def interpolate_basis(self, basis, dt, dt_max, norm=True): # Interpolate basis at the resolution of the data L,B = basis.shape t_int = np.arange(0.0, dt_max, step=dt) t_bas = np.linspace(0.0, dt_max, L) ibasis = np.zeros((len(t_int), B)) for b in np.arange(B): ibasis[:,b] = np.interp(t_int, t_bas, basis[:,b]) # Normalize so that the interpolated basis has volume 1 if norm: # ibasis /= np.trapz(ibasis,t_int,axis=0) ibasis /= (dt * np.sum(ibasis, axis=0)) if not self.allow_instantaneous: # Typically, the impulse responses are applied to times # (t+1:t+R). That means we need to prepend a row of zeros to make # sure the basis remains causal ibasis = np.vstack((np.zeros((1,B)), ibasis)) return ibasis
def InterpolatedGradients(im, pts): """ return linearly interpolated intensity values in im for all points pts pts is assumed to be ([1xn], [1xn]) tuple of y and x indices """ ptsi = tuple(p.astype(np.int32) for p in pts) # truncated integer part ptsf = (pts[0] - ptsi[0], pts[1] - ptsi[1]) # fractional part # bilinear weights up = 1 - ptsf[0] down = ptsf[0] left = 1 - ptsf[1] right = ptsf[1] # image components ul = im[ptsi] ur = im[(ptsi[0], 1+ptsi[1])] dl = im[(1+ptsi[0], ptsi[1])] dr = im[(1+ptsi[0], 1+ptsi[1])] return np.vstack(( left*(dl - ul) + right*(dr - ur), # y gradient up*(ur - ul) + down*(dr - dl))) # x gradient
def im2col(img, block_size = (5, 5), skip = 1): """ stretches block_size size'd patches centered skip distance away in both row/column space, stacks into columns (and stacks) bands into rows Use-case is for storing images for quick matrix multiplies - blows up memory usage by quite a bit (factor of 10!) motivated by implementation discussion here: http://cs231n.github.io/convolutional-networks/ edited from snippet here: http://stackoverflow.com/questions/30109068/implement-matlabs-im2col-sliding-in-python """ # stack depth bands (colors) if len(img.shape) == 3: return np.vstack([ im2col(img[:,:,k], block_size, skip) for k in xrange(img.shape[2]) ]) # input array and block size A = img B = block_size # Parameters M,N = A.shape col_extent = N - B[1] + 1 row_extent = M - B[0] + 1 # Get Starting block indices start_idx = np.arange(B[0])[:,None]*N + np.arange(B[1]) # Get offsetted indices across the height and width of input array offset_idx = np.arange(0, row_extent, skip)[:,None]*N + np.arange(0, col_extent, skip) # Get all actual indices & index into input array for final output out = np.take(A,start_idx.ravel()[:,None] + offset_idx.ravel()) return out
def forward_pass(self, graphs): """ Returns the nodes' features stacked together, along with a dictionary of nodes and their neighbors. An example structure is: - {1: [1, 2 , 4], 2: [2, 1], ... } """ # First off, we label each node with the index of each node's data. features = [] i = 0 for g in graphs: for n, d in g.nodes(data=True): features.append(d['features']) g.node[n]['idx'] = i i += 1 # We then do a second pass over the graphs, and record each node and # their neighbors' indices in the stacked features array. # # We also record the indices corresponding to each graph. nodes_nbrs = defaultdict(list) graph_idxs = defaultdict(list) for idx, g in enumerate(graphs): g.graph['idx'] = idx # set the graph's index attribute. for n, d in g.nodes(data=True): nodes_nbrs[d['idx']].append(d['idx']) graph_idxs[idx].append(d['idx']) # append node index to list # of graph's nodes indices. for nbr in g.neighbors(n): nodes_nbrs[d['idx']].append(g.node[nbr]['idx']) return np.vstack(features), nodes_nbrs, graph_idxs
def mixture_log_density(var_mixture_params, x): """Returns a weighted average over component densities.""" log_weights, var_params = unpack_mixture_params(var_mixture_params) component_log_densities = np.vstack([component_log_density(params_k, x) for params_k in var_params]).T return logsumexp(component_log_densities + log_weights, axis=1, keepdims=False)
Nsamps = th_samples.shape[0] # discard first half and randomly permute th_samples = th_samples[Nsamps/2:, :] ll_samps = ll_samps[Nsamps/2:] chain_perm = np.random.permutation(th_samples.shape[0])[0:2500] chain_perm = np.arange(2500) # assemble a few thousand samples B0 = parser.get(th_samples[0], 'betas') B_samps = np.zeros((len(chain_perm), B0.shape[0], B0.shape[1])) for i, idx in enumerate(chain_perm): betas = K_chol.dot(parser.get(th_samples[idx, :], 'betas').T).T B_samp = np.exp(betas) B_samp /= np.sum(B_samp * lam0_delta, axis=1, keepdims=True) B_samps[i, :, :] = B_samp B_chains.append(B_samps) B_samps = np.vstack(B_chains) B_samps = B_samps[npr.permutation(B_samps.shape[0]), :, :] B_mle = load_basis(num_bases = NUM_BASES, split_type = SPLIT_TYPE, lam_subsample = LAM_SUBSAMPLE) lam0, lam0_delta = ru.get_lam0(lam_subsample=LAM_SUBSAMPLE) def get_basis_sample(idx, mle = False): """ Method to return a basis sample to condition on (or the MLE if specified) """ if mle: return B_mle else: return B_samps[idx] ##########################################################################
Nr = TrackNormal(rx) # psie is sort of backwards: higher angles go to the left return np.angle(Nx) - np.angle(Nr) if __name__ == '__main__': TRACK_SPACING = 19.8 # cm x = SVGPathToTrackPoints("oakwarehouse.path", TRACK_SPACING)[:-1] xm = np.array(x)[:, 0] / 50 # 50 pixels / meter track_k = TrackCurvature(xm) Nx = TrackNormal(xm) u = 1j * Nx np.savetxt("track_x.txt", np.vstack([np.real(xm), np.imag(xm)]).T.reshape(-1), newline=",\n") np.savetxt("track_u.txt", np.vstack([np.real(u), np.imag(u)]).T.reshape(-1), newline=",\n") np.savetxt("track_k.txt", track_k, newline=",\n") ye, val, stuff = OptimizeTrack(xm, 1.4, 0.1) psie = RelativePsie(ye, xm) rx = u*ye + xm raceline_k = TrackCurvature(rx) np.savetxt("raceline_k.txt", raceline_k, newline=",\n") np.savetxt("raceline_ye.txt", ye, newline=",\n") np.savetxt("raceline_psie.txt", psie, newline=",\n")
def plot_ellipse(ax, mean, cov_sqrt, alpha, num_points=100): angles = np.linspace(0, 2*np.pi, num_points) circle_pts = np.vstack([np.cos(angles), np.sin(angles)]).T * 2.0 cur_pts = mean + np.dot(circle_pts, cov_sqrt) ax.plot(cur_pts[:, 0], cur_pts[:, 1], '-', alpha=alpha)
def gmm_log_likelihood(params, data): cluster_lls = [] for log_proportion, mean, cov_sqrt in zip(*unpack_gmm_params(params)): cov = np.dot(cov_sqrt.T, cov_sqrt) cluster_lls.append(log_proportion + mvn.logpdf(data, mean, cov)) return np.sum(logsumexp(np.vstack(cluster_lls), axis=0))
def genConstraints(prng, label, alpha, beta, num_ML, num_CL, start_expert = 0, \ flag_same=False): """ This function generates pairwise constraints (ML/CL) using groud-truth cluster label and noise parameters Parameters ---------- label: shape(n_sample, ) cluster label of all the samples alpha: shape(n_expert, ) sensitivity parameters of experts beta: shape(n_expert, ) specificity parameters of experts num_ML: int num_CL: int flag_same: True if different experts provide constraints for the same set of sample pairs, False if different experts provide constraints for different set of sample pairs Returns ------- S: shape(n_con, 4) The first column -> expert id The second and third column -> (row, column) indices of two samples The fourth column -> constraint values (1 for ML and 0 for CL) """ n_sample = len(label) tp = np.tile(label, (n_sample,1)) label_mat = (tp == tp.T).astype(int) ML_set = [] CL_set = [] # get indices of upper-triangle matrix [row, col] = np.triu_indices(n_sample, k=1) # n_sample * (n_sample-1)/2 for idx in range(len(row)): if label_mat[row[idx],col[idx]] == 1: ML_set.append([row[idx], col[idx]]) elif label_mat[row[idx],col[idx]] == 0: CL_set.append([row[idx], col[idx]]) else: print "Invalid matrix entry values" ML_set = np.array(ML_set) CL_set = np.array(CL_set) assert num_ML < ML_set.shape[0] assert num_CL < CL_set.shape[0] # generate noisy constraints for each expert assert len(alpha) == len(beta) n_expert = len(alpha) # initialize the constraint matrix S = np.zeros((0, 4)) # different experts provide constraint for the same set of sample pairs if flag_same == True: idx_ML = prng.choice(ML_set.shape[0], num_ML, replace=False) idx_CL = prng.choice(CL_set.shape[0], num_CL, replace=False) ML = ML_set[idx_ML, :] CL = CL_set[idx_CL, :] for m in range(n_expert): val_ML = prng.binomial(1, alpha[m], num_ML) val_CL = prng.binomial(1, 1-beta[m], num_CL) Sm_ML = np.hstack((np.ones((num_ML,1))*(m+start_expert), ML, \ val_ML.reshape(val_ML.size,1) )) Sm_CL = np.hstack((np.ones((num_CL,1))*(m+start_expert), CL, \ val_CL.reshape(val_CL.size,1) )) S = np.vstack((S, Sm_ML, Sm_CL)).astype(int) # different experts provide constraints for different sets of sample pairs else: for m in range(n_expert): prng = np.random.RandomState(1000 + m) idx_ML = prng.choice(ML_set.shape[0], num_ML, replace=False) idx_CL = prng.choice(CL_set.shape[0], num_CL, replace=False) ML = ML_set[idx_ML, :] CL = CL_set[idx_CL, :] val_ML = prng.binomial(1, alpha[m], num_ML) val_CL = prng.binomial(1, 1-beta[m], num_CL) Sm_ML = np.hstack((np.ones((num_ML,1))*(m+start_expert), ML, \ val_ML.reshape(val_ML.size,1) )) Sm_CL = np.hstack((np.ones((num_CL,1))*(m+start_expert), CL, \ val_CL.reshape(val_CL.size,1) )) S = np.vstack((S, Sm_ML, Sm_CL)).astype(int) return S
def polyinterp(points, doPlot=None, xminBound=None, xmaxBound=None): """ polynomial interpolation Parameters ---------- points: shape(pointNum, 3), three columns represents x, f, g doPolot: set to 1 to plot, default 0 xmin: min value that brackets minimum (default: min of points) xmax: max value that brackets maximum (default: max of points) set f or g to sqrt(-1)=1j if they are not known the order of the polynomial is the number of known f and g values minus 1 Returns ------- minPos: fmin: """ if doPlot == None: doPlot = 0 nPoints = points.shape[0] order = np.sum(np.imag(points[:, 1:3]) == 0) -1 # code for most common case: cubic interpolation of 2 points if nPoints == 2 and order == 3 and doPlot == 0: [minVal, minPos] = [np.min(points[:,0]), np.argmin(points[:,0])] notMinPos = 1 - minPos d1 = points[minPos,2] + points[notMinPos,2] - 3*(points[minPos,1]-\ points[notMinPos,1])/(points[minPos,0]-points[notMinPos,0]) t_d2 = d1**2 - points[minPos,2]*points[notMinPos,2] if t_d2 > 0: d2 = np.sqrt(t_d2) else: d2 = np.sqrt(-t_d2) * np.complex(0,1) if np.isreal(d2): t = points[notMinPos,0] - (points[notMinPos,0]-points[minPos,0])*\ ((points[notMinPos,2]+d2-d1)/(points[notMinPos,2]-\ points[minPos,2]+2*d2)) minPos = np.min([np.max([t,points[minPos,0]]), points[notMinPos,0]]) else: minPos = np.mean(points[:,0]) fmin = minVal return (minPos, fmin) xmin = np.min(points[:,0]) xmax = np.max(points[:,0]) # compute bounds of interpolation area if xminBound == None: xminBound = xmin if xmaxBound == None: xmaxBound = xmax # constraints based on available function values A = np.zeros((0, order+1)) b = np.zeros((0, 1)) for i in range(nPoints): if np.imag(points[i,1]) == 0: constraint = np.zeros(order+1) for j in np.arange(order,-1,-1): constraint[order-j] = points[i,0]**j A = np.vstack((A, constraint)) b = np.append(b, points[i,1]) # constraints based on availabe derivatives for i in range(nPoints): if np.isreal(points[i,2]): constraint = np.zeros(order+1) for j in range(1,order+1): constraint[j-1] = (order-j+1)* points[i,0]**(order-j) A = np.vstack((A, constraint)) b = np.append(b,points[i,2]) # find interpolating polynomial params = np.linalg.solve(A, b) # compute critical points dParams = np.zeros(order) for i in range(params.size-1): dParams[i] = params[i] * (order-i) if np.any(np.isinf(dParams)): cp = np.concatenate((np.array([xminBound, xmaxBound]), points[:,0])) else: cp = np.concatenate((np.array([xminBound, xmaxBound]), points[:,0], \ np.roots(dParams))) # test critical points fmin = np.infty; minPos = (xminBound + xmaxBound)/2. for xCP in cp: if np.imag(xCP) == 0 and xCP >= xminBound and xCP <= xmaxBound: fCP = np.polyval(params, xCP) if np.imag(fCP) == 0 and fCP < fmin: minPos = np.double(np.real(xCP)) fmin = np.double(np.real(fCP)) # plot situation (omit this part for now since we are not going to use it # anyway) return (minPos, fmin)
feat_list.append(d['features']) idx += 1 # Now loop over each node again and figure out its neighbors. for n, d in p.nodes(data=True): graph_idxs[project['title']].append(d['idx']) nodes_nbrs[d['idx']].append(d['idx']) graph_nodes[project['title']][d['idx']] = n for nbr in p.neighbors(n): nodes_nbrs[d['idx']].append(p.node[nbr]['idx']) # print(nodes_nbrs[d['idx']]) except: print('Did not make graph for {0}'.format(project['code'])) # Save the data to disk: # The array... feat_array = np.vstack(feat_list) np.save('../data/feat_array.npy', feat_array) # The node idxs and their neighbor idxs... with open('../data/nodes_nbrs.pkl', 'wb') as f: pkl.dump(nodes_nbrs, f) # The graphs' seqids and their node idxs... with open('../data/graph_idxs.pkl', 'wb') as f: pkl.dump(graph_idxs, f) # The graphs': {'SeqID1':{1:'A51SER',...},...} with open('../data/graph_nodes.pkl', 'wb') as f: pkl.dump(graph_nodes, f)
#examine output #################################################################### print "Final Loss: ", loss_fun(best_w, train_images, samps) params = pred_fun(best_w, train_images) means = params[:, :8] variances = params[:, -8:] i = 10 def compare_moments(i): print "samp comparison, idx = %d "%i print " {0:5} | {1:6} | {2:6} | {3:6} | {4:6} ".format( "dim", "mod_m", "sam_m", "mod_v", "sam_v") smean = samps[i].mean(axis=0) svar = samps[i].var(axis=0) for i, (mm, mv, m, v) in enumerate(zip(means[i, :], variances[i, :], smean, svar)): print " {0:5} | {1:6} | {2:6} | {3:6} | {4:6} ".format( i, "%2.2f"%mm, "%2.2f"%m, "%2.2f"%mv, "%2.2f"%v) compare_moments(0) compare_moments(10) compare_moments(80) ######### exploratory stuff - look at the scaling of each distribution svals = [] for i in range(len(samps)): u, s, v = np.linalg.svd(samps[i]) svals.append(s) svals = np.vstack(svals)
def minConf_PQN(funObj, x, funProj, options=None): """ The problems are of the form min funObj(x) s.t. x in C The projected quasi-Newton sub-problems are solved using the spectral projected gradient algorithm Parameters ---------- funObj: function to minimize, return objective value as the first argument and gradient as the second argument funProj: function that returns projection of x onto C options: 1) verbose: level of verbosity (0: no output, 1: final, 2: iter (default), 3: debug) 2) optTol: tolerance used to check for optimality (default: 1e-5) 3) progTol: tolerance used to check for progress (default: 1e-9) 4) maxIter: maximum number of calls to funObj (default: 500) 5) maxProject: maximum number of calls to funProj (default: 100000) 6) numDiff: compute derivatives numerically (0: use user-supplied derivatives (default), 1: use finite differences, 2: use complex differentials) 7) suffDec: sufficient decrease parameter in Armijo condition (default: 1e-4) 8) corrections: number of lbfgs corrections to store (default: 10) 9) adjustStep: use quadratic initialization of line search (default: 0) 10) bbInit: initialize sub-problem with Barzilai-Borwein step (default: 0) 11) SPGoptTol: optimality tolerance for SPG direction finding (default: 1e-6) 12) SPGiters: maximum number of iterations for SPG direction finding (default: 10) Returns ------- x: optimal parameter values f: optimal objective value funEvals: number of function evaluations """ # number of variables/parameters nVars = len(x) # set default optimization settings options_default = {'verbose':2, 'numDiff':0, 'optTol':1e-5, 'progTol':1e-9, \ 'maxIter':500, 'maxProject':100000, 'suffDec':1e-4, \ 'corrections':10, 'adjustStep':0, 'bbInit':0, 'SPGoptTol':1e-6,\ 'SPGprogTol':1e-10, 'SPGiters':10, 'SPGtestOpt':0} options = setDefaultOptions(options, options_default) if options['verbose'] == 3: print 'Running PQN...' print 'Number of L-BFGS Corrections to store: ' + \ str(options['corrections']) print 'Spectral initialization of SPG: ' + str(options['bbInit']) print 'Maximum number of SPG iterations: ' + str(options['SPGiters']) print 'SPG optimality tolerance: ' + str(options['SPGoptTol']) print 'SPG progress tolerance: ' + str(options['SPGprogTol']) print 'PQN optimality tolerance: ' + str(options['optTol']) print 'PQN progress tolerance: ' + str(options['progTol']) print 'Quadratic initialization of line search: ' + \ str(options['adjustStep']) print 'Maximum number of function evaluations: ' + \ str(options['maxIter']) print 'Maximum number of projections: ' + str(options['maxProject']) if options['verbose'] >= 2: print '{:10s}'.format('Iteration') + \ '{:10s}'.format('FunEvals') + \ '{:10s}'.format('Projections') + \ '{:15s}'.format('StepLength') + \ '{:15s}'.format('FunctionVal') + \ '{:15s}'.format('OptCond') funEvalMultiplier = 1 # project initial parameter vector # translate this function (Done!) x = funProj(x) projects = 1 # evaluate initial parameters # translate this function (Done!) [f, g] = funObj(x) funEvals = 1 # check optimality of initial point projects = projects + 1 if np.max(np.abs(funProj(x-g)-x)) < options['optTol']: if options['verbose'] >= 1: print "First-Order Optimality Conditions Below optTol at Initial Point" return (x, f, funEvals) i = 1 while funEvals <= options['maxIter']: # compute step direction # this is for initialization if i == 1: p = funProj(x-g) projects = projects + 1 S = np.zeros((nVars, 0)) Y = np.zeros((nVars, 0)) Hdiag = 1 else: y = g - g_old s = x - x_old # translate this function (Done!) [S, Y, Hdiag] = lbfgsUpdate(y, s, options['corrections'], \ options['verbose']==3, S, Y, Hdiag) # make compact representation k = Y.shape[1] L = np.zeros((k,k)) for j in range(k): L[j+1:,j] = np.dot(np.transpose(S[:,j+1:]), Y[:,j]) N = np.hstack((S/Hdiag, Y.reshape(Y.shape[0], Y.size/Y.shape[0]))) M1 = np.hstack((np.dot(S.T,S)/Hdiag, L)) M2 = np.hstack((L.T, -np.diag(np.diag(np.dot(S.T,Y))))) M = np.vstack((M1, M2)) # translate this function (Done!) HvFunc = lambda v: v/Hdiag - np.dot(N,np.linalg.solve(M,np.dot(N.T,v))) if options['bbInit'] == True: # use Barzilai-Borwein step to initialize sub-problem alpha = np.dot(s,s)/np.dot(s,y) if alpha <= 1e-10 or alpha > 1e10: alpha = min(1., 1./np.sum(np.abs(g))) # solve sub-problem xSubInit = x - alpha*g feasibleInit = 0 else: xSubInit = x feasibleInit = 1 # solve Sub-problem # translate this function (Done!) [p, subProjects] = solveSubProblem(x, g, HvFunc, funProj, \ options['SPGoptTol'], options['SPGprogTol'], \ options['SPGiters'], options['SPGtestOpt'], feasibleInit,\ xSubInit) projects = projects + subProjects d = p - x g_old = g x_old = x # check the progress can be made along the direction gtd = np.dot(g,d) if gtd > -options['progTol']: if options['verbose'] >= 1: print "Directional Derivative below progTol" break # select initial guess to step length if i == 1 or options['adjustStep'] == 0: t = 1. else: t = min(1., 2.*(f-f_old)/gtd) # bound step length on first iteration if i == 1: t = min(1., 1./np.sum(np.abs(g))) # evluate the objective and gradient at the initial step if t == 1: x_new = p else: x_new = x + t*d [f_new, g_new] = funObj(x_new) funEvals = funEvals + 1 # backtracking line search f_old = f # translate isLegal (Done!) while f_new > f + options['suffDec']*np.dot(g,x_new-x) or \ not isLegal(f_new): temp = t # backtrack to next trial value if not isLegal(f_new) or not isLegal(g_new): if options['verbose'] == 3: print "Halving step size" t = t/2. else: if options['verbose'] == 3: print "Cubic backtracking" # translate polyinterp (Done!) t = polyinterp(np.array([[0.,f,gtd],\ [t,f_new,np.dot(g_new,d)]]))[0] # adjust if change is too small/large if t < temp*1e-3: if options['verbose'] == 3: print "Interpolated value too small, Adjusting" t = temp*1e-3 elif t > temp*0.6: if options['verbose'] == 3: print "Interpolated value too large, Adjusting" t = temp*0.6 # check whether step has become too small if np.sum(np.abs(t*d)) < options['progTol'] or t == 0: if options['verbose'] == 3: print "Line search failed" t = 0 f_new = f g_new = g break # evaluate new point f_prev = f_new t_prev = temp x_new = x + t*d [f_new, g_new] = funObj(x_new) funEvals = funEvals + 1 # take step x = x_new f = f_new g = g_new optCond = np.max(np.abs(funProj(x-g)-x)) projects = projects + 1 # output log if options['verbose'] >= 2: print '{:10d}'.format(i) + \ '{:10d}'.format(funEvals*funEvalMultiplier) + \ '{:10d}'.format(projects) + \ '{:15.5e}'.format(t) + \ '{:15.5e}'.format(f) + \ '{:15.5e}'.format(optCond) # check optimality if optCond < options['optTol']: print "First-order optimality conditions below optTol" break if np.max(np.abs(t*d)) < options['progTol']: if options['verbose'] >= 1: print "Step size below progTol" break if np.abs(f-f_old) < options['progTol']: if options['verbose'] >= 1: print "Function value changing by less than progTol" break if funEvals > options['maxIter']: if options['verbose'] >= 1: print "Function evaluation exceeds maxIter" break if projects > options['maxProject']: if options['verbose'] >= 1: print "Number of projections exceeds maxProject" break i = i + 1 return (x, f, funEvals)