def kernelpdf(scale, sigma, dataset, datasetGen): #dataset is binned as eta1,eta2,mass,pt2,pt1 maxR = np.full((100), 3.3) minR = np.full((100), 2.9) valsReco = np.linspace(minR[0], maxR[0], 100) valsGen = valsReco h = np.tensordot( scale, valsGen, axes=0 ) #get a 5D vector with np.newaxis with all possible combos of kinematics and gen mass values h_ext = np.swapaxes(np.swapaxes(h, 2, 4), 3, 4)[:, :, np.newaxis, :, :, :] sigma_ext = sigma[:, :, np.newaxis, np.newaxis, :, :] xscale = np.sqrt(2.) * sigma_ext maxR_ext = maxR[np.newaxis, np.newaxis, :, np.newaxis, np.newaxis, np.newaxis] minR_ext = minR[np.newaxis, np.newaxis, :, np.newaxis, np.newaxis, np.newaxis] maxZ = ((maxR_ext - h_ext.astype('float64')) / xscale) minZ = ((minR_ext - h_ext.astype('float64')) / xscale) arg = np.sqrt(np.pi / 2.) * sigma_ext * (erf(maxZ) - erf(minZ)) #take tensor product between mass and genMass dimensions and sum over gen masses #divide each bin by the sum of gen events in that bin den = np.where( np.sum(datasetGen, axis=2) > 1000., np.sum(datasetGen, axis=2), -1)[:, :, np.newaxis, :, :] I = np.sum(arg * datasetGen[:, :, np.newaxis, :, :, :], axis=3) / den #give vals the right shape -> add dimension for gen mass (axis = 3) vals_ext = valsReco[np.newaxis, np.newaxis, :, np.newaxis, np.newaxis, np.newaxis] gaus = np.exp(-np.power(vals_ext - h_ext.astype('float64'), 2.) / (2 * np.power(sigma_ext, 2.))) #take tensor product between mass and genMass dimensions and sum over gen masses #divide each bin by the sum of gen events in that bin den2 = np.where( np.sum(datasetGen, axis=2) > 1000., np.sum(datasetGen, axis=2), 1)[:, :, np.newaxis, :, :] pdf = np.sum(gaus * datasetGen[:, :, np.newaxis, :, :, :], axis=3) / den2 / np.where(I > 0., I, -1) pdf = np.where(pdf > 0., pdf, 0.) massbinwidth = (maxR[0] - minR[0]) / 100 pdf = pdf * massbinwidth return pdf
def forward_pass(self, inputs, param_vector): if inputs.shape[2] == 32492: pool_map = genfromtxt('../mesh/neighs_L1.csv', delimiter=',') coords_old = coords_0 faces_old = faces_0 coords = coords_1 faces = faces_1 elif inputs.shape[2] == 5356: pool_map = genfromtxt('../mesh/neighs_L2.csv', delimiter=',') coords_old = coords_1 faces_old = faces_1 coords = coords_2 faces = faces_2 adj_mtx_old, _, _ = mesh_traversal.create_adj_mtx( coords_old, faces_old) adj_mtx, _, _ = mesh_traversal.create_adj_mtx(coords, faces) pool_map = list(map(int, pool_map)) patches = [] for i in range(coords.shape[0]): org_vert = int(pool_map[i]) neighs = mesh_traversal.get_neighs(adj_mtx_old, coords_old, org_vert, 1) patch = inputs[:, :, neighs] patch = np.mean(patch, axis=2) patches.append(patch) out = np.array(patches) out = np.swapaxes(out, 0, 1) out = np.swapaxes(out, 1, 2) return out
def emissionLikelihood( self, x, ys ): # Compute P( y | x, ϴ ) if( x.ndim == 2 ): # Multiple time steps if( ys.ndim == 2 ): assert x.shape[ 0 ] == ys.shape[ 0 ] else: # There are multiple measurements per latent state assert ys.ndim == 3 assert x.shape[ 0 ] == ys.shape[ 1 ] # Put the time index in front ys = np.swapaxes( ys, 0, 1 ) assert x.shape[ 0 ] == ys.shape[ 0 ] ans = 0.0 for t, ( _x, _ys ) in enumerate( zip( x, ys ) ): ans += Normal.log_likelihood( _ys, nat_params=( -0.5 * self.J1Emiss, self._hy.dot( _x ) ) ) return ans else: # Only 1 example. I don't think this code will ever be called assert x.ndim == 1 if( ys.ndim == 1 ): pass else: assert ys.ndim == 2 return Normal.log_likelihood( _ys, nat_params=( -0.5 * self.J1Emiss, self._hy.dot( _x ) ) )
def E_step(self, verbose=False): self.gaussian_states = self.laplace_approximation(verbose=verbose) # Compute normalizer and covariances with E step T, D = self.T, self.D_latent H_diag, H_upper_diag = self.sparse_hessian_log_joint( self.gaussian_states) J_init = J_11 = J_22 = np.zeros((D, D)) h_init = h_1 = h_2 = np.zeros((D, )) # Negate the Hessian since precision is -H J_21 = np.swapaxes(-H_upper_diag, -1, -2) J_node = -H_diag h_node = np.zeros((T, D)) logZ, _, self.smoothed_sigmas, E_xtp1_xtT = \ info_E_step(J_init, h_init, 0, J_11, J_21, J_22, h_1, h_2, np.zeros((T - 1)), J_node, h_node, np.zeros(T)) # Laplace approximation -- normalizer is the joint times # the normalizer from the Gaussian approx. self._normalizer = self.log_joint(self.gaussian_states) + logZ self._set_expected_stats(self.gaussian_states, self.smoothed_sigmas, E_xtp1_xtT)
def _ll(self, m, p, a, xn, xln, **kwargs): """Computation of log likelihood Dimensions ---------- m : n_unique x n_features p : n_unique x n_features x n_features a : n_unique x n_lags (shared_alpha=F) OR 1 x n_lags (shared_alpha=T) xn: N x n_features xln: N x n_features x n_lags """ samples = xn.shape[0] xn = xn.reshape(samples, 1, self.n_features) m = m.reshape(1, self.n_unique, self.n_features) det = np.linalg.det(np.linalg.inv(p)) det = det.reshape(1, self.n_unique) lagged = np.dot(xln, a.T) # NFU lagged = np.swapaxes(lagged, 1, 2) # NUF xm = xn - (lagged + m) tem = np.einsum('NUF,UFX,NUX->NU', xm, p, xm) # TODO division in gamma function res = np.log(gamma((self.degree_freedom + self.n_features)/2)) - \ np.log(gamma(self.degree_freedom/2)) - (self.n_features/2.0) * \ np.log(self.degree_freedom) - \ (self.n_features/2.0) * np.log(np.pi) - 0.5 * np.log(det) - \ ((self.degree_freedom + self.n_features) / 2.0) * \ np.log(1 + (1/self.degree_freedom) * tem) return res
def test_blocks_to_banded(T=5, D=3): """ Test blocks_to_banded correctness """ Ad = np.zeros((T, D, D)) Aod = np.zeros((T-1, D, D)) M = np.arange(1, D+1)[:, None] * 10 + np.arange(1, D+1) for t in range(T): Ad[t, :, :] = 100 * ((t+1)*10 + (t+1)) + M for t in range(T-1): Aod[t, :, :] = 100 * ((t+2)*10 + (t+1)) + M # print("Lower") # L = blocks_to_bands(Ad, Aod, lower=True) # print(L) # print("Upper") # U = blocks_to_bands(Ad, Aod, lower=False) # print(U) # Check inverse with random symmetric matrices Ad = npr.randn(T, D, D) Ad = (Ad + np.swapaxes(Ad, -1, -2)) / 2 Aod = npr.randn(T-1, D, D) Ad2, Aod2 = bands_to_blocks(blocks_to_bands(Ad, Aod, lower=True), lower=True) assert np.allclose(np.tril(Ad), np.tril(Ad2)) assert np.allclose(Aod, Aod2) Ad3, Aod3 = bands_to_blocks(blocks_to_bands(Ad, Aod, lower=False), lower=False) assert np.allclose(np.triu(Ad), np.triu(Ad3)) assert np.allclose(Aod, Aod3)
def _ll(self, m, p, a, xn, xln, **kwargs): """Computation of log likelihood Dimensions ---------- m : n_unique x n_features p : n_unique x n_features x n_features a : n_unique x n_lags (shared_alpha=F) OR 1 x n_lags (shared_alpha=T) xn: N x n_features xln: N x n_features x n_lags """ samples = xn.shape[0] xn = xn.reshape(samples, 1, self.n_features) m = m.reshape(1, self.n_unique, self.n_features) det = np.linalg.det(np.linalg.inv(p)) det = det.reshape(1, self.n_unique) lagged = np.dot(xln, a.T) # NFU lagged = np.swapaxes(lagged, 1, 2) # NUF xm = xn-(lagged + m) tem = np.einsum('NUF,UFX,NUX->NU', xm, p, xm) # TODO division in gamma function res = np.log(gamma((self.degree_freedom + self.n_features)/2)) - \ np.log(gamma(self.degree_freedom/2)) - (self.n_features/2.0) * \ np.log(self.degree_freedom) - \ (self.n_features/2.0) * np.log(np.pi) - 0.5 * np.log(det) - \ ((self.degree_freedom + self.n_features) / 2.0) * \ np.log(1 + (1/self.degree_freedom) * tem) return res
def _continuous_entropy(self): negentropy = 0 continuous_expectations = self.continuous_expectations for prms, (log_Z, Ex, smoothed_sigmas, ExxnT) in \ zip(self.continuous_state_params, continuous_expectations): # Kalman smoother outputs the smoothed covariance matrices. Add # back the mean to get E[x_t x_{t+1}^T] mumuT = np.swapaxes(Ex[:, None], 2,1) @ Ex[:, None] ExxT = smoothed_sigmas + mumuT # Pairwise terms negentropy += np.sum(-0.5 * trace_product(prms["J_ini"], ExxT[0])) negentropy += np.sum(-0.5 * trace_product(prms["J_dyn_11"], ExxT[:-1])) negentropy += np.sum(-0.5 * trace_product(prms["J_dyn_22"], ExxT[1:])) negentropy += np.sum(-0.5 * trace_product(prms["J_obs"], ExxT)) negentropy += np.sum(-1.0 * trace_product(prms["J_dyn_21"], ExxnT)) # Unary terms negentropy += np.sum(prms["h_ini"] * Ex[0]) negentropy += np.sum(prms["h_dyn_1"] * Ex[:-1]) negentropy += np.sum(prms["h_dyn_2"] * Ex[1:]) negentropy += np.sum(prms["h_obs"] * Ex) # Log normalizer negentropy -= log_Z return -negentropy
def _ll(self, m, p, a, xn, xln, **kwargs): """Computation of log likelihood Dimensions ---------- m : n_unique x n_features p : n_unique x n_features x n_features a : n_unique x n_lags (shared_alpha=F) OR 1 x n_lags (shared_alpha=T) xn: N x n_features xln: N x n_features x n_lags """ samples = xn.shape[0] xn = xn.reshape(samples, 1, self.n_features) m = m.reshape(1, self.n_unique, self.n_features) det = np.linalg.det(np.linalg.inv(p)) det = det.reshape(1, self.n_unique) lagged = np.dot(xln, a.T) # NFU lagged = np.swapaxes(lagged, 1, 2) # NUF xm = xn - (lagged + m) tem = np.einsum('NUF,UFX,NUX->NU', xm, p, xm) res = (-self.n_features / 2.0) * np.log( 2 * np.pi) - 0.5 * tem - 0.5 * np.log(det) return res
def test_multivariate_normal_logpdf_batches_and_states_masked(D=10): # Test broadcasting over B batches, N datapoints, and K parameters with masks B = 3 N = 100 K = 5 x = npr.randn(B, N, D) mask = npr.rand(B, N, D) < .5 mu = npr.randn(K, D) L = npr.randn(K, D, D) Sigma = np.matmul(L, np.swapaxes(L, -1, -2)) ll1 = multivariate_normal_logpdf(x[:, :, None, :], mu, Sigma, mask=mask[:, :, None, :]) assert ll1.shape == (B, N, K) ll2 = np.empty((B, N, K)) for b in range(B): for n in range(N): m = mask[b, n] if m.sum() == 0: ll2[b, n] = 0 else: for k in range(K): ll2[b, n, k] = mvn.logpdf(x[b, n][m], mu[k][m], Sigma[k][np.ix_(m, m)]) assert np.allclose(ll1, ll2)
def tensorize_and_convolve_mesh(a, adj_mtx, vals_list, coords, r, stride): """ Strides the mesh and applies convolution operation. Prepares tensors within the function, so not as efficient as mesh_convolve_tensorized(). If operating on already strided data, use mesh_convolve_tensorized() or mesh_convolve_tensorized_dyn(). :param filters: list of filters :param adj_mtx: adjacency matrix :param coords: coordinates of each vertex :return: result of the convolution operation """ strided_mesh = mesh_strider_batch(adj_mtx, vals_list, coords, r, stride, None) try: out = npo.einsum(a, [0, 1, 2], strided_mesh, [3, 4, 2]) except: try: a = a._value out = npo.einsum(a, [0, 1, 2], strided_mesh, [3, 4, 2]) except: strided_mesh = strided_mesh._value out = npo.einsum(a, [0, 1, 2], strided_mesh, [3, 4, 2]) out = out[0] out = np.swapaxes(out, 0, 1) return out
def _ll(self, m, p, a, xn, xln, **kwargs): """Computation of log likelihood Dimensions ---------- m : n_unique x n_features p : n_unique x n_features x n_features a : n_unique x n_lags (shared_alpha=F) OR 1 x n_lags (shared_alpha=T) xn: N x n_features xln: N x n_features x n_lags """ samples = xn.shape[0] xn = xn.reshape(samples, 1, self.n_features) m = m.reshape(1, self.n_unique, self.n_features) det = np.linalg.det(np.linalg.inv(p)) det = det.reshape(1, self.n_unique) lagged = np.dot(xln, a.T) # NFU lagged = np.swapaxes(lagged, 1, 2) # NUF xm = xn-(lagged + m) tem = np.einsum('NUF,UFX,NUX->NU', xm, p, xm) res = (-self.n_features/2.0)*np.log(2*np.pi) - 0.5*tem - 0.5*np.log(det) return res
def test_solveh_banded_grad(T=10, D=4): """ Test solveh_banded gradient """ J_diag, J_lower_diag, J_full = make_block_tridiag(T, D) J_diag = np.tile(J_diag[None, :, :], (T, 1, 1)) J_lower_diag = np.tile(J_lower_diag[None, :, :], (T - 1, 1, 1)) b = npr.randn(T * D) J_banded = blocks_to_bands(J_diag, J_lower_diag, lower=True) check_grads(solveh_banded, argnum=0, modes=['rev'], order=1)(J_banded, b, lower=True) check_grads(solveh_banded, argnum=1, modes=['rev'], order=1)(J_banded, b, lower=True) J_banded = blocks_to_bands(J_diag, np.swapaxes(J_lower_diag, -1, -2), lower=False) check_grads(solveh_banded, argnum=0, modes=['rev'], order=1)(J_banded, b, lower=False) check_grads(solveh_banded, argnum=1, modes=['rev'], order=1)(J_banded, b, lower=False)
def logdet_symm_block_tridiag(H_diag, H_upper_diag): """ compute the log determinant of a positive definite, symmetric block tridiag matrix. Use the Kalman info filter to do so. Specifically, the KF computes the normalizer: log Z = 1/2 h^T J^{-1} h -1/2 log |J| +n/2 log 2 \pi We set h=0 to get -1/2 log |J| + n/2 log 2 \pi and from this we solve for log |J|. """ T, D, _ = H_diag.shape assert H_diag.ndim == 3 and H_diag.shape[2] == D assert H_upper_diag.shape == (T - 1, D, D) J_init = J_11 = J_22 = np.zeros((D, D)) h_init = h_1 = h_2 = np.zeros((D, )) log_Z_init = 0 J_21 = np.swapaxes(H_upper_diag, -1, -2) log_Z_pair = 0 J_node = H_diag h_node = np.zeros((T, D)) log_Z_node = 0 logZ, _, _ = kalman_info_filter(J_init, h_init, log_Z_init, J_11, J_21, J_22, h_1, h_2, log_Z_pair, J_node, h_node, log_Z_node) # logZ = -1/2 log |J| + n/2 log 2 \pi logdetJ = -2 * (logZ - (T * D) / 2 * np.log(2 * np.pi)) return logdetJ
def exppdf(slope): maxR = 3.3 minR = 2.9 valsReco = np.linspace(minR, maxR, 100) I = (np.exp(-slope * minR) - np.exp(-slope * maxR)) / slope massbinwidth = (maxR - minR) / 100 h = np.tensordot(slope, valsReco, axes=0) h_ext = np.swapaxes(np.swapaxes(h, 2, 4), 3, 4) pdf = np.exp(-h_ext) / I return pdf * massbinwidth
def convert_lds_to_block_tridiag(As, bs, Qi_sqrts, ms, Ri_sqrts): """ Parameterize the LDS in terms of pairwise linear Gaussian dynamics and per-timestep Gaussian observations. p(x_{1:T}; theta) = [prod_{t=1}^{T-1} N(x_{t+1} | A_t x_t + b_t, Q_t)] * [prod_{t=1}^T N(x_t | m_t, R_t)] We can rewrite this as a Gaussian with a block tridiagonal precision matrix J. The blocks of this matrix are: J_{t,t} = A_t.T Q_t^{-1} A_t + Q_{t-1}^{-1} + R_t^{-1} J_{t,t+1} = -Q_t^{-1} A_t The linear term is h_t h_t = -A_t.T Q_t^{-1} b_t + Q_{t-1}^{-1} b_{t-1} + R_t^{-1} m_t We parameterize the model in terms of theta = {A_t, b_t, Q_t^{-1/2}}_{t=1}^{T-1}, {m_t, R_t^{-1/2}}_{t=1}^T """ T, D = ms.shape assert As.shape == (T-1, D, D) assert bs.shape == (T-1, D) assert Qi_sqrts.shape == (T-1, D, D) assert Ri_sqrts.shape == (T, D, D) # Construnct the inverse covariance matrices Qis = np.matmul(Qi_sqrts, np.swapaxes(Qi_sqrts, -1, -2)) Ris = np.matmul(Ri_sqrts, np.swapaxes(Ri_sqrts, -1, -2)) # Construct the joint, block-tridiagonal precision matrix J_lower_diag = -np.matmul(Qis, As) J_diag = np.concatenate([-np.matmul(np.swapaxes(As, -1, -2), J_lower_diag), np.zeros((1, D, D))]) \ + np.concatenate([np.zeros((1, D, D)), Qis]) \ + Ris # Construct the linear term h = np.concatenate([np.matmul(J_lower_diag, bs[:, :, None])[:, :, 0], np.zeros((1, D))]) \ + np.concatenate([np.zeros((1, D)), np.matmul(Qis, bs[:, :, None])[:, :, 0]]) \ + np.matmul(Ris, ms[:, :, None])[:, :, 0] return J_diag, J_lower_diag, h
def build_batch(idxs, cache=None): try: with h5py.File('train_0.h5', 'r') as hf: zero_train = hf['train'][:, idxs] with h5py.File('train_1.h5', 'r') as hf: one_train = hf['train'][:, idxs] with h5py.File('train_2.h5', 'r') as hf: two_train = hf['train'][:, idxs] with h5py.File('train_3.h5', 'r') as hf: three_train = hf['train'][:, idxs] except: ct_train = train_images.shape[0] for i in range(4): print( datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S')) data = train_images[(i * int(ct_train / 4)):((i + 1) * int(ct_train / 4))] train_batch = mesh_traversal.mesh_strider_batch( adj_mtx, data, coords, r, stride, cache) if i == 0: with h5py.File('train_0.h5', 'w') as hf: hf.create_dataset("train", data=train_batch) elif i == 1: with h5py.File('train_1.h5', 'w') as hf: hf.create_dataset("train", data=train_batch) elif i == 2: with h5py.File('train_2.h5', 'w') as hf: hf.create_dataset("train", data=train_batch) elif i == 3: with h5py.File('train_3.h5', 'w') as hf: hf.create_dataset("train", data=train_batch) tr_batch = np.concatenate((zero_train, one_train, two_train, three_train), axis=1) tr_batch = np.swapaxes(tr_batch, 1, 0) tr_batch = np.swapaxes(tr_batch, 1, 5) tr_batch = np.swapaxes(tr_batch, 4, 5) tr_batch = np.squeeze(tr_batch, axis=(2, 3)) return tr_batch
def lower_half(mat): # Takes the lower half of the matrix, and half the diagonal. # Necessary since numpy only uses lower half of covariance matrix. if len(mat.shape) == 2: return 0.5 * (np.tril(mat) + np.triu(mat, 1).T) elif len(mat.shape) == 3: return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1, 2)) else: raise ArithmeticError
def convolve_tensor(a, b): """ Convolves two tensorized arrays, most efficient convolution method. :param a: first array to be convoluted :param b: second array to be convoluted :return: convolved array """ b = as_strided_seq(b, 5, 1) b = np.moveaxis(b, [0, 1, 2, 3, 4, 5], [0, 3, 4, 5, 1, 2]) b = np.moveaxis(b, 5, 1) try: out = npo.einsum(a, [12, 1, 10, 11], b, [4, 12, 10, 11, 8, 9]) out = np.swapaxes(out, 0, 1) except: a = a._value out = npo.einsum(a, [12, 1, 10, 11], b, [4, 12, 10, 11, 8, 9]) out = np.swapaxes(out, 0, 1) return out
def lower_half(mat): # Takes the lower half of the matrix, and half the diagonal. # Necessary since numpy only uses lower half of covariance matrix. if len(mat.shape) == 2: return 0.5 * (np.tril(mat) + np.triu(mat, 1).T) elif len(mat.shape) == 3: return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2)) else: raise ArithmeticError
def _laplace_neg_hessian_params_to_hs(self, x, J_ini, J_dyn_11, J_dyn_21, J_dyn_22, J_obs): h_ini = J_ini @ x[0] h_dyn_1 = (J_dyn_11 @ x[:-1][:, :, None])[:, :, 0] h_dyn_1 += (np.swapaxes(J_dyn_21, -1, -2) @ x[1:][:, :, None])[:, :, 0] h_dyn_2 = (J_dyn_22 @ x[1:][:, :, None])[:, :, 0] h_dyn_2 += (J_dyn_21 @ x[:-1][:, :, None])[:, :, 0] h_obs = (J_obs @ x[:, :, None])[:, :, 0] return h_ini, h_dyn_1, h_dyn_2, h_obs
def Cs(self): D = self.D T = lambda X: np.swapaxes(X, -1, -2) Bs = 0.5 * (self._Ms - T(self._Ms)) # Bs is skew symmetric Fs = np.matmul(T(self._As), self._As) - Bs trm1 = np.concatenate((np.eye(D) - Fs, 2 * self._As), axis=1) trm2 = np.eye(D) + Fs Cs = T(np.linalg.solve(T(trm2), T(trm1))) assert np.allclose(np.matmul(T(Cs), Cs), np.tile(np.eye(D)[None, :, :], (Cs.shape[0], 1, 1))) return Cs
def _hessian(self, sigma, Y, K_X): by_dim = False n_y, d = Y.shape K = self._weighted_kernel(sigma, Y, None, K_X) hessian = np.zeros((d * d, n_y, n_y)) Y = np.array(Y, order='F') for i in range(d): for j in range(d): c_start, c_end = j * n_y, j * n_y + n_y r_start, r_end = i * n_y, i * n_y + n_y tmp = self._hessian_bloc_dim(sigma, Y[:, i], Y[:, j], K, i, j) tmp = np.reshape(tmp, [1, tmp.shape[0], tmp.shape[0]]) if i == 0 and j == 0: hessian = 1. * tmp else: hessian = np.concatenate([hessian, tmp], axis=0) hessian = np.reshape(hessian, [d, d, n_y, n_y]) hessian = np.swapaxes(hessian, 0, 2) hessian = np.swapaxes(hessian, 2, 3) hessian = np.reshape(hessian, [d * n_y, d * n_y]) return hessian
def sample(cls, params=None, nat_params=None, size=1): # Sample from P( x | Ѳ; α ) assert (params is None) ^ (nat_params is None) (alpha, ) = params if params is not None else cls.natToStandard( *nat_params) ans = np.swapaxes( np.array( [Dirichlet.sample(params=(a, ), size=size) for a in alpha]), 0, 1) cls.checkShape(ans) return ans
def train_blstm(X_train, Y_train, X_test, Y_test, cap_train, cap_test, train_mask, test_mask, num_hiddens=100, batch_size=15, L1_REG=1e-5, step_size=0.001, num_iters=5000, init_params=None, one_hot=None): ''' X_train and X_test are N x W x D arrays where N is the number of sentences, W is the max number of words in a sentence, and D is the vector representation for a word. For Y_train and Y_test the third dimension D is the one hot representation of the part of speech, which is described by the dict one_hot. For cap_train and cap_test the third dimension is the cap_vector. train_mask and test_mask are N dimensional vectors where each item i represents the number of words in sentence i ''' if one_hot is None: with open('storage/one_hot_list', 'rb') as f: one_hot = pickle.load(f) X_train = np.swapaxes(X_train, 0, 1) Y_train = np.swapaxes(Y_train, 0, 1) X_test = np.swapaxes(X_test, 0, 1) Y_test = np.swapaxes(Y_test, 0, 1) cap_train = np.swapaxes(cap_train, 0, 1) cap_test = np.swapaxes(cap_test, 0, 1) index_generator = batch_index_generator(X_train.shape[1], batch_size=batch_size) if init_params is None: init_params = init_blstm_params(input_size=X_train.shape[2], output_size=Y_train.shape[2], state_size=num_hiddens, param_scale=0.05) def training_loss(params, iter): sample_indices = index_generator.next() log_lik = -log_likelihood( params, X_train[:, sample_indices, :], Y_train[:, sample_indices, :], train_mask[sample_indices], cap_train[:, sample_indices, :]) return log_lik + L1_REG * l1_norm(params) trained_params = adam(training_loss_grad, init_params, step_size=step_size, num_iters=num_iters) return trained_params
def test_multivariate_normal_logpdf_unique_params(D=10): # Test broadcasting over datapoints and corresponding parameters leading_ndim = npr.randint(1, 4) shp = npr.randint(1, 10, size=leading_ndim) x = npr.randn(*shp, D) mu = npr.randn(*shp, D) L = npr.randn(*shp, D, D) Sigma = np.matmul(L, np.swapaxes(L, -1, -2)) ll1 = multivariate_normal_logpdf(x, mu, Sigma) ll2 = np.empty(shp) for inds in product(*[np.arange(s) for s in shp]): ll2[inds] = mvn.logpdf(x[inds], mu[inds], Sigma[inds]) assert np.allclose(ll1, ll2)
def _m_step_ar(self, expectations, datas, inputs, masks, tags, num_em_iters): K, D, M, lags = self.K, self.D, self.M, self.lags # Collect data for this dimension xs, ys, Ezs = [], [], [] for (Ez, _, _), data, input, mask, tag in zip(expectations, datas, inputs, masks, tags): # Only use data if it is complete if not np.all(mask): raise Exception("Encountered missing data in AutoRegressiveObservations!") xs.append( np.hstack([data[self.lags-l-1:-l-1] for l in range(self.lags)] + [input[self.lags:, :self.M], np.ones((data.shape[0]-self.lags, 1))])) ys.append(data[self.lags:]) Ezs.append(Ez[self.lags:]) for itr in range(num_em_iters): # Compute expected precision for each data point given current parameters taus = [] for x, y in zip(xs, ys): # mus = self._compute_mus(data, input, mask, tag) # sigmas = self._compute_sigmas(data, input, mask, tag) Afull = np.concatenate((self.As, self.Vs, self.bs[:, :, None]), axis=2) mus = np.matmul(Afull[None, :, :, :], x[:, None, :, None])[:, :, :, 0] sigmas = np.exp(self.inv_sigmas) # nu: (K,) mus: (T, K, D) sigmas: (K, D) y: (T, D) -> tau: (T, K, D) alpha = np.exp(self.inv_nus[:, None])/2 + 1/2 beta = np.exp(self.inv_nus[:, None])/2 + 1/2 * (y[:, None, :] - mus)**2 / sigmas taus.append(alpha / beta) # Fit the weighted linear regressions for each K and D J = np.tile(np.eye(D * lags + M + 1)[None, None, :, :], (K, D, 1, 1)) h = np.zeros((K, D, D*lags + M + 1,)) for x, y, Ez, tau in zip(xs, ys, Ezs, taus): robust_ar_statistics(Ez, tau, x, y, J, h) mus = np.linalg.solve(J, h) self.As = mus[:, :, :D*lags] self.Vs = mus[:, :, D*lags:D*lags+M] self.bs = mus[:, :, -1] # Fit the variance sqerr = 0 weight = 0 for x, y, Ez, tau in zip(xs, ys, Ezs, taus): yhat = np.matmul(x[None, :, :], np.swapaxes(mus, -1, -2)) sqerr += np.einsum('tk, tkd, ktd -> kd', Ez, tau, (y - yhat)**2) weight += np.sum(Ez, axis=0) self.inv_sigmas = np.log(sqerr / weight[:, None] + 1e-16)
def vjp(g): ge, gu = g ge = _matrix_diag(ge) f = 1/(e[..., anp.newaxis, :] - e[..., :, anp.newaxis] + 1.e-20) f -= _diag(f) ut = anp.swapaxes(u, -1, -2) r1 = f * _dot(ut, gu) r2 = -f * (_dot(_dot(ut, anp.conj(u)), anp.real(_dot(ut, gu)) * anp.eye(n))) r = _dot(_dot(anp.linalg.inv(ut), ge + r1 + r2), ut) if not anp.iscomplexobj(x): r = anp.real(r) # the derivative is still complex for real input (imaginary delta is allowed), real output # but the derivative should be real in real input case when imaginary delta is forbidden return r
def Cs(self): # See https://pubs.acs.org/doi/pdf/10.1021/acs.jpca.5b02015 # for a derivation of the rational Cayley transform. D = self.D T = lambda X: np.swapaxes(X, -1, -2) Bs = 0.5 * (self._Ms - T(self._Ms)) # Bs is skew symmetric Fs = np.matmul(T(self._As), self._As) - Bs trm1 = np.concatenate((np.eye(D) - Fs, 2 * self._As), axis=1) trm2 = np.eye(D) + Fs Cs = T(np.linalg.solve(T(trm2), T(trm1))) assert np.allclose(np.matmul(T(Cs), Cs), np.tile(np.eye(D)[None, :, :], (Cs.shape[0], 1, 1))) return Cs
def tensorize_and_convolve_fmri(a, adj_mtx, vals_list, coords, r, stride): """ Strides the mesh and applies convolution operation. Prepares tensors within the function, so not as efficient as mesh_convolve_tensorized(). If operating on already strided data, use mesh_convolve_tensorized() or mesh_convolve_tensorized_dyn(). :param filters: list of filters :param adj_mtx: adjacency matrix :param coords: coordinates of each vertex :return: result of the convolution operation """ vals_list = np.expand_dims(vals_list, axis=1) vals_list = np.swapaxes(vals_list, 2, 3) try: vals_list = vals_list._value except: pass strided_mesh = mesh_strider_batch(adj_mtx, vals_list, coords, r, stride, None) strided_vers = np.squeeze(np.array(strided_mesh)) a = np.array([a]) try: out = npo.einsum(a, [5, 3, 4, 2], strided_vers, [0, 1, 2, 3]) except: try: a = a._value out = npo.einsum(a, [5, 3, 4, 2], strided_vers, [0, 1, 2, 3]) except: strided_vers = strided_vers._value out = npo.einsum(a, [5, 3, 4, 2], strided_vers, [0, 1, 2, 3]) #out = out[0] out = np.swapaxes(out, 0, 1) out = np.swapaxes(out, 1, 2) return out
def conjugate_transpose(matrix): """ Compute the conjugate transpose of a matrix. Args: matrix :: numpy.ndarray - the matrix to compute the conjugate transpose of operation_policy :: qoc.OperationPolicy - what data type is used to perform the operation and with which method Returns: _conjugate_tranpose :: numpy.ndarray the conjugate transpose of matrix """ conjugate_transpose_ = anp.conjugate(anp.swapaxes(matrix, -1, -2)) return conjugate_transpose_
def trace_product(A, B): """ Compute trace of the matrix product A*B efficiently. A, B can be 2D or 3D arrays, in which case the trace is computed along the last two axes. In this case, the function will return an array. Computed using the fact that tr(AB) = sum_{ij}A_{ij}B_{ji}. """ ndimsA = A.ndim ndimsB = B.ndim assert ndimsA == ndimsB, "Both A and B must have same number of dimensions." assert ndimsA <= 3, "A and B must have 3 or fewer dimensions" # We'll take the trace along the last two dimensions. BT = np.swapaxes(B, -1, -2) return np.sum(A*BT, axis=(-1, -2))
def expm_eigh(h): """ Compute the unitary operator of a hermitian matrix. U = expm(-1j * h) Arguments: h :: ndarray (N X N) - The matrix to exponentiate, which must be hermitian. Returns: expm_h :: ndarray(N x N) - The unitary operator of a. """ eigvals, p = anp.linalg.eigh(h) p_dagger = anp.conjugate(anp.swapaxes(p, -1, -2)) d = anp.exp(-1j * eigvals) return anp.matmul(p *d, p_dagger)
def T(X): return np.swapaxes(X, -1, -2) if np.ndim(X) > 1 else X def symmetrize(X): return 0.5 * (X + T(X))
def generalized_outer_product(x): if np.ndim(x) == 1: return np.outer(x, x) return np.matmul(x, np.swapaxes(x, -1, -2))
import autograd.numpy.random as npr import autograd.scipy.linalg as spla from autograd.util import * from autograd import grad from builtins import range npr.seed(1) def check_symmetric_matrix_grads(fun, *args): def symmetrize(A): L = np.tril(A) return (L + T(L))/2. new_fun = lambda *args: fun(symmetrize(args[0]), *args[1:]) return check_grads(new_fun, *args) T = lambda A : np.swapaxes(A, -1, -2) def rand_psd(D): mat = npr.randn(D,D) return np.dot(mat, mat.T) def test_inv(): def fun(x): return to_scalar(np.linalg.inv(x)) d_fun = lambda x : to_scalar(grad(fun)(x)) D = 8 mat = npr.randn(D, D) mat = np.dot(mat, mat) + 1.0 * np.eye(D) check_grads(fun, mat) check_grads(d_fun, mat) def test_inv_3d():