def R_cb(x): def sf_func(params): return self.dist.sf(x - self.gamma, *params) jac = np.atleast_2d(jacobian(sf_func)(np.array(self.params))) # Second-Order Taylor Series Expansion of Variance var_R = [] for i, j in enumerate(jac): j = np.atleast_2d(j).T * j j = j[np.triu_indices(j.shape[0])] var_R.append(np.sum(j * pvars)) # First-Order Taylor Series Expansion of Variance # var_R = (jac**2 * np.diag(hess_inv)).sum(axis=1).T R_hat = self.sf(x) if bound == 'two-sided': diff = (z(alpha_ci / 2) * np.sqrt(np.array(var_R)) * np.array([1., -1.]).reshape(2, 1)) elif bound == 'upper': diff = z(alpha_ci) * np.sqrt(np.array(var_R)) else: diff = -z(alpha_ci) * np.sqrt(np.array(var_R)) exponent = diff / (R_hat * (1 - R_hat)) R_cb = R_hat / (R_hat + (1 - R_hat) * np.exp(exponent)) return R_cb.T
def grad_like(self, r, eps): """ Gradient of likelihood w.r.t variational parameters Args: r (): Transformed random sample eps (): Random sample Returns: gradient w.r.t covariance, gradient w.r.t mean """ if self.obs_idx is not None: r_obs = r[self.obs_idx] else: r_obs = r dr = self.likelihood_grad(r_obs, self.y) dr[np.isnan(dr)] = 0. self.dr = dr grads_R = [] for d in range(len(self.Rs)): Rs_copy = deepcopy(self.Rs) n = Rs_copy[d].shape[0] grad_R = np.zeros((n, n)) for i, j in zip(*np.triu_indices(n)): R_d = np.zeros((n, n)) R_d[i, j] = 1. Rs_copy[d] = R_d dR_eps = kron_mvp(Rs_copy, eps) if self.obs_idx is not None: dR_eps = dR_eps[self.obs_idx] grad_R[i, j] = np.sum(np.multiply(dr, dR_eps)) grads_R.append(grad_R) grad_mu = np.zeros(self.n) grad_mu[self.obs_idx] = dr return grads_R, grad_mu
def params2chol(params, D): R = np.zeros((D, D)) triu_inds = np.triu_indices(D) diag_inds = np.diag_indices(D) R[triu_inds] = params.copy() R[diag_inds] = np.exp(R[diag_inds]) return R
def U_to_vec(U): """ Convert an upper triangular matrix to a vector. **Does not** include diagonal. Returned vector is read out from U by rows. Assumes matrix is **first two** dimensions of passed array. """ r, c = np.triu_indices(U.shape[0], k=1) return U[r, c, ...]
def pdists_vec_to_sym(v, n): r"""Aranges the :math:`\frac{n (n - 1)}{2}` distances in a symmetric matrix with zeros on the diagonal. """ x = np.zeros(shape=(n, n)) x[np.triu_indices(n, 1)] = v x = x + x.T return x
def calc_potential_energy(scalings, X): X = get_points(X, scalings) i, j = anp.triu_indices(len(X), 1) D = squared_dist(X, X)[i, j] if np.any(D < 1e-12): return np.nan, np.nan return (1 / D).mean()
def derivative_names(ndim): """Iterate over derivative speficiations and their names.""" # Note: len(list(derivative_names(ndim)) == triangular(ndim + 1). yield (), 'f' # Function value. for i in range(ndim): yield (i, ), 'df/d%i' % i # First derivative along an axis. for i in range(ndim): yield (i, i), 'd^2f/d%i^2' % i # Second derivative along an axis. for i, j in zip(*np.triu_indices(ndim, k=1)): # Second derivarive along mixed axes. yield (int(i), int(j)), 'd^2f/(d%i d%i)' % (i, j)
def symmetric_matrix(arr, n): if len(arr) != n * (n + 1) / 2: raise Exception("Array must have dimensions n*(n+1)/2") ret = np.zeros((n, n)) idx = np.triu_indices(n) ret[idx] = arr ret[tuple(reversed(idx))] = arr assert np.all(ret == ret.T) return ret
def vec_to_U(v): """ Convert a vector to an upper triangular matrix. Vector **does not** include diagonal entries. Matrix is filled in by rows. """ # get matrix size N = v.size d = int((1 + np.sqrt(1 + 8 * N)) / 2) U = np.zeros((d, d)) U[np.triu_indices(d, 1)] = v return U
def R_cb(self, t, cb=0.05): def ssf(params): params = np.reshape(params, (self.m, self.dist.k + 1)) F = np.zeros_like(t) for i in range(self.m): F = F + params[i, 0] * self.dist.ff(t, *params[i, 1::]) return 1 - F pvars = self.hess_inv[np.triu_indices(self.hess_inv.shape[0])] with np.errstate(all='ignore'): jac = jacobian(ssf)(self.res.x) var_u = [] for i, j in enumerate(jac): j = np.atleast_2d(j).T * j j = j[np.triu_indices(j.shape[0])] var_u.append(np.sum(j * pvars)) diff = (z(cb / 2) * np.sqrt(np.array(var_u)) * np.array([1., -1.]).reshape(2, 1)) R_hat = self.sf(t) exponent = diff / (R_hat * (1 - R_hat)) R_cb = R_hat / (R_hat + (1 - R_hat) * np.exp(exponent)) return R_cb.T
def grad_KL_R(self): """ Gradient of KL divergence w.r.t variational covariance Returns: returns gradient """ grad_Rs = [] for d in range(len(self.Rs)): R_d = self.Rs[d] n = R_d.shape[0] grad_R = np.zeros((n, n)) R_inv = np.linalg.inv(R_d) K_inv_R = self.K_invs[d].dot(R_d) for i, j in zip(*np.triu_indices(n)): grad_R[i, j] = - R_inv[i, j] + \ np.prod(self.traces) / self.traces[d] * \ K_inv_R[i, j] grad_Rs.append(np.nan_to_num(grad_R)) return grad_Rs
def taylor_approx(target, stencil, values): """Use taylor series to approximate up to second order derivatives. Args: target: An array of shape (..., n), a batch of n-dimensional points where one wants to approximate function value and derivatives. stencil: An array of shape broadcastable to (..., k, n), for each target point a set of k = triangle(n + 1) points to use on its approximation. values: An array of shape broadcastable to (..., k), the function value at each of the stencil points. Returns: An array of shape (..., k), for each target point the approximated function value, gradient and hessian evaluated at that point (flattened and in the same order as returned by derivative_names). """ # Broadcast arrays to their required shape. batch_shape, ndim = target.shape[:-1], target.shape[-1] stencil = np.broadcast_to(stencil, batch_shape + (triangular(ndim + 1), ndim)) values = np.broadcast_to(values, stencil.shape[:-1]) # Subtract target from each stencil point. delta_x = stencil - np.expand_dims(target, axis=-2) delta_xy = np.matmul(np.expand_dims(delta_x, axis=-1), np.expand_dims(delta_x, axis=-2)) i = np.arange(ndim) j, k = np.triu_indices(ndim, k=1) # Build coefficients for the Taylor series equations, namely: # f(stencil) = coeffs @ [f(target), df/d0(target), ...] coeffs = np.concatenate( [ np.ones(delta_x.shape[:-1] + (1, )), # f(target) delta_x, # df/di(target) delta_xy[..., i, i] / 2, # d^2f/di^2(target) delta_xy[..., j, k], # d^2f/{dj dk}(target) ], axis=-1) # Then: [f(target), df/d0(target), ...] = coeffs^{-1} @ f(stencil) return np.squeeze(np.matmul(np.linalg.inv(coeffs), values[..., np.newaxis]), axis=-1)
def myqr_vjp(g, ans, x): from autograd.numpy import matmul as m from autograd.numpy.linalg import inv gq = g[0] gr = g[1] q = ans[0] r = ans[1] rt = r.T rtinv = inv(rt) qt = q.T grt = gr.T gqt = gq.T mid = m(r,grt) - m(gr,rt)+ m(qt,gq)- m(gqt,q) n = mid.shape[0] indices = np.triu_indices(n, k = 0) tmp = np.ones([n,n]) tmp[indices] = 0 return m(q, gr + m(mid*tmp, rtinv)) + m((gq-m(q,m(qt,gq))),rtinv)
def main(): r"""Main entry point in the graph embedding procedure.""" args = config_parser().parse_args() g_pdists = load_pdists(args) n = g_pdists.shape[0] d = args.manifold_dim # we are actually using only the upper diagonal part g_pdists = g_pdists[np.triu_indices(n, 1)] g_sq_pdists = g_pdists**2 # read the graph # the distortion cost def distortion_cost(X): man_sq_pdists = manifold_pdists(X, squared=True) return np.sum(np.abs(man_sq_pdists / g_sq_pdists - 1)) # the manifold, problem, and solver manifold = PositiveDefinite(d, k=n) problem = Problem(manifold=manifold, cost=distortion_cost, verbosity=2) linesearch = ReduceLROnPlateau(start_lr=2e-2, patience=10, threshold=1e-4, factor=0.1, verbose=1) solver = ConjugateGradient(linesearch=linesearch, maxiter=1000) # solve it with Timer('training') as t: X_opt = solver.solve(problem, x=sample_init_points(n, d)) # the distortion achieved man_pdists = manifold_pdists(X_opt) print('Average distortion: ', average_distortion(g_pdists, man_pdists)) man_pdists_sym = pdists_vec_to_sym(man_pdists, n, 1e12) print('MAP: ', mean_average_precision(g, man_pdists_sym, diag_adjusted=True))
def manifold_pdists(X, squared=False): r"""Computes the pairwise distances between N points on the SPD manifold. It uses a faster approach than calling :py:`pymanopt.manifolds.PositiveDefinite.dist` :math:`\frac{n (n + 1)}{2}`-times. It uses the fact that the inverse of the square root of a SPD matrix can be computed via Cholesky decomposition and inversion, and this needs to be done only once for each matrix. The distance function is .. math:: d(A, B) = \lVert \log(A^{-1/2} B A^{-1/2}) \rVert_F Parameters ---------- X : numpy.ndarray The SPD matrices to compute distances between, given as an (n,d,d)-shaped array. squared : bool Whether the squared distances should be returned. This is given as a parameter as opposed to leaving it up to the caller to square because returning the squared distances directly is better for automatic differentiation than returning the distances and manually squaring. """ mask = np.triu_indices(X.shape[0], 1) # -> first, compute X_i^{-1/2} X_j X_i^{-1/2} for 1<=i<j<= n C = np.linalg.cholesky(X) C_inv = np.linalg.inv(C) C_mul = C_inv[mask[0], :, :] # avoids duplicating the following -- X_mul = X[mask[1], :, :] # computations and does not mess up autograd A = np.einsum('mij,mjk,mlk->mil', C_mul, X_mul, C_mul) # -> then, compute the matrix logarithm and its squared Frobenius norm A_log = multilog(A) pdists = (A_log**2).sum(axis=(1, 2)) return pdists if squared else np.sqrt(pdists)
def mat_from_diag_triu_tril(diag, tri_upp, tri_low): """Build matrix from given components. Forms a matrix from diagonal, strictly upper triangular and strictly lower traingular parts. Parameters ---------- diag : array_like, shape=[..., n] tri_upp : array_like, shape=[..., (n * (n - 1)) / 2] tri_low : array_like, shape=[..., (n * (n - 1)) / 2] Returns ------- mat : array_like, shape=[..., n, n] """ n = diag.shape[-1] (i, ) = np.diag_indices(n, ndim=1) j, k = np.triu_indices(n, k=1) mat = np.zeros(diag.shape + (n, )) mat[..., i, i] = diag mat[..., j, k] = tri_upp mat[..., k, j] = tri_low return mat
def genConstraints(prng, label, alpha, beta, num_ML, num_CL, start_expert = 0, \ flag_same=False): """ This function generates pairwise constraints (ML/CL) using groud-truth cluster label and noise parameters Parameters ---------- label: shape(n_sample, ) cluster label of all the samples alpha: shape(n_expert, ) sensitivity parameters of experts beta: shape(n_expert, ) specificity parameters of experts num_ML: int num_CL: int flag_same: True if different experts provide constraints for the same set of sample pairs, False if different experts provide constraints for different set of sample pairs Returns ------- S: shape(n_con, 4) The first column -> expert id The second and third column -> (row, column) indices of two samples The fourth column -> constraint values (1 for ML and 0 for CL) """ n_sample = len(label) tp = np.tile(label, (n_sample, 1)) label_mat = (tp == tp.T).astype(int) ML_set = [] CL_set = [] # get indices of upper-triangle matrix [row, col] = np.triu_indices(n_sample, k=1) # n_sample * (n_sample-1)/2 for idx in range(len(row)): if label_mat[row[idx], col[idx]] == 1: ML_set.append([row[idx], col[idx]]) elif label_mat[row[idx], col[idx]] == 0: CL_set.append([row[idx], col[idx]]) else: print "Invalid matrix entry values" ML_set = np.array(ML_set) CL_set = np.array(CL_set) assert num_ML < ML_set.shape[0] assert num_CL < CL_set.shape[0] # generate noisy constraints for each expert assert len(alpha) == len(beta) n_expert = len(alpha) # initialize the constraint matrix S = np.zeros((0, 4)) # different experts provide constraint for the same set of sample pairs if flag_same == True: idx_ML = prng.choice(ML_set.shape[0], num_ML, replace=False) idx_CL = prng.choice(CL_set.shape[0], num_CL, replace=False) ML = ML_set[idx_ML, :] CL = CL_set[idx_CL, :] for m in range(n_expert): val_ML = prng.binomial(1, alpha[m], num_ML) val_CL = prng.binomial(1, 1 - beta[m], num_CL) Sm_ML = np.hstack((np.ones((num_ML,1))*(m+start_expert), ML, \ val_ML.reshape(val_ML.size,1) )) Sm_CL = np.hstack((np.ones((num_CL,1))*(m+start_expert), CL, \ val_CL.reshape(val_CL.size,1) )) S = np.vstack((S, Sm_ML, Sm_CL)).astype(int) # different experts provide constraints for different sets of sample pairs else: for m in range(n_expert): prng = np.random.RandomState(1000 + m) idx_ML = prng.choice(ML_set.shape[0], num_ML, replace=False) idx_CL = prng.choice(CL_set.shape[0], num_CL, replace=False) ML = ML_set[idx_ML, :] CL = CL_set[idx_CL, :] val_ML = prng.binomial(1, alpha[m], num_ML) val_CL = prng.binomial(1, 1 - beta[m], num_CL) Sm_ML = np.hstack((np.ones((num_ML,1))*(m+start_expert), ML, \ val_ML.reshape(val_ML.size,1) )) Sm_CL = np.hstack((np.ones((num_CL,1))*(m+start_expert), CL, \ val_CL.reshape(val_CL.size,1) )) S = np.vstack((S, Sm_ML, Sm_CL)).astype(int) return S
def calc_potential_energy(A, d): i, j = anp.triu_indices(len(A), 1) D = anp.sqrt(squared_dist(A, A)[i, j]) energy = anp.log((1 / D**d).mean()) return energy
def triu_to_vec(x, k=0): """ """ n = x.shape[-1] rows, cols = triu_indices(n, k=k) return x[..., rows, cols]
def unpack_params(self, params): tri = np.zeros((self.n_tasks, self.n_tasks)) tri[np.triu_indices(self.n_tasks, 1)] = params return tri.dot(tri.T)
def genConstraints(prng, label, alpha, beta, num_ML, num_CL, start_expert = 0, \ flag_same=False): """ This function generates pairwise constraints (ML/CL) using groud-truth cluster label and noise parameters Parameters ---------- label: shape(n_sample, ) cluster label of all the samples alpha: shape(n_expert, ) sensitivity parameters of experts beta: shape(n_expert, ) specificity parameters of experts num_ML: int num_CL: int flag_same: True if different experts provide constraints for the same set of sample pairs, False if different experts provide constraints for different set of sample pairs Returns ------- S: shape(n_con, 4) The first column -> expert id The second and third column -> (row, column) indices of two samples The fourth column -> constraint values (1 for ML and 0 for CL) """ n_sample = len(label) tp = np.tile(label, (n_sample,1)) label_mat = (tp == tp.T).astype(int) ML_set = [] CL_set = [] # get indices of upper-triangle matrix [row, col] = np.triu_indices(n_sample, k=1) # n_sample * (n_sample-1)/2 for idx in range(len(row)): if label_mat[row[idx],col[idx]] == 1: ML_set.append([row[idx], col[idx]]) elif label_mat[row[idx],col[idx]] == 0: CL_set.append([row[idx], col[idx]]) else: print "Invalid matrix entry values" ML_set = np.array(ML_set) CL_set = np.array(CL_set) assert num_ML < ML_set.shape[0] assert num_CL < CL_set.shape[0] # generate noisy constraints for each expert assert len(alpha) == len(beta) n_expert = len(alpha) # initialize the constraint matrix S = np.zeros((0, 4)) # different experts provide constraint for the same set of sample pairs if flag_same == True: idx_ML = prng.choice(ML_set.shape[0], num_ML, replace=False) idx_CL = prng.choice(CL_set.shape[0], num_CL, replace=False) ML = ML_set[idx_ML, :] CL = CL_set[idx_CL, :] for m in range(n_expert): val_ML = prng.binomial(1, alpha[m], num_ML) val_CL = prng.binomial(1, 1-beta[m], num_CL) Sm_ML = np.hstack((np.ones((num_ML,1))*(m+start_expert), ML, \ val_ML.reshape(val_ML.size,1) )) Sm_CL = np.hstack((np.ones((num_CL,1))*(m+start_expert), CL, \ val_CL.reshape(val_CL.size,1) )) S = np.vstack((S, Sm_ML, Sm_CL)).astype(int) # different experts provide constraints for different sets of sample pairs else: for m in range(n_expert): prng = np.random.RandomState(1000 + m) idx_ML = prng.choice(ML_set.shape[0], num_ML, replace=False) idx_CL = prng.choice(CL_set.shape[0], num_CL, replace=False) ML = ML_set[idx_ML, :] CL = CL_set[idx_CL, :] val_ML = prng.binomial(1, alpha[m], num_ML) val_CL = prng.binomial(1, 1-beta[m], num_CL) Sm_ML = np.hstack((np.ones((num_ML,1))*(m+start_expert), ML, \ val_ML.reshape(val_ML.size,1) )) Sm_CL = np.hstack((np.ones((num_CL,1))*(m+start_expert), CL, \ val_CL.reshape(val_CL.size,1) )) S = np.vstack((S, Sm_ML, Sm_CL)).astype(int) return S
def _construct_H_from_triu(self, h): H = np.zeros((self.N_parameters, self.N_parameters)) H[np.triu_indices(self.N_parameters)] = h H = H + H.T - np.diag(H) return H
@primitive def symmetric_matrix(arr, n): if len(arr) != n * (n + 1) / 2: raise Exception("Array must have dimensions n*(n+1)/2") ret = np.zeros((n, n)) idx = np.triu_indices(n) ret[idx] = arr ret[tuple(reversed(idx))] = arr assert np.all(ret == ret.T) return ret defvjp(symmetric_matrix, lambda ans, arr, n: lambda g: g[np.triu_indices(n)]) #symmetric_matrix.defgrad(lambda ans, arr, n: lambda g: g[np.triu_indices(n)]) #symmetric_matrix.defvjp(lambda g, ans, vs, gvs, arr, n: g[np.triu_indices(n)]) def slogdet_pos(X): sgn, slogdet = np.linalg.slogdet(X) if sgn <= 0: raise Exception("X determinant is nonpositive") return slogdet def log_wishart_pdf(X, V, n, p): # correct up to constant of proportionality return (n - p - 1) / 2 * slogdet_pos(X) - n / 2 * slogdet_pos( V) - 0.5 * np.trace(np.dot(np.linalg.inv(V), X))
def potential_energy(x): _x = x / x.sum(axis=1)[:, None] D = ((_x[:, None] - _x[None, :])**2).sum(axis=2) D = D[anp.triu_indices(len(_x), 1)] return (1 / D).mean()
def chol2params(chol, dchol, D): triu_inds = np.triu_indices(D) diag_inds = np.diag_indices(D) dchol[diag_inds] = chol[diag_inds] * dchol[diag_inds] params = dchol[triu_inds].copy() return params
def cb(self, t, on='R', alpha_ci=0.05, bound='two-sided'): r""" Confidence bounds of the ``on`` function at the ``alpa_ci`` level of significance. Can be the upper, lower, or two-sided confidence by changing value of ``bound``. Parameters ---------- x : array like or scalar The values of the random variables at which the confidence bounds will be calculated on : ('sf', 'ff', 'Hf'), optional The function on which the confidence bound will be calculated. bound : ('two-sided', 'upper', 'lower'), str, optional Compute either the two-sided, upper or lower confidence bound(s). Defaults to two-sided. alpha_ci : scalar, optional The level of significance at which the bound will be computed. Returns ------- cb : scalar or numpy array The value(s) of the upper, lower, or both confidence bound(s) of the selected function at x """ if self.method != 'MLE': raise Exception('Only MLE has confidence bounds') hess_inv = np.copy(self.hess_inv) pvars = hess_inv[np.triu_indices(hess_inv.shape[0])] old_err_state = np.seterr(all='ignore') if hasattr(self.dist, 'R_cb'): def R_cb(x): return self.dist.R_cb(x - self.gamma, *self.params, hess_inv, alpha_ci=alpha_ci, bound=bound) else: def R_cb(x): def sf_func(params): return self.dist.sf(x - self.gamma, *params) jac = np.atleast_2d(jacobian(sf_func)(np.array(self.params))) # Second-Order Taylor Series Expansion of Variance var_R = [] for i, j in enumerate(jac): j = np.atleast_2d(j).T * j j = j[np.triu_indices(j.shape[0])] var_R.append(np.sum(j * pvars)) # First-Order Taylor Series Expansion of Variance # var_R = (jac**2 * np.diag(hess_inv)).sum(axis=1).T R_hat = self.sf(x) if bound == 'two-sided': diff = (z(alpha_ci / 2) * np.sqrt(np.array(var_R)) * np.array([1., -1.]).reshape(2, 1)) elif bound == 'upper': diff = z(alpha_ci) * np.sqrt(np.array(var_R)) else: diff = -z(alpha_ci) * np.sqrt(np.array(var_R)) exponent = diff / (R_hat * (1 - R_hat)) R_cb = R_hat / (R_hat + (1 - R_hat) * np.exp(exponent)) return R_cb.T # Default cb is R cb = R_cb(t) if (on == 'ff') or (on == 'F'): cb = 1. - cb elif on == 'Hf': cb = -np.log(cb) elif on == 'hf': def cb_hf(x): out = [] for v in x: out.append(jacobian(lambda x: -np.log(R_cb(x)))(v)) return np.concatenate(out) cb = cb_hf(t) elif on == 'df': def cb_df(x): out = [] for v in x: out.append(jacobian(lambda x: (-np.log(R_cb(x)))(v) * self.sf(v))) return np.concatenate(out) cb = cb_df(t) np.seterr(**old_err_state) return cb