def reduce_data_and_connectivity(labels, n_labels, connectivity, data_matrix, thr): """ """ n_features = len(labels) incidence = coo_matrix( (np.ones(n_features), (labels, np.arange(n_features))), shape=(n_labels, n_features), dtype=np.float32).tocsc() inv_sum_col = dia_matrix( (np.array(1. / incidence.sum(axis=1)).squeeze(), 0), shape=(n_labels, n_labels)) incidence = inv_sum_col * incidence # reduced data reduced_data_matrix = (incidence * data_matrix.T).T reduced_connectivity = (incidence * connectivity) * incidence.T reduced_connectivity = reduced_connectivity - dia_matrix( (reduced_connectivity.diagonal(), 0), shape=(reduced_connectivity.shape)) i_idx, j_idx = reduced_connectivity.nonzero() data_matrix_ = np.maximum(thr, np.sum( (reduced_data_matrix[:, i_idx] - reduced_data_matrix[:, j_idx]) ** 2, 0)) reduced_connectivity.data = data_matrix_ return reduced_connectivity, reduced_data_matrix
def divide(self, factor): """ Scalar and matrix division. Scalar division is equivalent to 1/factor * matrix. Note: Division by zero will return zero, rather than NaN. :param factor: Denominator to use for division. :return: Resulted divided matrix. """ if isinstance(factor, Number): mat = self.matrix.copy() mat.data = mat.data / factor return self._new_instance(mat) elif isinstance(factor, IndexMatrix): if factor.shape == (1,1): return self.divide(factor[0,0]) elif factor.shape[0] == 1: inverted = 1/factor length = factor.shape[1] diag = self._new_instance(sp.dia_matrix((inverted.to_ndarray(), [0]), shape=(length, length)), row2word=self.col2word) return self.dot(diag) elif factor.shape[1] == 1: inverted = 1/factor.transpose() length = factor.shape[0] diag = self._new_instance(sp.dia_matrix((inverted.to_ndarray(), [0]), shape=(length, length)), col2word=self.row2word) return diag.dot(self) else: return self.multiply(1/factor) else: raise TypeError("Has to be either scalar or of type Matrix")
def __init__(self, dS, dt, ds, S, **kwargs): super(CrankNicolsonScheme, self).__init__(S) a, b, c, d = dS.fde(dt, ds, S, "explicit", **kwargs) self.Le = sparse.dia_matrix(([a, 2 + b, c], [-1, 0, 1]), shape=S.shape*2) a, b, c, d = dS.fde(dt, ds, S, "implicit", **kwargs) self.Li = sparse.dia_matrix(([-a, 2 - b, -c], [-1, 0, 1]), shape=S.shape*2).tocsr() self.d = 2 * d
def _laplace(self): ''' construct Laplace operator as a matrix based on user input filter. Actually this matrix can be thought as a convolution operator: f(x,z)*U(x,z) ''' f=[-1.,2.,-1.] nx = nz = self.n nf = len(f) nonzero = np.ones((nf,nx)) for i in range(nf): nonzero[i] *=f[i] offsets = array(range(nf/2,-nf/2 ,-1)) m1 = dia_matrix((nonzero,offsets),shape=(nx,nx)) m2 = identity(nz) k1 = kron(m1,m2) nonzero = np.ones((nf,nz)) for i in range(nf): nonzero[i,:] *=f[i] m1 = dia_matrix((nonzero,offsets),shape=(nz,nz)) m2 = identity(nx) k2 = kron(m2,m1) return k1+ k2
def optimize_obs_beta(self, ins_seq, obs_seq, j, n_iter, threshold=1e-6): Y = obs_seq X = ins_seq obs_beta = np.matrix(self.obs_weight_mat[j]).T g = np.squeeze(self.state_posts[:, j]) diag_g = dia_matrix(([g], 0), shape=(len(g), len(g))) difference = [] log_g = np.matrix(self.log_state_posts[:, j]).T g_y = np.multiply(np.matrix(self.state_posts[:, j]).T, Y) for n in range(n_iter): #print n, "Before The innder obs_obj_cost", self.obj_obs_subnet(obs_beta, j) nu = X * obs_beta mu = np.exp(nu) w_data = np.squeeze(np.array(mu)) W = dia_matrix(([w_data], 0), shape=(len(w_data), len(w_data))) grad = X.T * diag_g * (Y - mu) - 2 * self.gamma * obs_beta hess = -1 * X.T * diag_g * W * X - 2 * self.gamma * np.identity(self.input_dim) beta_old = obs_beta try: obs_beta = obs_beta - np.linalg.pinv(hess) * grad except Exception as e: print 'grad', n, grad, log_g[1:4], nu[1:4] sys.exit() #print n, "After The innder obs_obj_cost", self.obj_obs_subnet(obs_beta, j) difference.append(np.max(beta_old - obs_beta)) if difference[-1] <= threshold: break self.obs_weight_mat[j, :] = np.squeeze(np.array(obs_beta))
def second_difference_matrix(N=3,bc='BothClamped'): """ generates the second order difference operator in matrix form """ checkInputArgs(N,bc) diag = 2*[None] if bc == 'BothFree': M = N + 1 elif 'Free' in bc: M = N else: M = N - 1 if 'Free' not in bc: diag[0] = -2 + np.zeros(M) diag[1] = np.ones(M) mat = dia_matrix(([diag[0],diag[1],diag[1]],[0,1,-1]),shape=(M,M)) else: if bc == 'BothFree': diag[0] = np.concatenate((np.zeros(1),-2 + np.zeros(N - 1),np.zeros(1))) diag[1] = np.concatenate((np.ones(N - 1),np.zeros(2))) mat = dia_matrix(([diag[0],np.roll(diag[1],2),diag[1]],[0,1,-1]),shape=(M,M)) elif bc in ('LeftClampedRightFree','LeftSimplySupportedRightFree'): diag[0] = np.concatenate((-2 + np.zeros(N - 1),np.zeros(1))) diag[1] = np.concatenate((np.ones(N - 1),np.zeros(1))) mat = dia_matrix(([diag[0],np.roll(diag[1],1),np.roll(diag[1],-1)],[0,1,-1]),shape=(M,M)) elif bc in ('LeftFreeRightClamped','LeftFreeRightSimplySupported'): diag[0] = np.concatenate((np.zeros(1),-2 + np.zeros(N - 1))) diag[1] = np.concatenate((np.zeros(1),np.ones(N - 1))) mat = dia_matrix(([diag[0],np.roll(diag[1],1),np.roll(diag[1],-1)],[0,1,-1]),shape=(M,M)) return mat
def optimize_trans_beta(self, ins_seq, obs_seq, j, n_iter, threshold=1e-3): trans_theta = np.matrix(self.trans_weight_mat[j]) trans_post = np.matrix(self.trans_posts[:, j, :]) X = ins_seq Y = obs_seq difference = [] for n in range(n_iter): jac_array = np.zeros((self.n_components, self.input_dim)) I = np.matrix(np.identity(self.n_components)) nu = X * trans_theta.T prob_mu = extmath.safe_softmax(nu) for s in range(self.n_components): I_s = I[:, s] prob_mu_s = np.squeeze(np.array(prob_mu[:, s])) jac_s = np.squeeze(np.array(X.T * trans_post * I_s)) - \ np.squeeze(np.array(X.T * dia_matrix((prob_mu_s, 0), shape=(len(prob_mu_s), len(prob_mu_s))) * np.sum(trans_post, axis=1))) - 2 * self.gamma * np.squeeze(trans_theta[s,:]) jac_array[s, :] = jac_s # check for the NAN in records if np.isnan(np.min(jac_s)): print 'Encounter NAN', jac_s, n, s, jac_array print 'Debug: ' print "trans_post: ", trans_post print "prob_mu_s", prob_mu_s print "I_s", I_s sys.exit() jac_vec = np.matrix(jac_array.reshape(self.input_dim * self.n_components, 1)) hess_array = np.zeros((self.input_dim * self.n_components, self.input_dim * self.n_components)) for s in range(self.n_components): sum_trans_post = np.squeeze(np.array(np.sum(trans_post, axis=1))) for p in range(self.n_components): I_sp = I[s, p] prob_s = prob_mu[:, s] prob_p = prob_mu[:, p] prob_item = np.squeeze(np.array(np.multiply(prob_s, prob_p) - I_sp * prob_s)) hess_item = X.T * dia_matrix((prob_item, 0), shape=(len(prob_item), len(prob_item))) * dia_matrix((sum_trans_post, 0), shape=(len(sum_trans_post), len(sum_trans_post))) * X - 2 * self.gamma * np.matrix(np.identity(self.input_dim)) hess_array[(s * self.input_dim):((s + 1) * self.input_dim), (p * self.input_dim):((p + 1) * self.input_dim)] = np.array(hess_item) hess_array = np.matrix(hess_array) trans_theta_old = trans_theta try: trans_theta = trans_theta - np.reshape(np.linalg.pinv(hess_array) * jac_vec, (self.n_components, self.input_dim)) except Exception as e: print 'Failed to Converge!' print 'jac_vec', jac_vec print hess_array sys.exit() #print "trans_theta_old ", trans_theta_old #print "trans_theta_new", trans_theta difference.append(np.max(trans_theta_old - trans_theta)) if difference[-1] <= threshold: break self.trans_weight_mat[j, :, :] = np.array(trans_theta)
def BuildLaPoisson(): """ pour l'etape de projection matrice de Laplacien phi avec CL Neumann pour phi BUT condition de Neumann pour phi ==> non unicite de la solution besoin de fixer la pression en un point pour lever la degenerescence: ici [0][1] ==> need to build a correction matrix """ ### ne pas prendre en compte les points fantome (-2) NXi = nx NYi = ny ###### Definition of the 1D Lalace operator ###### AXE X ### Diagonal terms dataNXi = [numpy.ones(NXi), -2*numpy.ones(NXi), numpy.ones(NXi)] ### Conditions aux limites : Neumann à gauche, rien à droite dataNXi[2][1] = 2. # SF left # dataNXi[0][NXi-2] = 2. # SF right ###### AXE Y ### Diagonal terms dataNYi = [numpy.ones(NYi), -2*numpy.ones(NYi), numpy.ones(NYi)] ### Conditions aux limites : Neumann dataNYi[2][1] = 2. # SF low dataNYi[0][NYi-2] = 2. # SF top ###### Their positions offsets = numpy.array([-1,0,1]) DXX = sp.dia_matrix((dataNXi,offsets), shape=(NXi,NXi)) * dx_2 DYY = sp.dia_matrix((dataNYi,offsets), shape=(NYi,NYi)) * dy_2 ####### 2D Laplace operator LAP = sp.kron(sp.eye(NYi,NYi), DXX) + sp.kron(DYY, sp.eye(NXi,NXi)) ####### BUILD CORRECTION MATRIX ### Upper Diagonal terms dataNYNXi = [numpy.zeros(NYi*NXi)] offset = numpy.array([1]) ### Fix coef: 2+(-1) = 1 ==> Dirichlet en un point (redonne Laplacien) ### ATTENTION COEF MULTIPLICATIF : dx_2 si M(j,i) j-NY i-NX dataNYNXi[0][1] = -1 * dx_2 LAP0 = sp.dia_matrix((dataNYNXi,offset), shape=(NYi*NXi,NYi*NXi)) return LAP + LAP0
def multiply(self, other): """Point-wise multiplication by another matrix, vector, or scalar. """ # Scalar multiplication. if isscalarlike(other): return self._mul_scalar(other) # Sparse matrix or vector. if isspmatrix(other): if self.shape == other.shape: if not isinstance(other, fast_csr_matrix): other = csr_matrix(other) return self._binopt(other, '_elmul_') # Single element. elif other.shape == (1,1): return self._mul_scalar(other.toarray()[0, 0]) elif self.shape == (1,1): return other._mul_scalar(self.toarray()[0, 0]) # A row times a column. elif self.shape[1] == other.shape[0] and self.shape[1] == 1: return self._mul_sparse_matrix(other.tocsc()) elif self.shape[0] == other.shape[1] and self.shape[0] == 1: return other._mul_sparse_matrix(self.tocsc()) # Row vector times matrix. other is a row. elif other.shape[0] == 1 and self.shape[1] == other.shape[1]: other = dia_matrix((other.toarray().ravel(), [0]), shape=(other.shape[1], other.shape[1])) return self._mul_sparse_matrix(other) # self is a row. elif self.shape[0] == 1 and self.shape[1] == other.shape[1]: copy = dia_matrix((self.toarray().ravel(), [0]), shape=(self.shape[1], self.shape[1])) return other._mul_sparse_matrix(copy) # Column vector times matrix. other is a column. elif other.shape[1] == 1 and self.shape[0] == other.shape[0]: other = dia_matrix((other.toarray().ravel(), [0]), shape=(other.shape[0], other.shape[0])) return other._mul_sparse_matrix(self) # self is a column. elif self.shape[1] == 1 and self.shape[0] == other.shape[0]: copy = dia_matrix((self.toarray().ravel(), [0]), shape=(self.shape[0], self.shape[0])) return copy._mul_sparse_matrix(other) else: raise ValueError("inconsistent shapes") # Dense matrix. if isdense(other): if self.shape == other.shape: ret = self.tocoo() ret.data = np.multiply(ret.data, other[ret.row, ret.col] ).view(np.ndarray).ravel() return ret # Single element. elif other.size == 1: return self._mul_scalar(other.flat[0]) # Anything else. return np.multiply(self.todense(), other)
def whfilter(a, weights=None, lamb=1600, p=3, ): """ Generalized Whittaker-Handerson Graduation Method Parameters ---------- a : array-like The input array, shape (n,) weights : array-like or None Weights lamb : float The relative importance between goodness of fit and smoothness (smoothness increases with lamb). p : integer, default 3 The degree of smoothness. We minimize the p-th finite-differences of the graduated data. Examples: p=2 Hodrick-Prescott filter; p=3 Whittaker-Henderson method; Note: moments 0..p-1 will be conserved by graduation Returns ------- out : array The smoothed data References ---------- implementation of scikits.statsmodels.tsa.filters.hp_filter.py Alicja S. Nocon & William F. Scott (2012): "An extension of the Whittaker-Henderson method of graduation", Scandinavian Actuarial Journal, 2012:1, 70-79 Whittaker, E. T. (1922). "On a new method of graduation", Proceedings of the Edinburgh Mathematical Society 41,63-75. """ # input data a = np.squeeze(a); if a.ndim>1: raise ValueError("input array a must be 1d"); n = a.size; # weights W = np.squeeze(weights) if weights is not None else np.ones(n); if np.any(W==0) or not np.all(np.isfinite(W)): raise ValueError("weights must be non-zero and finite."); W = sparse.dia_matrix((W, 0), shape=(n,n)); # set up difference Matrix K, shape (n-p, n) # K_ij = k(j-i), l=j-i # k(l) = (-1)^l Binomial(p,l) if 0<=l<=p else 0 l = np.arange(p+1); k = (-1)**l * comb(p,l); # same as K_0j diags =np.tile(k,(n,1)).T; # side-diagonal K_i,i+l; n-times k(l) offsets=np.arange(p+1); # index of side-diagonals K = sparse.dia_matrix((diags,offsets),shape=(n-p,n)); # K_ij # solve quadratic optimization problem return spsolve(W+lamb*K.T.dot(K), W.dot(a));
def hittimes(self): lplus = self.lplus D = dia_matrix((self.degrees,0),shape = (self.nnodes,self.nnodes)) diag = dia_matrix((lplus.diagonal(),0),shape = lplus.shape) sums = dia_matrix( ((lplus*D).sum(1),0),shape = lplus.shape) one = matrix(ones(lplus.shape)) oneS = sums*one vol = self.volume return array(oneS - oneS.T - vol*lplus + vol*one*diag)
def test_endianness(): d = np.ones((3,4)) offsets = [-1,0,1] a = dia_matrix((d.astype('<f8'), offsets), (4, 4)) b = dia_matrix((d.astype('>f8'), offsets), (4, 4)) v = np.arange(4) assert_allclose(a.dot(v), [1, 3, 6, 5]) assert_allclose(b.dot(v), [1, 3, 6, 5])
def testExpLog(self): N = 10 a = random(N) c = exp(a) discrepancy = c.diff(a) - sp.dia_matrix((np.exp(a._base), 0), (N,N)) if discrepancy.nnz > 0: self.assertAlmostEqual(0, np.abs(discrepancy.data).max()) c = log(a) discrepancy = c.diff(a) - sp.dia_matrix((1 / a._base, 0), (N,N)) if discrepancy.nnz > 0: self.assertAlmostEqual(0, np.abs(discrepancy.data).max())
def testSinCos(self): N = 10 a = random(N) b = sin(a) c = cos(a) discrepancy = b.diff(a) - sp.dia_matrix((np.cos(a._base), 0), (N,N)) if discrepancy.nnz > 0: self.assertAlmostEqual(0, np.abs(discrepancy.data).max()) discrepancy = c.diff(a) + sp.dia_matrix((np.sin(a._base), 0), (N,N)) if discrepancy.nnz > 0: self.assertAlmostEqual(0, np.abs(discrepancy.data).max())
def testDiv(self): N = 10 a = random(N) b = random(N) c = a / b / 2 discrepancy = c.diff(a) - sp.dia_matrix((1. / b._base / 2., 0), (N,N)) if discrepancy.nnz > 0: self.assertAlmostEqual(0, np.abs(discrepancy.data).max()) discrepancy = c.diff(b) + sp.dia_matrix(((a / b**2)._base/2, 0), (N,N)) if discrepancy.nnz > 0: self.assertAlmostEqual(0, np.abs(discrepancy.data).max())
def testPoisson1DResidual(self): N = 10000 u = random(N) dx = 1. / (N * COMM_WORLD.Get_size() + 1) u_right = zeros(1) u_left = zeros(1) my_rank = COMM_WORLD.Get_rank() if my_rank > 0: COMM_WORLD.Send(u[:1], my_rank - 1) if my_rank < COMM_WORLD.Get_size() - 1: COMM_WORLD.Recv(u_right, my_rank + 1) if my_rank < COMM_WORLD.Get_size() - 1: COMM_WORLD.Send(u[-1:], my_rank + 1) if my_rank > 0: COMM_WORLD.Recv(u_left, my_rank - 1) u_ext = hstack([u_left, u, u_right]) f = (u_ext[2:] + u_ext[:-2] - 2 * u_ext[1:-1]) / dx**2 f_diff_u = diff_mpi(f, u, 'tangent') # check diagonal blocks lapl = -2 * sp.eye(N,N) \ + sp.dia_matrix((np.ones(N), 1), (N,N)) \ + sp.dia_matrix((np.ones(N), -1), (N,N)) my_rank = COMM_WORLD.Get_rank() discrepancy = f_diff_u[my_rank] - lapl / dx**2 if discrepancy.nnz > 0: self.assertAlmostEqual(0, np.abs(discrepancy.data).max()) # lower diagonal blocks lapl_l = sp.csr_matrix(([1.], ([0], [N-1])), shape=(N,N)) if my_rank > 0: discrepancy = f_diff_u[my_rank-1] - lapl_l / dx**2 if discrepancy.nnz > 0: self.assertAlmostEqual(0, np.abs(discrepancy.data).max()) # upper diagonal blocks lapl_u = lapl_l.T if my_rank < COMM_WORLD.Get_size() - 1: discrepancy = f_diff_u[my_rank+1] - lapl_u / dx**2 if discrepancy.nnz > 0: self.assertAlmostEqual(0, np.abs(discrepancy.data).max()) # other blocks are 0 for rank in range(COMM_WORLD.Get_size()): if abs(rank - my_rank) > 1 and rank in f_diff_u: self.assertEqual(f_diff_u[rank], 0)
def scale_normalize(X): row_diag = np.asarray(1.0 / np.sqrt(X.sum(axis=1))).squeeze() col_diag = np.asarray(1.0 / np.sqrt(X.sum(axis=0))).squeeze() row_diag = np.where(np.isnan(row_diag), 0, row_diag) col_diag = np.where(np.isnan(col_diag), 0, col_diag) if issparse(X): n_rows, n_cols = X.shape r = dia_matrix((row_diag, [0]), shape=(n_rows, n_rows)) c = dia_matrix((col_diag, [0]), shape=(n_cols, n_cols)) an = r * X * c else: an = row_diag[:, np.newaxis] * X * col_diag return an, row_diag, col_diag
def testMul(self): N = 1000 a = random(N) b = random(N) c = a * b * 5 self.assertEqual(0, (c.diff(a, 'tangent') - \ 5 * sp.dia_matrix((b._base, 0), (N,N))).nnz) self.assertEqual(0, (c.diff(a, 'adjoint') - \ 5 * sp.dia_matrix((b._base, 0), (N,N))).nnz) self.assertEqual(0, (c.diff(b, 'tangent') - \ 5 * sp.dia_matrix((a._base, 0), (N,N))).nnz) self.assertEqual(0, (c.diff(b, 'adjoint') - \ 5 * sp.dia_matrix((a._base, 0), (N,N))).nnz)
def _bh(self): n = self.nx -1 dp = ones(n)*2. dl = ones(n)*-1. ds = dl dh2 = 1./(self.dx*self.dx) D = [dp,dl,ds] bh = dia_matrix( (D,array([0,-1,1])),shape=(n,n)) I = dia_matrix( (-1.*dl,array([0])),shape=(n,n)) A = I + self.a*self.dt*dh2*bh return A
def geometry(Nr,Nz,parms): r = np.linspace(-parms.Lr, parms.Lr, Nr+1) hr= r[1]-r[0] r = r[::-1] e = np.ones(Nr) Dr = (np.diag(e,-1) - np.diag(e,1))/(2*hr) Dr[0,0:2] = [1,-1]/hr Dr[Nr,Nr-1:Nr+1] = [1,-1]/hr Dr2 = (np.diag(e,-1) - 2*np.diag(np.ones(Nr+1),0) + np.diag(e,1))/hr**2 Dr2[0,0:3] = [1,-2,1]/hr**2 Dr2[Nr,Nr-2:Nr+1] = [1,-2,1]/hr**2 z = np.linspace(-parms.Lz, 0, Nz) hz=z[1]-z[0] z = z[::-1] e = np.ones(Nz-1) Dz = (np.diag(e,-1) - np.diag(e,1))/(2*hz) Dz[0,0:3] = [-3,4,-1]/(2*hz) Dz[Nz-1,Nz-3:Nz] = [1,-4,3]/(2*hz) Dz2 = (np.diag(e,-1) - 2*np.diag(np.ones(Nz),0) + np.diag(e,1))/hz**2 Dz2[0,0:3] = [1,-2,1]/hz**2 Dz2[Nz-1,Nz-3:Nz] = [1,-2,1]/hz**2 sp.dia_matrix(Dr); sp.dia_matrix(Dr2) sp.dia_matrix(Dz); sp.dia_matrix(Dz2) return [Dr,Dr2,r,Dz,Dz2,z]
def build_laplacian_related_matrices_sparse(W): """W: the sign matrix (sparse) """ assert issparse(W) entries = list(indexed_entries(W)) W_p = _make_matrix(list(filter(lambda e: e[2] > 0, entries)), W.shape) W_n = _make_matrix(list(filter(lambda e: e[2] < 0, entries)), W.shape) W_n = -W_n D_p = dia_matrix((np.transpose(W_p.sum(axis=1)), [0]), W.shape) D_n = dia_matrix((np.transpose(W_n.sum(axis=1)), [0]), W.shape) D_hat = D_p + D_n return W_p, W_n, D_p, D_n, D_hat
def normalize(self, c=0): """ Normalize the graph according to the index c Normalization means that the sum of the edges values that go into or out each vertex must sum to 1 Parameters ---------- c=0 in {0, 1, 2}, optional: index that designates the way according to which D is normalized c == 0 => for each vertex a, sum{edge[e, 0]=a} D[e]=1 c == 1 => for each vertex b, sum{edge[e, 1]=b} D[e]=1 c == 2 => symmetric ('l2') normalization Notes ----- Note that when sum_{edge[e, .] == a } D[e] = 0, nothing is performed """ from scipy.sparse import dia_matrix c = int(c) if not c in [0, 1, 2]: raise ValueError('c must be equal to 0, 1 or 2') if self.E == 0: if c < 2: return np.zeros(self.V) else: return np.zeros(self.V), np.zeros(self.V) adj = self.to_coo_matrix().tocsr() s1 = adj.sum(0) s2 = adj.sum(1) if c == 1: s = dia_matrix((1. / s1, 0), shape=(self.V, self.V)) adj = adj * s self.weights = wgraph_from_adjacency(adj).get_weights() return np.asarray(s1) if c == 0: s = dia_matrix((1. / s2.T, 0), shape=(self.V, self.V)) adj = s * adj self.weights = wgraph_from_adjacency(adj).get_weights() return np.asarray(s2) if c == 2: s1 = dia_matrix((1. / np.sqrt(s1), 0), shape=(self.V, self.V)) s2 = dia_matrix((1. / np.sqrt(adj.sum(1)), 0), shape=(self.V, self.V)) adj = (s1 * adj) * s2 self.weights = wgraph_from_adjacency(adj).get_weights() return np.asarray(s1), np.asarray(s2)
def get_subset_cpd(self, sub_idx): """ Get the cpd over a subset of the variables. :param np.ndarray[int]|np.ndarray[bool] sub_idx: indices of variables to keep :return: a new Gaussian CPD :rtype: GaussianCPD """ if len(sub_idx) == 0 or (sub_idx.dtype == bool and not np.sum(sub_idx)): raise ValueError("sub_idx must not be empty") sub_mean = self.mean[sub_idx] sub_dim = len(sub_mean) if isinstance(self.precision, sp.dia_matrix): sub_precision = sp.dia_matrix((self.precision.diagonal()[sub_idx], np.zeros(1)), shape=(sub_dim, sub_dim)) elif np.isscalar(self.precision): sub_precision = self.precision elif isinstance(self.precision, np.ndarray): if np.prod(self.precision.shape) == self.dim: sub_precision = self.precision[sub_idx] else: # We do the indexing this way for performance reasons. sub_precision = self.precision[sub_idx, :][:, sub_idx] else: # We do the indexing this way for performance reasons. sub_precision = self.precision.tocsr()[sub_idx, :][:, sub_idx] return GaussianCPD(dim=sub_dim, mean=sub_mean, precision=sub_precision, mean_lin_op=get_subset_lin_op(self.mean_lin_op, sub_idx))
def _solve_cholesky(X, y, alpha, sample_weight=None): # w = inv(X^t X + alpha*Id) * X.T y n_samples, n_features = X.shape n_targets = y.shape[1] has_sw = sample_weight is not None if has_sw: sample_weight = sample_weight * np.ones(n_samples) sample_weight_matrix = sparse.dia_matrix((sample_weight, 0), shape=(n_samples, n_samples)) weighted_X = safe_sparse_dot(sample_weight_matrix, X) A = safe_sparse_dot(weighted_X.T, X, dense_output=True) Xy = safe_sparse_dot(weighted_X.T, y, dense_output=True) else: A = safe_sparse_dot(X.T, X, dense_output=True) Xy = safe_sparse_dot(X.T, y, dense_output=True) one_alpha = np.array_equal(alpha, len(alpha) * [alpha[0]]) if one_alpha: A.flat[::n_features + 1] += alpha[0] return linalg.solve(A, Xy, sym_pos=True, overwrite_a=True).T else: coefs = np.empty([n_targets, n_features]) for coef, target, current_alpha in zip(coefs, Xy.T, alpha): A.flat[::n_features + 1] += current_alpha coef[:] = linalg.solve(A, target, sym_pos=True, overwrite_a=False).ravel() A.flat[::n_features + 1] -= current_alpha return coefs
def smooth(self, scalars, factor=1.0): """Smooth vertex-wise function given by `scalars` across the surface using mean curvature flow method (see http://brickisland.net/cs177fa12/?p=302). Amount of smoothing is controlled by `factor`. Parameters ---------- scalars : 1D ndarray, shape (total_verts,) A scalar-valued function across the cortex, such as the curvature supplied by mean_curvature. factor : float, optional Amount of smoothing to perform, larger values smooth more. Returns ------- smscalars : 1D ndarray, shape (total_verts,) Smoothed scalar values. """ if factor == 0.0: return scalars B,D,W,V = self.laplace_operator npt = len(D) lfac = sparse.dia_matrix((D,[0]), (npt,npt)) - factor * (W-V) goodrows = np.nonzero(~np.array(lfac.sum(0) == 0).ravel())[0] lfac_solver = sparse.linalg.dsolve.factorized(lfac[goodrows][:,goodrows]) goodsmscalars = lfac_solver((D * scalars)[goodrows]) smscalars = np.zeros(scalars.shape) smscalars[goodrows] = goodsmscalars return smscalars
def dilation(self, nbiter=1, fast=True): """ Morphological dimlation of the field data. self.field is changed Parameters ---------- nbiter: int, optional, the number of iterations required fixme ----- cython """ nbiter = int(nbiter) if fast: from ._graph import dilation if self.E > 0: if (self.field.size == self.V): self.field = self.field.reshape((self.V, 1)) idx, neighb, _ = self.compact_neighb() for i in range(nbiter): dilation(self.field, idx, neighb) else: from scipy.sparse import dia_matrix adj = self.to_coo_matrix() + dia_matrix( (np.ones(self.V), 0), (self.V, self.V)) rows = adj.tolil().rows for i in range(nbiter): self.field = np.array([self.field[row].max(0) for row in rows])
def smoothing_matrix(vertices, adj_mat, smoothing_steps=20, verbose=None): """Create a smoothing matrix which can be used to interpolate data defined for a subset of vertices onto mesh with an adjancency matrix given by adj_mat. If smoothing_steps is None, as many smoothing steps are applied until the whole mesh is filled with with non-zeros. Only use this option if the vertices correspond to a subsampled version of the mesh. Parameters ---------- vertices : 1d array vertex indices adj_mat : sparse matrix N x N adjacency matrix of the full mesh smoothing_steps : int or None number of smoothing steps (Default: 20) verbose : bool, str, int, or None If not None, override default verbose level (see surfer.verbose). Returns ------- smooth_mat : sparse matrix smoothing matrix with size N x len(vertices) """ from scipy import sparse logger.info("Updating smoothing matrix, be patient..") e = adj_mat.copy() e.data[e.data == 2] = 1 n_vertices = e.shape[0] e = e + sparse.eye(n_vertices, n_vertices) idx_use = vertices smooth_mat = 1.0 n_iter = smoothing_steps if smoothing_steps is not None else 1000 for k in range(n_iter): e_use = e[:, idx_use] data1 = e_use * np.ones(len(idx_use)) idx_use = np.where(data1)[0] scale_mat = sparse.dia_matrix((1 / data1[idx_use], 0), shape=(len(idx_use), len(idx_use))) smooth_mat = scale_mat * e_use[idx_use, :] * smooth_mat logger.info("Smoothing matrix creation, step %d" % (k + 1)) if smoothing_steps is None and len(idx_use) >= n_vertices: break # Make sure the smoothing matrix has the right number of rows # and is in COO format smooth_mat = smooth_mat.tocoo() smooth_mat = sparse.coo_matrix((smooth_mat.data, (idx_use[smooth_mat.row], smooth_mat.col)), shape=(n_vertices, len(vertices))) return smooth_mat
def _randomized_logistic(X, y, weights, mask, C=1., verbose=False, fit_intercept=True, tol=1e-3): X = X[safe_mask(X, mask)] y = y[mask] if issparse(X): size = len(weights) weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size)) X = X * weight_dia else: X *= (1 - weights) C = np.atleast_1d(np.asarray(C, dtype=np.float64)) if C.ndim > 1: raise ValueError("C should be 1-dimensional array-like, " "but got a {}-dimensional array-like instead: {}." .format(C.ndim, C)) scores = np.zeros((X.shape[1], len(C)), dtype=np.bool) for this_C, this_scores in zip(C, scores.T): # XXX : would be great to do it with a warm_start ... clf = LogisticRegression(C=this_C, tol=tol, penalty='l1', dual=False, fit_intercept=fit_intercept, solver='liblinear', multi_class='ovr') clf.fit(X, y) this_scores[:] = np.any( np.abs(clf.coef_) > 10 * np.finfo(np.float).eps, axis=0) return scores
def to_sparse(D, format="csc"): """ Transform dense matrix to sparse matrix of return_type bsr_matrix(arg1[, shape, dtype, copy, blocksize]) Block Sparse Row matrix coo_matrix(arg1[, shape, dtype, copy]) A sparse matrix in COOrdinate format. csc_matrix(arg1[, shape, dtype, copy]) Compressed Sparse Column matrix csr_matrix(arg1[, shape, dtype, copy]) Compressed Sparse Row matrix dia_matrix(arg1[, shape, dtype, copy]) Sparse matrix with DIAgonal storage dok_matrix(arg1[, shape, dtype, copy]) Dictionary Of Keys based sparse matrix. lil_matrix(arg1[, shape, dtype, copy]) Row-based linked list sparse matrix :param D: Dense matrix :param format: how to save the sparse matrix :return: sparse version """ if format == "bsr": return sprs.bsr_matrix(D) elif format == "coo": return sprs.coo_matrix(D) elif format == "csc": return sprs.csc_matrix(D) elif format == "csr": return sprs.csr_matrix(D) elif format == "dia": return sprs.dia_matrix(D) elif format == "dok": return sprs.dok_matrix(D) elif format == "lil": return sprs.lil_matrix(D) else: return to_dense(D)
def compute_lrbt_transfos(zfc=None, zfo=None, mmat=None, trunck=dict(threshh=1e-6)): """ the transformation matrices for the BT MOR :param zfc: Factor of the controllability Gramian :math:`W_c = Z_cZ_c^H` :param zfo: Factor of the observability Gramian :math:`W_o = Z_oZ_o^H` :param mmat: mass matrix :param trunck: truncation parameters :return: the left and right transformation matrices `tl` and `tr` \ for the balanced truncation """ if mmat is None: mmat = sps.eye(zfo.shape[0]) lsv_mat, sv, rsv_matt = np.linalg.svd(np.dot(zfc.T, mmat*zfo)) k = np.where(sv > trunck['threshh'])[0].size lsvk, rsvk, svk = lsv_mat[:, :k], rsv_matt.T[:, :k], sv[:k] svsqri = 1./np.sqrt(svk) svsqri_mat = sps.dia_matrix((svsqri, np.array([0])), shape=(k, k)) tl = np.dot(zfc, lsvk*svsqri_mat) tr = np.dot(zfo, rsvk*svsqri_mat) return tl, tr, sv
def identity(self): """ Returns the identy matrix with same dimension as the adjacency matrix. """ return dia_matrix((ones(self.nnodes), 0), shape=(self.nnodes, self.nnodes))
def laplacian(self): if self._laplacian is None: self._laplacian = dia_matrix((self.degrees, 0), shape=(self.nnodes, self.nnodes)) self._laplacian -= self.adj return self._laplacian
def mask_test_edges(adj): # Function to build test set with 10% positive links # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper. # TODO: Clean up. # Remove diagonal elements adj = adj - sp.dia_matrix( (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape) adj.eliminate_zeros() # Check that diag is zero: assert np.diag(adj.todense()).sum() == 0 adj_triu = sp.triu(adj) #返回上三角矩阵 adj_tuple = sparse_to_tuple(adj_triu) #转换为coo_matrix edges = adj_tuple[0] edges_all = sparse_to_tuple(adj)[0] num_test = int(np.floor(edges.shape[0] / 10.)) #10%的测试集 num_val = int(np.floor(edges.shape[0] / 20.)) #5%的验证集 all_edge_idx = list(range(edges.shape[0])) np.random.shuffle(all_edge_idx) val_edge_idx = all_edge_idx[:num_val] test_edge_idx = all_edge_idx[num_val:(num_val + num_test)] test_edges = edges[test_edge_idx] val_edges = edges[val_edge_idx] train_edges = np.delete(edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0) def ismember(a, b, tol=5): rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1) return np.any(rows_close) test_edges_false = [] while len(test_edges_false) < len(test_edges): idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue if ismember([idx_i, idx_j], edges_all): continue if test_edges_false: if ismember([idx_j, idx_i], np.array(test_edges_false)): continue if ismember([idx_i, idx_j], np.array(test_edges_false)): continue test_edges_false.append([idx_i, idx_j]) val_edges_false = [] while len(val_edges_false) < len(val_edges): idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue if ismember([idx_i, idx_j], train_edges): continue if ismember([idx_j, idx_i], train_edges): continue if ismember([idx_i, idx_j], val_edges): continue if ismember([idx_j, idx_i], val_edges): continue if val_edges_false: if ismember([idx_j, idx_i], np.array(val_edges_false)): continue if ismember([idx_i, idx_j], np.array(val_edges_false)): continue val_edges_false.append([idx_i, idx_j]) print(~ismember(val_edges_false, edges_all)) # assert ~ismember(test_edges_false, edges_all) # assert ~ismember(val_edges_false, edges_all) # assert ~ismember(val_edges, train_edges) # assert ~ismember(test_edges, train_edges) # assert ~ismember(val_edges, test_edges) data = np.ones(train_edges.shape[0]) # Re-build adj matrix adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape) adj_train = adj_train + adj_train.T # NOTE: these edge lists only contain single direction of edge! return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false
def hpfilter(X, lamb=1600): """ Hodrick-Prescott filter Parameters ---------- X : array-like The 1d ndarray timeseries to filter of length (nobs,) or (nobs,1) lamb : float The Hodrick-Prescott smoothing parameter. A value of 1600 is suggested for quarterly data. Ravn and Uhlig suggest using a value of 6.25 (1600/4**4) for annual data and 129600 (1600*3**4) for monthly data. Returns ------- cycle : array The estimated cycle in the data given lamb. trend : array The estimated trend in the data given lamb. Examples -------- >>> import statsmodels.api as sm >>> import pandas as pd >>> dta = sm.datasets.macrodata.load_pandas().data >>> index = pd.DatetimeIndex(start='1959Q1', end='2009Q4', freq='Q') >>> dta.set_index(index, inplace=True) >>> cycle, trend = sm.tsa.filters.hpfilter(dta.realgdp, 1600) >>> gdp_decomp = dta[['realgdp']] >>> gdp_decomp["cycle"] = cycle >>> gdp_decomp["trend"] = trend >>> import matplotlib.pyplot as plt >>> fig, ax = plt.subplots() >>> gdp_decomp[["realgdp", "trend"]]["2000-03-31":].plot(ax=ax, ... fontsize=16) >>> plt.show() .. plot:: plots/hpf_plot.py Notes ----- The HP filter removes a smooth trend, `T`, from the data `X`. by solving min sum((X[t] - T[t])**2 + lamb*((T[t+1] - T[t]) - (T[t] - T[t-1]))**2) T t Here we implemented the HP filter as a ridge-regression rule using scipy.sparse. In this sense, the solution can be written as T = inv(I - lamb*K'K)X where I is a nobs x nobs identity matrix, and K is a (nobs-2) x nobs matrix such that K[i,j] = 1 if i == j or i == j + 2 K[i,j] = -2 if i == j + 1 K[i,j] = 0 otherwise See Also -------- statsmodels.tsa.filters.bk_filter.bkfilter statsmodels.tsa.filters.cf_filter.cffilter statsmodels.tsa.seasonal.seasonal_decompose References ---------- Hodrick, R.J, and E. C. Prescott. 1980. "Postwar U.S. Business Cycles: An Empricial Investigation." `Carnegie Mellon University discussion paper no. 451`. Ravn, M.O and H. Uhlig. 2002. "Notes On Adjusted the Hodrick-Prescott Filter for the Frequency of Observations." `The Review of Economics and Statistics`, 84(2), 371-80. """ _pandas_wrapper = _maybe_get_pandas_wrapper(X) X = np.asarray(X, float) if X.ndim > 1: X = X.squeeze() nobs = len(X) I = sparse.eye(nobs, nobs) # noqa:E741 offsets = np.array([0, 1, 2]) data = np.repeat([[1.], [-2.], [1.]], nobs, axis=1) K = sparse.dia_matrix((data, offsets), shape=(nobs - 2, nobs)) use_umfpack = True trend = spsolve(I + lamb * K.T.dot(K), X, use_umfpack=use_umfpack) cycle = X - trend if _pandas_wrapper is not None: return _pandas_wrapper(cycle), _pandas_wrapper(trend) return cycle, trend
def main(): timer = Timer() timer.start() os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' tf.set_random_seed(0) MAX_SENT_LENGTH = 20 MAX_SENTS = 100 EMBEDDING_DIM = 50 POST_DIM = 10 TEXT_DIM = 50 VALIDATION_SPLIT = 0.2 MIXTURES = 5 Graph_DIM = 10 TRAINING_EPOCHS = 50 flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_float('learning_rate', 0.0001, 'Initial learning rate.') flags.DEFINE_integer('hidden1', 32, 'Number of units in hidden layer 1.') flags.DEFINE_integer('hidden2', Graph_DIM, 'Number of units in hidden layer 2.') flags.DEFINE_integer('batch_size', 32, 'Size of a mini-batch') flags.DEFINE_float('dropout', 0., 'Dropout rate (1 - keep probability).') flags.DEFINE_float('lambda1', 1e-4, 'Parameter of energy.') flags.DEFINE_float('lambda2', 1e-9, 'lossSigma.') flags.DEFINE_float('lambda3', 0.01, 'GAE.') flags.DEFINE_string('model', 'gcn_ae', 'Model string.') model_str = FLAGS.model # variable to store evaluation results precision_list = [] recall_list = [] f1_list = [] auc_list = [] for t in range(10): with open('./data/instagram.pickle', 'rb') as handle: store_data = pickle.load(handle) labels = store_data['labels'] df = store_data['df'] data = store_data['data'] postInfo = store_data['postInfo'] timeInfo = store_data['timeInfo'] embedding_matrix = store_data['embedding_matrix'] word_index = store_data['word_index'] num_session = data.shape[0] nb_validation_samples = int(VALIDATION_SPLIT * num_session) '''For Evaluation''' single_label = np.asarray(labels) labels = to_categorical(np.asarray(labels)) print('Shape of data tensor:', data.shape) print('Shape of label tensor:', labels.shape) zeros = np.zeros(num_session) zeros = zeros.reshape((num_session, 1, 1)) # FLAGS.learning_rate = lr '''Hierarchical Attention Network for text and other info''' placeholders = { 'zero_input': tf.placeholder(tf.float32, shape=[None, 1, 1]), 'review_input': tf.placeholder(tf.float32, shape=[None, MAX_SENTS, MAX_SENT_LENGTH + 1]), 'post_input': tf.placeholder(tf.float32, shape=[ None, 4, ]), 'time_label': tf.placeholder(tf.float32, shape=[None, MAX_SENTS]) } g = nx.Graph() edgelist = pd.read_csv('./data/source_target.csv') for i, elrow in edgelist.iterrows(): g.add_edge(elrow[0].strip('\n'), elrow[1].strip('\n')) adj = nx.adjacency_matrix(g) user_attributes = pd.read_csv('./data/user_friend_follower.csv') user_attributes = user_attributes.set_index('user').T.to_dict('list') nodelist = list(g.nodes()) features = [] User_post = np.zeros( (len(nodelist), num_session)) # 2218 number of posts for id, node in enumerate(nodelist): posts_ID = df.loc[df['owner_id'] == node].index.values.tolist() for p_id in posts_ID: User_post[id][p_id] = 1 features.append(user_attributes[node]) # only keep the posts that are in the training data User_post_train = User_post[:, :-nb_validation_samples] User_post_test = User_post[:, -nb_validation_samples:] features = sparse.csr_matrix(features) features = normalize(features, norm='max', axis=0) adj_orig = adj adj_orig = adj_orig - sparse.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_norm = preprocess_graph(adj) adj_label = adj + sparse.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) # Define placeholders placeholders.setdefault('features', tf.sparse_placeholder(tf.float32)) placeholders.setdefault('adj', tf.sparse_placeholder(tf.float32)) placeholders.setdefault('adj_orig', tf.sparse_placeholder(tf.float32)) placeholders.setdefault('dropout', tf.placeholder_with_default(0., shape=())) placeholders.setdefault( 'user_post', tf.placeholder(tf.int32, [len(nodelist), None])) d = {placeholders['dropout']: FLAGS.dropout} placeholders.update(d) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] '''Graph AutoEncoder''' if model_str == 'gcn_ae': Graph_model = GCNModelAE(placeholders, num_features, features_nonzero) elif model_str == 'gcn_vae': Graph_model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) embedding_layer = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SENT_LENGTH, trainable=True, mask_zero=True) all_input = Input(shape=(MAX_SENT_LENGTH + 1, )) sentence_input = crop(1, 0, MAX_SENT_LENGTH)(all_input) # slice time_input = crop(1, MAX_SENT_LENGTH, MAX_SENT_LENGTH + 1)(all_input) # slice embedded_sequences = embedding_layer(sentence_input) # embedded_sequences=BatchNormalization()(embedded_sequences) l_lstm = Bidirectional(GRU(TEXT_DIM, return_sequences=True))(embedded_sequences) l_att = AttLayer(TEXT_DIM)(l_lstm) # (?,200) # time_embedding=Dense(TIME_DIM,activation='sigmoid')(time_input) merged_output = Concatenate()([l_att, time_input]) # text+time information sentEncoder = Model(all_input, merged_output) review_input = placeholders['review_input'] review_encoder = TimeDistributed(sentEncoder)(review_input) l_lstm_sent = Bidirectional(GRU(TEXT_DIM, return_sequences=True))(review_encoder) fully_sent = Dense(1, use_bias=False)(l_lstm_sent) pred_time = Activation(activation='linear')(fully_sent) zero_input = placeholders['zero_input'] shift_predtime = Concatenate(axis=1)([zero_input, pred_time]) shift_predtime = crop(1, 0, MAX_SENTS)(shift_predtime) l_att_sent = AttLayer(TEXT_DIM)(l_lstm_sent) # embed the #likes, shares post_input = placeholders['post_input'] fully_post = Dense(POST_DIM, use_bias=False)(post_input) # norm_fullypost=BatchNormalization()(fully_post) post_embedding = Activation(activation='relu')(fully_post) fully_review = concatenate( [l_att_sent, post_embedding] ) # merge the document level vectro with the additional embedded features such as #likes pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) with tf.name_scope('graph_cost'): preds_sub = Graph_model.reconstructions labels_sub = tf.reshape( tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]) if model_str == 'gcn_ae': opt = CostAE(preds=preds_sub, labels=labels_sub, pos_weight=pos_weight, norm=norm) elif model_str == 'gcn_vae': opt = CostVAE(preds=preds_sub, labels=labels_sub, model=Graph_model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm) User_latent = Graph_model.z_mean # (n_user, G_embeddim) Post_latent = fully_review # (batch size, text_embed_dim+post_dim) max_indices = tf.argmax(placeholders['user_post'], axis=0) add_latent = tf.gather(User_latent, max_indices) session_latent = tf.concat( [Post_latent, add_latent], axis=1) # the representation of text + graph '''DAGMM''' h1_size = 2 * TEXT_DIM + Graph_DIM + POST_DIM gmm = GMM(MIXTURES) est_net = EstimationNet([h1_size, MIXTURES], tf.nn.tanh) gamma = est_net.inference(session_latent, FLAGS.dropout) gmm.fit(session_latent, gamma) individual_energy = gmm.energy(session_latent) Time_label = placeholders['time_label'] Time_label = tf.reshape(Time_label, [tf.shape(Time_label)[0], MAX_SENTS, 1]) with tf.name_scope('loss'): GAE_error = opt.cost energy = tf.reduce_mean(individual_energy) lossSigma = gmm.cov_diag_loss() prediction_error = tf.losses.mean_squared_error( shift_predtime, Time_label) loss = prediction_error + FLAGS.lambda1 * energy + FLAGS.lambda2 * lossSigma + FLAGS.lambda3 * GAE_error x_train = data[:-nb_validation_samples] time_train = timeInfo[:-nb_validation_samples] zeros_train = zeros[:-nb_validation_samples] y_train = labels[:-nb_validation_samples] post_train = postInfo[:-nb_validation_samples] x_val = data[-nb_validation_samples:] zeros_test = zeros[-nb_validation_samples:] time_test = timeInfo[-nb_validation_samples:] y_val = labels[-nb_validation_samples:] post_test = postInfo[-nb_validation_samples:] y_single = single_label[-nb_validation_samples:] print( 'Number of positive and negative posts in training and validation set' ) print(y_train.sum(axis=0)) print(y_val.sum(axis=0)) print("model fitting - Unsupervised cyberbullying detection") optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) train_step = optimizer.minimize(loss) GAEcorrect_prediction = tf.equal( tf.cast(tf.greater_equal(tf.sigmoid(preds_sub), 0.5), tf.int32), tf.cast(labels_sub, tf.int32)) feed_dict_train = construct_feed_dict(zeros_train, x_train, post_train, time_train, FLAGS.dropout, adj_norm, adj_label, features, User_post_train, placeholders) feed_dict_train.update({placeholders['dropout']: FLAGS.dropout}) sess = tf.Session() sess.run(tf.global_variables_initializer()) total_batch = int(num_session / FLAGS.batch_size) zero_batches = np.array_split(zeros_train, total_batch) x_batches = np.array_split(x_train, total_batch) p_batches = np.array_split(post_train, total_batch) t_batches = np.array_split(time_train, total_batch) UP_batches = np.array_split(User_post_train, total_batch, axis=1) for epoch in range(TRAINING_EPOCHS): ave_cost = 0 ave_energy = 0 ave_recon = 0 ave_sigma = 0 ave_GAE = 0 for i in range(total_batch): batch_x = x_batches[i] batch_p = p_batches[i] batch_t = t_batches[i] batch_z = zero_batches[i] user_post = UP_batches[i] feed_dict = construct_feed_dict(batch_z, batch_x, batch_p, batch_t, FLAGS.dropout, adj_norm, adj_label, features, user_post, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) _, total_loss, loss_sigma, GAE_loss, Energy_error, recon_error = sess.run( [ train_step, loss, lossSigma, GAE_error, energy, prediction_error ], feed_dict) ave_cost += total_loss / total_batch ave_energy += Energy_error / total_batch ave_GAE += GAE_loss / total_batch ave_sigma += loss_sigma / total_batch ave_recon += recon_error / total_batch # if epoch % 10 == 0 or epoch == TRAINING_EPOCHS - 1: # print("This is epoch %d, the total loss is %f, energy error is %f, GAE error is %f, sigma error is %f,prediction error is %f") \ # % (epoch + 1, ave_cost, ave_energy, ave_GAE, ave_sigma, ave_recon) fix = gmm.fix_op() sess.run(fix, feed_dict=feed_dict_train) feed_dict_test = construct_feed_dict(zeros_test, x_val, post_test, time_test, FLAGS.dropout, adj_norm, adj_label, features, User_post_test, placeholders) pred_energy, representations = sess.run( [individual_energy, session_latent], feed_dict=feed_dict_test) bully_energy_threshold = np.percentile(pred_energy, 65) print('the bully energy threshold is : %f' % bully_energy_threshold) label_pred = np.where(pred_energy >= bully_energy_threshold, 1, 0) print(precision_recall_fscore_support(y_single, label_pred)) print(accuracy_score(y_single, label_pred)) print(roc_auc_score(y_single, label_pred)) tf.reset_default_graph() K.clear_session() precision_list.append( precision_recall_fscore_support(y_single, label_pred)[0][1]) recall_list.append( precision_recall_fscore_support(y_single, label_pred)[1][1]) f1_list.append( precision_recall_fscore_support(y_single, label_pred)[2][1]) auc_list.append(roc_auc_score(y_single, label_pred)) print('>>> Evaluation metrics') print('>>> precision mean: {0.4f}; precision std: {1:.4f}'.format( np.mean(precision_list), np.std(precision_list))) print('>>> recall mean: {0.4f}; recall std: {1:.4f}'.format( np.mean(recall_list), np.std(recall_list))) print('>>> f1 mean: {0.4f}; f1 std: {1:.4f}'.format( np.mean(f1_list), np.std(f1_list))) print('>>> auc mean: {0.4f}; auc std: {1:.4f}'.format( np.mean(auc_list), np.std(auc_list))) timer.stop()
def lat2SW(nrows=3, ncols=5, criterion="rook", row_st=False): """ Create a sparse W matrix for a regular lattice. Parameters ---------- nrows : int number of rows ncols : int number of columns rook : {"rook", "queen", "bishop"} type of contiguity. Default is rook. row_st : boolean If True, the created sparse W object is row-standardized so every row sums up to one. Defaults to False. Returns ------- w : scipy.sparse.dia_matrix instance of a scipy sparse matrix Notes ----- Observations are row ordered: first k observations are in row 0, next k in row 1, and so on. This method directly creates the W matrix using the strucuture of the contiguity type. Examples -------- >>> from pysal import weights >>> w9 = weights.lat2SW(3,3) >>> w9[0,1] 1 >>> w9[3,6] 1 >>> w9r = weights.lat2SW(3,3, row_st=True) >>> w9r[3,6] 0.33333333333333331 """ n = nrows * ncols diagonals = [] offsets = [] if criterion == "rook" or criterion == "queen": d = np.ones((1, n)) for i in range(ncols - 1, n, ncols): d[0, i] = 0 diagonals.append(d) offsets.append(-1) d = np.ones((1, n)) diagonals.append(d) offsets.append(-ncols) if criterion == "queen" or criterion == "bishop": d = np.ones((1, n)) for i in range(0, n, ncols): d[0, i] = 0 diagonals.append(d) offsets.append(-(ncols - 1)) d = np.ones((1, n)) for i in range(ncols - 1, n, ncols): d[0, i] = 0 diagonals.append(d) offsets.append(-(ncols + 1)) data = np.concatenate(diagonals) offsets = np.array(offsets) m = sparse.dia_matrix((data, offsets), shape=(n, n), dtype=np.int8) m = m + m.T if row_st: m = sparse.spdiags(1. / m.sum(1).T, 0, *m.shape) * m return m
def maskTestEdges(adj, testPercent=10., valPercent=5.): """ Randomly removes some edges from original graph to create test and validation sets for link prediction task :param adj: complete sparse adjacency matrix of the graph :param testPercent: percentage of edges in test set :param valPercent: percentage of edges in validation set :return: train incomplete adjacency matrix, validation and test sets """ # Remove diagonal elements adj = adj - sp.dia_matrix((adj.diagonal()[None, :], [0]), shape=adj.shape) adj.eliminate_zeros() # Check that diag is zero: assert adj.diagonal().sum() == 0 edgesPositive, _, _ = sparseToTuple(adj) # Filtering out edges from lower triangle of adjacency matrix edgesPositive = edgesPositive[edgesPositive[:,1] > edgesPositive[:,0],:] # val_edges, val_edges_false, test_edges, test_edges_false = None, None, None, None # number of positive (and negative) edges in test and val sets: numTest = int(np.floor(edgesPositive.shape[0] / (100. / testPercent))) numVal = int(np.floor(edgesPositive.shape[0] / (100. / valPercent))) # sample positive edges for test and val sets: edgesPositiveIdx = np.arange(edgesPositive.shape[0]) np.random.shuffle(edgesPositiveIdx) valEdgeIdx = edgesPositiveIdx[:numVal] testEdgeIdx = edgesPositiveIdx[numVal:(numVal + numTest)] testEdges = edgesPositive[testEdgeIdx] # positive test edges valEdges = edgesPositive[valEdgeIdx] # positive val edges trainEdges = np.delete(edgesPositive, np.hstack([testEdgeIdx, valEdgeIdx]), axis = 0) # positive train edges # the above strategy for sampling without replacement will not work for # sampling negative edges on large graphs, because the pool of negative # edges is much much larger due to sparsity, therefore we'll use # the following strategy: # 1. sample random linear indices from adjacency matrix WITH REPLACEMENT # (without replacement is super slow). sample more than we need so we'll # probably have enough after all the filtering steps. # 2. remove any edges that have already been added to the other edge lists # 3. convert to (i,j) coordinates # 4. swap i and j where i > j, to ensure they're upper triangle elements # 5. remove any duplicate elements if there are any # 6. remove any diagonal elements # 7. if we don't have enough edges, repeat this process until we get enough positiveIdx, _, _ = sparseToTuple(adj) # [i,j] coord pairs for all true edges positiveIdx = positiveIdx[:,0]*adj.shape[0] + positiveIdx[:,1] # linear indices testEdgesFalse = np.empty((0,2),dtype='int64') idxTestEdgesFalse = np.empty((0,),dtype='int64') while len(testEdgesFalse) < len(testEdges): # step 1: idx = np.random.choice(adj.shape[0]**2, 2*(numTest - len(testEdgesFalse)), replace = True) # step 2: idx = idx[~np.in1d(idx, positiveIdx, assume_unique = True)] idx = idx[~np.in1d(idx, idxTestEdgesFalse, assume_unique = True)] # step 3: rowidx = idx // adj.shape[0] colidx = idx % adj.shape[0] coords = np.vstack((rowidx,colidx)).transpose() # step 4: lowertrimask = coords[:,0] > coords[:,1] coords[lowertrimask] = coords[lowertrimask][:,::-1] # step 5: coords = np.unique(coords, axis = 0) # note: coords are now sorted lexicographically np.random.shuffle(coords) # not anymore # step 6: coords = coords[coords[:,0] != coords[:,1]] # step 7: coords = coords[:min(numTest, len(idx))] testEdgesFalse = np.append(testEdgesFalse, coords, axis = 0) idx = idx[:min(numTest, len(idx))] idxTestEdgesFalse = np.append(idxTestEdgesFalse, idx) valEdgesFalse = np.empty((0,2), dtype = 'int64') idxValEdgesFalse = np.empty((0,), dtype = 'int64') while len(valEdgesFalse) < len(valEdges): # step 1: idx = np.random.choice(adj.shape[0]**2, 2*(numVal - len(valEdgesFalse)), replace = True) # step 2: idx = idx[~np.in1d(idx, positiveIdx, assume_unique = True)] idx = idx[~np.in1d(idx, idxTestEdgesFalse, assume_unique = True)] idx = idx[~np.in1d(idx, idxValEdgesFalse, assume_unique = True)] # step 3: rowidx = idx // adj.shape[0] colidx = idx % adj.shape[0] coords = np.vstack((rowidx,colidx)).transpose() # step 4: lowertrimask = coords[:,0] > coords[:,1] coords[lowertrimask] = coords[lowertrimask][:,::-1] # step 5: coords = np.unique(coords, axis = 0) # note: coords are now sorted lexicographically np.random.shuffle(coords) # not any more # step 6: coords = coords[coords[:,0] != coords[:,1]] # step 7: coords = coords[:min(numVal, len(idx))] valEdgesFalse = np.append(valEdgesFalse, coords, axis = 0) idx = idx[:min(numVal, len(idx))] idxValEdgesFalse = np.append(idxValEdgesFalse, idx) # sanity checks: trainEdgesLinear = trainEdges[:,0]*adj.shape[0] + trainEdges[:,1] testEdgesLinear = testEdges[:,0]*adj.shape[0] + testEdges[:,1] assert not np.any(np.in1d(idxTestEdgesFalse, positiveIdx)) assert not np.any(np.in1d(idxValEdgesFalse, positiveIdx)) assert not np.any(np.in1d(valEdges[:,0]*adj.shape[0]+valEdges[:,1], trainEdgesLinear)) assert not np.any(np.in1d(testEdgesLinear, trainEdgesLinear)) assert not np.any(np.in1d(valEdges[:,0]*adj.shape[0]+valEdges[:,1], testEdgesLinear)) # Re-build adj matrix data = np.ones(trainEdges.shape[0]) adjTrain = sp.csr_matrix((data, (trainEdges[:, 0], trainEdges[:, 1])), shape=adj.shape) adjTrain = adjTrain + adjTrain.T return adjTrain, valEdges, valEdgesFalse, testEdges, testEdgesFalse
# Load data #adj, features = load_data(dataset_str) matfile = sio.loadmat('../../../data/' + dataset_str + '.mat') adj0 = matfile['net'] try: features0 = matfile['group'] except: features0 = sp.identity(adj0.shape[0]) Roc = [] Ap = [] for expnum in range(10): # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj0 adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj0) adj = adj_train if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) # Define placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float32),
def fourth_difference_matrix(N=3, bc='BothClamped', cfs=None): """ generates the fourth order difference operator in matrix form """ checkInputArgs(N, bc) diag = 3 * [None] # determine matrix dimensions based on N and type of boundary conditions if bc == 'BothFree': M = N + 1 elif 'Free' in bc: M = N else: M = N - 1 diag[0] = 6 + np.zeros(M) diag[1] = -4 + np.zeros(M) diag[2] = np.ones(M) if 'Free' not in bc: if bc == 'BothSimplySupported': diag[0][0] = 5 diag[0][-1] = 5 elif bc == 'LeftSimplySupportedRightClamped': diag[0][0] = 5 elif bc == 'LeftClampedRightSimplySupported': diag[0][-1] = 5 return dia_matrix( ([diag[0], diag[1], diag[1], diag[2], diag[2]], [0, 1, -1, 2, -2]), shape=(M, M)) else: if cfs == None: raise TypeError('argument cfs must be supplied') elif not isinstance(cfs, dict): raise TypeError('argument cfs must be of type dict') if bc == 'BothFree': diag[0][0] = cfs['a0'] diag[0][-1] = cfs['a0'] diag[0][1] = 5 diag[0][-2] = 5 diag[1][0] = -2 diag[1][-2] = cfs['a1'] diag2 = np.concatenate((cfs['a2'] + np.zeros(1), np.zeros(M - 2), cfs['a2'] + np.zeros(1))) Dxxxx = dia_matrix( ([diag[0], np.fliplr([diag[1]])[0], diag[1], diag[2], diag[2] ], [0, 1, -1, 2, -2]), shape=(M, M)) elif bc in ('LeftClampedRightFree', 'LeftSimplySupportedRightFree'): diag[0][-1] = cfs['a0'] diag[0][-2] = 5 diag[1][-2] = cfs['a1'] diag2 = np.concatenate((np.zeros(M - 1), cfs['a2'] + np.zeros(1))) if bc == 'LeftSimplySupportedRightFree': diag[0][0] = 5 tmp = np.roll(np.fliplr([diag[1]])[0], -1) tmp[-1] = -2 Dxxxx = dia_matrix( ([diag[0], tmp, diag[1], diag[2], diag[2]], [0, 1, -1, 2, -2]), shape=(M, M)) elif bc in ('LeftFreeRightClamped', 'LeftFreeRightSimplySupported'): diag[0][0] = cfs['a0'] diag[0][1] = 5 diag[1][0] = -2 diag2 = np.concatenate((cfs['a2'] + np.zeros(1), np.zeros(M - 1))) if bc == 'LeftFreeRightSimplySupported': diag[0][-1] = 5 tmp = np.roll(np.fliplr([diag[1]])[0], 1) tmp[1] = cfs['a1'] Dxxxx = dia_matrix( ([diag[0], tmp, diag[1], diag[2], diag[2]], [0, 1, -1, 2, -2]), shape=(M, M)) return (Dxxxx, dia_matrix(([ diag2, -1.0 * np.roll(diag2, 1), -1.0 * np.roll(diag2, -1) ], [0, 1, -1]), shape=(M, M)))
def train(self, adj): # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train = adj features = sp.identity(adj.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) # Define placeholders self.placeholders = { 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] # Create model if self.model_selection == 'gcn_ae': self.model = GCNModelAE(self.placeholders, num_features, features_nonzero, self.hidden1, self.hidden2) elif self.model_selection == 'gcn_vae': self.model = GCNModelVAE(self.placeholders, num_features, num_nodes, features_nonzero, self.hidden1, self.hidden2) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) # Optimizer with tf.name_scope('optimizer'): if self.model_selection == 'gcn_ae': opt = OptimizerAE(preds=self.model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( self.placeholders['adj_orig'], validate_indices=False), [-1]), pos_weight=pos_weight, norm=norm, learning_rate=self.learning_rate) elif self.model_selection == 'gcn_vae': opt = OptimizerVAE(preds=self.model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( self.placeholders['adj_orig'], validate_indices=False), [-1]), model=self.model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, learning_rate=self.learning_rate) # Initialize session self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Train model for epoch in range(self.epochs): t = time.time() # Construct feed dictionary self.feed_dict = construct_feed_dict(adj_norm, adj_label, features, self.placeholders) self.feed_dict.update({self.placeholders['dropout']: self.dropout}) # Run single weight update outs = self.sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=self.feed_dict) # Compute average loss avg_cost = outs[1] avg_accuracy = outs[2] print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "train_acc=", "{:.5f}".format(avg_accuracy), "time=", "{:.5f}".format(time.time() - t)) print("Optimization Finished!")
def test_dia(self): x = sparse.dia_matrix( (cupy.array([], 'f'), cupy.array([0], 'i')), shape=(0, 0), dtype='f') self.assertTrue(sparse.isspmatrix_dia(x))
def d2matrix(nelem): elements = ones((3, nelem)) elements[1, :] *= -2 return dia_matrix((elements, [-1, 0, 1]), shape=(nelem, nelem)).tocsc()
def gae_scores(adj_sparse, train_test_split, features_matrix=None, LEARNING_RATE=0.01, EPOCHS=200, HIDDEN1_DIM=32, HIDDEN2_DIM=16, DROPOUT=0, edge_score_mode="dot-product", verbose=1): adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \ test_edges, test_edges_false = train_test_split # Unpack train-test split start_time = time.time() # Train on CPU (hide GPU) due to memory constraints os.environ['CUDA_VISIBLE_DEVICES'] = "" # Convert features from normal matrix --> sparse matrix --> tuple # features_tuple contains: (list of matrix coordinates, list of values, matrix dimensions) if features_matrix is None: x = sp.lil_matrix(np.identity(adj_sparse.shape[0])) else: x = sp.lil_matrix(features_matrix) features_tuple = sparse_to_tuple(x) features_shape = features_tuple[2] # Get graph attributes (to feed into model) num_nodes = adj_sparse.shape[0] # number of nodes in adjacency matrix num_features = features_shape[ 1] # number of features (columsn of features matrix) features_nonzero = features_tuple[1].shape[ 0] # number of non-zero entries in features matrix (or length of values list) # Store original adjacency matrix (without diagonal entries) for later adj_orig = deepcopy(adj_sparse) adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() # Normalize adjacency matrix adj_norm = preprocess_graph(adj_train) # Add in diagonals adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Define placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } # How much to weigh positive examples (true edges) in cost print_function # Want to weigh less-frequent classes higher, so as to prevent model output bias # pos_weight = (num. negative samples / (num. positive samples) pos_weight = float(adj_sparse.shape[0] * adj_sparse.shape[0] - adj_sparse.sum()) / adj_sparse.sum() # normalize (scale) average weighted cost norm = adj_sparse.shape[0] * adj_sparse.shape[0] / float( (adj_sparse.shape[0] * adj_sparse.shape[0] - adj_sparse.sum()) * 2) # Create VAE model model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero, HIDDEN1_DIM, HIDDEN2_DIM) opt = OptimizerVAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, learning_rate=LEARNING_RATE) cost_val = [] acc_val = [] val_roc_score = [] prev_embs = [] # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) # Train model for epoch in range(EPOCHS): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features_tuple, placeholders) feed_dict.update({placeholders['dropout']: DROPOUT}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict) # Compute average loss avg_cost = outs[1] avg_accuracy = outs[2] # Evaluate predictions feed_dict.update({placeholders['dropout']: 0}) gae_emb = sess.run(model.z_mean, feed_dict=feed_dict) prev_embs.append(gae_emb) gae_score_matrix = np.dot(gae_emb, gae_emb.T) # # TODO: remove this (debugging) # if not np.isfinite(gae_score_matrix).all(): # print 'Found non-finite value in GAE score matrix! Epoch: {}'.format(epoch) # with open('numpy-nan-debugging.pkl', 'wb') as f: # dump_info = {} # dump_info['gae_emb'] = gae_emb # dump_info['epoch'] = epoch # dump_info['gae_score_matrix'] = gae_score_matrix # dump_info['adj_norm'] = adj_norm # dump_info['adj_label'] = adj_label # dump_info['features_tuple'] = features_tuple # # dump_info['feed_dict'] = feed_dict # dump_info['prev_embs'] = prev_embs # pickle.dump(dump_info, f, protocol=2) # # END TODO roc_curr, roc_curve_curr, ap_curr = get_roc_score(val_edges, val_edges_false, gae_score_matrix, apply_sigmoid=True) val_roc_score.append(roc_curr) # Print results for this epoch if verbose == 2: print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "train_acc=", "{:.5f}".format(avg_accuracy), "val_roc=", "{:.5f}".format( val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr), "time=", "{:.5f}".format(time.time() - t)) if verbose == 2: print("Optimization Finished!") # Print final results feed_dict.update({placeholders['dropout']: 0}) gae_emb = sess.run(model.z_mean, feed_dict=feed_dict) # Dot product edge scores (default) if edge_score_mode == "dot-product": gae_score_matrix = np.dot(gae_emb, gae_emb.T) runtime = time.time() - start_time # Calculate final scores gae_val_roc, gae_val_roc_curve, gae_val_ap = get_roc_score( val_edges, val_edges_false, gae_score_matrix) gae_test_roc, gae_test_roc_curve, gae_test_ap = get_roc_score( test_edges, test_edges_false, gae_score_matrix) # Take bootstrapped edge embeddings (via hadamard product) elif edge_score_mode == "edge-emb": def get_edge_embeddings(edge_list): embs = [] for edge in edge_list: node1 = edge[0] node2 = edge[1] emb1 = gae_emb[node1] emb2 = gae_emb[node2] edge_emb = np.multiply(emb1, emb2) embs.append(edge_emb) embs = np.array(embs) return embs # Train-set edge embeddings pos_train_edge_embs = get_edge_embeddings(train_edges) neg_train_edge_embs = get_edge_embeddings(train_edges_false) train_edge_embs = np.concatenate( [pos_train_edge_embs, neg_train_edge_embs]) # Create train-set edge labels: 1 = real edge, 0 = false edge train_edge_labels = np.concatenate( [np.ones(len(train_edges)), np.zeros(len(train_edges_false))]) # Val-set edge embeddings, labels if len(val_edges) > 0 and len(val_edges_false) > 0: pos_val_edge_embs = get_edge_embeddings(val_edges) neg_val_edge_embs = get_edge_embeddings(val_edges_false) val_edge_embs = np.concatenate( [pos_val_edge_embs, neg_val_edge_embs]) val_edge_labels = np.concatenate( [np.ones(len(val_edges)), np.zeros(len(val_edges_false))]) # Test-set edge embeddings, labels pos_test_edge_embs = get_edge_embeddings(test_edges) neg_test_edge_embs = get_edge_embeddings(test_edges_false) test_edge_embs = np.concatenate( [pos_test_edge_embs, neg_test_edge_embs]) # Create val-set edge labels: 1 = real edge, 0 = false edge test_edge_labels = np.concatenate( [np.ones(len(test_edges)), np.zeros(len(test_edges_false))]) # Train logistic regression classifier on train-set edge embeddings edge_classifier = LogisticRegression(random_state=0) edge_classifier.fit(train_edge_embs, train_edge_labels) # Predicted edge scores: probability of being of class "1" (real edge) if len(val_edges) > 0 and len(val_edges_false) > 0: val_preds = edge_classifier.predict_proba(val_edge_embs)[:, 1] test_preds = edge_classifier.predict_proba(test_edge_embs)[:, 1] runtime = time.time() - start_time # Calculate scores if len(val_edges) > 0 and len(val_edges_false) > 0: gae_val_roc = roc_auc_score(val_edge_labels, val_preds) gae_val_roc_curve = roc_curve(val_edge_labels, val_preds) gae_val_ap = average_precision_score(val_edge_labels, val_preds) else: gae_val_roc = None gae_val_roc_curve = None gae_val_ap = None gae_test_roc = roc_auc_score(test_edge_labels, test_preds) gae_test_roc_curve = roc_curve(test_edge_labels, test_preds) gae_test_ap = average_precision_score(test_edge_labels, test_preds) # Record scores gae_scores = {} gae_scores['test_roc'] = gae_test_roc gae_scores['test_roc_curve'] = gae_test_roc_curve gae_scores['test_ap'] = gae_test_ap gae_scores['val_roc'] = gae_val_roc gae_scores['val_roc_curve'] = gae_val_roc_curve gae_scores['val_ap'] = gae_val_ap gae_scores['val_roc_per_epoch'] = val_roc_score gae_scores['runtime'] = runtime return gae_scores
W[3, 6] = W[6, 3] = 1 W[5, 9] = W[9, 5] = 1 W[6, 7] = W[7, 6] = 1 W[6, 8] = W[8, 6] = 1 W[7, 9] = W[9, 7] = 1 # クラスラベル # 与えられていないときは0 y = dok_matrix([[1, 0, 1, 0, 0, 1, 0, 1, 0, 0]]).T A = W.T * W # 単位行列 I = identity(10) D = dia_matrix((A.sum(0), [0]), (10, 10)).tocsr() D = scipy.sparse.diags(numpy.reciprocal(numpy.sqrt(D).data)) L = I - D * A * D print(L) lamb = 0.0001 # In[22]: #[ 0.45966011 0.23023256 0.46046512 0.1519678 0.5372093 -0.57951699 # -0.38980322 -0.51627907-0.19490161 -0.15903399] f = spsolve((I + (1 - lamb) * L), y) # In[23]: # top 1, 5, 10,
def TVRegDiff(data, itern, alph, u0=None, scale='small', ep=1e-6, dx=None, plotflag=_has_matplotlib, diagflag=True): """ Estimate derivatives from noisy data based using the Total Variation Regularized Numerical Differentiation (TVDiff) algorithm. Parameters ---------- data : ndarray One-dimensional array containing series data to be differentiated. itern : int Number of iterations to run the main loop. A stopping condition based on the norm of the gradient vector g below would be an easy modification. No default value. alph : float Regularization parameter. This is the main parameter to fiddle with. Start by varying by orders of magnitude until reasonable results are obtained. A value to the nearest power of 10 is usally adequate. No default value. Higher values increase regularization strenght and improve conditioning. u0 : ndarray, optional Initialization of the iteration. Default value is the naive derivative (without scaling), of appropriate length (this being different for the two methods). Although the solution is theoretically independent of the initialization, a poor choice can exacerbate conditioning issues when the linear system is solved. scale : {large' or 'small' (case insensitive)}, str, optional Default is 'small'. 'small' has somewhat better boundary behavior, but becomes unwieldly for data larger than 1000 entries or so. 'large' has simpler numerics but is more efficient for large-scale problems. 'large' is more readily modified for higher-order derivatives, since the implicit differentiation matrix is square. ep : float, optional Parameter for avoiding division by zero. Default value is 1e-6. Results should not be very sensitive to the value. Larger values improve conditioning and therefore speed, while smaller values give more accurate results with sharper jumps. dx : float, optional Grid spacing, used in the definition of the derivative operators. Default is the reciprocal of the data size. plotflag : bool, optional Flag whether to display plot at each iteration. Default is True. Useful, but adds significant running time. diagflag : bool, optional Flag whether to display diagnostics at each iteration. Default is True. Useful for diagnosing preconditioning problems. When tolerance is not met, an early iterate being best is more worrying than a large relative residual. Returns ------- u : ndarray Estimate of the regularized derivative of data. Due to different grid assumptions, length(u) = length(data) + 1 if scale = 'small', otherwise length(u) = length(data). """ # Make sure we have a column vector data = np.array(data) assert len(data.shape) == 1, "data is not one-dimensional" # Get the data size. n = len(data) # Default checking. (u0 is done separately within each method.) if dx is None: dx = 1.0 / n # Different methods for small- and large-scale problems. if (scale.lower() == 'small'): # Construct differentiation matrix. c = np.ones(n + 1) / dx D = sparse.spdiags([-c, c], [0, 1], n, n + 1) DT = D.transpose() # Construct antidifferentiation operator and its adjoint. def A(x): return (np.cumsum(x) - 0.5 * (x + x[0]))[1:] * dx def AT(w): return (sum(w) * np.ones(n + 1) - np.transpose( np.concatenate(([sum(w) / 2.0], np.cumsum(w) - w / 2.0)))) * dx # Default initialization is naive derivative if u0 is None: u0 = np.concatenate(([0], np.diff(data), [0])) u = u0 # Since Au( 0 ) = 0, we need to adjust. ofst = data[0] # Precompute. ATb = AT(ofst - data) # input: size n # Main loop. for ii in range(1, itern + 1): # Diagonal matrix of weights, for linearizing E-L equation. Q = sparse.spdiags(1. / (np.sqrt((D * u)**2 + ep)), 0, n, n) # Linearized diffusion matrix, also approximation of Hessian. L = dx * DT * Q * D # Gradient of functional. g = AT(A(u)) + ATb + alph * L * u # Prepare to solve linear equation. tol = 1e-6 maxit = 400 # Simple preconditioner. P = alph * sparse.spdiags(L.diagonal() + 1, 0, n + 1, n + 1) def linop(v): return (alph * L * v + AT(A(v))) linop = splin.LinearOperator((n + 1, n + 1), linop) P = None if diagflag: [s, info_i] = sparse.linalg.cg(linop, g, x0=None, tol=tol, maxiter=maxit, callback=None, M=P, atol='legacy') log_iteration(ii, s[0], u, g) if (info_i > 0): print(ii) logging.warning( "WARNING - convergence to tolerance not achieved!") elif (info_i < 0): logging.warning("WARNING - illegal input or breakdown") else: [s, info_i] = sparse.linalg.cg(linop, g, x0=None, tol=tol, maxiter=maxit, callback=None, M=P, atol='legacy') # Update solution. u = u - s # Display plot. if plotflag: plt.plot(u) plt.show() elif (scale.lower() == 'large'): # Construct anti-differentiation operator and its adjoint. def A(v): return np.cumsum(v) def AT(w): return (sum(w) * np.ones(len(w)) - np.transpose(np.concatenate(([0.0], np.cumsum(w[:-1]))))) # Construct differentiation matrix. c = np.ones(n) D = sparse.spdiags([-c, c], [0, 1], n, n) / dx mask = np.ones((n, n)) mask[-1, -1] = 0.0 D = sparse.dia_matrix(D.multiply(mask)) DT = D.transpose() # Since Au( 0 ) = 0, we need to adjust. data = data - data[0] # Default initialization is naive derivative. if u0 is None: u0 = np.concatenate(([0], np.diff(data))) u = u0 # Precompute. ATd = AT(data) # Main loop. for ii in range(1, itern + 1): # Diagonal matrix of weights, for linearizing E-L equation. Q = sparse.spdiags(1. / np.sqrt((D * u)**2.0 + ep), 0, n, n) # Linearized diffusion matrix, also approximation of Hessian. L = DT * Q * D # Gradient of functional. g = AT(A(u)) - ATd g = g + alph * L * u # Build preconditioner. c = np.cumsum(range(n, 0, -1)) B = alph * L + sparse.spdiags(c[::-1], 0, n, n) # droptol = 1.0e-2 R = sparse.dia_matrix(np.linalg.cholesky(B.todense())) # Prepare to solve linear equation. tol = 1.0e-6 maxit = 200 def linop(v): return (alph * L * v + AT(A(v))) linop = splin.LinearOperator((n, n), linop) if diagflag: [s, info_i] = sparse.linalg.cg(linop, -g, x0=None, tol=tol, maxiter=maxit, callback=None, M=np.dot(R.transpose(), R), atol='legacy') log_iteration(ii, s[0], u, g) if (info_i > 0): print(ii) logging.warning( "WARNING - convergence to tolerance not achieved!") elif (info_i < 0): logging.warning("WARNING - illegal input or breakdown") else: [s, info_i] = sparse.linalg.cg(linop, -g, x0=None, tol=tol, maxiter=maxit, callback=None, M=np.dot(R.transpose(), R), atol='legacy') # Update current solution u = u + s # Display plot if plotflag: plt.close('all') plt.plot(u / dx) plt.show() u = u / dx return u
def biharmonic_matrix_2d(Nx=3, Ny=3, bc='CCCC'): """ generates the discrete biharmonic operator in matrix form """ checkInputArgs(Nx, Ny, bc) #np.set_printoptions(threshold=np.nan,linewidth=230,precision=2,suppress=True) diag = 6 * [None] if bc == 'CCCC': diag[0] = 20 + np.zeros((Nx - 1) * (Ny - 1)) elif bc == 'SSSS': diag[0] = np.array([18] + (Ny - 3) * [19] + [18] + sum([[19] + (Ny - 3) * [20] + [19]] * (Nx - 3), []) + [18] + (Ny - 3) * [19] + [18]) elif bc == 'CCCS': diag[0] = np.array(sum([[19] + [20] * (Ny - 2)] * (Nx - 1), [])) elif bc == 'CCSC': diag[0] = np.array(sum([[20] * (Ny - 2) + [19]] * (Nx - 1), [])) elif bc == 'CCSS': diag[0] = np.array(sum([[19] + [20] * (Ny - 3) + [19]] * (Nx - 1), [])) elif bc == 'CSCC': diag[0] = [20] * (Ny - 1) * Nx + [19] * (Ny - 1) elif bc == 'CSCS': diag[0] = sum([[19] + [20] * (Ny - 2)] * (Nx - 2), []) + [18] + [19] * (Ny - 2) elif bc == 'CSSC': diag[0] = sum([[20] * (Ny - 2) + [19]] * (Nx - 2), []) + [19] * (Ny - 2) + [18] elif bc == 'CSSS': diag[0] = sum([[19] + [20] * (Ny - 3) + [19]] * (Nx - 2), []) + [18] + [19] * (Ny - 3) + [18] elif bc == 'SCCC': diag[0] = [19] * (Ny - 1) + [20] * (Ny - 1) * (Nx - 2) elif bc == 'SCCS': diag[0] = [18] + [19] * (Ny - 2) + sum([[19] + [20] * (Ny - 2)] * (Nx - 2), []) elif bc == 'SCSC': diag[0] = [19] * (Ny - 2) + [18] + sum([[20] * (Ny - 2) + [19]] * (Nx - 2), []) elif bc == 'SCSS': diag[0] = [18] + [19] * (Ny - 3) + [18] + sum([[19] + [20] * (Ny - 3) + [19]] * (Nx - 2), []) elif bc == 'SSCC': diag[0] = [19] * (Ny - 1) + [20] * (Ny - 1) * (Nx - 3) + [19 ] * (Ny - 1) elif bc == 'SSCS': diag[0] = [18] + [19] * (Ny - 2) + sum( [[19] + [20] * (Ny - 2)] * (Nx - 3), []) + [18] + [19] * (Ny - 2) elif bc == 'SSSC': diag[0] = [19] * (Ny - 2) + [18] + sum( [[20] * (Ny - 2) + [19]] * (Nx - 3), []) + [19] * (Ny - 2) + [18] else: raise NotImplementedError( 'free boundary conditions are not implemented yet') diag[1] = -8 + np.zeros((Nx - 1) * (Ny - 1)) for i in range(1, Nx): diag[1][i * (Ny - 1) - 1] = 0 diag[2] = np.ones((Nx - 1) * (Ny - 1)) for i in range(1, Nx): diag[2][i * (Ny - 1) - 1] = 0 diag[2][i * (Ny - 1) - 2] = 0 diag[3] = -8 + np.zeros((Nx - 1) * (Ny - 1)) diag[4] = 2 + np.zeros((Nx - 1) * (Ny - 1)) for i in range(1, Nx): diag[4][i * (Ny - 1) - 1] = 0 diag[5] = np.ones((Nx - 1) * (Ny - 1)) mat = dia_matrix(([diag[0],np.roll(diag[1],1),diag[1],np.roll(diag[2],2),diag[2],diag[3],diag[3],\ np.roll(diag[4],1),np.roll(diag[4],Ny - 1),np.roll(diag[4],-Ny + 2),diag[4],diag[5],diag[5]],\ [0,1,-1,2,-2,Ny - 1,-Ny + 1,Ny,Ny - 2,-Ny + 2,-Ny,2*(Ny - 1),2*(-Ny + 1)]),\ shape=((Nx - 1)*(Ny - 1),(Nx - 1)*(Ny - 1))) return mat
def _load_nonlink_level(handler, level, pathtable, pathname): """ Loads level and builds appropriate type, without handling softlinks """ if isinstance(level, tables.Group): if _sns and (level._v_title.startswith('SimpleNamespace:') or DEEPDISH_IO_ROOT_IS_SNS in level._v_attrs): val = SimpleNamespace() dct = val.__dict__ elif level._v_title.startswith('list:'): dct = {} val = [] else: dct = {} val = dct # in case of recursion, object needs to be put in pathtable # before trying to fully load it pathtable[pathname] = val # Load sub-groups for grp in level: lev = _load_level(handler, grp, pathtable) n = grp._v_name # Check if it's a complicated pair or a string-value pair if n.startswith('__pair'): dct[lev['key']] = lev['value'] else: dct[n] = lev # Load attributes for name in level._v_attrs._f_list(): if name.startswith(DEEPDISH_IO_PREFIX): continue v = level._v_attrs[name] dct[name] = v if level._v_title.startswith('list:'): N = int(level._v_title[len('list:'):]) for i in range(N): val.append(dct['i{}'.format(i)]) return val elif level._v_title.startswith('tuple:'): N = int(level._v_title[len('tuple:'):]) lst = [] for i in range(N): lst.append(dct['i{}'.format(i)]) return tuple(lst) elif level._v_title.startswith('nonetype:'): return None elif is_pandas_dataframe(level): assert _pandas, "pandas is required to read this file" store = _HDFStoreWithHandle(handler) return store.get(level._v_pathname) elif level._v_title.startswith('sparse:'): frm = level._v_attrs.format if frm in ('csr', 'csc', 'bsr'): shape = tuple(level.shape[:]) cls = { 'csr': sparse.csr_matrix, 'csc': sparse.csc_matrix, 'bsr': sparse.bsr_matrix } matrix = cls[frm](shape) matrix.data = level.data[:] matrix.indices = level.indices[:] matrix.indptr = level.indptr[:] matrix.maxprint = level._v_attrs.maxprint return matrix elif frm == 'dia': shape = tuple(level.shape[:]) matrix = sparse.dia_matrix(shape) matrix.data = level.data[:] matrix.offsets = level.offsets[:] matrix.maxprint = level._v_attrs.maxprint return matrix elif frm == 'coo': shape = tuple(level.shape[:]) matrix = sparse.coo_matrix(shape) matrix.data = level.data[:] matrix.col = level.col[:] matrix.row = level.row[:] matrix.maxprint = level._v_attrs.maxprint return matrix else: raise ValueError('Unknown sparse matrix type: {}'.format(frm)) else: return val elif isinstance(level, tables.VLArray): if level.shape == (1, ): return _load_pickled(level) else: return level[:] elif isinstance(level, tables.Array): if 'zeroarray_dtype' in level._v_attrs: # Unpack zero-size arrays (shape is stored in an HDF5 array and # type is stored in the attibute 'zeroarray_dtype') dtype = level._v_attrs.zeroarray_dtype sh = level[:] return np.zeros(tuple(sh), dtype=dtype) if 'strtype' in level._v_attrs: strtype = level._v_attrs.strtype itemsize = level._v_attrs.itemsize if strtype == b'unicode': return level[:].view(dtype=(np.unicode_, itemsize)) elif strtype == b'ascii': return level[:].view(dtype=(np.string_, itemsize)) # This serves two purposes: # (1) unpack big integers: the only time we save arrays like this # (2) unpack non-flammkuchen "scalars" if level.shape == (): return level[()] return level[:]
def create_biharmonic_solver(self, boundary_verts, clip_D=0.1): r"""Set up biharmonic equation with Dirichlet boundary conditions on the cortical mesh and precompute Cholesky factorization for solving it. The vertices listed in `boundary_verts` are considered part of the boundary, and will not be included in the factorization. To facilitate Cholesky decomposition (which requires a symmetric matrix), the squared Laplace-Beltrami operator is separated into left-hand-side (L2) and right-hand-side (Dinv) parts. If we write the L-B operator as the product of the stiffness matrix (V-W) and the inverse mass matrix (Dinv), the biharmonic problem is as follows (with `u` denoting non-boundary vertices) .. math:: :nowrap: \begin{eqnarray} L^2_{u} \phi &=& -\rho_{u} \\ \left[ D^{-1} (V-W) D^{-1} (V-W) \right]_{u} \phi &=& -\rho_{u} \\ \left[ (V-W) D^{-1} (V-W) \right]_{u} \phi &=& -\left[D \rho\right]_{u} \end{eqnarray} Parameters ---------- boundary_verts : list or ndarray of length V Indices of vertices that will be part of the Dirichlet boundary. Returns ------- lhs : sparse matrix Left side of biharmonic problem, (V-W) D^{-1} (V-W) rhs : sparse matrix, dia Right side of biharmonic problem, D Dinv : sparse matrix, dia Inverse mass matrix, D^{-1} lhsfac : cholesky Factor object Factorized left side, solves biharmonic problem notboundary : ndarray, int Indices of non-boundary vertices """ try: from scikits.sparse.cholmod import cholesky factorize = lambda x: cholesky(x).solve_A except ImportError: factorize = sparse.linalg.dsolve.factorized B, D, W, V = self.laplace_operator npt = len(D) g = np.nonzero(D > 0)[0] # Find vertices with non-zero mass #g = np.nonzero((L.sum(0) != 0).A.ravel())[0] # Find vertices with non-zero mass notboundary = np.setdiff1d(np.arange(npt)[g], boundary_verts) # find non-boundary verts D = np.clip(D, clip_D, D.max()) Dinv = sparse.dia_matrix((D**-1,[0]), (npt,npt)).tocsr() # construct Dinv L = Dinv.dot((V-W)) # construct Laplace-Beltrami operator lhs = (V-W).dot(L) # construct left side, almost squared L-B operator #lhsfac = cholesky(lhs[notboundary][:,notboundary]) # factorize lhsfac = factorize(lhs[notboundary][:,notboundary]) # factorize return lhs, D, Dinv, lhsfac, notboundary
def _assemble(self, mu=None): grid = self.grid # compute the local coordinates of the codim-1 subentity centers in the reference element reference_element = grid.reference_element(0) subentity_embedding = reference_element.subentity_embedding(1) subentity_centers = ( np.einsum('eij,j->ei', subentity_embedding[0], reference_element.sub_reference_element(1).center()) + subentity_embedding[1]) # compute shift for periodic boundaries embeddings = grid.embeddings(0) superentities = grid.superentities(1, 0) superentity_indices = grid.superentity_indices(1, 0) boundary_mask = grid.boundary_mask(1) inner_mask = ~boundary_mask SE_I0 = superentities[:, 0] SE_I1 = superentities[:, 1] SE_I0_I = SE_I0[inner_mask] SE_I1_I = SE_I1[inner_mask] SHIFTS = (np.einsum( 'eij,ej->ei', embeddings[0][SE_I0_I, :, :], subentity_centers[superentity_indices[:, 0][inner_mask]]) + embeddings[1][SE_I0_I, :]) SHIFTS -= (np.einsum( 'eij,ej->ei', embeddings[0][SE_I1_I, :, :], subentity_centers[superentity_indices[:, 1][inner_mask]]) + embeddings[1][SE_I1_I, :]) # comute distances for gradient approximations centers = grid.centers(1) orthogonal_centers = grid.orthogonal_centers() VOLS = grid.volumes(1) INNER_DISTS = np.linalg.norm(orthogonal_centers[SE_I0_I, :] - orthogonal_centers[SE_I1_I, :] - SHIFTS, axis=1) del SHIFTS # assemble matrix FLUXES = VOLS[inner_mask] / INNER_DISTS if self.diffusion_function is not None: FLUXES *= self.diffusion_function(centers[inner_mask], mu=mu) if self.diffusion_constant is not None: FLUXES *= self.diffusion_constant del INNER_DISTS FLUXES = np.concatenate((-FLUXES, -FLUXES, FLUXES, FLUXES)) FLUXES_I0 = np.concatenate((SE_I0_I, SE_I1_I, SE_I0_I, SE_I1_I)) FLUXES_I1 = np.concatenate((SE_I1_I, SE_I0_I, SE_I0_I, SE_I1_I)) if self.boundary_info.has_dirichlet: dirichlet_mask = self.boundary_info.dirichlet_mask(1) SE_I0_D = SE_I0[dirichlet_mask] boundary_normals = grid.unit_outer_normals()[ SE_I0_D, superentity_indices[:, 0][dirichlet_mask]] BOUNDARY_DISTS = np.sum( (centers[dirichlet_mask, :] - orthogonal_centers[SE_I0_D, :]) * boundary_normals, axis=-1) DIRICHLET_FLUXES = VOLS[dirichlet_mask] / BOUNDARY_DISTS if self.diffusion_function is not None: DIRICHLET_FLUXES *= self.diffusion_function( centers[dirichlet_mask], mu=mu) if self.diffusion_constant is not None: DIRICHLET_FLUXES *= self.diffusion_constant FLUXES = np.concatenate((FLUXES, DIRICHLET_FLUXES)) FLUXES_I0 = np.concatenate((FLUXES_I0, SE_I0_D)) FLUXES_I1 = np.concatenate((FLUXES_I1, SE_I0_D)) A = coo_matrix((FLUXES, (FLUXES_I0, FLUXES_I1)), shape=(self.source.dim, self.source.dim)) A = (dia_matrix( ([1. / grid.volumes(0)], [0]), shape=(grid.size(0), ) * 2) * A).tocsc() return A
def jacobian(self, U, mu=None): assert U in self.source and len(U) == 1 mu = self.parse_parameter(mu) if not hasattr(self, '_grid_data'): self._fetch_grid_data() U = U.data.ravel() g = self.grid bi = self.boundary_info gd = self._grid_data SUPE = gd['SUPE'] VOLS0 = gd['VOLS0'] VOLS1 = gd['VOLS1'] BOUNDARIES = gd['BOUNDARIES'] CENTERS = gd['CENTERS'] DIRICHLET_BOUNDARIES = gd['DIRICHLET_BOUNDARIES'] NEUMANN_BOUNDARIES = gd['NEUMANN_BOUNDARIES'] UNIT_OUTER_NORMALS = gd['UNIT_OUTER_NORMALS'] INNER = np.setdiff1d(np.arange(g.size(1)), BOUNDARIES) solver_options = self.solver_options delta = solver_options.get( 'jacobian_delta') if solver_options else None if delta is None: delta = jacobian_options()['delta'] if bi.has_dirichlet: if hasattr(self, '_dirichlet_values'): dirichlet_values = self._dirichlet_values elif self.dirichlet_data is not None: dirichlet_values = self.dirichlet_data( CENTERS[DIRICHLET_BOUNDARIES], mu=mu) else: dirichlet_values = np.zeros_like(DIRICHLET_BOUNDARIES) F_dirichlet = self.numerical_flux.evaluate_stage1( dirichlet_values, mu) UP = U + delta UM = U - delta F = self.numerical_flux.evaluate_stage1(U, mu) FP = self.numerical_flux.evaluate_stage1(UP, mu) FM = self.numerical_flux.evaluate_stage1(UM, mu) del UP, UM F_edge = [f[SUPE] for f in F] FP_edge = [f[SUPE] for f in FP] FM_edge = [f[SUPE] for f in FM] del F, FP, FM F0P_edge = [f.copy() for f in F_edge] for f, ff in zip(F0P_edge, FP_edge): f[:, 0] = ff[:, 0] f[BOUNDARIES, 1] = f[BOUNDARIES, 0] if bi.has_dirichlet: for f, f_d in zip(F0P_edge, F_dirichlet): f[DIRICHLET_BOUNDARIES, 1] = f_d NUM_FLUX_0P = self.numerical_flux.evaluate_stage2( F0P_edge, UNIT_OUTER_NORMALS, VOLS1, mu) del F0P_edge F0M_edge = [f.copy() for f in F_edge] for f, ff in zip(F0M_edge, FM_edge): f[:, 0] = ff[:, 0] f[BOUNDARIES, 1] = f[BOUNDARIES, 0] if bi.has_dirichlet: for f, f_d in zip(F0M_edge, F_dirichlet): f[DIRICHLET_BOUNDARIES, 1] = f_d NUM_FLUX_0M = self.numerical_flux.evaluate_stage2( F0M_edge, UNIT_OUTER_NORMALS, VOLS1, mu) del F0M_edge D_NUM_FLUX_0 = (NUM_FLUX_0P - NUM_FLUX_0M) D_NUM_FLUX_0 /= (2 * delta) if bi.has_neumann: D_NUM_FLUX_0[NEUMANN_BOUNDARIES] = 0 del NUM_FLUX_0P, NUM_FLUX_0M F1P_edge = [f.copy() for f in F_edge] for f, ff in zip(F1P_edge, FP_edge): f[:, 1] = ff[:, 1] f[BOUNDARIES, 1] = f[BOUNDARIES, 0] if bi.has_dirichlet: for f, f_d in zip(F1P_edge, F_dirichlet): f[DIRICHLET_BOUNDARIES, 1] = f_d NUM_FLUX_1P = self.numerical_flux.evaluate_stage2( F1P_edge, UNIT_OUTER_NORMALS, VOLS1, mu) del F1P_edge, FP_edge F1M_edge = F_edge for f, ff in zip(F1M_edge, FM_edge): f[:, 1] = ff[:, 1] f[BOUNDARIES, 1] = f[BOUNDARIES, 0] if bi.has_dirichlet: for f, f_d in zip(F1M_edge, F_dirichlet): f[DIRICHLET_BOUNDARIES, 1] = f_d NUM_FLUX_1M = self.numerical_flux.evaluate_stage2( F1M_edge, UNIT_OUTER_NORMALS, VOLS1, mu) del F1M_edge, FM_edge D_NUM_FLUX_1 = (NUM_FLUX_1P - NUM_FLUX_1M) D_NUM_FLUX_1 /= (2 * delta) if bi.has_neumann: D_NUM_FLUX_1[NEUMANN_BOUNDARIES] = 0 del NUM_FLUX_1P, NUM_FLUX_1M I1 = np.hstack([ SUPE[INNER, 0], SUPE[INNER, 0], SUPE[INNER, 1], SUPE[INNER, 1], SUPE[BOUNDARIES, 0] ]) I0 = np.hstack([ SUPE[INNER, 0], SUPE[INNER, 1], SUPE[INNER, 0], SUPE[INNER, 1], SUPE[BOUNDARIES, 0] ]) V = np.hstack([ D_NUM_FLUX_0[INNER], -D_NUM_FLUX_0[INNER], D_NUM_FLUX_1[INNER], -D_NUM_FLUX_1[INNER], D_NUM_FLUX_0[BOUNDARIES] ]) A = coo_matrix((V, (I0, I1)), shape=(g.size(0), g.size(0))) A = csc_matrix(A).copy( ) # See pymor.operators.cg.DiffusionOperatorP1 for why copy() is necessary A = dia_matrix(([1. / VOLS0], [0]), shape=(g.size(0), ) * 2) * A return NumpyMatrixOperator(A, source_id=self.source.id, range_id=self.range.id)
def _assemble(self, mu=None): A = dia_matrix((self.grid.volumes(0), [0]), shape=(self.grid.size(0), ) * 2) return A
def smoothing_matrix(vertices, adj_mat, smoothing_steps=20, verbose=None): """Create a smoothing matrix. This smoothing matrix can be used to interpolate data defined for a subset of vertices onto mesh with an adjancency matrix given by adj_mat. If smoothing_steps is None, as many smoothing steps are applied until the whole mesh is filled with with non-zeros. Only use this option if the vertices correspond to a subsampled version of the mesh. Parameters ---------- vertices : 1d array vertex indices adj_mat : sparse matrix N x N adjacency matrix of the full mesh smoothing_steps : int or None number of smoothing steps (Default: 20) %(verbose)s Returns ------- smooth_mat : sparse matrix smoothing matrix with size N x len(vertices) """ from scipy import sparse logger.info("Updating smoothing matrix, be patient..") if smoothing_steps == 0: return _nearest(vertices, adj_mat) e = adj_mat.copy() e.data[e.data == 2] = 1 n_vertices = e.shape[0] e = e + sparse.eye(n_vertices, n_vertices) idx_use = vertices smooth_mat = 1.0 n_iter = smoothing_steps if smoothing_steps is not None else 1000 for k in range(n_iter): e_use = e[:, idx_use] data1 = e_use * np.ones(len(idx_use)) idx_use = np.where(data1)[0] scale_mat = sparse.dia_matrix((1 / data1[idx_use], 0), shape=(len(idx_use), len(idx_use))) smooth_mat = scale_mat * e_use[idx_use, :] * smooth_mat logger.info("Smoothing matrix creation, step %d" % (k + 1)) if smoothing_steps is None and len(idx_use) >= n_vertices: break # Make sure the smoothing matrix has the right number of rows # and is in COO format smooth_mat = smooth_mat.tocoo() smooth_mat = sparse.coo_matrix((smooth_mat.data, (idx_use[smooth_mat.row], smooth_mat.col)), shape=(n_vertices, len(vertices))) return smooth_mat
def getLaplacianEigs(A, NEigs): DEG = sparse.dia_matrix((A.sum(1).flatten(), 0), A.shape) L = DEG - A w, v = slinalg.eigsh(L, k=NEigs, sigma=0, which='LM') return (w, v, L)
def coefUfll(h): return 1 def coefb(h, y): return (8 * h ** 2) - y x = np.linspace(0, 1, 11) h = 0.1 y = [3, 0, 0, 0, 0, 0, 0, 0, 4] diag = [coefU(h) for i in range(9)] diagFll = [coefUfll(h) for i in range(9)] diagPrv = [coefUprv(h) for i in range(9)] arr = dia_matrix(([diag, diagFll, diagPrv], [0, -1, 1]), shape=(9, 9)).toarray() b = [coefb(h, i) for i in y] yy = np.linalg.solve(arr, b).tolist() yy.insert(0, 3) yy.append(4) table = pt.PrettyTable() table.add_column("x", np.round(x, 7)) table.add_column("y", np.round(yy, 7)) print(table) graph1 = plt.plot(x, yy, color="black", label="Runge4") graph1 = plt.grid(True) plt.show()
def gae_for_na(name): """ train and evaluate disambiguation results for a specific name :param name: author name :return: evaluation results """ adj, features, labels = load_local_data(name=name) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train = gen_train_edges(adj) adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] input_feature_dim = features.shape[1] if FLAGS.is_sparse: # TODO to test # features = sparse_to_tuple(features.tocoo()) # features_nonzero = features[1].shape[0] features = features.todense() # TODO else: features = normalize_vectors(features) # Define placeholders placeholders = { # 'features': tf.sparse_placeholder(tf.float32), 'features': tf.placeholder(tf.float32, shape=(None, input_feature_dim)), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, input_feature_dim) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, input_feature_dim, num_nodes) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() # negative edges/pos edges print('positive edge weight', pos_weight) norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.nnz) * 2) # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), pos_weight=pos_weight, norm=norm) elif model_str == 'gcn_vae': opt = OptimizerVAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) def get_embs(): feed_dict.update({placeholders['dropout']: 0}) emb = sess.run(model.z_mean, feed_dict=feed_dict) # z_mean is better return emb # Train model for epoch in range(FLAGS.epochs): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict) # Compute average loss avg_cost = outs[1] avg_accuracy = outs[2] print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "train_acc=", "{:.5f}".format(avg_accuracy), "time=", "{:.5f}".format(time.time() - t)) emb = get_embs() n_clusters = len(set(labels)) emb_norm = normalize_vectors(emb) clusters_pred = clustering(emb_norm, num_clusters=n_clusters) prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels) print('pairwise precision', '{:.5f}'.format(prec), 'recall', '{:.5f}'.format(rec), 'f1', '{:.5f}'.format(f1)) return [prec, rec, f1], num_nodes, n_clusters
def geodesic_distance(self, verts, m=1.0, fem=False): """Minimum mesh geodesic distance (in mm) from each vertex in surface to any vertex in the collection `verts`. Geodesic distance is estimated using heat-based method (see 'Geodesics in Heat', Crane et al, 2012). Diffusion of heat along the mesh is simulated and then used to infer geodesic distance. The duration of the simulation is controlled by the parameter `m`. Larger values of `m` will smooth & regularize the distance computation. Smaller values of `m` will roughen and will usually increase error in the distance computation. The default value of 1.0 is probably pretty good. This function caches some data (sparse LU factorizations of the laplace-beltrami operator and the weighted adjacency matrix), so it will be much faster on subsequent runs. The time taken by this function is independent of the number of vertices in verts. Parameters ---------- verts : 1D array-like of ints Set of vertices to compute distance from. This function returns the shortest distance to any of these vertices from every vertex in the surface. m : float, optional Reverse Euler step length. The optimal value is likely between 0.5 and 1.5. Default is 1.0, which should be fine for most cases. fem : bool, optional Whether to use Finite Element Method lumped mass matrix. Wasn't used in Crane 2012 paper. Doesn't seem to help any. Returns ------- 1D ndarray, shape (total_verts,) Geodesic distance (in mm) from each vertex in the surface to the closest vertex in `verts`. """ npt = len(self.pts) if m not in self._rlfac_solvers or m not in self._nLC_solvers: B, D, W, V = self.laplace_operator nLC = W - V # negative laplace matrix if not fem: spD = sparse.dia_matrix((D,[0]), (npt,npt)).tocsr() # lumped mass matrix else: spD = B t = m * self.avg_edge_length ** 2 # time of heat evolution lfac = spD - t * nLC # backward Euler matrix # Exclude rows with zero weight (these break the sparse LU, that finicky f**k) goodrows = np.nonzero(~np.array(lfac.sum(0) == 0).ravel())[0] self._goodrows = goodrows self._rlfac_solvers[m] = sparse.linalg.dsolve.factorized(lfac[goodrows][:,goodrows]) self._nLC_solvers[m] = sparse.linalg.dsolve.factorized(nLC[goodrows][:,goodrows]) # Solve system to get u, the heat values u0 = np.zeros((npt,)) # initial heat values u0[verts] = 1.0 goodu = self._rlfac_solvers[m](u0[self._goodrows]) u = np.zeros((npt,)) u[self._goodrows] = goodu # Compute grad u at each face gradu = self.surface_gradient(u, at_verts=False) # Compute X (normalized grad u) #X = np.nan_to_num((-gradu.T / np.sqrt((gradu**2).sum(1))).T) graduT = gradu.T gusum = ne.evaluate("sum(gradu ** 2, 1)") X = np.nan_to_num(ne.evaluate("-graduT / sqrt(gusum)").T) # Compute integrated divergence of X at each vertex #x1 = x2 = x3 = np.zeros((X.shape[0],)) c32, c13, c21 = self._cot_edge x1 = 0.5 * (c32 * X).sum(1) x2 = 0.5 * (c13 * X).sum(1) x3 = 0.5 * (c21 * X).sum(1) conn1, conn2, conn3 = self._polyconn divx = conn1.dot(x1) + conn2.dot(x2) + conn3.dot(x3) # Compute phi (distance) goodphi = self._nLC_solvers[m](divx[self._goodrows]) phi = np.zeros((npt,)) phi[self._goodrows] = goodphi - goodphi.min() # Ensure that distance is zero for selected verts phi[verts] = 0.0 return phi
def mask_test_edges(adj): '''Randomly sample possibly unconnected training graph, and validation and test edges.''' # Remove diagonal elements adj = adj - sp.dia_matrix( (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape) adj.eliminate_zeros() # Check that diag is zero: assert np.diag(adj.todense()).sum() == 0 adj_triu = sp.triu(adj) adj_tuple = sparse_to_tuple(adj_triu) edges = adj_tuple[0] edges_all = sparse_to_tuple(adj)[0] num_test = int(np.floor(edges.shape[0] / 10.)) num_val = int(np.floor(edges.shape[0] / 20.)) all_edge_idx = range(edges.shape[0]) np.random.shuffle(all_edge_idx) val_edge_idx = all_edge_idx[:num_val] test_edge_idx = all_edge_idx[num_val:(num_val + num_test)] test_edges = edges[test_edge_idx] val_edges = edges[val_edge_idx] train_edges = np.delete(edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0) def ismember(a, b, tol=5): rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1) return (np.all(np.any(rows_close, axis=-1), axis=-1) and np.all(np.any(rows_close, axis=0), axis=0)) test_edges_false = [] while len(test_edges_false) < len(test_edges): idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue if ismember([idx_i, idx_j], edges_all): continue if test_edges_false: if ismember([idx_j, idx_i], np.array(test_edges_false)): continue if ismember([idx_i, idx_j], np.array(test_edges_false)): continue test_edges_false.append([idx_i, idx_j]) val_edges_false = [] while len(val_edges_false) < len(val_edges): idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue if ismember([idx_i, idx_j], train_edges): continue if ismember([idx_j, idx_i], train_edges): continue if ismember([idx_i, idx_j], val_edges): continue if ismember([idx_j, idx_i], val_edges): continue if val_edges_false: if ismember([idx_j, idx_i], np.array(val_edges_false)): continue if ismember([idx_i, idx_j], np.array(val_edges_false)): continue val_edges_false.append([idx_i, idx_j]) assert not ismember(test_edges_false, edges_all) assert not ismember(val_edges_false, edges_all) assert not ismember(val_edges, train_edges) assert not ismember(test_edges, train_edges) assert not ismember(val_edges, test_edges) data = np.ones(train_edges.shape[0]) # Re-build adj matrix adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape) adj_train = adj_train + adj_train.T # NOTE: these edge lists only contain single direction of edge! return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false
def matrix(self, g, data, bc_weight=False): """ Return the matrix for a discretization of a second order elliptic equation using dual virtual element method. See self.matrix_rhs for a detaild description. Additional parameter: -------------------- bc_weight: to compute the infinity norm of the matrix and use it as a weight to impose the boundary conditions. Default True. Additional return: weight: if bc_weight is True return the weight computed. """ # Allow short variable names in backend function # pylint: disable=invalid-name # If a 0-d grid is given then we return an identity matrix if g.dim == 0: M = sps.dia_matrix(([1, 0], 0), (self.ndof(g), self.ndof(g))) if bc_weight: return M, 1 return M # Retrieve the permeability, boundary conditions, and aperture # The aperture is needed in the hybrid-dimensional case, otherwise is # assumed unitary param = data['param'] k = param.get_tensor(self) bc = param.get_bc(self) a = param.get_aperture() faces, cells, sign = sps.find(g.cell_faces) index = np.argsort(cells) faces, sign = faces[index], sign[index] # Map the domain to a reference geometry (i.e. equivalent to compute # surface coordinates in 1d and 2d) c_centers, f_normals, f_centers, R, dim, _ = pp.cg.map_grid(g) if not data.get('is_tangential', False): # Rotate the permeability tensor and delete last dimension if g.dim < 3: k = k.copy() k.rotate(R) remove_dim = np.where(np.logical_not(dim))[0] k.perm = np.delete(k.perm, (remove_dim), axis=0) k.perm = np.delete(k.perm, (remove_dim), axis=1) # In the virtual cell approach the cell diameters should involve the # apertures, however to keep consistency with the hybrid-dimensional # approach and with the related hypotheses we avoid. diams = g.cell_diameters() # Weight for the stabilization term weight = np.power(diams, 2 - g.dim) # Allocate the data to store matrix entries, that's the most efficient # way to create a sparse matrix. size = np.sum(np.square(g.cell_faces.indptr[1:]-\ g.cell_faces.indptr[:-1])) I = np.empty(size, dtype=np.int) J = np.empty(size, dtype=np.int) dataIJ = np.empty(size) idx = 0 for c in np.arange(g.num_cells): # For the current cell retrieve its faces loc = slice(g.cell_faces.indptr[c], g.cell_faces.indptr[c + 1]) faces_loc = faces[loc] # Compute the H_div-mass local matrix A = self.massHdiv(a[c] * k.perm[0:g.dim, 0:g.dim, c], c_centers[:, c], g.cell_volumes[c], f_centers[:, faces_loc], f_normals[:, faces_loc], sign[loc], diams[c], weight[c])[0] # Save values for Hdiv-mass local matrix in the global structure cols = np.tile(faces_loc, (faces_loc.size, 1)) loc_idx = slice(idx, idx + cols.size) I[loc_idx] = cols.T.ravel() J[loc_idx] = cols.ravel() dataIJ[loc_idx] = A.ravel() idx += cols.size # Construct the global matrices mass = sps.coo_matrix((dataIJ, (I, J))) div = -g.cell_faces.T M = sps.bmat([[mass, div.T], [div, None]], format='csr') norm = sps.linalg.norm(mass, np.inf) if bc_weight else 1 # assign the Neumann boundary conditions # For dual discretizations, internal boundaries # are handled by assigning Dirichlet conditions. THus, we remove them # from the is_neu (where they belong by default) and add them in # is_dir. is_neu = np.logical_and(bc.is_neu, np.logical_not(bc.is_internal)) if bc and np.any(is_neu): is_neu = np.hstack((is_neu, np.zeros(g.num_cells, dtype=np.bool))) M[is_neu, :] *= 0 M[is_neu, is_neu] = norm if bc_weight: return M, norm return M
def interpolate(self, fnc=None, m0=None, mn=None, set_coeffs=False): """ Determines the spline's coefficients such that it interpolates a given function. Parameters ---------- fnc : callable or tuple of arrays (tt, xx) m0 : float mn : float set_coeffs: bool determine whether the calculated coefficients should be set to self or not """ if not callable(fnc): fnc = self._interpolate_array(fnc) assert callable(fnc) points = self.nodes # IPS() if 0 and not self._use_std_approach: # TODO: This code seems to be obsolete since 2015-12 assert self._steady_flag # how many independent coefficients does the spline have coeffs_size = self._indep_coeffs.size # generate points to evaluate the function at # (function and spline interpolant should be equal in these) nodes = np.linspace(self.a, self.b, coeffs_size, endpoint=True) # evaluate the function fnc_t = np.array([fnc(t) for t in nodes]) dep_vecs = [self.get_dependence_vectors(t) for t in nodes] S_dep_mat = np.array([vec[0] for vec in dep_vecs]) S_dep_mat_abs = np.array([vec[1] for vec in dep_vecs]) # solve the equation system #free_coeffs = np.linalg.solve(S_dep_mat, fnc_t - S_dep_mat_abs) free_coeffs = np.linalg.lstsq(S_dep_mat, fnc_t - S_dep_mat_abs)[0] else: # compute values at the nodes vv = np.array([fnc(t) for t in self.nodes]) # create vector of step sizes #h = np.array([self.nodes[k+1] - self.nodes[k] for k in xrange(self.nodes.size-1)]) h = np.diff(self.nodes) # create diagonals for the coefficient matrix of the equation system l = np.array([ h[k + 1] / (h[k] + h[k + 1]) for k in xrange(self.nodes.size - 2) ]) d = 2.0 * np.ones(self.nodes.size - 2) u = np.array([ h[k] / (h[k] + h[k + 1]) for k in xrange(self.nodes.size - 2) ]) # right hand side of the equation system r = np.array([(3.0/h[k])*l[k]*(vv[k+1] - vv[k]) + (3.0/h[k+1])*u[k]*(vv[k+2]-vv[k+1])\ for k in xrange(self.nodes.size-2)]) # add conditions for unique solution # boundary derivatives l = np.hstack([l, 0.0, 0.0]) d = np.hstack([1.0, d, 1.0]) u = np.hstack([0.0, 0.0, u]) if m0 is None: m0 = (vv[1] - vv[0]) / (self.nodes[1] - self.nodes[0]) if mn is None: mn = (vv[-1] - vv[-2]) / (self.nodes[-1] - self.nodes[-2]) r = np.hstack([m0, r, mn]) data = [l, d, u] offsets = [-1, 0, 1] # create tridiagonal coefficient matrix D = sparse.dia_matrix((data, offsets), shape=(self.n + 1, self.n + 1)) # solve the equation system sol = sparse.linalg.spsolve(D.tocsr(), r) # calculate the coefficients coeffs = np.zeros((self.n, 4)) # compute the coefficients of the interpolant if self._use_std_approach: for i in xrange(self.n): coeffs[i, :] = [ vv[i], sol[i], 3.0 / h[i]**2 * (vv[i + 1] - vv[i]) - 1.0 / h[i] * (2 * sol[i] + sol[i + 1]), -2.0 / h[i]**3 * (vv[i + 1] - vv[i]) + 1.0 / h[i]**2 * (sol[i] + sol[i + 1]), ] else: for i in xrange(self.n): coeffs[i, :] = [ vv[i + 1], sol[i + 1], 3.0 / h[i]**2 * (vv[i] - vv[i + 1]) + 1.0 / h[i] * (sol[i] + 2 * sol[i + 1]), 2.0 / h[i]**3 * (vv[i] - vv[i + 1]) + 1.0 / h[i]**2 * (sol[i] + sol[i + 1]) ] # get the indices of the free coefficients coeff_name_split_str = [ c.name.split('_')[-2:] for c in self._indep_coeffs_sym ] free_coeff_indices = [(int(s[0]), int(s[1])) for s in coeff_name_split_str] free_coeffs = np.array([coeffs[i] for i in free_coeff_indices]) # set solution for the free coefficients if set_coeffs: self.set_coefficients(free_coeffs=free_coeffs) #!!! dbg test # self.set_coefficients(coeffs=coeffs) return free_coeffs
def tv_derivative(data, xs, u0=None, alpha=10., maxit=1000, linalg_solver_maxit=100., tol=1e-4, verbose=False, solver='lgmres'): data = _np.asarray(data, dtype=_np.float64).squeeze() xs = _np.asarray(xs, dtype=_np.float64).squeeze() n = data.shape[0] assert xs.shape[0] == n, "the grid must have the same dimension as data" epsilon = 1e-6 # grid of mid points between xs, extrapolating first and last node: # # x--|--x--|---x---|-x-|-x # midpoints = _np.concatenate( ([xs[0] - .5 * (xs[1] - xs[0])], .5 * (xs[1:] + xs[:-1]), [xs[-1] + .5 * (xs[-1] - xs[-2])])).squeeze() assert midpoints.shape[0] == n + 1 diff = _fd.get_fd_matrix_midpoints(midpoints, k=1, window_width=5) assert diff.shape[0] == n assert diff.shape[1] == n + 1 diff_t = diff.transpose(copy=True).tocsc() assert diff.shape[0] == n assert diff.shape[1] == n + 1 A = _cumtrapz_operator(midpoints) AT = A.transpose(copy=True) ATA = AT.dot(A) if u0 is None: u = _np.concatenate(([0], _np.diff(data), [0])) else: u = u0 # Aadj_A = lambda v: A_adjoint(A(v)) Aadj_offset = AT * (data[0] - data) E_n = _sparse.dia_matrix((n, n), dtype=xs.dtype) midpoints_diff = _np.diff(midpoints) for ii in range(1, maxit + 1): E_n.setdiag(midpoints_diff * (1. / _np.sqrt(_np.diff(u)**2.0 + epsilon))) L = diff_t * E_n * diff g = ATA.dot(u) + Aadj_offset + alpha * L * u # solve linear equation. info_i = 0 if solver == 'lgmres' or solver == 'lgmres_scipy': if solver == 'lgmres_scipy': s, info_i = _splin.lgmres(A=alpha * L + ATA, b=-g, x0=u, tol=tol, maxiter=linalg_solver_maxit, outer_k=7) else: from pynumtools.lgmres import lgmres as _lgmres s = _lgmres(A=alpha * L + ATA, b=-g, x0=u, tol=tol, maxiter=linalg_solver_maxit) elif solver == 'bicgstab': [s, info_i] = _splin.bicgstab(A=alpha * L + ATA, b=-g, x0=u, tol=tol, maxiter=linalg_solver_maxit) elif solver == 'spsolve': s = _splin.spsolve((alpha * L + ATA), -g, use_umfpack=True) elif solver == 'np': s = _np.linalg.solve( (alpha * L + ATA).todense().astype(_np.float64), (-g).astype(_np.float64)) relative_change = _np.linalg.norm(s[0]) / _np.linalg.norm(u) if verbose: print( 'iteration {0:4d}: relative change = {1:.3e}, gradient norm = {2:.3e}' .format(ii, relative_change, _np.linalg.norm(g))) if info_i > 0: print("WARNING - convergence to tolerance not achieved!") elif info_i < 0: print("WARNING - illegal input or breakdown") # Update current solution u = u + s return u