def get_sample_data(n_sess, full_brain=False, subj=1): """ Download the data for the current session and subject Parameters ---------- n_sess: int number of session, one of {0, 1, 2, 3, 4} subj: int number of subject, one of {1, 2} """ DIR = tempfile.mkdtemp() ds = np.DataSource(DIR) BASEDIR = 'http://fa.bianp.net/projects/hrf_estimation/data' BASEDIR_COMMON = BASEDIR + '/data_common/' if full_brain: BASEDIR += '/full_brain' BASEDIR_SUBJ = BASEDIR + '/data_subj%s/' % subj event_matrix = io.mmread(ds.open( BASEDIR_COMMON + 'event_matrix.mtx')).toarray() print('Downloading BOLD signal') voxels = np.load(ds.open( BASEDIR_SUBJ + 'voxels_%s.npy' % n_sess)) # print('Downloading Scatting Stim') # scatt_stim = np.load(ds.open( # BASEDIR_SUBJ + 'scatt_stim_%s.npy' % n_sess)) em = sparse.coo_matrix(event_matrix) fir_matrix = utils.convolve_events(event_matrix, np.eye(HRF_LENGTH)) events_train = sparse.block_diag([event_matrix] * 5).toarray() conditions_train = sparse.coo_matrix(events_train).col onsets_train = sparse.coo_matrix(events_train).row return voxels, conditions_train, onsets_train
def test_cross_val_score_fit_params(): clf = MockClassifier() n_samples = X.shape[0] n_classes = len(np.unique(y)) W_sparse = coo_matrix((np.array([1]), (np.array([1]), np.array([0]))), shape=(10, 1)) P_sparse = coo_matrix(np.eye(5)) DUMMY_INT = 42 DUMMY_STR = '42' DUMMY_OBJ = object() def assert_fit_params(clf): # Function to test that the values are passed correctly to the # classifier arguments for non-array type assert_equal(clf.dummy_int, DUMMY_INT) assert_equal(clf.dummy_str, DUMMY_STR) assert_equal(clf.dummy_obj, DUMMY_OBJ) fit_params = {'sample_weight': np.ones(n_samples), 'class_prior': np.ones(n_classes) / n_classes, 'sparse_sample_weight': W_sparse, 'sparse_param': P_sparse, 'dummy_int': DUMMY_INT, 'dummy_str': DUMMY_STR, 'dummy_obj': DUMMY_OBJ, 'callback': assert_fit_params} cross_val_score(clf, X, y, fit_params=fit_params)
def mesh_edges(tris): """Returns sparse matrix with edges as an adjacency matrix Parameters ---------- tris : array of shape [n_triangles x 3] The triangles Returns ------- edges : sparse matrix The adjacency matrix """ npoints = np.max(tris) + 1 ntris = len(tris) a, b, c = tris.T edges = sparse.coo_matrix((np.ones(ntris), (a, b)), shape=(npoints, npoints)) edges = edges + sparse.coo_matrix((np.ones(ntris), (b, c)), shape=(npoints, npoints)) edges = edges + sparse.coo_matrix((np.ones(ntris), (c, a)), shape=(npoints, npoints)) edges = edges.tocsr() edges = edges + edges.T return edges
def sparseMatrix2coo(A, rowOffset=0, colOffset=0): """Convert SparseMatrix to scipy.coo_matrix. Parameters ---------- A: pg.SparseMapMatrix | pg.SparseMatrix Matrix to convert from. Returns ------- mat: scipy.coo_matrix Matrix to convert into. """ from scipy.sparse import coo_matrix vals = pg.RVector() rows = pg.IndexArray([0]) cols = pg.IndexArray([0]) if isinstance(A, pg.SparseMatrix): C = pg.RSparseMapMatrix(A) C.fillArrays(vals=vals, rows=rows, cols=cols) rows += rowOffset cols += colOffset return coo_matrix((vals, (rows, cols)), shape=(A.rows(), A.cols())) elif isinstance(A, pg.SparseMapMatrix): A.fillArrays(vals, rows, cols) rows += rowOffset cols += colOffset return coo_matrix((vals, (rows, cols)), shape=(A.rows(), A.cols())) return coo_matrix(A)
def _assemble(self, mu=None): g = self.grid bi = self.boundary_info if g.dim > 2: raise NotImplementedError if bi is None or not bi.has_robin or self.robin_data is None: return coo_matrix((g.size(g.dim), g.size(g.dim))).tocsc() RI = bi.robin_boundaries(1) if g.dim == 1: robin_c = self.robin_data[0](g.centers(1)[RI], mu=mu) I = coo_matrix((robin_c, (RI, RI)), shape=(g.size(g.dim), g.size(g.dim))) return csc_matrix(I).copy() else: xref = g.quadrature_points(1, order=self.order)[RI] # xref(robin-index, quadraturepoint-index) if self.robin_data[0].shape_range == (): robin_c = self.robin_data[0](xref, mu=mu) else: robin_elements = g.superentities(1, 0)[RI, 0] robin_indices = g.superentity_indices(1, 0)[RI, 0] normals = g.unit_outer_normals()[robin_elements, robin_indices] robin_values = self.robin_data[0](xref, mu=mu) robin_c = np.einsum('ei,eqi->eq', normals, robin_values) # robin_c(robin-index, quadraturepoint-index) q, w = line.quadrature(order=self.order) SF = np.squeeze(np.array([1 - q, q])) SF_INTS = np.einsum('ep,pi,pj,e,p->eij', robin_c, SF, SF, g.integration_elements(1)[RI], w).ravel() SF_I0 = np.repeat(g.subentities(1, g.dim)[RI], 2).ravel() SF_I1 = np.tile(g.subentities(1, g.dim)[RI], [1, 2]).ravel() I = coo_matrix((SF_INTS, (SF_I0, SF_I1)), shape=(g.size(g.dim), g.size(g.dim))) return csc_matrix(I).copy()
def test_bmat(self): A = coo_matrix([[1,2],[3,4]]) B = coo_matrix([[5],[6]]) C = coo_matrix([[7]]) D = coo_matrix((0,0)) expected = matrix([[1, 2, 5], [3, 4, 6], [0, 0, 7]]) assert_equal(construct.bmat([[A,B],[None,C]]).todense(), expected) expected = matrix([[1, 2, 0], [3, 4, 0], [0, 0, 7]]) assert_equal(construct.bmat([[A,None],[None,C]]).todense(), expected) expected = matrix([[0, 5], [0, 6], [7, 0]]) assert_equal(construct.bmat([[None,B],[C,None]]).todense(), expected) expected = matrix(np.empty((0,0))) assert_equal(construct.bmat([[None,None]]).todense(), expected) assert_equal(construct.bmat([[None,D],[D,None]]).todense(), expected) # test bug reported in gh-5976 expected = matrix([[7]]) assert_equal(construct.bmat([[None,D],[C,None]]).todense(), expected) # test failure cases assert_raises(ValueError, construct.bmat, [[A],[B]]) assert_raises(ValueError, construct.bmat, [[A,C]])
def lowerBidiagonalMatrix(m, n): # This is a simple example for testing LSMR. # It uses the leading m*n submatrix from # A = [ 1 # 1 2 # 2 3 # 3 4 # ... # n ] # suitably padded by zeros. # # 04 Jun 2010: First version for distribution with lsmr.py if m <= n: row = hstack((arange(m, dtype=int), \ arange(1, m, dtype=int))) col = hstack((arange(m, dtype=int), \ arange(m-1, dtype=int))) data = hstack((arange(1, m+1, dtype=float), \ arange(1,m, dtype=float))) return coo_matrix((data, (row, col)), shape=(m,n)) else: row = hstack((arange(n, dtype=int), \ arange(1, n+1, dtype=int))) col = hstack((arange(n, dtype=int), \ arange(n, dtype=int))) data = hstack((arange(1, n+1, dtype=float), \ arange(1,n+1, dtype=float))) return coo_matrix((data,(row, col)), shape=(m,n))
def get_problem_for_Q(self,p,r): """ Constructs second-stage quadratic problem in the form minimize(x) (1/2)x^THx + g^Tx subject to Ax = b l <= x <= u, where x = (q,w,s,z). Parameters ---------- p : generator powers r : renewable powers Returns ------- problem : QuadProblem """ # Constatns num_p = self.num_p num_w = self.num_w num_r = self.num_r num_bus = self.num_bus num_br = self.num_br Ow = coo_matrix((num_w,num_w)) Os = coo_matrix((num_r,num_r)) Oz = coo_matrix((num_br,num_br)) Iz = eye(num_br,format='coo') ow = np.zeros(num_w) os = np.zeros(num_r) oz = np.zeros(num_br) cost_factor = self.parameters['cost_factor'] H1 = self.H1/cost_factor g1 = self.g1/cost_factor # Form QP problem H = bmat([[H1,None,None,None], # q: gen power adjustments [None,Ow,None,None], # w: bus voltage angles [None,None,Os,None], # s: curtailed renewable powers [None,None,None,Oz]], # z: slack variables for thermal limits format='coo') g = np.hstack((g1,ow,os,oz)) A = bmat([[self.G,-self.A,self.R,None], [None,self.J,None,-Iz]],format='coo') b = np.hstack((self.b-self.G*p,oz)) l = np.hstack((self.p_min-p, self.w_min, os, self.z_min)) u = np.hstack((self.p_max-p, self.w_max, r, self.z_max)) # Return return QuadProblem(H,g,A,b,l,u)
def main(): print "Solve small matrix..." R = array([0, 0, 1, 1, 1, 2, 2]) C = array([0, 1, 0, 1, 2, 1, 2]) V = array([4.0, -1.0, -1.0, 4.0, -1.0, -1.0, 4.0]) b = array([3.0, 2.0, 3.0]) A = coo_matrix((V, (R, C)), shape=(3, 3)) # convert to csr format for efficiency x = spsolve(A.tocsr(), b) print "x = ", x print "Solve psd matrix..." # skip the first row (n, nnz) A = numpy.genfromtxt('../data/psd.txt', skiprows=1) b = numpy.genfromtxt('../data/b.txt') coo = coo_matrix((A[:, 2], (A[:, 0], A[:, 1]))) x = spsolve(coo.tocsr(), b) print 'x = ', x print "Solve big matrix..." A = numpy.genfromtxt('../data/mat_helmholtz.txt', skiprows=1) coo = coo_matrix((A[:, 2], (A[:, 0], A[:, 1]))) n = coo.shape[0] b = numpy.ones(n) x = spsolve(coo.tocsr(), b) print 'x = ', x
def real_case(): data = real_data(filename) D,R = read_tensor(filename) T = np.zeros((len(D),3,3)) for i in xrange(len(D)): T[i,:,:] = np.dot(np.dot(R[i,:,:],D[i,:,:]),R[i,:,:].T) plot_2d(T[:,:2,:2]) del T,D,R print np.max(data[:,0,0]),np.max(data[:,1,1]),np.max(data[:,2,2]) for i in xrange(n*n): if data[i,0,0]<-20 or math.isnan(data[i,0,0]) == True: data[i,0,0] = np.mean(data[:,0,0]) if data[i,1,1]<-20 or math.isnan(data[i,0,0]) == True: data[i,1,1] = np.mean(data[:,1,1]) y = sparse.coo_matrix(data[:,0,0]).transpose() f = main(y) d = np.zeros((n*n,2,2)) d[:,0,0] = f[:,0] y = sparse.coo_matrix(data[:,0,1]).transpose() f = main(y) d[:,0,1] = f[:,0] d[:,1,0] = f[:,0] y = sparse.coo_matrix(data[:,1,1]).transpose() f = main(y) d[:,1,1] = f[:,0] res = from_log_euclidian(d) plot_2d(res) return res
def co_labelling(z, kmax=None, kmin=None): """ return a sparse co-labelling matrix given the label vector z Parameters ---------- z: array of shape(n_samples), the input labels kmax: int, optional, considers only the labels in the range [0, kmax[ Returns ------- colabel: a sparse coo_matrix, yields the co labelling of the data i.e. c[i,j]= 1 if z[i]==z[j], 0 otherwise """ from scipy.sparse import coo_matrix n = z.size colabel = coo_matrix((n, n)) if kmax == None: kmax = z.max() + 1 if kmin == None: kmin = z.min() - 1 for k in np.unique(z): if (k < kmax) & (k > kmin): i = np.array(np.nonzero(z == k)) row = np.repeat(i, i.size) col = np.ravel(np.tile(i, i.size)) data = np.ones((i.size) ** 2) colabel = colabel + coo_matrix((data, (row, col)), shape=(n, n)) return colabel
def function(lamb,y): b = PETSc.Vec().createSeq(m*m) b.setValues(range(m*m), b_matrix().transpose().dot(y).toarray()) D = np.eye(m*m) D[:1,:] = 0 D = sparse.coo_matrix(D).dot(lamb) B = b_matrix().transpose().dot(b_matrix()) B = D + B A = PETSc.Mat() A.create(comm) A.setSizes([m*m, m*m]) A.setType('mpidense') A.setUp() A.setValues(range(m*m),range(m*m),B.toarray()) A.assemblyBegin() A.assemblyEnd() x = PETSc.Vec().createSeq(m*m) ksp = PETSc.KSP().create() ksp.setOperators(A) ksp.setFromOptions() ksp.setType('cg') print 'Solving with:', ksp.getType() ksp.solve(b, x) #SS.setValues(range(m*m), range(m*m),B.toarray()) #S = sparse.kron(S,SX) print 'Converged in', ksp.getIterationNumber(), 'iterations.' x = sparse.coo_matrix(x.getArray()) fun = b_matrix().dot(x.transpose()) return fun
def test_case(): data = test_data() x = np.linspace(0,1,n) Z = np.reshape(data[:,0,0],(n,n)) #te = np.reshape(test,(num,num)) # fig = plt.figure() # ax = fig.add_subplot(111,projection ='3d') # X,Y = np.meshgrid(x,x) # ax.plot_surface(X,Y,Z, rstride =4, cstride =4, color ='b') # for i in xrange(n*n): # if data[i,0,0]<-20 or math.isnan(data[i,0,0]) == True: # data[i,0,0] = -13.5 #print len(data[:,0,0]) # y = sparse.coo_matrix(test_function(n)).transpose() y = sparse.coo_matrix(data[:,0,0]).transpose() f = main(y) d = np.zeros((n*n,2,2)) d[:,0,0] = f[:,0] y = sparse.coo_matrix(data[:,0,1]).transpose() f = main(y) d[:,0,1] = f[:,0] d[:,1,0] = f[:,0] y = sparse.coo_matrix(data[:,1,1]).transpose() f = main(y) d[:,1,1] = f[:,0] res = from_log_euclidian(d) plot_2d(res) return res
def test_input_dtypes(): dtypes = (np.int32, np.int64, np.float32, np.float64) no_users, no_items = (10, 100) no_features = 20 for dtype in dtypes: train = sp.coo_matrix((no_users, no_items), dtype=dtype) user_features = sp.coo_matrix((no_users, no_features), dtype=dtype) item_features = sp.coo_matrix((no_items, no_features), dtype=dtype) model = LightFM() model.fit_partial(train, user_features=user_features, item_features=item_features) model.predict(np.random.randint(0, no_users, 10).astype(np.int32), np.random.randint(0, no_items, 10).astype(np.int32), user_features=user_features, item_features=item_features)
def setup_spgemm_scipy(size, sparsity = None, context = None, dtype = np.float32): WITH_SCIPY = True try: import scipy.sparse as sp except: WITH_SCIPY = False if not WITH_SCIPY: raise UnsupportedPlatformException("scipy.sparse") import math nnz = int(math.ceil((size*size)*sparsity)) mod = nnz values = np.array([], dtype=dtype) max_size = 10**6 while mod > 0: if mod < max_size: values = np.append(values, np.ones((mod,)).astype(dtype) * 0.6) mod = 0 else: values = np.append(values, np.ones((max_size,)).astype(dtype) * 0.6) mod -= max_size rows = np.random.randint(0, size-1, size=nnz) cols = np.random.randint(0, size-1, size=nnz) A = sp.coo_matrix((values, (rows, cols)), shape=(size, size), dtype=dtype) B = sp.coo_matrix((values, (rows, cols)), shape=(size, size), dtype=dtype) return A, B
def mesh_edges(faces): """Get sparse matrix with edges as an adjacency matrix. This function is a copy from the PySurfer package. See : https://github.com/nipy/PySurfer/blob/master/surfer/utils.py Parameters ---------- faces : array_like The mesh faces of shape (n_faces, 3). Returns ------- edges : sparse matrix The adjacency matrix. """ from scipy import sparse npoints = np.max(faces) + 1 nfaces = len(faces) a, b, c = faces.T edges = sparse.coo_matrix((np.ones(nfaces), (a, b)), shape=(npoints, npoints)) edges = edges + sparse.coo_matrix((np.ones(nfaces), (b, c)), shape=(npoints, npoints)) edges = edges + sparse.coo_matrix((np.ones(nfaces), (c, a)), shape=(npoints, npoints)) edges = edges + edges.T edges = edges.tocoo() return edges
def allocate(self): Annz = self.A_nnz Gnnz = self.G_nnz Jnnz = self.J_nnz Arows = self.A_row Grows = self.G_row Jrows = self.J_rows Hnnz = self.H_nnz num_vars = self.network.num_vars self.set_b(np.zeros(Arows)) self.set_A(coo_matrix((np.zeros(Annz),(Annz*[0],Annz*[0])), shape=(Arows,num_vars))) self.set_l(np.zeros(Grows)) self.set_u(np.zeros(Grows)) self.set_G(coo_matrix((np.zeros(Gnnz),(Gnnz*[0],Gnnz*[0])), shape=(Grows,num_vars))) self.set_f(np.zeros(Jrows)) self.set_J(coo_matrix((np.zeros(Jnnz),(Jnnz*[0],Jnnz*[0])), shape=(Jrows,num_vars))) self.allocate_H_array(Jrows) for i in range(Jrows): self.set_H_single(i, coo_matrix((np.zeros(Hnnz[i]),(Hnnz[i]*[0],Hnnz[i]*[0])), shape=(num_vars,num_vars)))
def test_iqp_random(self): solver = opt.opt_solver.OptSolverIQP() solver.set_parameters({'tol': 1e-8, 'quiet': True}) self.assertRaises(Exception,solver.solve,4) for i in range(10): n = 50 m = 10 p = 20 A = coo_matrix(np.random.randn(m,n)) b = np.random.randn(m) g = np.random.randn(n) B = np.matrix(np.random.randn(p,n)) H = coo_matrix(B.T*B) l = np.random.randn(n) u = l + 10*np.random.rand() prob = opt.opt_solver.QuadProblem(H,g,A,b,l,u) solver.solve(prob) x = solver.get_primal_variables() lam,nu,mu,pi = solver.get_dual_variables() eps = 1e-10 self.assertLess(norm(g + H*x - A.T*lam + mu - pi),eps) self.assertLess(norm(A*x-b),eps) self.assertTrue(np.all(x <= u)) self.assertTrue(np.all(x >= l)) self.assertTrue(norm(mu*(u-x),np.inf),eps) self.assertTrue(norm(pi*(x-l),np.inf),eps)
def test_reshape(old_shape, new_shape, stride_only=False): blob_in0 = 'col' blob_out0 = 'col_out' blob_in1 = 'row' blob_out1 = 'row_out' old_shape_for_op = (-1, old_shape[1]) if stride_only else old_shape op = core.CreateOperator('SparseMatrixReshape', [blob_in0, blob_in1], [blob_out0, blob_out1], old_shape=old_shape_for_op, new_shape=new_shape) A = np.random.random_sample(old_shape) A[np.random.random_sample(old_shape) > .5] = 0 A_coo = coo_matrix(A) old_row, old_col = A_coo.row, A_coo.col workspace.FeedBlob(blob_in0, old_col.astype(np.int64)) workspace.FeedBlob(blob_in1, old_row.astype(np.int32)) workspace.RunOperatorOnce(op) A_new_coo = coo_matrix(A.reshape(new_shape)) new_row, new_col = A_new_coo.row, A_new_coo.col col_out = workspace.FetchBlob(blob_out0) row_out = workspace.FetchBlob(blob_out1) np.testing.assert_array_equal(col_out, new_col) np.testing.assert_array_equal(row_out, new_row)
def histogram_from_ijv(parent_ijv, child_ijv): """Find per pixel overlap of parent labels and child labels, stored in ijv format. parent_ijv - the parents which contain the children child_ijv - the children to be mapped to a parent Returns a 2d array of overlap between each parent and child. Note that the first row and column are empty, as these correspond to parent and child labels of 0. """ parent_count = 0 if (parent_ijv.shape[0] == 0) else np.max(parent_ijv[:, 2]) child_count = 0 if (child_ijv.shape[0] == 0) else np.max(child_ijv[:, 2]) if parent_count == 0 or child_count == 0: return np.zeros((parent_count + 1, child_count + 1), int) dim_i = max(np.max(parent_ijv[:, 0]), np.max(child_ijv[:, 0])) + 1 dim_j = max(np.max(parent_ijv[:, 1]), np.max(child_ijv[:, 1])) + 1 parent_linear_ij = parent_ijv[:, 0] + dim_i * parent_ijv[:, 1] child_linear_ij = child_ijv[:, 0] + dim_i * child_ijv[:, 1] parent_matrix = coo_matrix((np.ones((parent_ijv.shape[0],)), (parent_ijv[:, 2], parent_linear_ij)), shape=(parent_count + 1, dim_i * dim_j)) child_matrix = coo_matrix((np.ones((child_ijv.shape[0],)), (child_linear_ij, child_ijv[:, 2])), shape=(dim_i * dim_j, child_count + 1)) # I surely do not understand the sparse code. Converting both # arrays to csc gives the best peformance... Why not p.csr and # c.csc? return (parent_matrix.tocsc() * child_matrix.tocsc()).toarray()
def loadMVLENS(): print "Parsing 1M movielens data . . ." data_array = loadtxt("/home/meawoppl/datasets/movielens-1m/ratings.dat", delimiter="::") print "\tDone. . . " print "Converting to sparse matrix format . . ." users, movies, ratings, trash = data_array.T ratings -= ratings.mean() del trash print users.min(), movies.min() user_movie = c_[users, movies].T m = sparse.coo_matrix((ratings, user_movie)) print "\tDone" tm = array(m.todense()) tmm = 1*(tm != 0) # remove nonparticaptory users . . . tm = tm[tmm.sum(axis=1)!=0, :] tmm = tmm[tmm.sum(axis=1)!=0, :] tm = tm[:, tmm.sum(axis=0)!=0] tmm = tmm[:, tmm.sum(axis=0)!=0] row_mean = tm.sum(axis=1) / tmm.sum(axis=1) col_mean = tm.sum(axis=0) / tmm.sum(axis=0) m=sparse.coo_matrix(tm) return m, row_mean, col_mean
def set_src_feat_vector(): global X, sizeTrData, devAndTestSrcDict, TRAIN_FILE sizeTrData = get_numlines(TRAIN_FILE) X1 = numpy.zeros((math.ceil(sizeTrData/2), len(devAndTestSrcDict)), dtype=int) for numLine, line in enumerate(open(TRAIN_FILE, 'r')): if numLine == math.ceil(sizeTrData/2): break src, tgt = line.strip().split('|||') for word in src.split(): if word in devAndTestSrcDict: X1[numLine][devAndTestSrcDict[word]] += 1 X1 = coo_matrix(X1) X2 = numpy.zeros((sizeTrData-math.ceil(sizeTrData/2), len(devAndTestSrcDict)), dtype=int) for numLine, line in enumerate(open(TRAIN_FILE, 'r')): if numLine >= math.ceil(sizeTrData/2): src, tgt = line.strip().split('|||') for word in src.split(): if word in devAndTestSrcDict: X2[numLine-math.ceil(sizeTrData/2)][devAndTestSrcDict[word]] += 1 X2 = coo_matrix(X2) X = vstack([X1, X2])
def test_synthesis_mat(self): """ m1 = [1, 0, 2, -1] [0, 0, 3, 0] [4, 5, 6, -2] m2 = [1,-2, -5] [0, 3, -6] """ row = np.array([0, 0, +0, 1, 2, 2, 2, 2]) col = np.array([0, 2, +3, 2, 0, 1, 2, 3]) dat = np.array([1, 2, -1, 3, 4, 5, 6, -2]) m1 = coo_matrix((dat, (row, col))) row = np.array([+0, +0, +0, +1, +1]) col = np.array([+0, +1, +2, +1, +2]) dat = np.array([+1, -2, -5, +3, -6]) m2 = coo_matrix((dat, (row, col))) self.assertEqual((3, 4), m1.shape) self.assertEqual((2, 3), m2.shape) m3 = synthesis_mat(m1, m2).toarray() self.assertEqual((6, 12), m3.shape) self.assertAlmostEqual(2.0, m3[0, 2]) self.assertAlmostEqual(3.0, m3[1, 2]) self.assertAlmostEqual(-6.0, m3[1, 4 + 2]) self.assertAlmostEqual(+9.0, m3[3 + 1, 4 + 2])
def __init__(self, X_l, L_l, X_u, random_generator, ** kw): """ Intializes the S3VM optimizer. """ self.__random_generator = random_generator # This is a nuisance, but we may need to pad extra dimensions to either X_l or X_u # in case the highest feature indices appear only in one of the two data matrices if X_l.shape[1] > X_u.shape[1]: X_u = sparse.hstack([X_u, sparse.coo_matrix(X_u.shape[0], X_l.shape[1] - X_u.shape[1])]) elif X_l.shape[1] < X_u.shape[1]: X_l = sparse.hstack([X_l, sparse.coo_matrix(X_l.shape[0], X_u.shape[1] - X_u.shape[1])]) # We vertically stack the data matrices into one big matrix X = sparse.vstack([X_l, X_u]) self.__size_l, self.__size_u, self.__size_n = X_l.shape[0], X_u.shape[0], X_l.shape[0]+ X_u.shape[0] x = arr.array('i') for l in L_l: x.append(int(l)) self.__YL = mat(x, dtype=np.float64) self.__YL = self.__YL.transpose() self.__setParameters( ** kw) self.__kw = kw self.X_l = X_l.tocsr() self.X_u = X_u.tocsr() self.X = X.tocsr() # compute mean of unlabeled patterns self.__mean_u = self.X_u.mean(axis=0) self.X_u_T = X_u.tocsc().T self.X_l_T = X_l.tocsc().T self.X_T = X.tocsc().T
def HiptmairMatrixSetup(mesh, N, M): path = os.path.abspath(os.path.join(inspect.getfile(inspect.currentframe()), "..")) gradient_code = open(os.path.join(path, 'DiscreteGradient.cpp'), 'r').read() compiled_gradient_module = compile_extension_module(code=gradient_code) column = numpy.zeros(2*mesh.num_edges(), order="C") #, dtype="intc") row = numpy.zeros(2*mesh.num_edges(), order="C") #, dtype="intc") data = numpy.zeros(2*mesh.num_edges(), order="C") #, dtype="intc") dataX = numpy.zeros(2*mesh.num_edges(), order="C") dataY = numpy.zeros(2*mesh.num_edges(), order="C") dataZ = numpy.zeros(2*mesh.num_edges(), order="C") tic() c = compiled_gradient_module.ProlongationGradsecond(mesh, dataX,dataY,dataZ, data, row, column) end = toc() print ("{:40}").format("Data for C and P created, time: "), " ==> ",("{:4f}").format(end) # print row # print column # print data C = coo_matrix((data,(row,column)), shape=(N, M)).tocsr() Px = coo_matrix((dataX,(row,column)), shape=(N, M)).tocsr() Py = coo_matrix((dataY,(row,column)), shape=(N, M)).tocsr() Pz = coo_matrix((dataZ,(row,column)), shape=(N, M)).tocsr() return C, [Px,Py,Pz]
def form_prediction_matrix(y_train_pred_proba, y_test_pred, user_label_matrix, annotated_user_ids, non_annotated_user_ids): index = user_label_matrix[annotated_user_ids, :] > 0.0 index = index.toarray() y_train_pred_proba_new = np.zeros_like(y_train_pred_proba) y_train_pred_proba_new[index] = y_train_pred_proba[index] y_train_pred_proba = y_train_pred_proba_new y_train_pred_proba = spsp.coo_matrix(y_train_pred_proba, shape=y_train_pred_proba.shape) y_test_pred = spsp.coo_matrix(y_test_pred, shape=y_test_pred.shape) prediction_matrix_row = np.append(annotated_user_ids[y_train_pred_proba.row], [non_annotated_user_ids[y_test_pred.row, ]]) prediction_matrix_col = np.append(y_train_pred_proba.col, [y_test_pred.col, ]) prediction_matrix_data = np.append(y_train_pred_proba.data, [y_test_pred.data, ]) prediction_matrix = spsp.coo_matrix((prediction_matrix_data, (prediction_matrix_row, prediction_matrix_col)), shape=(annotated_user_ids.size + non_annotated_user_ids.size, user_label_matrix.shape[1])) prediction_matrix = spsp.csr_matrix(prediction_matrix) prediction_matrix.eliminate_zeros() return prediction_matrix
def test_multilabel_representation_invariance(): # Generate some data n_classes = 4 n_samples = 50 _, y1 = make_multilabel_classification( n_features=1, n_classes=n_classes, random_state=0, n_samples=n_samples, allow_unlabeled=True ) _, y2 = make_multilabel_classification( n_features=1, n_classes=n_classes, random_state=1, n_samples=n_samples, allow_unlabeled=True ) # To make sure at least one empty label is present y1 += [0] * n_classes y2 += [0] * n_classes y1_sparse_indicator = sp.coo_matrix(y1) y2_sparse_indicator = sp.coo_matrix(y2) for name in MULTILABELS_METRICS: metric = ALL_METRICS[name] # XXX cruel hack to work with partial functions if isinstance(metric, partial): metric.__module__ = "tmp" metric.__name__ = name measure = metric(y1, y2) # Check representation invariance assert_almost_equal( metric(y1_sparse_indicator, y2_sparse_indicator), measure, err_msg="%s failed representation invariance " "between dense and sparse indicator " "formats." % name, )
def to_sparse(_membership, out): "Return a sparse matrix object." n_elements, _unique, _weighted, collections, collections_id = out sh = n_elements, len(collections_id) aux_map = dict(zip(collections_id, range(sh[1]))) if issparse(_membership): return _membership, (range(n_elements), collections_id) if _unique: _membership = np.array(_membership).ravel() matrix = np.array([aux_map[e] for e in _membership]) matrix = matrix.astype(int) matrix = coo_matrix((np.ones(sh[0]), (range(sh[0]), matrix)), shape=sh) elif not _weighted: indices = [] for i in xrange(sh[0]): for j in range(len(_membership[i])): indices.append((i, aux_map[_membership[i][j]])) indices = np.array(indices)[:, 0], np.array(indices)[:, 1] matrix = coo_matrix((np.ones(len(indices[0])), indices), shape=sh) elif _weighted: indices, data = [], [] for i in xrange(sh[0]): for j in _membership[i]: indices.append((i, aux_map[j])) data.append(_membership[i][j]) indices = np.array(indices)[:, 0], np.array(indices)[:, 1] matrix = coo_matrix((np.array(data), indices), shape=sh) return matrix, (range(n_elements), collections_id)
def add_indices(self, indices, axis): """ Add new indices to the matrix given an axis, filled with zeroes. If an index value is already present it is ignored. :param indices: List of words :param axis: 0 (rows) or 1 (cols) :return: Matrix with new index. """ if axis == 0: new_indices = list(set(indices) - set(self.row2word)) if len(new_indices) == 0: return self shape = (len(new_indices), self.shape[1]) mat = self._new_instance(sp.coo_matrix(shape), row2word=new_indices) elif axis == 1: new_indices = list(set(indices) - set(self.col2word)) if len(new_indices) == 0: return self shape = (self.shape[0], len(new_indices)) mat = self._new_instance(sp.coo_matrix(shape), col2word=new_indices) else: raise ValueError("Axis can only be 0 or 1") return self.append(mat, axis)
def main(): lily_objs = list() with open('lily.txt') as f: for line in f: line = line.strip() lily_obj = dict() label,content = line.split('\t') lily_obj['label'] = label lily_obj['words'] = content.split(' ') lily_objs.append(lily_obj) random.seed(0) random.shuffle(lily_objs) #随机打乱 corpus = [' '.join(lily_obj['words']) for lily_obj in lily_objs] tf,tfidf = cal_tf_and_tfidf(corpus) labels = [lily_obj['label'] for lily_obj in lily_objs] labels = np.array(labels) lily = dict() tf = coo_matrix(tf) tfidf = coo_matrix(tfidf) lily['tf'] = tf lily['tfidf'] = tfidf lily['labels'] = labels with open('lily.pickle','wb') as f: pickle.dump(lily,f) tf = np.array(tf) print type(tf)
def helm_coefficients_josep(Yseries, V0, S0, Ysh0, pq, pv, sl, pqpv, tolerance=1e-6, max_coeff=30, verbose=False): """ Holomorphic Embedding LoadFlow Method as formulated by Josep Fanals Batllori in 2020 THis function just returns the coefficients for further usage in other routines :param Yseries: Admittance matrix of the series elements :param V0: vector of specified voltages :param S0: vector of specified power :param Ysh0: vector of shunt admittances (including the shunts of the branches) :param pq: list of pq nodes :param pv: list of pv nodes :param sl: list of slack nodes :param pqpv: sorted list of pq and pv nodes :param tolerance: target error (or tolerance) :param max_coeff: maximum number of coefficients :param verbose: print intermediate information :return: U, X, Q, iterations """ npqpv = len(pqpv) npv = len(pv) nsl = len(sl) n = Yseries.shape[0] # --------------------------- PREPARING IMPLEMENTATION ------------------------------------------------------------- U = np.zeros((max_coeff, npqpv), dtype=complex) # voltages X = np.zeros((max_coeff, npqpv), dtype=complex) # compute X=1/conj(U) Q = np.zeros((max_coeff, npqpv), dtype=complex) # unknown reactive powers if n < 2: return U, X, Q, 0 if verbose: print('Yseries') print(Yseries.toarray()) df = pd.DataFrame(data=np.c_[Ysh0.imag, S0.real, S0.imag, np.abs(V0)], columns=['Ysh', 'P0', 'Q0', 'V0']) print(df) # build the reduced system Yred = Yseries[np.ix_(pqpv, pqpv)] # admittance matrix without slack buses Yslack = -Yseries[np.ix_(pqpv, sl)] # yes, it is the negative of this G = np.real(Yred) # real parts of Yij B = np.imag(Yred) # imaginary parts of Yij vec_P = S0.real[pqpv] vec_Q = S0.imag[pqpv] Vslack = V0[sl] Ysh = Ysh0[pqpv] Vm0 = np.abs(V0[pqpv]) vec_W = Vm0 * Vm0 # indices 0 based in the internal scheme nsl_counted = np.zeros(n, dtype=int) compt = 0 for i in range(n): if i in sl: compt += 1 nsl_counted[i] = compt pq_ = pq - nsl_counted[pq] pv_ = pv - nsl_counted[pv] pqpv_ = np.sort(np.r_[pq_, pv_]) # .......................CALCULATION OF TERMS [0] ------------------------------------------------------------------ if nsl > 1: U[0, :] = spsolve(Yred, Yslack.sum(axis=1)) else: U[0, :] = spsolve(Yred, Yslack) X[0, :] = 1 / np.conj(U[0, :]) # .......................CALCULATION OF TERMS [1] ------------------------------------------------------------------ valor = np.zeros(npqpv, dtype=complex) # get the current injections that appear due to the slack buses reduction I_inj_slack = Yslack[pqpv_, :] * Vslack valor[pq_] = I_inj_slack[pq_] - Yslack[pq_].sum(axis=1).A1 + (vec_P[pq_] - vec_Q[pq_] * 1j) * X[0, pq_] - U[0, pq_] * Ysh[pq_] valor[pv_] = I_inj_slack[pv_] - Yslack[pv_].sum(axis=1).A1 + (vec_P[pv_]) * X[0, pv_] - U[0, pv_] * Ysh[pv_] # compose the right-hand side vector RHS = np.r_[valor.real, valor.imag, vec_W[pv_] - (U[0, pv_] * U[0, pv_]).real # vec_W[pv_] - 1.0 ] # Form the system matrix (MAT) Upv = U[0, pv_] Xpv = X[0, pv_] VRE = coo_matrix((2 * Upv.real, (np.arange(npv), pv_)), shape=(npv, npqpv)).tocsc() VIM = coo_matrix((2 * Upv.imag, (np.arange(npv), pv_)), shape=(npv, npqpv)).tocsc() XIM = coo_matrix((-Xpv.imag, (pv_, np.arange(npv))), shape=(npqpv, npv)).tocsc() XRE = coo_matrix((Xpv.real, (pv_, np.arange(npv))), shape=(npqpv, npv)).tocsc() EMPTY = csc_matrix((npv, npv)) MAT = vs((hs((G, -B, XIM)), hs((B, G, XRE)), hs((VRE, VIM, EMPTY))), format='csc') if verbose: print('MAT') print(MAT.toarray()) # factorize (only once) MAT_LU = factorized(MAT.tocsc()) # solve LHS = MAT_LU(RHS) # update coefficients U[1, :] = LHS[:npqpv] + 1j * LHS[npqpv:2 * npqpv] Q[0, pv_] = LHS[2 * npqpv:] X[1, :] = -X[0, :] * np.conj(U[1, :]) / np.conj(U[0, :]) # .......................CALCULATION OF TERMS [>=2] ---------------------------------------------------------------- iter_ = 1 for c in range(2, max_coeff): # c defines the current depth valor[pq_] = (vec_P[pq_] - vec_Q[pq_] * 1j) * X[c - 1, pq_] - U[c - 1, pq_] * Ysh[pq_] valor[pv_] = -1j * conv2(X, Q, c, pv_) - U[c - 1, pv_] * Ysh[pv_] + X[c - 1, pv_] * vec_P[pv_] RHS = np.r_[valor.real, valor.imag, -conv3(U, U, c, pv_).real] LHS = MAT_LU(RHS) # update voltage coefficients U[c, :] = LHS[:npqpv] + 1j * LHS[npqpv:2 * npqpv] # update reactive power Q[c - 1, pv_] = LHS[2 * npqpv:] # update voltage inverse coefficients X[c, :] = -conv1(U, X, c) / np.conj(U[0, :]) iter_ += 1 return U, X, Q, iter_
def compute_kron_mat_cuda(self,g1,g2,kernel_name='create_kron_mat',gpu_block=None): # pragma: no cover """kronecker matrix with the edges pssm Args: g1 (iScore.Graph): first graph g2 (iScore.Graph): second graph kernel_name (str): name of the kernel to use gpu_block (None, optional): Size of the GPU block """ n1 = g1.num_edges n2 = g2.num_edges n_edges_prod = 2*n1*n2 # get the gpu block size if specified if gpu_block is not None: block = gpu_block else: block = self.gpu_block dim = (n1,n2,1) grid = tuple([int(np.ceil(n/t)) for n,t in zip(dim,block)]) # start timer t0 = time() driver.Context.synchronize() create_kron_mat_gpu = self.mod.get_function(kernel_name) # put the raw pssm on the GPU pssm1 = gpuarray.to_gpu(np.array(g1.edges_pssm).astype(np.float32)) pssm2 = gpuarray.to_gpu(np.array(g2.edges_pssm).astype(np.float32)) # we have to put the index on the gpu as well ind1 = gpuarray.to_gpu(np.array(g1.edges_index).astype(np.int32)) ind2 = gpuarray.to_gpu(np.array(g2.edges_index).astype(np.int32)) # create the gpu arrays only if we have to # i.e. in case we run the calculation once (test or tune) # in other cases the weigh and index are booked in self.run() if not hasattr(self,'weight_product'): self.weight_product = gpuarray.zeros(n_edges_prod, np.float32) self.index_product = gpuarray.zeros((n_edges_prod,2), np.int32) driver.Context.synchronize() if self.debug: print('GPU - Mem : %f \t (block size:%dx%d)' %(time()-t0,block[0],block[1])) # use the combvec kernel t0 = time() create_kron_mat_gpu (ind1,ind2, pssm1,pssm2, self.index_product,self.weight_product, n1,n2,g2.num_nodes, block=block,grid=grid) # extract the data # restrict to the ones calculated here ind = self.index_product.get() w = self.weight_product.get()[:n_edges_prod] # final size n_nodes_prod = g1.num_nodes*g2.num_nodes # create the matrix tt = time() # replaced the transpose with # doubling of weight and index (with switch) w = np.concatenate((w,w)) ind = np.vstack((ind,np.flip(ind,axis=1))) index = ( ind[:,0],ind[:,1]) self.Wx = sp_sparse.coo_matrix( (w,index),shape=( n_nodes_prod,n_nodes_prod ) ) #driver.Context.synchronize() if self.debug: print('GPU - Kron : %f \t (block size:%dx%d)' %(time()-t0,block[0],block[1]))
Cx1 = np.vstack([np.mean(X[Cx == 0], 0), np.mean(X[Cx == 1], 0)]) ind = np.argmin(np.mean(Cx1[:, -num_psd_elms_high_freq:], axis=1)) active_pixels = (L[:, ind] > thresh_probability) active_pixels = L[:, ind] pl.imshow(np.reshape((active_pixels), (d1, d2), order='F')) #%% ff = np.zeros(np.shape(A_or)[-1]) cl_thr = 0.2 #ff = false(1,size(Am,2)); for i in range(np.shape(A_or)[-1]): a1 = A_or[:, i] a2 = A_or[:, i] * active_pixels if np.sum(a2**2) >= cl_thr**2 * np.sum(a1**2): ff[i] = 1 id_set = 1 cse.utilities.view_patches_bar(Yr, coo_matrix( A_or[:, ff == id_set]), C_or[ff == id_set, :], b2, f2, d1, d2, YrA=YrA[srt[ff == id_set], :]) # km=KMeans(n_clusters=2) # Cx=km.fit_transform(X) # Cx=km.fit_transform(cp) # Cx=km.cluster_centers_ # L=km.labels_ # ind=np.argmin(np.mean(Cx[:,-49:],axis=1)) #active_pixels = (L==ind) #centroids = Cx;
def to_scipy_sparse_matrix(G, nodelist=None, dtype=None, weight='weight', format='csr'): """Returns the graph adjacency matrix as a SciPy sparse matrix. Parameters ---------- G : graph The NetworkX graph used to construct the NumPy matrix. nodelist : list, optional The rows and columns are ordered according to the nodes in `nodelist`. If `nodelist` is None, then the ordering is produced by G.nodes(). dtype : NumPy data-type, optional A valid NumPy dtype used to initialize the array. If None, then the NumPy default is used. weight : string or None optional (default='weight') The edge attribute that holds the numerical value used for the edge weight. If None then all edge weights are 1. format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'} The type of the matrix to be returned (default 'csr'). For some algorithms different implementations of sparse matrices can perform better. See [1]_ for details. Returns ------- M : SciPy sparse matrix Graph adjacency matrix. Notes ----- For directed graphs, matrix entry i,j corresponds to an edge from i to j. The matrix entries are populated using the edge attribute held in parameter weight. When an edge does not have that attribute, the value of the entry is 1. For multiple edges the matrix values are the sums of the edge weights. When `nodelist` does not contain every node in `G`, the matrix is built from the subgraph of `G` that is induced by the nodes in `nodelist`. Uses coo_matrix format. To convert to other formats specify the format= keyword. The convention used for self-loop edges in graphs is to assign the diagonal matrix entry value to the weight attribute of the edge (or the number 1 if the edge has no weight attribute). If the alternate convention of doubling the edge weight is desired the resulting Scipy sparse matrix can be modified as follows: >>> import scipy as sp >>> G = nx.Graph([(1, 1)]) >>> A = nx.to_scipy_sparse_matrix(G) >>> print(A.todense()) [[1]] >>> A.setdiag(A.diagonal() * 2) >>> print(A.todense()) [[2]] Examples -------- >>> G = nx.MultiDiGraph() >>> G.add_edge(0, 1, weight=2) 0 >>> G.add_edge(1, 0) 0 >>> G.add_edge(2, 2, weight=3) 0 >>> G.add_edge(2, 2) 1 >>> S = nx.to_scipy_sparse_matrix(G, nodelist=[0, 1, 2]) >>> print(S.todense()) [[0 2 0] [1 0 0] [0 0 4]] References ---------- .. [1] Scipy Dev. References, "Sparse Matrices", https://docs.scipy.org/doc/scipy/reference/sparse.html """ from scipy import sparse if nodelist is None: nodelist = list(G) nlen = len(nodelist) if nlen == 0: raise nx.NetworkXError("Graph has no nodes or edges") if len(nodelist) != len(set(nodelist)): msg = "Ambiguous ordering: `nodelist` contained duplicates." raise nx.NetworkXError(msg) index = dict(zip(nodelist, range(nlen))) coefficients = zip(*((index[u], index[v], d.get(weight, 1)) for u, v, d in G.edges(nodelist, data=True) if u in index and v in index)) try: row, col, data = coefficients except ValueError: # there is no edge in the subgraph row, col, data = [], [], [] if G.is_directed(): M = sparse.coo_matrix((data, (row, col)), shape=(nlen, nlen), dtype=dtype) else: # symmetrize matrix d = data + data r = row + col c = col + row # selfloop entries get double counted when symmetrizing # so we subtract the data on the diagonal selfloops = list(nx.selfloop_edges(G, data=True)) if selfloops: diag_index, diag_data = zip(*((index[u], -d.get(weight, 1)) for u, v, d in selfloops if u in index and v in index)) d += diag_data r += diag_index c += diag_index M = sparse.coo_matrix((d, (r, c)), shape=(nlen, nlen), dtype=dtype) try: return M.asformat(format) # From Scipy 1.1.0, asformat will throw a ValueError instead of an # AttributeError if the format if not recognized. except (AttributeError, ValueError): raise nx.NetworkXError("Unknown sparse matrix format: %s" % format)
def _fast_kde_2d(x, y, gridsize=(128, 128), circular=False): """ 2D fft-based Gaussian kernel density estimate (KDE). The code was adapted from https://github.com/mfouesneau/faststats Parameters ---------- x : Numpy array or list y : Numpy array or list gridsize : tuple Number of points used to discretize data. Use powers of 2 for fft optimization circular: bool If True, use circular boundaries. Defaults to False Returns ------- grid: A gridded 2D KDE of the input points (x, y) xmin: minimum value of x xmax: maximum value of x ymin: minimum value of y ymax: maximum value of y """ x = np.asarray(x, dtype=float) x = x[np.isfinite(x)] y = np.asarray(y, dtype=float) y = y[np.isfinite(y)] xmin, xmax = x.min(), x.max() ymin, ymax = y.min(), y.max() len_x = len(x) weights = np.ones(len_x) n_x, n_y = gridsize d_x = (xmax - xmin) / (n_x - 1) d_y = (ymax - ymin) / (n_y - 1) xyi = _stack(x, y).T xyi -= [xmin, ymin] xyi /= [d_x, d_y] xyi = np.floor(xyi, xyi).T scotts_factor = len_x**(-1 / 6) cov = _cov(xyi) std_devs = np.diag(cov)**0.5 kern_nx, kern_ny = np.round(scotts_factor * 2 * np.pi * std_devs) inv_cov = np.linalg.inv(cov * scotts_factor**2) x_x = np.arange(kern_nx) - kern_nx / 2 y_y = np.arange(kern_ny) - kern_ny / 2 x_x, y_y = np.meshgrid(x_x, y_y) kernel = _stack(x_x.flatten(), y_y.flatten()) kernel = _dot(inv_cov, kernel) * kernel kernel = np.exp(-kernel.sum(axis=0) / 2) kernel = kernel.reshape((int(kern_ny), int(kern_nx))) boundary = "wrap" if circular else "symm" grid = coo_matrix((weights, xyi), shape=(n_x, n_y)).toarray() grid = convolve2d(grid, kernel, mode="same", boundary=boundary) norm_factor = np.linalg.det(2 * np.pi * cov * scotts_factor**2) norm_factor = len_x * d_x * d_y * norm_factor**0.5 grid /= norm_factor return grid, xmin, xmax, ymin, ymax
train_agg = train.drop_duplicates(subset=['pid_code', 'song_code']).copy() test_agg = test.drop_duplicates(subset=['pid_code', 'song_code']).copy() train_agg['val'] = 1 test_agg['val'] = 1 train_agg['val_stoch'] = train_agg.groupby('pid_code').val.transform( lambda x: x / np.linalg.norm(x)) test_agg['val_stoch'] = test_agg.groupby('pid_code').val.transform( lambda x: x / np.linalg.norm(x)) test_agg_pop = test_agg.join(train.song_code.value_counts().rename('pop'), on='song_code') test_agg_pop['pop'].fillna(1, inplace=True) sp_A = spl.coo_matrix( (train_agg['val_stoch'].values.T, train_agg[['pid_code', 'song_code']].values.T)) sp_A._shape = (int(playlist_meta.pid_code.max() + 1), int(song_meta.song_code.max() + 1)) sp_A = sp_A.tocsr() sp_A_t = sp_A.T sp_A_const = spl.coo_matrix( (train_agg['val'].values.T, train_agg[['pid_code', 'song_code']].values.T)) sp_A_const._shape = (int(playlist_meta.pid_code.max() + 1), int(song_meta.song_code.max() + 1)) sp_A_const = sp_A_const.tocsr() sp_A_const_t = sp_A_const.T plusadd = 0
def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", n_perm=None): """Compute persistence diagrams for X data array. If X is not a distance matrix, it will be converted to a distance matrix using the chosen metric. Parameters ---------- X: ndarray (n_samples, n_features) A numpy array of either data or distance matrix. Can also be a sparse distance matrix of type scipy.sparse maxdim: int, optional, default 1 Maximum homology dimension computed. Will compute all dimensions lower than and equal to this value. For 1, H_0 and H_1 will be computed. thresh: float, default infinity Maximum distances considered when constructing filtration. If infinity, compute the entire filtration. coeff: int prime, default 2 Compute homology with coefficients in the prime field Z/pZ for p=coeff. metric: string or callable The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options specified in pairwise_distances, including "euclidean", "manhattan", or "cosine". Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them. n_perm: int The number of points to subsample in a "greedy permutation," or a furthest point sampling of the points. These points will be used in lieu of the full point cloud for a faster computation, at the expense of some accuracy, which can be bounded as a maximum bottleneck distance to all diagrams on the original point set Returns ------- A dictionary holding all of the results of the computation { 'dgms': list (size maxdim) of ndarray (n_pairs, 2) A list of persistence diagrams, one for each dimension less than maxdim. Each diagram is an ndarray of size (n_pairs, 2) with the first column representing the birth time and the second column representing the death time of each pair. 'num_edges': int The number of edges added during the computation 'dperm2all': ndarray(n_samples, n_samples) or ndarray (n_perm, \ n_samples) if n_perm The distance matrix used in the computation if n_perm is none. Otherwise, the distance from all points in the permutation to all points in the dataset 'idx_perm': ndarray(n_perm) if n_perm > 0 Index into the original point cloud of the points used as a subsample in the greedy permutation 'r_cover': float Covering radius of the subsampled points. If n_perm <= 0, then the full point cloud was used and this is 0 } """ if n_perm and sparse.issparse(X): raise Exception( "Greedy permutation is not supported for sparse distance matrices") if n_perm and n_perm > X.shape[0]: raise Exception("Number of points in greedy permutation is greater" " than number of points in the point cloud") if n_perm and n_perm < 0: raise Exception( "Should be a strictly positive number of points in the greedy " "permutation") idx_perm = np.arange(X.shape[0]) r_cover = 0.0 if n_perm: idx_perm, lambdas, dperm2all = get_greedy_perm(X, n_perm=n_perm, metric=metric) r_cover = lambdas[-1] dm = dperm2all[:, idx_perm] else: if metric == 'precomputed': dm = X else: dm = pairwise_distances(X, metric=metric) dperm2all = dm n_points = dm.shape[0] if not sparse.issparse(dm) and np.sum(np.abs(dm.diagonal()) > 0) > 0: # If any of the diagonal elements are nonzero, # convert to sparse format, because currently # that's the only format that handles nonzero # births dm = sparse.coo_matrix(dm) if sparse.issparse(dm): coo = dm.tocoo() res = DRFDMSparse( coo.row.astype(dtype=np.int32, order="C"), coo.col.astype(dtype=np.int32, order="C"), np.array(coo.data, dtype=np.float32, order="C"), n_points, maxdim, thresh, coeff, ) else: I, J = np.meshgrid(np.arange(n_points), np.arange(n_points)) DParam = np.array(dm[I > J], dtype=np.float32) res = DRFDM(DParam, maxdim, thresh, coeff) # Unwrap persistence diagrams dgms = res["births_and_deaths_by_dim"] for dim in range(len(dgms)): N = int(len(dgms[dim]) / 2) dgms[dim] = np.reshape(np.array(dgms[dim]), [N, 2]) ret = { "dgms": dgms, "num_edges": res["num_edges"], "dperm2all": dperm2all, "idx_perm": idx_perm, "r_cover": r_cover, } return ret
def _sparse_fruchterman_reingold(A, dim=2, k=None, pos=None, fixed=None, iterations=50): # Position nodes in adjacency matrix A using Fruchterman-Reingold # Entry point for NetworkX graph is fruchterman_reingold_layout() # Sparse version try: import numpy as np except ImportError: raise ImportError("_sparse_fruchterman_reingold() requires numpy: http://scipy.org/ ") try: nnodes,_=A.shape except AttributeError: raise nx.NetworkXError( "fruchterman_reingold() takes an adjacency matrix as input") try: from scipy.sparse import spdiags,coo_matrix except ImportError: raise ImportError("_sparse_fruchterman_reingold() scipy numpy: http://scipy.org/ ") # make sure we have a LIst of Lists representation try: A=A.tolil() except: A=(coo_matrix(A)).tolil() if pos==None: # random initial positions pos=np.asarray(np.random.random((nnodes,dim)),dtype=A.dtype) else: # make sure positions are of same type as matrix pos=pos.astype(A.dtype) # no fixed nodes if fixed==None: fixed=[] # optimal distance between nodes if k is None: k=np.sqrt(1.0/nnodes) # the initial "temperature" is about .1 of domain area (=1x1) # this is the largest step allowed in the dynamics. t=0.1 # simple cooling scheme. # linearly step down by dt on each iteration so last iteration is size dt. dt=t/float(iterations+1) displacement=np.zeros((dim,nnodes)) for iteration in range(iterations): displacement*=0 # loop over rows for i in range(A.shape[0]): if i in fixed: continue # difference between this row's node position and all others delta=(pos[i]-pos).T # distance between points distance=np.sqrt((delta**2).sum(axis=0)) # enforce minimum distance of 0.01 distance=np.where(distance<0.01,0.01,distance) # the adjacency matrix row Ai=np.asarray(A.getrowview(i).toarray()) # displacement "force" displacement[:,i]+=\ (delta*(k*k/distance**2-Ai*distance/k)).sum(axis=1) # update positions length=np.sqrt((displacement**2).sum(axis=0)) length=np.where(length<0.01,0.1,length) pos+=(displacement*t/length).T # cool temperature t-=dt pos=_rescale_layout(pos) return pos
def ensemble_fit( X, estimated_n_topics=10, model="plsa", init="random", min_samples=3, min_cluster_size=4, n_starts=16, n_jobs=1, parallelism="dask", topic_combination="hellinger_umap", bootstrap=True, n_iter=100, n_iter_per_test=10, tolerance=0.001, e_step_thresh=1e-16, lift_factor=1, beta_loss=1, alpha=0.0, solver="mu", random_state=None, ): """Generate a set of stable topics by using an ensemble of topic models and then clustering the results and generating representative topics for each cluster. The generate a set of document vectors based on the selected stable topics. Parameters ---------- X: array or sparse matrix of shape (n_docs, n_words) The bag-of-words matrix for the corpus to train on. estimated_n_topics: int (optional, default=10) The estimated number of topics. Note that the final number of topics produced can differ from this value, and may be more or less than the provided value. Instead this value provides the algorithm with a suggestion of the approximate number of topics to use. model: string (optional, default="plsa") The topic modeling method to use (either "plsa" or "nmf") init: string or tuple (optional, default="random") The intialization method to use. This should be one of: * ``"random"`` * ``"nndsvd"`` * ``"nmf"`` or a tuple of two ndarrays of shape (n_docs, n_topics) and (n_topics, n_words). int (optional, default=3) The min_samples parameter to use for HDBSCAN clustering. min_cluster_size: int (optional, default=4) The min_cluster_size parameter to use for HDBSCAN clustering n_starts: int (optional, default=16) The number of bootstrap sampled topic models to run -- the size of the ensemble. n_jobs: int (optional, default=8) The number of parallel jobs to run at a time. parallelism: string (optional, default="dask") The parallelism model to use. Should be one of "dask" or "joblib" or "none". topic_combination: string (optional, default="hellinger_umap") The method of comnining ensemble topics into a set of stable topics. Should be one of: * ``"hellinger_umap"`` * ``"hellinger"`` * ``"kl_divergence"`` n_iter: int The maximum number iterations of EM to perform n_iter_per_test: int The number of iterations between tests for relative improvement in log-likelihood. tolerance: float The threshold of relative improvement in log-likelihood required to continue iterations. e_step_thresh: float (optional, default=1e-32) Option to promote sparsity. If the value of P(w|z)P(z|d) in the E step falls below threshold then write a zero for P(z|w,d). lift_factor: int (optional, default=1) Importance factor to apply to lift -- if high lift value are important to you then larger lift factors will be beneficial. beta_loss: float or string, (optional, default 'kullback-leibler') The beta loss to use if using NMF for topic modeling. alpha: float (optional, default=0.0) The alpha parameter defining regularization if using NMF for topic modeling. solver: string, (optional, default="mu") The choice of solver if using NMF for topic modeling. Should be either "cd" or "mu". random_state int, RandomState instance or None, (optional, default: None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. Used in in initialization. Returns ------- doc_vectors, stable_topics: arrays of shape (n_docs, M) and (M, n_words) The vectors giving the probability of topics for each document, and the stable topics produced by the ensemble. """ X = check_array(X, accept_sparse="csr", dtype=np.float32) if issparse(X): X_coo = X.tocoo() else: X_coo = coo_matrix(X, dtype=np.float32) all_topics = ensemble_of_topics( X_coo, estimated_n_topics, model, n_jobs, n_starts, parallelism, init=init, n_iter=n_iter, n_iter_per_test=n_iter_per_test, tolerance=tolerance, e_step_thresh=e_step_thresh, bootstrap=bootstrap, lift_factor=1, beta_loss=beta_loss, alpha=alpha, solver=solver, random_state=random_state, ) if topic_combination in _topic_combiner: cluster_topics = _topic_combiner[topic_combination] else: raise ValueError("topic_combination must be one of {}".format( tuple(_topic_combiner.keys()))) stable_topics = cluster_topics(all_topics, min_samples, min_cluster_size) if lift_factor != 1: stable_topics **= lift_factor normalize(stable_topics, axis=1) if model == "plsa": sample_weight = _check_sample_weight(None, X, dtype=np.float32) doc_vectors = plsa_refit( X, stable_topics, sample_weight, e_step_thresh=e_step_thresh, random_state=random_state, ) elif model == "nmf": doc_vectors, _, _ = non_negative_factorization( X, H=stable_topics, n_components=stable_topics.shape[0], update_H=False, beta_loss=beta_loss, alpha=alpha, solver=solver, ) else: raise ValueError('Model must be one of "plsa" or "nmf"') return doc_vectors, stable_topics
def load_data(path="../data/transfer/", dataset="chn", preserve_order=1): """Load citation network dataset (cora only for now)""" print('Loading {} dataset...'.format(dataset)) idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset), dtype=np.dtype(str)) features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32) labels = encode_onehot( idx_features_labels[:, -1]) # labels are at the end of each line #f = open("{}{}.multilabel".format(path, dataset)) #multilabels =np.genfromtxt("{}{}.multilabel".format(path, dataset), # dtype=np.dtype(str)) # build graph idx = np.array(idx_features_labels[:, 0], dtype=np.int32) idx_map = {j: i for i, j in enumerate(idx)} edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset), dtype=np.int32) edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), dtype=np.int32).reshape(edges_unordered.shape) adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(labels.shape[0], labels.shape[0]), dtype=np.float32) # build symmetric adjacency matrix adj = sp.coo_matrix(adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)) for item in adj.__dict__.items(): print(item) print(adj.col) edge_ret = [] edge_weight = [] node_weight = [0.0 for i in range(0, len(idx))] if preserve_order == 1: adj_pres = adj else: adj_pres = sp.coo_matrix(adj**2) # sampling weight for i in range(0, len(adj.data)): edge_ret.append((adj_pres.row[i], adj_pres.col[i])) edge_weight.append(float(adj_pres.data[i])) node_weight[adj.row[i]] += adj.data[i] features = normalize(features) adj = adj + sp.eye(adj.shape[0]) D = sp.coo_matrix([[ 1.0 / math.sqrt(node_weight[j]) if j == i else 0 for j in range(len(idx)) ] for i in range(len(idx))]) adj = D * adj * D idx_train = range(140) idx_val = range(200, 500) idx_test = range(500, 1500) features = torch.FloatTensor(np.array(features.todense())) labels = torch.LongTensor(np.where(labels)[1]) adj = sparse_mx_to_torch_sparse_tensor(adj) idx_train = torch.LongTensor(idx_train) idx_val = torch.LongTensor(idx_val) idx_test = torch.LongTensor(idx_test) for i in range(0, len(node_weight)): node_weight[i] = math.pow(node_weight[i], 0.75) return adj, features, labels, idx_train, idx_val, idx_test, edge_ret, torch.tensor( edge_weight), torch.tensor(node_weight) #, multilabels
print print factor.todense() # In[155]: A_int = second_deriv+factor if n < 10: print A_int.todense() # In[156]: A = sps.vstack([ sps.coo_matrix(([1], ([0],[0])), shape=(1, n)), A_int, sps.coo_matrix(([1], ([0],[n-1])), shape=(1, n)), ]) A = sps.csr_matrix(A) if n < 10: print A.todense() # In[157]: rhs = np.zeros(n)
def generate_spectral_label(cfg, k, target_dir, n_neighbor, args): """Use spectral clustering to generate labels. The user can specify distance metric in args Parameters ---------- cfg: dict, specifying data path, names we care k: list of int, choices of number of clusters target_dir: directory for storing the data n_neighbor: int, number of neighbors used for graph construction args: arguments, it provides choice of several options args.append means we append new number of clusters to existing problem args.pca means we first use PCA to perform dimensionality reduction args.speuclid means we use euclidean distance in y args.spdydx means we use dy/dx as distance metric args.spvio means we use constraint violation as distance metric """ data = np.load(cfg['file_path']) x, y = data[cfg['x_name']], data[cfg['y_name']] query = Query(x, None, n_neighbor + 1, scale=True) x_scaled = query.A # this is scaled data nn_ind = query.getIndex(x) # Is this why I was wrong? Is pyflann still working fine? n_data = x.shape[0] # build sparse graph based on neighboring distances, I shall use a distance function for evaluation if args.speuclid: def dst_fun(x0, y0, x1, y1): return np.linalg.norm(y0 - y1) out_fnm = os.path.join(target_dir, 'sp_euclid_label.npz') if args.spdydx: def dst_fun(x0, y0, x1, y1): return np.linalg.norm(y0 - y1) / np.linalg.norm(x0 - x1) out_fnm = os.path.join(target_dir, 'sp_dydx_label.npz') if args.spvio: out_fnm = os.path.join(target_dir, 'sp_vio_label.npz') sys.path.insert(0, cfg['script_path']) import libserver if args.pen: libserver.init(cfg['cfg_path']) def dst_fun(x0, y0, x1, y1): xmid = (x0 + x1) / 2 ymid = (y0 + y1) / 2 c = libserver.eval(ymid.astype(np.float64)) return np.linalg.norm(c[1:]) if args.car: solver = libserver.pysolver() solver.initfnm(cfg['cfg_path']) def dst_fun(x0, y0, x1, y1): xmid = (x0 + x1) / 2 ymid = (y0 + y1) / 2 c = solver.constrEval(ymid.astype(np.float64)) return np.linalg.norm(c[1:]) if args.drone: solver = libserver.pysolver() solver.initfnm(cfg['cfg_path']) def dst_fun(x0, y0, x1, y1): xmid = (x0 + x1) / 2 # this is not used, maybe not good but who knows ymid = (y0 + y1) / 2 solver.updateObstacle(xmid[3:]) c = solver.constrEval(ymid.astype(np.float64)) return np.linalg.norm(c[1:]) dist, row, col = construct_distance_graph(x, y, nn_ind, dst_fun, rm_col_one=True) print('distance matrix construction finished') aff_mat = sp.coo_matrix((dist, (row, col)), shape=(n_data, n_data)) # prepare for output if args.append and os.path.exists(out_fnm): result = ddctParse(out_fnm) else: result = {} # perform spectral clustering for k_ in k: print('run spectral clustering with %d' % k_) sc = SpectralClustering(k_, eigen_solver='amg', affinity='precomputed', assign_labels='discretize', n_jobs=-1) sc.fit(aff_mat) label = sc.labels_ result['%d' % k_] = label np.savez(out_fnm, **result)
def raster_2D(poly_xy: np.ndarray, grid_x: np.ndarray, grid_y: np.ndarray) -> np.ndarray: """Draws a polygon onto a 2D grid of pixels. Pixel values equal to the fraction of the pixel area covered by the polygon. This implementation is written for accuracy and works with double precision, in contrast to most other implementations which are written for speed and usually only allow for 256 (and often fewer) possible pixel values without performing (very slow) super-sampling. Args: poly_xy: `2 x N` ndarray containing x,y coordinates for each point in the polygon. grid_x: x-coordinates for the edges of each pixel specified as a 1D array. grid_y: y-coordinates for the edges of each pixel specified as a 1D array. Returns: 2D ndarray with pixel values in the range [0, 1] containing the anti-aliased polygon. Note that the size of the array is `[grid_x.size - 1, grid_y.size - 1]`. Raises: ValueError: If `poly_xy` doesn't have exactly two rows or if `grid_x` or `grid_y` have a size less than 2. """ if poly_xy.shape[0] != 2: raise ValueError( "Expected `poly_xy` to have 2 rows, got {} instead.".format( poly_xy.shape[0])) if grid_x.size < 2 or grid_y.size < 2: raise ValueError( "Expected both `grid_x` and `grid_y` to have atleast 2" " elements, got sizes of {} and {} respectively.".format( grid_x.size, grid_y.size)) # Oversample the polygon by including its intersection with the grid as # new vertices. vertices = _expand_polygon_vertices(poly_xy, grid_x, grid_y) # If the shape fell completely outside our area, just return a blank grid. if vertices.size == 0: return zeros((grid_x.size - 1, grid_y.size - 1)) # Calculate segment cover, area, and corresponding pixel's subscripts. poly = np.hstack((vertices, vertices[0])) endpoint_avg = (poly[:-1] + poly[1:]) * 0.5 # Remove segments along the right and top edges (they correspond to outside # pixels, but couldn't be removed until now because poly_xy stores points, # not segments, and the edge points are needed when creating endpoint_avg). non_edge = np.logical_and( np.real(endpoint_avg) < grid_x[-1], np.imag(endpoint_avg) < grid_y[-1]) endpoint_final = endpoint_avg[non_edge] x_sub = np.digitize(np.real(endpoint_final), grid_x) - 1 y_sub = np.digitize(np.imag(endpoint_final), grid_y) - 1 cover = np.diff(np.imag(poly), axis=0)[non_edge] / np.diff(grid_y)[y_sub] area = (np.real(endpoint_final) - grid_x[x_sub]) * cover / np.diff(grid_x)[x_sub] # Use coo_matrix(...).toarray() to efficiently convert from (x, y, v) pairs # to ndarrays. We can use v = (-area + 1j * cover) followed with calls to # np.real() and np.imag() to improve performance (otherwise we'd have to # call coo_matrix() twice. It's really inefficient because it involves lots # of random memory access, unlike real() and imag()). poly_grid = sparse.coo_matrix( (-area + 1j * cover, (x_sub, y_sub)), shape=(grid_x.size - 1, grid_y.size - 1)).toarray() result_grid = np.real(poly_grid) + np.imag(poly_grid).cumsum(axis=0) return np.abs(result_grid)
while s != '': data_samples.append(s.lower()) dataset.append(s) s = file_tweet.readline() file_tweet.close() ##################################### list of tweets # Inverted Index to find root print("Generating IDF to find summary...") t0 = time() count_vec = CountVectorizer(ngram_range=(1, 1), analyzer='word', stop_words="english") X_Train_counts = count_vec.fit_transform(data_samples) X_name = count_vec.get_feature_names() m, n = X_Train_counts.shape cx = coo_matrix(X_Train_counts) freq = [0 for i in range(0, n)] for i, j, v in zip(cx.row, cx.col, cx.data): freq[j] += v words = [] for i in range(0, n): words.append((X_name[i], freq[i])) words = sorted(words, key=itemgetter(1)) freq.clear() root = words[-1][ 0] ###################################################### Topic of summary / root node print("Topic of the Summary is '%s'" % root) # Inverted Index to build dictionary of frequencies of words count_vec = CountVectorizer(ngram_range=(1, 1))
def test_plot_connectome(tmpdir): node_color = ['green', 'blue', 'k', 'cyan'] # symmetric up to 1e-3 relative tolerance adjacency_matrix = np.array([[1., -2., 0.3, 0.], [-2.002, 1, 0., 0.], [0.3, 0., 1., 0.], [0., 0., 0., 1.]]) node_coords = np.arange(3 * 4).reshape(4, 3) args = adjacency_matrix, node_coords kwargs = dict(edge_threshold=0.38, title='threshold=0.38', node_size=10, node_color=node_color) plot_connectome(*args, **kwargs) plt.close() # used to speed-up tests for the next plots kwargs['display_mode'] = 'x' # node_coords not an array but a list of tuples plot_connectome(adjacency_matrix, [tuple(each) for each in node_coords], **kwargs) # saving to file filename = str(tmpdir.join('temp.png')) display = plot_connectome(*args, output_file=filename, **kwargs) assert display is None assert os.path.isfile(filename) assert os.path.getsize(filename) > 0 plt.close() # with node_kwargs, edge_kwargs and edge_cmap arguments plot_connectome(*args, edge_threshold='70%', node_size=[10, 20, 30, 40], node_color=np.zeros((4, 3)), edge_cmap='RdBu', colorbar=True, node_kwargs={ 'marker': 'v'}, edge_kwargs={ 'linewidth': 4}) plt.close() # masked array support masked_adjacency_matrix = np.ma.masked_array( adjacency_matrix, np.abs(adjacency_matrix) < 0.5) plot_connectome(masked_adjacency_matrix, node_coords, **kwargs) plt.close() # sparse matrix support sparse_adjacency_matrix = sparse.coo_matrix(adjacency_matrix) plot_connectome(sparse_adjacency_matrix, node_coords, **kwargs) plt.close() # NaN matrix support node_color = ['green', 'blue', 'k'] # Overriding 'node_color' for 3 elements of size 3. kwargs['node_color'] = node_color nan_adjacency_matrix = np.array([[1., np.nan, 0.], [np.nan, 1., 2.], [np.nan, 2., 1.]]) nan_node_coords = np.arange(3 * 3).reshape(3, 3) plot_connectome(nan_adjacency_matrix, nan_node_coords, **kwargs) plt.close() # smoke-test where there is no edge to draw, e.g. when # edge_threshold is too high plot_connectome(*args, edge_threshold=1e12) plt.close() # with colorbar=True plot_connectome(*args, colorbar=True) plt.close() # smoke-test with hemispheric saggital cuts plot_connectome(*args, display_mode='lzry') plt.close() # test node_color as a string with display_mode='lzry' plot_connectome(*args, node_color='red', display_mode='lzry') plt.close() plot_connectome(*args, node_color=['red'], display_mode='lzry') plt.close()
return b #iteration 11 times for i in range(11): l = pow(2, i - 5) #solve the least square problem. ans = np.dot(np.linalg.pinv(create_A(l)), create_B(l)) #reshape to show image im_recon = ans.reshape((row, col)) #gradient for computing error dX, dY = np.gradient(im_recon) #use sparse matrix because of matrix overflow dX = coo_matrix(dX) dY = coo_matrix(dY) dX2 = dX.multiply(dX).todense() dY2 = dY.multiply(dY).todense() #compute error error = (im_recon - im_noise)**2 + l * (dX2 + dY2) #show the result print('lamda: ', l) p1 = plt.subplot(2, 2, 1) p1.set_title('original image') plt.imshow(im, cmap='gray') plt.axis('off') p2 = plt.subplot(2, 2, 2)
def create_bow(doc_indices, words, n_docs, vocab_size): return sparse.coo_matrix(([1] * len(doc_indices), (doc_indices, words)), shape=(n_docs, vocab_size)).tocsr()
def pairwise_distance_xy_z(data1, data2, rp_max, pi_max, period=None, verbose=False, num_threads=1, approx_cell1_size=None, approx_cell2_size=None): """ Function returns pairs of points separated by a xy-projected distance smaller than or equal to the input ``rp_max`` and z distance ``pi_max``. Note that if data1 == data2 that the `~halotools.mock_observables.pairwise_distance_xy_z` function double-counts pairs. Parameters ---------- data1 : array_like N1 by 3 numpy array of 3-dimensional positions. Values of each dimension should be between zero and the corresponding dimension of the input period. data2 : array_like N2 by 3 numpy array of 3-dimensional positions. Values of each dimension should be between zero and the corresponding dimension of the input period. rp_max : array_like radius of the cylinder to search for neighbors around galaxies in ``data1``. If a single float is given, ``rp_max`` is assumed to be the same for each galaxy in ``data1``. You may optionally pass in an array of length *Npts1*, in which case each point in ``data1`` will have its own individual neighbor-search projected radius. Length units assumed to be in Mpc/h, here and throughout Halotools. pi_max : array_like Half-length of cylinder to search for neighbors around galaxies in ``data1``. If a single float is given, ``pi_max`` is assumed to be the same for each galaxy in ``data1``. You may optionally pass in an array of length *Npts1*, in which case each point in ``data1`` will have its own individual neighbor-search cylinder half-length. Length units assumed to be in Mpc/h, here and throughout Halotools. period : array_like, optional Length-3 array defining the periodic boundary conditions. If only one number is specified, the enclosing volume is assumed to be a periodic cube (by far the most common case). If period is set to None, the default option, PBCs are set to infinity. verbose : Boolean, optional If True, print out information and progress. num_threads : int, optional Number of CPU cores to use in the pair counting. If ``num_threads`` is set to the string 'max', use all available cores. Default is 1 thread for a serial calculation that does not open a multiprocessing pool. approx_cell1_size : array_like, optional Length-3 array serving as a guess for the optimal manner by which the `~halotools.mock_observables.pair_counters.RectangularDoubleMesh` will apportion the ``data`` points into subvolumes of the simulation box. The optimum choice unavoidably depends on the specs of your machine. Default choice is to use 1/10 of the box size in each dimension, which will return reasonable result performance for most use-cases. Performance can vary sensitively with this parameter, so it is highly recommended that you experiment with this parameter when carrying out performance-critical calculations. approx_cell2_size : array_like, optional See comments for ``approx_cell1_size``. Returns ------- distance : `~scipy.sparse.coo_matrix` sparse matrix in COO format containing distances between the ith entry in ``data1`` and jth in ``data2``. Examples -------- For demonstration purposes we create randomly distributed sets of points within a periodic unit cube. >>> Npts1, Npts2, Lbox = 1000, 1000, 250. >>> period = [Lbox, Lbox, Lbox] >>> rp_max = 1.0 >>> pi_max = 2.0 >>> x1 = np.random.uniform(0, Lbox, Npts1) >>> y1 = np.random.uniform(0, Lbox, Npts1) >>> z1 = np.random.uniform(0, Lbox, Npts1) >>> x2 = np.random.uniform(0, Lbox, Npts2) >>> y2 = np.random.uniform(0, Lbox, Npts2) >>> z2 = np.random.uniform(0, Lbox, Npts2) We transform our *x, y, z* points into the array shape used by the pair-counter by taking the transpose of the result of `numpy.vstack`. This boilerplate transformation is used throughout the `~halotools.mock_observables` sub-package: >>> data1 = np.vstack([x1, y1, z1]).T >>> data2 = np.vstack([x2, y2, z2]).T >>> perp_dist_matrix, para_dist_matrix = pairwise_distance_xy_z(data1, data2, rp_max, pi_max, period = period) """ # Process the inputs with the helper function result = _pairwise_distance_xy_z_process_args(data1, data2, rp_max, pi_max, period, verbose, num_threads, approx_cell1_size, approx_cell2_size) x1in, y1in, z1in, x2in, y2in, z2in = result[0:6] rp_max, max_rp_max, pi_max, max_pi_max, period, num_threads, PBCs, approx_cell1_size, approx_cell2_size = result[ 6:] xperiod, yperiod, zperiod = period search_xlength, search_ylength, search_zlength = max_rp_max, max_rp_max, max_pi_max # Compute the estimates for the cell sizes approx_cell1_size, approx_cell2_size = (_set_approximate_cell_sizes( approx_cell1_size, approx_cell2_size, period)) approx_x1cell_size, approx_y1cell_size, approx_z1cell_size = approx_cell1_size approx_x2cell_size, approx_y2cell_size, approx_z2cell_size = approx_cell2_size # Build the rectangular mesh double_mesh = RectangularDoubleMesh( x1in, y1in, z1in, x2in, y2in, z2in, approx_x1cell_size, approx_y1cell_size, approx_z1cell_size, approx_x2cell_size, approx_y2cell_size, approx_z2cell_size, search_xlength, search_ylength, search_zlength, xperiod, yperiod, zperiod, PBCs) # Create a function object that has a single argument, for parallelization purposes engine = partial(pairwise_distance_xy_z_engine, double_mesh, x1in, y1in, z1in, x2in, y2in, z2in, rp_max, pi_max) # Calculate the cell1 indices that will be looped over by the engine num_threads, cell1_tuples = _cell1_parallelization_indices( double_mesh.mesh1.ncells, num_threads) if num_threads > 1: pool = multiprocessing.Pool(num_threads) result = pool.map(engine, cell1_tuples) pool.close() else: result = [engine(cell1_tuples[0])] # unpack result d_perp = np.zeros((0, ), dtype='float') d_para = np.zeros((0, ), dtype='float') i_inds = np.zeros((0, ), dtype='int') j_inds = np.zeros((0, ), dtype='int') # unpack the results for i in range(len(result)): d_perp = np.append(d_perp, result[i][0]) d_para = np.append(d_para, result[i][1]) i_inds = np.append(i_inds, result[i][2]) j_inds = np.append(j_inds, result[i][3]) return (coo_matrix((d_perp, (i_inds, j_inds)), shape=(len(data1), len(data2))), coo_matrix((d_para, (i_inds, j_inds)), shape=(len(data1), len(data2))))
def convert_labels(y, C=3): Y = sparse.coo_matrix((np.ones_like(y), (y, np.arange(len(y)))), shape=(C, len(y))).toarray() return Y
def test_connectome_strength(tmpdir): # symmetric up to 1e-3 relative tolerance adjacency_matrix = np.array([[1., -2., 0.3, 0.], [-2.002, 1, 0., 0.], [0.3, 0., 1., 0.], [0., 0., 0., 1.]]) node_coords = np.arange(3 * 4).reshape(4, 3) args = adjacency_matrix, node_coords kwargs = dict() plot_connectome_strength(*args, **kwargs) plt.close() # used to speed-up tests for the net plots kwargs['display_mode'] = 'x' # node_coords not an array but a list of tuples plot_connectome_strength(adjacency_matrix, [tuple(each) for each in node_coords], **kwargs) # saving to file filename = str(tmpdir.join('test.png')) display = plot_connectome_strength( *args, output_file=filename, **kwargs ) assert display is None assert os.path.isfile(filename) assert os.path.getsize(filename) > 0 plt.close() # passing node args plot_connectome_strength(*args, node_size=10, cmap='RdBu') plt.close() plot_connectome_strength(*args, node_size=10, cmap=plt.cm.RdBu) plt.close() # masked array support masked_adjacency_matrix = np.ma.masked_array( adjacency_matrix, np.abs(adjacency_matrix) < 0.5 ) plot_connectome_strength( masked_adjacency_matrix, node_coords, **kwargs ) plt.close() # sparse matrix support sparse_adjacency_matrix = sparse.coo_matrix(adjacency_matrix) plot_connectome_strength( sparse_adjacency_matrix, node_coords, **kwargs ) plt.close() # NaN matrix support nan_adjacency_matrix = np.array([[1., np.nan, 0.], [np.nan, 1., 2.], [np.nan, 2., 1.]]) nan_node_coords = np.arange(3 * 3).reshape(3, 3) plot_connectome_strength(nan_adjacency_matrix, nan_node_coords, **kwargs) plt.close() # smoke-test with hemispheric sagital cuts plot_connectome_strength(*args, display_mode='lzry') plt.close()
import matplotlib.pyplot as plt df = pd.read_csv("data/ml-latest-small/ratings.csv") df['timestamp'] = pd.to_datetime(df.timestamp, unit='s') #Initialize Matrix method 1 # matrix = np.zeros((max(df.userId), max(df.movieId))) # for i in range(df.shape[0]): # matrix[df.iloc[i,0]-1, df.iloc[i,1]-1] = df.iloc[i,2] # #method 2 # mtx = ss.coo_matrix((df.rating, (df.userId, df['movieId'])), shape=(max(df.userId)+1, max(df['movieId'])+1)) #train/validation splinting trainSet, valSet = sklearn.model_selection.train_test_split(df.iloc[:, :3]) trainSet = ss.coo_matrix( (trainSet['rating'], (trainSet['userId'], trainSet['movieId'])), shape=(df['userId'].max() + 1, df['movieId'].max() + 1)) valSet = ss.coo_matrix( (valSet['rating'], (valSet['userId'], valSet['movieId'])), shape=(df['userId'].max() + 1, df['movieId'].max() + 1)) def validation(W, H, valSet): error = 0 for (row, col, data) in zip(valSet.row, valSet.col, valSet.data): error += abs(np.dot(W[row], H[:, col]) - data) return error error = np.zeros(10) trainloss = np.zeros(10)
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding.""" matrix = None if self._metric == "Jaccard": edges, weights = graph.get_jaccard_coo_matrix() elif self._metric == "Laplacian": edges, weights = graph.get_laplacian_coo_matrix() elif self._metric == "Modularity": matrix = graph.get_dense_modularity_matrix() elif self._metric == "Left Normalized Laplacian": edges, weights = graph.get_left_normalized_laplacian_coo_matrix() elif self._metric == "Right Normalized Laplacian": edges, weights = graph.get_right_normalized_laplacian_coo_matrix() elif self._metric == "Symmetric Normalized Laplacian": edges, weights = graph.get_symmetric_normalized_laplacian_coo_matrix( ) elif self._metric == "Neighbours Intersection size": edges, weights = graph.get_neighbours_intersection_size_coo_matrix( ) elif self._metric == "Ancestors Jaccard": matrix = graph.get_shared_ancestors_jaccard_adjacency_matrix( graph.get_breadth_first_search_from_node_names( src_node_name=self._root_node_name, compute_predecessors=True), verbose=verbose) elif self._metric == "Ancestors size": matrix = graph.get_shared_ancestors_size_adjacency_matrix( graph.get_breadth_first_search_from_node_names( src_node_name=self._root_node_name, compute_predecessors=True), verbose=verbose) elif self._metric == "Adamic-Adar": edges, weights = graph.get_adamic_adar_coo_matrix() elif self._metric == "Adjacency": edges, weights = graph.get_directed_edge_node_ids(), np.ones( graph.get_number_of_directed_edges()) else: raise NotImplementedError(f"The provided metric {self._metric} " "is not currently supported.") if matrix is None: matrix = coo_matrix((weights, (edges[:, 0], edges[:, 1])), shape=(graph.get_number_of_nodes(), graph.get_number_of_nodes()), dtype=np.float32) U, sigmas, Vt = sparse_svds(matrix, k=int(self._embedding_size / 2)) else: U, sigmas, Vt = randomized_svd(matrix, n_components=int( self._embedding_size / 2)) sigmas = np.diagflat(np.sqrt(sigmas)) left_embedding = np.dot(U, sigmas) right_embedding = np.dot(Vt.T, sigmas) if return_dataframe: node_names = graph.get_node_names() left_embedding = pd.DataFrame(left_embedding, index=node_names) right_embedding = pd.DataFrame(right_embedding, index=node_names) return EmbeddingResult( embedding_method_name=self.model_name(), node_embeddings=[left_embedding, right_embedding])
def main(args): ## Load in args which set parameters for runs epochs = args.epochs points_to_collect = args.points_to_collect #number of repetitions per d lr = args.lr model = args.model dataset = args.dataset opt_alg = args.opt_alg ds_to_explore = [int(d_num) for d_num in args.ds_to_explore] nnz = args.nnz init_iters = args.init_iters block_start = args.block_start # This mainly is so the random seed is different use_sparse = args.use_sparse_multiply jit_grad = args.jit_grad # Hide any GPUs form TensorFlow. Otherwise TF might reserve memory and make # it unavailable to JAX. tf.config.experimental.set_visible_devices([], "GPU") ## Logging # Logger specifications do_log = True do_gitchecks = True do_envchecks = True log_dir = '../lottery-subspace-data' if use_sparse: param_str = '%s_%s_init%i_nnz%i' % (model, dataset, init_iters, nnz) else: param_str = '%s_%s_init%i' % (model, dataset, init_iters) logger = logging.getLogger("my logger") scriptname = os.path.basename(__file__).rstrip('.py') # Get name of script aname, _ = loggingSetup(logger, scriptname, log_dir, do_log=do_log, param_str = param_str) result_file = '%s_results' % (aname) # Outfile name # Print current environment and git status to the log if do_gitchecks: gitstatus(logger) if do_envchecks: envstatus(logger, use_gpu = True) # Start log with experimental parameters logger.info('\n ---Code Output---\n') logger.info('\n') logger.info('[Burn-in Subspace] Random affine subspace at trained parameters: \n') logger.info('\n') logger.info('Dimensions to Explore: %s \n' % str(ds_to_explore)) logger.info('Model: %s \n' % (model)) logger.info('Dataset: %s \n' % (dataset)) logger.info('Optimization Algorithm: %s with learning rate %.2e \n' % (opt_alg, lr)) logger.info('Initial Training Iterations: %s Iterations \n' % str(init_iters)) if use_sparse: logger.info('Sparsity: %s nonzero\n' % str(nnz)) else: logger.info('No sparsity restrictions on projection matrix. \n') logger.info('Collect %i points for each dimension (Random seed starting at %i). \n' % (points_to_collect, block_start)) logger.info('Run optimization for %i epochs. \n' % (epochs)) logger.info('\n') ## Setup data if (dataset == 'MNIST'): x_train, full_train_dict, train_ds, test_ds, classes = setupMNIST() input_shape = (1, 28, 28, 1) elif (dataset == 'fashionMNIST'): x_train, full_train_dict, train_ds, test_ds, classes = setupFashionMNIST() input_shape = (1, 28, 28, 1) elif (dataset == 'SVHN'): x_train, full_train_dict, train_ds, test_ds, classes = setupSVHN() input_shape = (1, 32, 32, 3) elif (dataset == 'cifar10'): x_train, full_train_dict, train_ds, test_ds, classes = setupCIFAR10() input_shape = (1, 32, 32, 3) elif (dataset == 'cifar100'): x_train, full_train_dict, train_ds, test_ds, classes = setupCIFAR100() input_shape = (1, 32, 32, 3) else: logging.error('Dataset not recognized \n') test_ds_normalized = dict(test_ds) ## Initialize model global net if (model == 'TinyCNN'): net = SimpleCNN.partial( channels = [16,32], classes = classes, ) elif (model == 'SmallCNN'): net = SimpleCNN.partial( channels = [32,64,64], classes = classes, ) elif (model == 'MediumCNN'): net = SimpleCNN.partial( channels = [32,64,64,128], classes = classes, ) elif (model == 'ResNet_BNotf'): net = KerasResNets.partial( num_classes = classes, use_batch_norm = True, ) elif (model == 'WideResNet'): net = WideResnet.partial( blocks_per_group=2, channel_multiplier=4, num_outputs=100, dropout_rate=0.0 ) else: logger.error('Model type not recognized\n') out = { "model": model, "dataset": dataset, "epochs": epochs, "points_to_collect": points_to_collect, "ds_to_explore": ds_to_explore, "init_iters": init_iters, "nnz": nnz, "full_d": '', "data": { "d": [], "point_id": [], "it": [], "abs_theta": [], "train_loss": [], "train_acc": [], "full_train_loss": [], "full_train_acc": [], "best_train_acc": [], "test_loss": [], "test_acc": [], "best_test_acc": [], "nnz": [], "avg_grad_time": [], "avg_proj_time": [], "epoch_times": [] } } time_per_run = onp.zeros((len(ds_to_explore), points_to_collect, epochs)) loss_grad_full = jax.jit(jax.grad( lambda model, batch: normal_loss_opt( model,batch ) )) # Loop over runs for each dimension for point_id in range(points_to_collect): # Initialize the net, block_start allows us to split the runs up into parts _, initial_params = net.init_by_shape(jax.random.PRNGKey(point_id+block_start+12574),[(input_shape, jnp.float32)]) model = flax.nn.Model(net, initial_params) if init_iters == 0: # This is the intrinsic dimension case trained_params = initial_params else: # This is the burn-in subspace case optimizer = flax.optim.Momentum(learning_rate=lr).create(model) total_it = -1 for batch in train_ds: total_it = total_it + 1 if total_it > init_iters: break optimizer = optimizer.apply_gradient(loss_grad_full(optimizer.target, batch)) # This now are parameters that have been trained for the specified number of iterations trained_params = optimizer.target.params # Loop over dimension to explore for d_num, d in enumerate(ds_to_explore): params_now = trained_params D = jnp.sum(jnp.asarray([onp.prod(x.shape) for x in jax.tree_flatten(initial_params)[0]])) logger.info('\n'+'-'*95+'\n') logger.info("Run Number "+str(point_id)+'\n') logger.info("Number of params = "+str(D)+" subspace d="+str(d)+'\n') # Projection plane if use_sparse: M_unit = generate_projection(d,D,nnz,enforce_no_overlap_if_possible = True) else: M_unit = generate_projection(d,D) if use_sparse: M_unit_transpose_coo = sparse.coo_matrix(M_unit.T) M_unit_transpose_sparse = onp.array((M_unit_transpose_coo.row, M_unit_transpose_coo.col, M_unit_transpose_coo.data)) bytes_string = "M_unit bytes: " + sizeof_fmt(M_unit.nbytes) + " M_unit_sparse data bytes: "+ sizeof_fmt(M_unit_transpose_coo.data.nbytes) + " M_unit_sparse total bytes: " + sizeof_fmt(M_unit_transpose_coo.data.nbytes + M_unit_transpose_coo.col.nbytes + M_unit_transpose_coo.row.nbytes) logger.info(bytes_string + '\n') logger.info('-'*95 + '\n') # Important: This now uses the trained parameters leaves0,treedef = jax.tree_flatten(params_now) vec0,shapes_list = flatten_leaves(leaves0) if use_sparse: # Gradient function of the loss (with sparse matrix-vector multiplication) loss_grad_wrt_theta = jax.grad( lambda theta_now, batch: normal_loss( sparse_theta_to_paramstree(theta_now,M_unit_transpose_sparse,vec0,treedef,shapes_list), batch ) ) else: if jit_grad: loss_grad_wrt_theta = jax.jit(jax.grad( lambda theta_now, batch: normal_loss( theta_to_paramstree(theta_now,M_unit,vec0,treedef,shapes_list), batch ) )) else: loss_grad_wrt_theta = jax.grad( lambda theta_now, batch: normal_loss( theta_to_paramstree(theta_now,M_unit,vec0,treedef,shapes_list), batch ) ) # Start at the initial params (vec0), not the global origin theta = jnp.zeros((1,d)) # Parameters and aux variables for Adam beta_1=0.9 beta_2=0.999 epsilon=1e-07 mass = jnp.zeros((1, d)) velocity = jnp.zeros((1, d)) # Reset every loop total_it = -1 best_train_acc = 0 best_test_acc = 0 # Lists to store time for computing grad and projecting theta to full parameter space grad_ts = [] proj_ts = [] ## Train the model # Loop over training data for batch in train_ds: total_it += 1 if total_it / (len(x_train)/128.0) > epochs: break e_float = total_it / (len(x_train)/128.0) # This is the gradient in the hyperplane space grad_t1 = time.time() g_theta = loss_grad_wrt_theta(theta,batch) grad_t2 = time.time() grad_ts.append(grad_t2 - grad_t1) # Take a step in the plane if (opt_alg == 'Adam'): # Approximation of 1st and 2nd moment via exponential averaging mass = beta_1 * mass + (1.0 - beta_1) * g_theta velocity = beta_2 * velocity + (1.0 - beta_2) * (g_theta**2.0) # Bias correction hat_mass = mass / (1.0-beta_1) hat_velocity = velocity / (1.0-beta_2) # Update theta = theta - lr / (jnp.sqrt(hat_velocity) + epsilon) * hat_mass else: theta = theta - lr*g_theta # Get updated parameters proj_t1 = time.time() if use_sparse: params_now = sparse_theta_to_paramstree(theta,M_unit_transpose_sparse,vec0,treedef,shapes_list) else: params_now = theta_to_paramstree(theta,M_unit,vec0,treedef,shapes_list) proj_t2 = time.time() proj_ts.append(proj_t2 - proj_t1) # Batch loss and accuracy loss_out = normal_loss(params_now,batch) accuracy_out = normal_accuracy(params_now,batch) # Print train accuracies once in a while if total_it % 50 == 0 and total_it != 0: logger.info('{:10}{:10}{:15}{:15}{:15}{:15}{:15}'.format(str(round(e_float, 3)),str(total_it),str(onp.linalg.norm(theta)),str(loss_out),str(accuracy_out),'-','-')+'\n') # Test and print stats every epoch if (total_it % int(len(x_train)/128.0)) in [0]: # Test verification test_loss_out = normal_loss(params_now,test_ds_normalized) test_accuracy_out = normal_accuracy(params_now,test_ds_normalized) # Full train accuracy full_loss_out = normal_loss(params_now,full_train_dict) full_accuracy_out = normal_accuracy(params_now,full_train_dict) # Check if this is the best accuracy we've seen if test_accuracy_out > best_test_acc: best_test_acc = test_accuracy_out if full_accuracy_out > best_train_acc: best_train_acc = full_accuracy_out if total_it > 0: t2 = time.time() time_per_run[d_num, point_id, int(total_it / int(len(x_train)/128.0))-1] = t2 - t1 t1 = time.time() logger.info('{:10}{:10}{:15}{:15}{:15}{:15}{:15}'.format('epoch','iter','|theta|', 'train loss', 'train acc', 'test loss', 'test acc')+'\n') logger.info('{:10}{:10}{:15}{:15}{:15}{:15}{:15}'.format(str(round(e_float, 3)),str(total_it),str(onp.linalg.norm(theta)),str(full_loss_out),str(full_accuracy_out),str(test_loss_out),str(test_accuracy_out))+'\n') avg_grad_time = onp.mean(grad_ts) avg_proj_time = onp.mean(proj_ts) logger.info('\nTotal time: ' + str(sum(time_per_run[d_num, point_id])) +'\n') logger.info('Avg time to compute gradient: ' + str(avg_grad_time)+'\n') logger.info('Avg time to project theta: ' + str(avg_proj_time)+'\n') # Data out out["full_d"] = D out["data"]["d"].append(d) out["data"]["point_id"].append(point_id) out["data"]["it"].append(str(total_it)) out["data"]["abs_theta"].append(str(onp.linalg.norm(theta))) out["data"]["train_loss"].append(str(loss_out)) out["data"]["train_acc"].append(str(accuracy_out)) out["data"]["full_train_loss"].append(str(full_loss_out)) out["data"]["full_train_acc"].append(str(full_accuracy_out)) out["data"]["best_train_acc"].append(str(best_train_acc)) out["data"]["test_loss"].append(str(test_loss_out)) out["data"]["test_acc"].append(str(test_accuracy_out)) out["data"]["best_test_acc"].append(str(best_test_acc)) out["data"]["nnz"].append(nnz) out["data"]["avg_grad_time"].append(avg_grad_time) out["data"]["avg_proj_time"].append(avg_proj_time) out["data"]["epoch_times"].append(time_per_run[d_num, point_id]) # Write data to file every new dimension save_obj(out, result_file)
def build_matrix(pxl_inds, ints, nrows, ncols): return coo_matrix((ints, (pxl_inds / ncols, pxl_inds % ncols)), shape=(nrows, ncols))
if class_prior is not None: assert_true(class_prior.shape[0] == len(np.unique(y)), 'MockClassifier extra fit_param class_prior.shape[0]' ' is {0}, should be {1}'.format(class_prior.shape[0], len(np.unique(y)))) return self def predict(self, T): return T.shape[0] def score(self, X=None, Y=None): return 1. / (1 + np.abs(self.a)) X = np.ones((10, 2)) X_sparse = coo_matrix(X) y = np.arange(10) // 2 ############################################################################## # Tests def check_valid_split(train, test, n_samples=None): # Use python sets to get more informative assertion failure messages train, test = set(train), set(test) # Train and test split should not overlap assert_equal(train.intersection(test), set()) if n_samples is not None: # Check that the union of train an test split cover all the indices
# In[7]: user_item_train, user_item_test, rating_train, rating_test = train_test_split( user_item.T, rating, test_size=2775344, random_state=42) # 27753444 nnz_train = 24978100 nnz_test = 2775344 # In[8]: #for test data, we need COO format to calculate test RMSE #1-based to 0-based R_test_coo = coo_matrix( (rating_test, (user_item_test[:, 0] - 1, user_item_test[:, 1] - 1))) #scipy does not guarantee coo row-major layout expected by cuSPARSE R_test_coo = R_test_coo.tocsr().tocoo() assert R_test_coo.nnz == nnz_test R_test_coo.data.astype(np.float32).tofile('R_test_coo.data.bin') R_test_coo.row.tofile('R_test_coo.row.bin') R_test_coo.col.tofile('R_test_coo.col.bin') # In[9]: print("max(R_test_coo.data)") print(np.max(R_test_coo.data)) print("max(R_test_coo.row)") print(np.max(R_test_coo.row)) print("max(R_test_coo.col)") print(np.max(R_test_coo.col))
- Yshunts[pq_] * U[0, pq_] \ + (vec_P[pq_] - vec_Q[pq_] * 1j) * X[0, pq_] \ - prod2[pq_] \ - np.sum(Ytapslack[pq_, :], axis=1) valor[pv_] = - prod[pv_] \ + np.sum(Yslack[pv_, :], axis=1) \ - Yshunts[pv_] * U[0, pv_] \ + vec_P[pv_] * X[0, pv_] \ - prod2[pv_] \ - np.sum(Ytapslack[pv_, :], axis=1) RHS = np.r_[valor.real, valor.imag, W[pv_] - 1] # amb l'equació del mòdul dels PV VRE = coo_matrix((2 * U_re[0, pv_], (np.arange(npv), pv_)), shape=(npv, npqpv)).tocsc() # matriu dispersa COO a compr. VIM = coo_matrix((2 * U_im[0, pv_], (np.arange(npv), pv_)), shape=(npv, npqpv)).tocsc() XIM = coo_matrix((-X_im[0, pv_], (pv_, np.arange(npv))), shape=(npqpv, npv)).tocsc() XRE = coo_matrix((X_re[0, pv_], (pv_, np.arange(npv))), shape=(npqpv, npv)).tocsc() EMPTY = csc_matrix((npv, npv)) # matriu dispera comprimida MATx = vstack((hstack((G, -B, XIM)), hstack( (B, G, XRE)), hstack((VRE, VIM, EMPTY))), format='csc') MAT_LU = factorized( MATx.tocsc()) # matriu factoritzada (només cal fer-ho una vegada) LHS = MAT_LU(RHS) # obtenir vector d'incògnites
if not os.path.isdir(process_data_dir): os.mkdir(process_data_dir) adj_file = pjoin(process_data_dir, 'adj_{}.npz'.format(args.least_ratio)) node_pos_file = pjoin(process_data_dir, 'node_pos_{}.npy'.format(args.least_ratio)) W = sp.load_npz(adj_file).toarray() node_pos = np.load(node_pos_file) # num of nodes n = node_pos.shape[0] args.n_route = n # Calculate graph kernel L = scaled_laplacian(W) # Alternative approximation method: 1st approx - first_approx(W, n). Lk = cheb_poly_approx(L, Ks, n) Lk_sp = sp.coo_matrix(Lk) # Lk_spt = tf.SparseTensorValue( # indices=np.array([Lk_sp.row, Lk_sp.col], np.int64).T, # values=Lk_sp.data, # dense_shape=Lk_sp.shape) tf.add_to_collection(name='graph_kernel_indices', value=tf.cast( tf.constant(np.array([Lk_sp.row, Lk_sp.col]).T), tf.int64)) tf.add_to_collection(name='graph_kernel_value', value=tf.cast(tf.constant(Lk_sp.data), tf.float32)) tf.add_to_collection(name='graph_kernel_shape', value=tf.cast(tf.constant(Lk_sp.shape), tf.int64))
def create(self, system, positions=None): """Return the SOAP output for the given system and given positions. Args: system (:class:`ase.Atoms` | :class:`.System`): Input system. positions (list): Cartesian positions or atomic indices. If specified, the SOAP spectrum will be created for these points. If no positions are defined, the SOAP output will be created for all atoms in the system. Returns: np.ndarray | scipy.sparse.coo_matrix: The SOAP output for the given system and positions. The return type depends on the 'sparse'-attribute. The first dimension is given by the number of positions and the second dimension is determined by the get_number_of_features()-function. """ # Transform the input system into the internal System-object system = self.get_system(system) # Check that the system does not have elements that are not in the list # of atomic numbers zs = set(system.get_atomic_numbers()) if not zs.issubset(self._atomic_number_set): raise ValueError( "The given system has the following atomic numbers not defined " "in the SOAP constructor: {}".format( zs.difference(self._atomic_number_set))) sub_elements = np.array(list(set(system.get_atomic_numbers()))) # Check if periodic is valid if self._periodic: cell = system.get_cell() if np.cross(cell[0], cell[1]).dot(cell[2]) == 0: raise ValueError( "System doesn't have cell to justify periodicity.") # Positions specified, use them if positions is not None: # Check validity of position definitions and create final cartesian # position list list_positions = [] if len(positions) == 0: raise ValueError( "The argument 'positions' should contain a non-empty set of" " atomic indices or cartesian coordinates") for i in positions: if np.issubdtype(type(i), np.integer): list_positions.append(system.get_positions()[i]) elif isinstance(i, list) or isinstance(i, tuple): list_positions.append(i) else: raise ValueError( "Create method requires the argument 'positions', a " "list of atom indices and/or positions") # Determine the SOAPLite function to call based on periodicity and # rbf if self._rbf == "gto": if self._periodic: soap_func = soaplite.get_periodic_soap_locals else: soap_func = soaplite.get_soap_locals soap_mat = soap_func(system, list_positions, self._alphas, self._betas, rCut=self._rcut, nMax=self._nmax, Lmax=self._lmax, crossOver=self._crossover, all_atomtypes=sub_elements.tolist(), eta=self._eta) elif self._rbf == "polynomial": if self._periodic: soap_func = soaplite.get_periodic_soap_locals_poly else: soap_func = soaplite.get_soap_locals_poly soap_mat = soap_func(system, list_positions, rCut=self._rcut, nMax=self._nmax, Lmax=self._lmax, all_atomtypes=sub_elements.tolist(), eta=self._eta) # No positions given, calculate SOAP for all atoms in the structure else: # Determine the SOAPLite function to call based on periodicity and # rbf if self._rbf == "gto": if self._periodic: soap_func = soaplite.get_periodic_soap_structure else: soap_func = soaplite.get_soap_structure soap_mat = soap_func(system, self._alphas, self._betas, rCut=self._rcut, nMax=self._nmax, Lmax=self._lmax, crossOver=self._crossover, all_atomtypes=sub_elements.tolist(), eta=self._eta) elif self._rbf == "polynomial": if self._periodic: soap_func = soaplite.get_periodic_soap_structure_poly else: soap_func = soaplite.get_soap_structure_poly soap_mat = soap_func(system, rCut=self._rcut, nMax=self._nmax, Lmax=self._lmax, all_atomtypes=sub_elements.tolist(), eta=self._eta) # Map the output from subspace of elements to the full space of # elements soap_mat = self.get_full_space_output(soap_mat, sub_elements, self._atomic_numbers) # Create the averaged SOAP output if requested. if self._average: soap_mat = soap_mat.mean(axis=0) soap_mat = np.expand_dims(soap_mat, 0) # Make into a sparse array if requested if self._sparse: soap_mat = coo_matrix(soap_mat) return soap_mat
def __init__(self, parent, value, seg_ids=None): ''' Initializes and sets the correct data. ''' # We've classed this so that we can override some of the normal functions and allow indexing via seg_id self.__dict__ = {} # Is this function thread safe? iter_group = parent.data_reader.get_iter_group(value) # iter_group = parent.west['iterations/iter_{num:08d}'.format(num=value)] self.parent = parent current = {} current['iteration'] = value if seg_ids is None: seg_ids = range(0, iter_group['seg_index']['weight'].shape[0]) # Just make these easier to access. current['weights'] = iter_group['seg_index']['weight'][seg_ids] current['pcoord'] = iter_group['pcoord'][...][seg_ids, :, :] try: current['auxdata'] = {} for key in list(iter_group['auxdata'].keys()): current['auxdata'][key] = iter_group['auxdata'][key][...][ seg_ids, :] except Exception: pass current['parents'] = iter_group['seg_index']['parent_id'][seg_ids] current['summary'] = parent.data_reader.data_manager.get_iter_summary( int(value)) current['seg_id'] = np.array( list(range(0, iter_group['seg_index'].shape[0])))[seg_ids] current['walkers'] = current['summary']['n_particles'] current['states'] = parent.assign['trajlabels'][ value - 1, :current['walkers'], :][seg_ids] current['bins'] = parent.assign['assignments'][ value - 1, :current['walkers'], :][seg_ids] # Calculates the bin population for this iteration. nbins = parent.assign['state_map'].shape[0] # We have to take the 'unknown' state into account # nstates = parent.assign['state_labels'].shape[0] + 1 # Temporarily disabled while I sort out the fact that we shouldn't be using data from w_assign for state populations. # current['plot'] = Plotter(parent.direct, parent.reweight, parent.iteration, parent.assign['bin_labels'], parent.assign['state_labels'], current['populations'].states, current['populations'].bins, parent.interface) # Now we'll load up the results of the kinetics analysis. current['direct'] = KineticsIteration(parent.direct, value, parent.assign, value) evolution_datasets = [ 'rate_evolution', 'conditional_flux_evolution', 'state_pop_evolution', 'color_prob_evolution', 'total_fluxes', 'target_flux_evolution', ] # We want to load these up as... oh, who knows, I suppose? try: current['reweight'] = KineticsIteration(parent.reweight, value, parent.assign, value) # We'll make this not a sparse matrix... matrix = parent.reweight['iterations/iter_{:08d}'.format(value)] # Assume color. current['instant_matrix'] = sp.coo_matrix( (matrix['flux'][...], (matrix['rows'][...], matrix['cols'][...])), shape=((nbins - 1) * 2, (nbins - 1) * 2)).todense() reweighting = True except Exception: # This analysis hasn't been enabled, so we'll simply return the default error message. current['reweight'] = parent.reweight['rate_evolution'] current['instant_matrix'] = parent.reweight['bin_populations'] current['matrix'] = parent.reweight['bin_populations'] reweighting = False # Check if the analysis has been enabled. If yes, make them specify dataset dictionaries. If not, return the thing. if reweighting: for key in evolution_datasets: current[key] = WIPIDataset(raw={ 'direct': current['direct'][key], 'reweight': current['reweight'][key] }, key='a') else: for key in evolution_datasets: current[key] = WIPIDataset( raw={'direct': current['direct'][key]}, key='direct') self.raw = current