def to_coo_adjacency_matrix(data, simalarity=False, distance_fun=None): ''' convert the dataset to a sparse coo adjacency matrix. :param data: :py:class:`gct.Dataset` :rtype: scipy coo_matrix ''' edges = data.get_edges() rows = edges['src'].values cols = edges['dest'].values n = int(max(np.max(rows), np.max(cols))) + 1 if data.is_weighted(): weight = edges['weight'].values else: weight = np.ones_like(rows) if not simalarity: # distance if distance_fun is None or distance_fun == 'minus': weight = -weight elif distance_fun == 'exp_minus': weight = np.exp(-weight) else: raise ValueError("unknown " + distance_fun) if data.is_directed(): return coo_matrix((weight, (rows, cols)), shape=[n, n]) else: newX = np.concatenate([weight, weight]) newrows = np.concatenate([rows, cols]) newcols = np.concatenate([cols, rows]) return coo_matrix((newX, (newrows, newcols)), shape=[n, n])
def test_sparse_multiply(self): row = np.array([0, 3, 1, 0]) col = np.array([0, 3, 1, 2]) data = np.array([4, 5, 7, 9]) S1 = coo_matrix((data, (row, col)), shape=(4, 5)) S2 = coo_matrix((data, (row, col)), shape=(5, 4)) pybammS1 = pybamm.Matrix(S1) pybammS2 = pybamm.Matrix(S2) D1 = np.ones((4, 5)) D2 = np.ones((5, 4)) pybammD1 = pybamm.Matrix(D1) pybammD2 = pybamm.Matrix(D2) # Multiplication is elementwise np.testing.assert_array_equal( (pybammS1 * pybammS1).evaluate().toarray(), S1.multiply(S1).toarray()) np.testing.assert_array_equal( (pybammS2 * pybammS2).evaluate().toarray(), S2.multiply(S2).toarray()) np.testing.assert_array_equal( (pybammD1 * pybammS1).evaluate().toarray(), S1.toarray() * D1) np.testing.assert_array_equal( (pybammS1 * pybammD1).evaluate().toarray(), S1.toarray() * D1) np.testing.assert_array_equal( (pybammD2 * pybammS2).evaluate().toarray(), S2.toarray() * D2) np.testing.assert_array_equal( (pybammS2 * pybammD2).evaluate().toarray(), S2.toarray() * D2) with self.assertRaisesRegex(pybamm.ShapeError, "inconsistent shapes"): (pybammS1 * pybammS2).test_shape() with self.assertRaisesRegex(pybamm.ShapeError, "inconsistent shapes"): (pybammS2 * pybammS1).test_shape() with self.assertRaisesRegex(pybamm.ShapeError, "inconsistent shapes"): (pybammS2 * pybammS1).evaluate_ignoring_errors() # Matrix multiplication is normal matrix multiplication np.testing.assert_array_equal( (pybammS1 @ pybammS2).evaluate().toarray(), (S1 * S2).toarray()) np.testing.assert_array_equal( (pybammS2 @ pybammS1).evaluate().toarray(), (S2 * S1).toarray()) np.testing.assert_array_equal((pybammS1 @ pybammD2).evaluate(), S1 * D2) np.testing.assert_array_equal((pybammD2 @ pybammS1).evaluate(), D2 * S1) np.testing.assert_array_equal((pybammS2 @ pybammD1).evaluate(), S2 * D1) np.testing.assert_array_equal((pybammD1 @ pybammS2).evaluate(), D1 * S2) with self.assertRaisesRegex(pybamm.ShapeError, "dimension mismatch"): (pybammS1 @ pybammS1).test_shape() with self.assertRaisesRegex(pybamm.ShapeError, "dimension mismatch"): (pybammS2 @ pybammS2).test_shape()
def apply_threshold(original, threshold): """ Filter the matrix such that each field has is greater than the threshold. :param original: The original matrix. :param threshold: Numeric threshold applied to each cell. :return: COO matrix with cells filtered according to the threshold. """ original = coo_matrix(original) # Ensure COO format indices = original.data > threshold new_data = original.data[indices] new_matrix = new_data, (original.row[indices], original.col[indices]) return coo_matrix(new_matrix)
def tocoo(self): """ Return a copy of this matrix in COOrdinate format""" from scipy.sparse.coo import coo_matrix if self.nnz == 0: return coo_matrix(self.shape, dtype=self.dtype) else: idx_dtype = get_index_dtype( maxval=max(self.shape[0], self.shape[1])) data = np.asarray(_list(self.values()), dtype=self.dtype) indices = np.asarray(_list(self.keys()), dtype=idx_dtype).T return coo_matrix((data, indices), shape=self.shape, dtype=self.dtype)
def test_is_matrix_zero(self): a = pybamm.Matrix(coo_matrix(np.zeros((10, 10)))) b = pybamm.Matrix(coo_matrix(np.ones((10, 10)))) c = pybamm.Matrix(coo_matrix(([1], ([0], [0])), shape=(5, 5))) self.assertTrue(pybamm.is_matrix_zero(a)) self.assertFalse(pybamm.is_matrix_zero(b)) self.assertFalse(pybamm.is_matrix_zero(c)) a = pybamm.Matrix(np.zeros((10, 10))) b = pybamm.Matrix(np.ones((10, 10))) c = pybamm.Matrix([1, 0, 0]) self.assertTrue(pybamm.is_matrix_zero(a)) self.assertFalse(pybamm.is_matrix_zero(b)) self.assertFalse(pybamm.is_matrix_zero(c))
def build_edge_data(args, base_data): if args.use_edge: adj_ = [] if len(base_data['edges_in']) == 0: adj_.append( np.zeros((args.max_nodes, args.max_nodes), dtype=np.float32)) else: adj = coo_matrix((np.ones(len( base_data['edges_in'])), np.array(base_data['edges_in']).T), shape=(args.max_nodes, args.max_nodes), dtype=np.float32).toarray() adj_.append(adj) if len(base_data['edges_out']) == 0: adj_.append( np.zeros((args.max_nodes, args.max_nodes), dtype=np.float32)) else: adj = coo_matrix((np.ones(len( base_data['edges_out'])), np.array(base_data['edges_out']).T), shape=(args.max_nodes, args.max_nodes), dtype=np.float32).toarray() adj_.append(adj) adj = np.pad(np.ones((len(base_data['nodes_candidates_id']), len(base_data['nodes_candidates_id'])), dtype=np.float32), ((0, args.max_nodes - len(base_data['nodes_candidates_id'])), (0, args.max_nodes - len(base_data['nodes_candidates_id']))), mode='constant') \ - adj_[0] - adj_[1] - np.pad(np.eye(len(base_data['nodes_candidates_id'])), ((0, args.max_nodes - len(base_data['nodes_candidates_id'])), (0, args.max_nodes - len(base_data['nodes_candidates_id']))), mode='constant') adj_.append(np.clip(adj, 0, 1, dtype=np.float32)) adj = np.stack(adj_, 0) d_ = adj.sum(-1) d_[np.nonzero(d_)] **= -1 adj = adj * np.expand_dims(d_, -1) return torch.from_numpy(adj) else: adj = np.pad(np.ones((len(base_data['nodes_candidates_id']), len(d['nodes_candidates_id']))), ((0, args.max_nodes - len(base_data['nodes_candidates_id'])), (0, args.max_nodes - len(base_data['nodes_candidates_id']))), mode='constant') \ - np.pad(np.eye(len(base_data['nodes_candidates_id'])), ((0, args.max_nodes - len(base_data['nodes_candidates_id'])), (0, args.max_nodes - len(base_data['nodes_candidates_id']))), mode='constant') return torch.from_numpy(adj)
def test_get_term_proportions(dtm, matrix_type): if matrix_type == 1: dtm = np.matrix(dtm) dtm_arr = dtm.A dtm_flat = dtm.A1 elif matrix_type == 2: dtm = coo_matrix(dtm) dtm_arr = dtm.A dtm_flat = dtm.A.flatten() else: dtm = np.array(dtm) dtm_arr = dtm dtm_flat = dtm.flatten() if dtm.ndim != 2: with pytest.raises(ValueError): lda_utils.common.get_term_proportions(dtm) else: tp = lda_utils.common.get_term_proportions(dtm) assert tp.ndim == 1 assert tp.shape == (dtm_arr.shape[1],) if len(dtm_flat) > 0: assert np.isclose(tp.sum(), 1.0) assert all(0 <= v <= 1 for v in tp)
def _assembly_K(mesh, omega): build_local_Ke = g.choose_impl(_fwi_ls.build_local_Ke, _build_local_Ke) # Prepare Ke for each element, and keep their data to be used in the assembly Ke_local_list = [] for connectivity, points, mu, eta in zip(mesh.connectivity_list, mesh.points_in_elements, mesh.mu, mesh.eta): Ke_local_list.append( (connectivity, build_local_Ke(points, omega, mu, eta))) # Assembly the global matrix Ke_coo_i = [] Ke_coo_j = [] Ke_coo_data = [] for connectivity, Ke_local in Ke_local_list: for k, p1 in enumerate(connectivity): for l, p2 in enumerate(connectivity): Ke_coo_i.append(p1) Ke_coo_j.append(p2) Ke_coo_data.append(Ke_local[k, l]) # Build the sparse data structure Ke_global = coo_matrix( (Ke_coo_data, (Ke_coo_i, Ke_coo_j)), shape=(mesh.n_points, mesh.n_points), dtype=np.complex, ) return Ke_global
def _compute_table_rank(self, contained): logger.log(logging.DEBUG, "Computing tables relations") tables_rank = [([], []) for _ in range(6)] indices = [ set(l) for l in np.split(contained.indices, contained.indptr)[1:-1] ] for root in self.dictionary.roots: for t0, t1 in combinations(self.dictionary.roots[root], 2): commons = [self.dictionary.index[i] for i in indices[t0.index] & indices[t1.index]] ranks = set(map(lambda t: t.rank, commons)) for rank in ranks: tables_rank[rank][0].extend((t0.index, t1.index)) tables_rank[rank][1].extend((t1.index, t0.index)) for t in self.dictionary: ranks = {self.dictionary.index[i].rank for i in indices[t.index]} - {6} for rank in ranks: tables_rank[rank][0].append(t.index) tables_rank[rank][1].append(t.index) return [coo_matrix(([True]*len(i), (i, j)), shape=self.shape, dtype=np.bool) for i, j in tables_rank]
def test_add_sparse(self): m = self.basic_m mm = m + m test_m = np.array([[4., 0., 9., 0.], [0., 7., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 5.]]) mm2 = test_m * 2 self.assertIsInstance(mm, CSRMatrix) self.assertListEqual(mm.toarray().flatten().tolist(), mm2.flatten().tolist()) m2 = coo_matrix( (self.basic_m.data, (self.basic_m.row, self.basic_m.col)), shape=self.basic_m.shape) with self.assertRaises(Exception) as context: mm = m + m2 m2 = IdentityMatrix(4) mm = m + m2 test_m = np.array([[5., 0., 9., 0.], [0., 8., 0., 0.], [0., 0., 1., 0.], [0., 0., 0., 6.]]) mm2 = test_m self.assertIsInstance(mm, CSRMatrix) self.assertListEqual(mm.toarray().flatten().tolist(), mm2.flatten().tolist()) mm = m2 + m self.assertIsInstance(mm, CSRMatrix) self.assertListEqual(mm.toarray().flatten().tolist(), mm2.flatten().tolist()) D = DiagonalMatrix(np.ones(m.shape[1])) mm = m + D mm_dense = m.todense() + D.todense() self.assertTrue(np.allclose(mm.todense(), mm_dense))
def create_w_from_binary(chosen: list, not_chosen: list, nonzero_binary: list, sparse: bool = True): """ Return W s.t. W dot W.T is reduced density matrix according to selected bipartition :param chosen: list of chosen qubits :param notchosen: list of qubits to trace away :param nonzero_binary: list s.t. nonzero_binary[j] = j-th index in which the state is nonzero, represented as binary string: psi = [0,1,0,1,0] -> nonzero_binary = ['001','011'] :return: W """ # row idxs of nonzero elements in W rows = [ aux.to_decimal(aux.select_components(i, chosen)) for i in nonzero_binary ] cols = [ aux.to_decimal((aux.select_components(i, not_chosen))) for i in nonzero_binary ] number_of_nonzeros = len(nonzero_binary) norm = number_of_nonzeros**(-1 / 2) data = np.ones(number_of_nonzeros) * norm if sparse: return coo_matrix((data, (rows, cols)), shape=(2**len(chosen), 2**len(not_chosen))).tocsc() flatrow_idx = [i * 2**len(notchosen) + j for i, j in zip(rows, cols)] W = np.zeros(2**(len(chosen) + len(notchosen))) W[flatrow_idx] = norm return W.reshape((2**len(chosen), 2**len(notchosen)))
def _init_features(self, features): ''' This built-in feature model treats features as discrete, not distributed representations (embeddings). To handle distances between feature vectors, it is better to set no_words=True and use a GP label model. :param features: :return: ''' self.feat_map = {} # map features tokens to indices self.features = [] # list of mapped index values for each token if self.no_features: return N = len(features) for feat in features.flatten(): if feat not in self.feat_map: self.feat_map[feat] = len(self.feat_map) self.features.append(self.feat_map[feat]) self.features = np.array(self.features).astype(int) # sparse matrix of one-hot encoding, nfeatures x N, where N is number of tokens in the dataset self.features_mat = coo_matrix( (np.ones(len(features)), (self.features, np.arange(N)))).tocsr()
def _compute_table_rank(self, contained): logger.log(logging.DEBUG, "Computing tables relations") tables_rank = [([], []) for _ in range(6)] indices = [ set(l) for l in np.split(contained.indices, contained.indptr)[1:-1] ] for root in self.dictionary.roots: for t0, t1 in combinations(self.dictionary.roots[root], 2): commons = [ self.dictionary.index[i] for i in indices[t0.index] & indices[t1.index] ] ranks = set(map(lambda t: t.rank, commons)) for rank in ranks: tables_rank[rank][0].extend((t0.index, t1.index)) tables_rank[rank][1].extend((t1.index, t0.index)) for t in self.dictionary: ranks = {self.dictionary.index[i].rank for i in indices[t.index]} - {6} for rank in ranks: tables_rank[rank][0].append(t.index) tables_rank[rank][1].append(t.index) return [ coo_matrix(([True] * len(i), (i, j)), shape=self.shape, dtype=np.bool) for i, j in tables_rank ]
def _prepare_sparse_data(data): if not hasattr(data, 'dtype') or not hasattr( data, 'shape') or len(data.shape) != 2: raise ValueError( '`data` must be a NumPy array/matrix or SciPy sparse matrix of two dimensions' ) if data.dtype == np.int: arr_ctype = ctypes.c_int elif data.dtype == np.int32: arr_ctype = ctypes.c_int32 elif data.dtype == np.int64: arr_ctype = ctypes.c_int64 else: raise ValueError('dtype of `data` is not supported: `%s`' % data.dtype) if not hasattr( data, 'format' ): # dense matrix -> convert to sparse matrix in coo format data = coo_matrix(data) elif data.format != 'coo': data = data.tocoo() sparse_data_base = mp.Array(arr_ctype, data.data) sparse_rows_base = mp.Array(ctypes.c_int, data.row) # TODO: datatype correct? sparse_cols_base = mp.Array(ctypes.c_int, data.col) # TODO: datatype correct? logger.info( 'initializing evaluation with sparse matrix of format `%s` and shape %dx%d' % (data.format, data.shape[0], data.shape[1])) return sparse_data_base, sparse_rows_base, sparse_cols_base
def generate_adjacency(X, graph): """根据边列表生成邻接矩阵 Inputs: ------- X: tensor, 所有图的节点特征 graph: tensor, 所有图的边列表 Output: ------- adjacency: sparse numpy array, 所有图节点组成的稀疏邻接矩阵 """ # 节点个数, 边条数 num_nodes = X.size(0) num_edges = graph.size(1) # 转换tensor至numpy array graph_np = graph.detach().cpu().numpy() # 添加自连接并删除重复的边 edge_index = np.concatenate((graph_np, np.flipud(graph_np)), axis=1) edge_index = edge_index.T.tolist() sorted_edge_index = sorted(edge_index) edge_index = list(k for k, _ in groupby(sorted_edge_index)) edge_index = np.asarray(edge_index) # 生成稀疏邻接矩阵 adjacency = coo_matrix( (np.ones(num_edges), (edge_index[:, 0], edge_index[:, 1])), shape=(num_nodes, num_nodes), dtype=float) return adjacency
def __init__(self, worker_id, tasks_queue, results_queue, data, group=None, target=None, name=None, args=(), kwargs=None): super(MultiprocModelsWorkerABC, self).__init__(group, target, name, args, kwargs or {}) logger.debug('worker `%s`: creating worker with ID %d' % (self.name, worker_id)) self.worker_id = worker_id self.tasks_queue = tasks_queue self.results_queue = results_queue self.data_per_doc = {} for doc_label, sparse_mem in data.items(): sparse_data_base, sparse_row_ind_base, sparse_col_ind_base = sparse_mem sparse_data = np.ctypeslib.as_array(sparse_data_base.get_obj()) sparse_row_ind = np.ctypeslib.as_array( sparse_row_ind_base.get_obj()) sparse_col_ind = np.ctypeslib.as_array( sparse_col_ind_base.get_obj()) logger.debug( 'worker `%s`: creating sparse data matrix for document `%s`' % (self.name, doc_label)) self.data_per_doc[doc_label] = coo_matrix( (sparse_data, (sparse_row_ind, sparse_col_ind)))
def test_mul_sparse_matrix(self): # test unsymmetric times unsymmetric m = self.basic_m dense_m = m.toarray() res = m * m dense_res = np.matmul(dense_m, dense_m) self.assertFalse(res.is_symmetric) self.assertTrue(np.allclose(res.toarray(), dense_res)) # test symmetric result m = self.basic_m dense_m = m.toarray() res = m.transpose() * m dense_res = np.matmul(dense_m.transpose(), dense_m) self.assertTrue(res.is_symmetric) self.assertTrue(np.allclose(res.toarray(), dense_res)) # test unsymmetric with rectangular m = self.basic_m dense_m2 = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]]) m2 = COOMatrix(dense_m2) res = m * m2 dense_res = np.matmul(m.toarray(), dense_m2) self.assertFalse(res.is_symmetric) self.assertTrue(np.allclose(res.toarray(), dense_res)) # test unsymmetric with rectangular scipycoo m = self.basic_m dense_m2 = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]]) m2 = coo_matrix(dense_m2) with self.assertRaises(Exception) as context: res = m * m2
def probes(self, x: ndarray): """Return matrix which acts on a solution vector to find its values on points `x`. The product of this with a finite element function vector is like the result of assembling a `Functional` and it can be thought of as the matrix of inner products of the test functions of the basis with Dirac deltas at `x` but because its action is concentrated at points it is not assembled with the usual quadratures. """ cells = self.mesh.element_finder(mapping=self.mapping)(*x) pts = self.mapping.invF(x[:, :, np.newaxis], tind=cells) phis = np.array([ self.elem.gbasis(self.mapping, pts, k, tind=cells)[0] for k in range(self.Nbfun) ]).flatten() return coo_matrix( ( phis, ( np.tile(np.arange(x.shape[1]), self.Nbfun), self.element_dofs[:, cells].flatten(), ), ), shape=(x.shape[1], self.N), )
def generate_sparse_matrix(n, pl, bw): per_line = pl * 2 + 1 rows_data = np.empty(per_line * n, dtype=np.int) cols_data = np.empty(per_line * n, dtype=np.int) data_data = np.ones(per_line * n, dtype=np.float) pl1 = pl + 1 pts = np.linspace(1, 1 + bw, pl, False, dtype=np.int) - 1 for i in range(n): rs = i + pts rs[(rs >= n)] = i ls = i - pts ls[(ls < 0)] = i s = i * per_line cols_data[s:s+pl] = ls cols_data[s+pl] = i cols_data[s+pl1:s+per_line] = rs # cols_data[i * per_line:(i+1) * per_line] = np.hstack((ls, [i], rs)) rows_data[s:(i + 1) * per_line] = i matrix = coo_matrix((data_data, (rows_data, cols_data)), shape=(n, n), dtype=np.double) return matrix
def calculate_transition_matrix(potential, resolution, beta, bounds=None): """Calculate a transition matrix from a potential.""" if bounds is None: bounds = potential.bounds grid = plotting.get_grid(bounds, resolution) n_states = grid.shape[1] * grid.shape[2] # The transition matrix will be constructed as a sparse COO matrix t_matrix_rows = np.zeros((0,)) t_matrix_cols = np.zeros((0,)) t_matrix_data = np.zeros((0,)) neighbors = _neighbors() potential = potential.potential(grid[0], grid[1]) for row_i in xrange(grid.shape[1]): for col_i in xrange(grid.shape[2]): # Loop through each starting point pot = potential[row_i, col_i] from_state = _state_id(row_i, col_i, grid.shape) normalization = 0.0 # Only do nearest-neighbor for neighs in neighbors: nrow_i = row_i + neighs[0] ncol_i = col_i + neighs[1] if (nrow_i < 0 or nrow_i >= grid.shape[1] or ncol_i < 0 or ncol_i >= grid.shape[2]): # Transition probability to states outside our state space # is zero. This is not in the transition matrix pass else: to_state = _state_id(nrow_i, ncol_i, grid.shape) delta_pot = potential[nrow_i, ncol_i] - pot t_prob = np.exp(-beta * delta_pot) # Store info for our sparse matrix t_matrix_rows = np.append(t_matrix_rows, from_state) t_matrix_cols = np.append(t_matrix_cols, to_state) t_matrix_data = np.append(t_matrix_data, t_prob) normalization += t_prob t_matrix = coo.coo_matrix((t_matrix_data, (t_matrix_rows, t_matrix_cols)), shape=(n_states, n_states)).tocsr() # Normalize for trow_i in xrange(n_states): rfrom = t_matrix.indptr[trow_i] rto = t_matrix.indptr[trow_i + 1] normalization = np.sum(t_matrix.data[rfrom:rto]) if normalization < EPSILON: print("No transitions from %d" % (trow_i)) else: t_matrix.data[rfrom:rto] = t_matrix.data[rfrom:rto] / normalization return t_matrix, grid
def _assemble_scipy_csr( indices: ndarray, data: ndarray, shape: Tuple[int, ...], local_shape: Optional[Tuple[int, ...]] ): K = coo_matrix((data, (indices[0], indices[1])), shape=shape) K.eliminate_zeros() return K.tocsr()
def test_mul_sparse_matrix(self): # test symmetric times symmetric m = self.g_matrix dense_m = m.toarray() res = m * m dense_res = np.matmul(dense_m, dense_m) self.assertTrue(res.is_symmetric) self.assertTrue(np.allclose(res.toarray(), dense_res)) # test symmetric times unsymmetric m = self.basic_m dense_m2 = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]]) m2 = COOMatrix(dense_m2) res = m * m2 dense_res = np.matmul(m.toarray(), dense_m2) self.assertFalse(res.is_symmetric) self.assertTrue(np.allclose(res.toarray(), dense_res)) # test symmetric times full symmetric m2 = self.full_m dense_m = m.toarray() dense_m2 = m2.toarray() res = m * m2 dense_res = np.matmul(dense_m, dense_m2) self.assertTrue(res.is_symmetric) self.assertTrue(np.allclose(res.toarray(), dense_res)) # test symmetric times full scipycoo m2 = coo_matrix((self.full_m.data, (self.full_m.row, self.full_m.col)), shape=self.full_m.shape) dense_m = m.toarray() dense_m2 = m2.toarray() with self.assertRaises(Exception) as context: res = m * m2 m = self.basic_m dense_m2 = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]]) m2 = coo_matrix(dense_m2) with self.assertRaises(Exception) as context: res = m * m2
def buildLaplacianMat(rt, userNum, itemNum, adj_type): rt_item = rt['itemId'] + userNum uiMat = coo_matrix((rt['rating'], (rt['userId'], rt['itemId']))) # print('uiMat shape', uiMat.shape) uiMat_upperPart = coo_matrix((rt['rating'], (rt['userId'], rt_item))) # print('uiMat_upperPart shape', uiMat_upperPart.shape) uiMat_lowerPart = uiMat.transpose() uiMat_lowerPart.resize((itemNum, userNum + itemNum)) adj = sparse.vstack([uiMat_upperPart,uiMat_lowerPart]) selfLoop = sparse.eye(userNum + itemNum) def normalize_adj(adj): adj = adj.tocsr() degree = sparse.csr_matrix(adj.sum(axis=1)) d_inv_sqrt = degree.power(-0.5) # csr_matrix (size ,1) d_inv_sqrt = np.array(d_inv_sqrt.todense()).reshape(-1) D = sparse.diags(d_inv_sqrt) L = D.dot(adj).dot(D) # csr_matrix (size, size) return sparse.coo_matrix(L) # def normalize_adj(adj): # adj = adj.tocsr() # degree = sparse.csr_matrix(adj.sum(axis=1)) # degree = np.array(degree.todense()) # d_inv_sqrt = 1.0/degree # d_inv_sqrt = d_inv_sqrt.reshape(-1) # d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0. # D = sparse.diags(d_inv_sqrt) # L = D.dot(adj) # csr_matrix (size, size) # return sparse.coo_matrix(L) # A' = (D + I)^-1/2 ( A + I ) (D + I)^-1/2 if adj_type == 'ui_mat': return uiMat elif adj_type == 'plain_adj': return adj elif adj_type == 'norm_adj': return normalize_adj(adj + selfLoop) # A'' = D^-1/2 A D^-1/2 elif adj_type == 'mean_adj': # return (adj + selfLoop).tocoo(), normalize_adj(adj) return normalize_adj(adj)
def merge_sparse(matrix1, matrix2): matrix1_coo = matrix1.tocoo() matrix2_coo = matrix2.tocoo() data = np.concatenate((matrix1_coo.data, matrix2_coo.data)) rows = np.concatenate((matrix1_coo.row, matrix2_coo.row)) cols = np.concatenate((matrix1_coo.col, matrix2_coo.col)) full_coo = coo.coo_matrix((data, (rows, cols)), shape=matrix1.shape) return full_coo.tocsr()
def merge_sparse(matrix1, matrix2): matrix1_coo = matrix1.tocoo() matrix2_coo = matrix2.tocoo() data = np.concatenate((matrix1_coo.data, matrix2_coo.data)) rows = np.concatenate((matrix1_coo.row, matrix2_coo.row)) cols = np.concatenate((matrix1_coo.col, matrix2_coo.col)) full_coo = coo.coo_matrix((data, (rows,cols)), shape=matrix1.shape) return full_coo.tocsr()
def tocoo(self, copy=True): major_dim, minor_dim = self._swap(self.shape) minor_indices = self.indices major_indices = np.empty(len(minor_indices), dtype=self.indices.dtype) _sparsetools.expandptr(major_dim, self.indptr, major_indices) row, col = self._swap((major_indices, minor_indices)) from scipy.sparse.coo import coo_matrix return coo_matrix((self.data, (row, col)), self.shape, copy=copy, dtype=self.dtype)
def test_sparse_divide(self): row = np.array([0, 3, 1, 0]) col = np.array([0, 3, 1, 2]) data = np.array([4, 5, 7, 9]) S1 = coo_matrix((data, (row, col)), shape=(4, 5)) pybammS1 = pybamm.Matrix(S1) v1 = np.ones((4, 1)) pybammv1 = pybamm.Vector(v1) np.testing.assert_array_equal( (pybammS1 / pybammv1).evaluate().toarray(), S1.toarray() / v1 )
def buildLaplacianMat(rt, userNum, itemNum, adj_type): rt_item = rt['itemId'] + userNum uiMat = coo_matrix((rt['rating'], (rt['userId'], rt['itemId']))) # print('uiMat shape', uiMat.shape) uiMat_upperPart = coo_matrix((rt['rating'], (rt['userId'], rt_item))) # print('uiMat_upperPart shape', uiMat_upperPart.shape) uiMat = uiMat.transpose() uiMat.resize((itemNum, userNum + itemNum)) adj = sparse.vstack([uiMat_upperPart, uiMat]) selfLoop = sparse.eye(userNum + itemNum) # def normalize_adj(adj): # sumArr = (adj>0).sum(axis=1) # diag = list(np.array(sumArr.flatten())[0]) # diag = np.power(diag,-0.5) # D = sparse.diags(diag) # L = D * adj * D # L = sparse.coo_matrix(L) # return L def normalize_adj(adj): adj = adj.tocsr() degree = sparse.csr_matrix(adj.sum(axis=1)) d_inv_sqrt = degree.power(-0.5) # csr_matrix (size ,1) d_inv_sqrt = np.array(d_inv_sqrt.todense()).reshape(-1) D = sparse.diags(d_inv_sqrt) L = D.dot(adj).dot(D) # csr_matrix (size, size) return sparse.coo_matrix(L) if adj_type == 'plain_adj': return adj # A' = (D + I)^-1/2 ( A + I ) (D + I)^-1/2 elif adj_type == 'norm_adj': return normalize_adj(adj + selfLoop) # A'' = D^-1/2 A D^-1/2 elif adj_type == 'mean_adj': return normalize_adj(adj)
def __init__(self, arg1, shape=None, filename="sparse.spy", tablename="dok_matrix", dtype=None, copy=False, commit_freq=1.0): spmatrix.__init__(self) self.dtype = getdtype(dtype, default=float) if isinstance(arg1, tuple) and isshape(arg1): # (M,N) M, N = arg1 self.shape = (M, N) elif isspmatrix(arg1): # Sparse ctor if isspmatrix_dok(arg1) and copy: arg1 = arg1.copy() else: arg1 = arg1.todok() if dtype is not None: arg1 = arg1.astype(dtype) self.shape = arg1.shape self.dtype = arg1.dtype else: # Dense ctor try: arg1 = np.asarray(arg1) except: raise TypeError('invalid input format') if len(arg1.shape) != 2: raise TypeError('expected rank <=2 dense array or matrix') from scipy.sparse.coo import coo_matrix d = coo_matrix(arg1, dtype=dtype).todok() self.shape = arg1.shape self.dtype = d.dtype ddict.__init__(self, filename, tablename=tablename, commit_freq=commit_freq, key_types=("UNSIGNED INTEGER", int_tuple_ser(*self.shape), int_tuple_unser(*self.shape)), val_types=("REAL", float, float)) if isspmatrix(arg1): # Sparse ctor ddict.update(self, arg1) elif not (isinstance(arg1, tuple) and isshape(arg1)): ddict.update(self, d)
def alsModel(ratingsPath): print("\n***Generating Recommendations using ALS model***") userlist = [ 56886, 71026, 73136, 88837, 125582, 25403, 36860, 70402, 132121, 4363, 56592, 83665, 87073, 99945 ] model_name = "Alternating Least Squares" ratingsMatrix = pd.read_csv(ratingsPath) ratingsMatrix.rename(columns={ 'product_id_left': 'product_id', 'ones_left': 'purchases' }, inplace=True) # map each Item and user to a unique numeric value ratingsMatrix['user_id'] = ratingsMatrix['user_id'].astype("category") ratingsMatrix['product_id'] = ratingsMatrix['product_id'].astype( "category") # create a sparse matrix of all the user/product/purchases triples purchases = coo_matrix((ratingsMatrix['purchases'].astype(float), (ratingsMatrix['product_id'].cat.codes, ratingsMatrix['user_id'].cat.codes))) # initialize a model print("Initializing model") model = implicit.als.AlternatingLeastSquares(factors=50) # train the model on a sparse matrix of item/user/confidence weights log.debug("training model %s", model_name) start = time.time() model.fit(purchases) log.debug("trained model '%s' in %s", model_name, time.time() - start) log.debug("calculating top purchases") # recommend items for a user user_items = purchases.T.tocsr() #recommendations = model.recommend(userid, user_items) recommendations = [] for userid in userlist: for itemid, score in model.recommend(userid, user_items): record = [userid, itemid, score] recommendations.append(record) #print("recommendations for",userid,"\n",record) #print(recommendations) recommendedProductsDf = pd.DataFrame( recommendations, columns=["userid", "productid", "score"]) print(recommendedProductsDf.head(10)) recommendedProductsDf.to_csv( "../../../data/processed/als-recommendations.csv")
def graph_to_sparse_matrix(graph): """ Creates a connectivity sparse matrix from the adjacency graph (list of neighbors list) """ from scipy.sparse.coo import coo_matrix n_vox = len(graph) ij = [[], []] for i in xrange(n_vox): ij[0] += [i] * len(graph[i]) ij[1] += list(graph[i]) return coo_matrix((np.ones(len(ij[0]), dtype=int), ij))
def graph_to_sparse_matrix(graph): """ Creates a connectivity sparse matrix from the adjacency graph (list of neighbors list) """ from scipy.sparse.coo import coo_matrix n_vox = len(graph) ij = [[],[]] for i in xrange(n_vox): ij[0] += [i] * len(graph[i]) ij[1] += list(graph[i]) return coo_matrix((np.ones(len(ij[0]), dtype=int), ij))
def build_U(self, f): bits = self.bits n = 2**(2 * bits) if (bits > 32): print("bits = {}, exiting".format(bits)) exit(1) b = np.arange(n) bH = b >> bits bL = b % 2**bits aL = f[bH] ^ bL a = bH << bits | aL data = [1] * n U = coo.coo_matrix((data, (a, b)), shape=[n, n]) return qutip.Qobj(U)
def face_angles_sparse(mesh): """ A sparse matrix representation of the face angles. Returns ---------- sparse: scipy.sparse.coo_matrix with: dtype: float shape: (len(mesh.vertices), len(mesh.faces)) """ matrix = coo_matrix((mesh.face_angles.flatten(), (mesh.faces_sparse.row, mesh.faces_sparse.col)), mesh.faces_sparse.shape) return matrix
def toarray(self) -> ndarray: """Return a dense numpy array.""" if len(self.shape) == 1: return coo_matrix( (self.data, (self.indices[0], np.zeros_like(self.indices[0]))), shape=self.shape + (1,), ).toarray().T[0] elif len(self.shape) == 2: return self.tocsr().toarray() # slow implementation for testing N-tensors out = np.zeros(self.shape) for itr in range(self.indices.shape[1]): out[tuple(self.indices[:, itr])] += self.data[itr] return out
def _compute_contains(self): logger.log(logging.DEBUG, "Computing contains/contained relations") # contain/contained i = list(range(len(self.dictionary))) j = list(range(len(self.dictionary))) for r_p, v in self.dictionary.roots.items(): paradigms = {t for t in v if t.script.paradigm} for p in paradigms: _contains = [self.dictionary.terms[ss].index for ss in p.script.singular_sequences] + \ [k.index for k in paradigms if k.script in p.script] i.extend(repeat(p.index, len(_contains))) j.extend(_contains) return coo_matrix(([True] * len(i), (i, j)), shape=self.shape, dtype=np.bool)
def test_03_03_color_ijv(self): workspace, module, ijv = self.make_workspace_ijv() self.assertTrue(isinstance(module, C.ConvertObjectsToImage)) module.image_mode.value = C.IM_COLOR module.run(workspace) pixel_data = workspace.image_set.get_image(IMAGE_NAME).pixel_data # # convert the labels into individual bits (1, 2, 4, 8) # the labels matrix is a matrix of bits that are on # vbit = 2 ** (ijv[:, 2] - 1) vbit_color = np.zeros((np.max(vbit) * 2, 3)) bits = coo_matrix((vbit, (ijv[:, 0], ijv[:, 1]))).toarray() # # Get some color for every represented bit combo # vbit_color[bits[ijv[:, 0], ijv[:,1]], :] = pixel_data[ijv[:, 0], ijv[:,1], :] self.assertTrue(np.all(pixel_data == vbit_color[bits, :]))
def test_03_02_gray_ijv(self): workspace, module, ijv = self.make_workspace_ijv() self.assertTrue(isinstance(module, C.ConvertObjectsToImage)) module.image_mode.value = C.IM_GRAYSCALE module.run(workspace) pixel_data = workspace.image_set.get_image(IMAGE_NAME).pixel_data counts = coo_matrix((np.ones(ijv.shape[0]), (ijv[:, 0], ijv[:, 1]))).toarray() self.assertTrue(np.all(pixel_data[counts == 0] == 0)) pd_values = np.unique(pixel_data) pd_labels = np.zeros(pixel_data.shape, int) for i in range(1, len(pd_values)): pd_labels[pixel_data == pd_values[i]] = i dest_v = np.zeros(np.max(ijv[:, 2] + 1), int) dest_v[ijv[:, 2]] = pd_labels[ijv[:, 0], ijv[:, 1]] pd_ok = np.zeros(pixel_data.shape, bool) ok = pd_labels[ijv[:, 0], ijv[:, 1]] == dest_v[ijv[:, 2]] pd_ok[ijv[ok, 0], ijv[ok, 1]] = True self.assertTrue(np.all(pd_ok[counts > 0]))
def __get_predicted_labels(self, data_matrix, features_to_include): """ Finds the nearest cluster for all data points and adds a new feature label in all feature vectors of data matrix. The data matrix is modified in place. It returns a new copy of data_matrix with "features_to_include" features. """ feature_names = self.vectorizer.get_feature_names() for feature_vector in data_matrix: row = [0] * len(feature_names) column = range(len(feature_names)) data = map(lambda feature_name:feature_vector[feature_name] if feature_name in feature_vector else 0, feature_names) feature_csr_matrix = csr_matrix(coo_matrix((data, (row, column)))) predicted_label = self.k_means_estimator.predict(feature_csr_matrix) feature_vector[self.LABEL_FEATURE_KEY] = predicted_label[0] expanded_data_matrix = self.__get_expanded_data_matrix(data_matrix) if features_to_include: return self.__get_filtered_data_matrix(expanded_data_matrix, features_to_include) else: return expanded_data_matrix
def _compute_father(self): logger.log(logging.DEBUG, "Computing father/child relations") def _recurse_script(script): result = [] for sub_s in script.children if isinstance(script, AdditiveScript) else [script]: if isinstance(sub_s, NullScript): continue if sub_s in self.dictionary.terms: result.append(self.dictionary.terms[sub_s].index) else: if sub_s.layer > 0: result.extend(chain.from_iterable(_recurse_script(c) for c in sub_s.children)) return result # father = coo_matrix((3, len(self.dictionary), len(self.dictionary)), dtype=np.bool) father = [([], []) for _ in range(3)] for t in self.dictionary.terms.values(): s = t.script for sub_s in s if isinstance(s, AdditiveScript) else [s]: if len(sub_s.children) == 0 or isinstance(sub_s, NullScript): continue for i, s in enumerate(('father_substance', 'father_attribute', 'father_mode')): if s in t.inhibitions: continue fathers_indexes = _recurse_script(sub_s.children[i]) father[i][0].extend(repeat(t.index, len(fathers_indexes))) father[i][1].extend(fathers_indexes) return [coo_matrix(([True] * len(i), (i, j)), shape=self.shape, dtype=np.bool) for i, j in father]
def _compute_siblings(self): # siblings # 1 dim => the sibling type # -0 opposed # -1 associated # -2 crossed # -3 twin def _opposed_sibling(s0, s1): return not s0.empty and not s1.empty and\ s0.cardinal == s1.cardinal and\ s0.children[0] == s1.children[1] and s0.children[1] == s1.children[0] def _associated_sibling(s0, s1): return s0.cardinal == s1.cardinal and\ s0.children[0] == s1.children[0] and \ s0.children[1] == s1.children[1] and \ s0.children[2] != s1.children[2] def _crossed_sibling(s0, s1): return s0.layer >= 2 and \ s0.cardinal == s1.cardinal and \ _opposed_sibling(s0.children[0], s1.children[0]) and \ _opposed_sibling(s0.children[1], s1.children[1]) siblings = [([], []) for _ in range(4)] logger.log(logging.DEBUG, "Computing siblings relations") for root in self.dictionary.roots: _inhib_opposed = 'opposed' not in root.inhibitions _inhib_associated = 'associated' not in root.inhibitions _inhib_crossed = 'crossed' not in root.inhibitions _inhib_twin = 'twin' not in root.inhibitions if root.script.layer == 0: continue _twins = [] for i, t0 in enumerate(self.dictionary.roots[root]): if not isinstance(t0.script, MultiplicativeScript): continue if t0.script.children[0] == t0.script.children[1]: _twins.append(t0) for t1 in [t for j, t in enumerate(self.dictionary.roots[root]) if j > i and isinstance(t.script, MultiplicativeScript)]: if _inhib_opposed and _opposed_sibling(t0.script, t1.script): siblings[0][0].extend((t0.index, t1.index)) siblings[0][1].extend((t1.index, t0.index)) if _inhib_associated and _associated_sibling(t0.script, t1.script): siblings[1][0].extend((t0.index, t1.index)) siblings[1][1].extend((t1.index, t0.index)) if _inhib_crossed and _crossed_sibling(t0.script, t1.script): siblings[2][0].extend((t0.index, t1.index)) siblings[2][1].extend((t1.index, t0.index)) if _inhib_twin: _twins = sorted(_twins, key=lambda t: t.script.cardinal) for card, g in groupby(_twins, key=lambda t: t.script.cardinal): twin_indexes = [t.index for t in g] if len(twin_indexes) > 1: index0, index1 = list(zip(*permutations(twin_indexes, r=2))) siblings[3][0].extend(index0) siblings[3][1].extend(index1) return [coo_matrix(([True]*len(i), (i, j)), shape=self.shape, dtype=np.bool) for i, j in siblings]
threshold=boston.target.mean()) new_target[:5] #array([ 1., 0., 1., 1., 1.]) (boston.target[:5] > boston.target.mean()).astype(int) #array([1, 0, 1, 1, 1]) bin = preprocessing.Binarizer(boston.target.mean()) new_target = bin.fit_transform(boston.target) new_target[:5] #array([ 1., 0., 1., 1., 1.]) #Sparse matrices from scipy.sparse import coo spar = coo.coo_matrix(np.random.binomial(1, .25, 100)) preprocessing.binarize(spar, threshold=-1) #ValueError: Cannot binarize a sparse matrix with threshold < 0 #Working with categorical类别 variables iris = datasets.load_iris() X = iris.data y = iris.target #Now, with X and Y being as they normally will be, we'll operate on the data as one: #把 X,y合成一个矩阵 d = np.column_stack((X, y)) #先跳过一些
docs, input_type, tokenizer = dataset_small() paths = docs else: unchecked_paths, input_type, tokenizer = dataset_mails(doc_path) docs, paths = get_document_names(doc_path, unchecked_paths) print('Loaded ', len(docs), ' files from path: ', doc_path, '.') print('Extracting features.') vectorizer = create_vectorizer(input_type, tokenizer=tokenizer, ngram_range=(1, 1)) data = vectorizer.fit_transform(paths) features = vectorizer.get_feature_names() data = coo_matrix(data) data = apply_threshold(data, 0.1) # Filter out everything, that is too weak. print('Reading headers.') with codecs.open(rescal_path + '/headers.txt', 'r', encoding='utf8') as f: original_headers = f.read().splitlines() DOCUMENT = 'Need: ' FEATURE = 'Attr: ' new_headers, offsets = new_tensor_slice(original_headers, [ (DOCUMENT, docs, data.row), (FEATURE, features, data.col)] ) document_offsets = offsets[DOCUMENT]
def sprandn(m, n, density=0.01, format="coo", dtype=None, random_state=None): """Generate a sparse matrix of the given shape and density with standard normally distributed values. Parameters ---------- m, n : int shape of the matrix density : real density of the generated matrix: density equal to one means a full matrix, density of 0 means a matrix with no non-zero items. format : str sparse matrix format. dtype : dtype type of the returned matrix values. random_state : {numpy.random.RandomState, int}, optional Random number generator or random seed. If not given, the singleton numpy.random will be used. Notes ----- Only float types are supported for now. """ if density < 0 or density > 1: raise ValueError("density expected to be 0 <= density <= 1") if dtype and (dtype not in [np.float32, np.float64, np.longdouble]): raise NotImplementedError("type %s not supported" % dtype) mn = m * n tp = np.intc if mn > np.iinfo(tp).max: tp = np.int64 if mn > np.iinfo(tp).max: msg = """\ Trying to generate a random sparse matrix such as the product of dimensions is greater than %d - this is not supported on this machine """ raise ValueError(msg % np.iinfo(tp).max) # Number of non zero values k = int(density * m * n) if random_state is None: random_state = np.random elif isinstance(random_state, (int, np.integer)): random_state = np.random.RandomState(random_state) # Use the algorithm from python's random.sample for k < mn/3. if mn < 3*k: # We should use this line, but choice is only available in numpy >= 1.7 # ind = random_state.choice(mn, size=k, replace=False) ind = random_state.permutation(mn)[:k] else: ind = np.empty(k, dtype=tp) selected = set() for i in xrange(k): j = random_state.randint(mn) while j in selected: j = random_state.randint(mn) selected.add(j) ind[i] = j j = np.floor(ind * 1. / m).astype(tp) i = (ind - j * m).astype(tp) vals = random_state.randn(k).astype(dtype) return coo_matrix((vals, (i, j)), shape=(m, n)).asformat(format)