def evalModel(self, inData, inGt, inImg, gtShape, plot=True): (dataOutY, dataOutX, dataVals) = sp.find(inData) if(inGt != None): if(self.gtSparse): (gtOutY, gtOutX, gtVals) = sp.find(inGt) feedDict = {self.dataIndices:[dataOutY, dataOutX], self.dataValues:dataVals, self.gtIndices:[gtOutY, gtOutX], self.gtValues:gtVals, self.keep_prob:1.0 } else: feedDict = {self.dataIndices:[dataOutY, dataOutX], self.dataValues:dataVals, self.gt:inGt, self.keep_prob:1.0 } else: feedDict = {self.dataIndices:[dataOutY, dataOutX], self.dataValues:dataVals, self.keep_prob:1.0 } outVals = self.est.eval(feed_dict=feedDict, session=self.sess) if(inGt != None): summary = self.sess.run(self.mergedSummary, feed_dict=feedDict) self.test_writer.add_summary(summary, self.timestep) if(plot): filename = self.plotDir + "test_" + str(self.timestep) if(self.gtSparse): gt = np.reshape(inGt.toarray(), (self.batchSize, gtShape[0], gtShape[1], gtShape[2], gtShape[3])) else: gt = inGt data = (inData, inGt, inImg) self.evalAndPlotCam(feedDict, data, gt, filename) return outVals
def trainModel(self, dataObj, save, plot): for i in range(self.innerSteps): #Get data from dataObj data = dataObj.getData(self.batchSize) (dataOutY, dataOutX, dataVals) = sp.find(data[0]) if(self.gtSparse): (gtOutY, gtOutX, gtVals) = sp.find(data[1]) feedDict = {self.dataIndices:[dataOutY, dataOutX], self.dataValues:dataVals, self.gtIndices:[gtOutY, gtOutX], self.gtValues:gtVals} else: feedDict = {self.dataIndices:[dataOutY, dataOutX], self.dataValues:dataVals, self.gt:data[1]} #feedDict = {self.inputImage: data[0], self.gt: data[1]} #Run optimizer self.sess.run(self.optimizerAll, feed_dict=feedDict) self.sess.run(self.optimizerBias, feed_dict=feedDict) if(i%self.writeStep == 0): summary = self.sess.run(self.mergedSummary, feed_dict=feedDict) self.train_writer.add_summary(summary, self.timestep) if(i%self.progress == 0): print "Timestep ", self.timestep self.timestep+=1 if(save): save_path = self.saver.save(self.sess, self.saveFile, global_step=self.timestep, write_meta_graph=False) print("Model saved in file: %s" % save_path) if(plot): filename = self.plotDir + "train_" + str(self.timestep) gtShape = dataObj.gtShape if(self.gtSparse): gt = np.reshape(data[1].toarray(), (self.batchSize, gtShape[0], gtShape[1], gtShape[2], gtShape[3])) else: gt = data[1] self.evalAndPlotCam(feedDict, data, gt, filename)
def lookup_arg_freqs(self, word): i = self.vocabulary.get(word) if i is None: return None sum1 = sum(find(self.binary_sparse[::2, i])[2]) sum2 = sum(find(self.binary_sparse[1::2, i])[2]) return sum1, sum2
def lookup_0_freqs(self, word): i = self.vocabulary.get(word) if i is None: return None out_sum = sum(find(self.zero_sparse[i, :])[2]) in_sum = sum(find(self.zero_sparse[:, i])[2]) return out_sum, in_sum
def learning(self,npres): learned_connectivity=sparse.lil_matrix(self.connectivity) for i in range(npres): #Potentiation mydim=len(sparse.find(self.stimulus[i])[1]) row=np.tile(sparse.find(self.stimulus[i])[1],mydim)+1 col=np.repeat(sparse.find(self.stimulus[i])[1],mydim)+1 pot_entries=np.random.binomial(1,self.qp,mydim*mydim) row_pot=list(sparse.find(pot_entries*row)[2]-1) col_pot=list(sparse.find(pot_entries*col)[2]-1) learned_connectivity[row_pot,col_pot]=1 #pot_matrix=sparse.coo_matrix((pot_entries, (row, col)), shape=(self.N, self.N)) #Depression mydim_col=len(sparse.find(-(self.stimulus[i]-np.ones(self.N)))[1]) mydim_row=mydim col=np.repeat(sparse.find(-(self.stimulus[i]-np.ones(self.N)))[1],mydim_row)+1 row=np.tile(sparse.find(self.stimulus[i])[1],mydim_col)+1 dep_entries=np.random.binomial(1,self.qn,mydim_row*mydim_col) #entries that wont be depressed row_dep=list(sparse.find(dep_entries*row)[2]-1) col_dep=list(sparse.find(dep_entries*col)[2]-1) learned_connectivity[row_dep,col_dep]=0 print 'learning pattern ',i+1,' of ',self.p return learned_connectivity
def lookup_bin_freqs(self, word): i = self.binary_vocab.get(word) if i is None: return None sum1 = sum(find(self.binary_sparse[2*i, :])[2]) sum2 = sum(find(self.binary_sparse[2*i+1, :])[2]) return sum1, sum2
def cosine_similarity(v1,v2): """fast cosine similarity for sparse vectors""" v1_idxs, _, value = sparse.find(v1) v2_idxs, _, value = sparse.find(v2) sumxx, sumxy, sumyy = 0, 0, 0 for i in set(np.append(v1_idxs, v2_idxs)): x = v1[(i,0)]; y = v2[(i,0)] sumxx += x*x sumyy += y*y sumxy += x*y return sumxy/math.sqrt(sumxx*sumyy)
def array_equal(a1, a2): """array_equal that supports sparse and dense arrays with missing values""" if a1.shape != a2.shape: return False if not (sp.issparse(a1) or sp.issparse(a2)): # Both dense: just compare return np.allclose(a1, a2, equal_nan=True) v1 = np.vstack(sp.find(a1)).T v2 = np.vstack(sp.find(a2)).T if not (sp.issparse(a1) and sp.issparse(a2)): # Any dense: order indices v1.sort(axis=0) v2.sort(axis=0) return np.allclose(v1, v2, equal_nan=True)
def connectivityMatrixNew(self): self.patterns =np.random.normal(0,1, size=(self.p,self.N)) mybin=np.random.binomial(1,0.5,size=(self.p,self.N)) #self.patterns =np.multiply(mybin,np.random.normal(-1,1, size=(self.p,self.N)))+np.multiply(1-mybin,np.random.normal(1,1,size=(self.p,self.N))) #mu1=0.0 #sigma1=1.0 #self.patterns =np.random.lognormal(mu1,sigma1, size=(self.p,self.N))-np.exp(mu1+(sigma1**2)/2.) print 'Patterns created. N patterns:',self.p patterns_pre=self.patterns patterns_post=self.patterns #creating connectivity with sparse matrices rv=bernoulli(1).rvs #connectivity=sparse.csr_matrix(sparse.random(self.N,self.N,density=self.c,data_rvs=rv)) indexes=sparse.find(sparse.random(self.N,self.N,density=self.c,data_rvs=rv)) print 'Connectivity created. N patterns:',self.p #finding the non zero entries #index_row=sparse.find(connectivity)[0] #index_col=sparse.find(connectivity)[1] # smart way to write down the outer product learning connectivity=(self.Amp/(self.c*self.N))*np.einsum('ij,ij->j',patterns_post[:,indexes[0]],patterns_pre[:,indexes[1]]) connectivity=sparse.csr_matrix((connectivity,(indexes[0],indexes[1])),shape=(self.N,self.N)) 'Connectivity loaded with patterns. N patterns:',self.p self.connectivity=connectivity
def find_user_top_match(self, user_index, nbr_recommendations = 5, k = 20, k_min = 10, sim = 0.15, rating_normalisation = True): ''' Compute all the item's rating for a given user and output the most relevant ''' user_ratings = np.zeros(self.nbr_items) already_rated = find(self.relationship_matrix_csc[:,user_index])[0] already_rated = np.r_[already_rated,user_index] for i, rating in enumerate(user_ratings): if i not in already_rated: try: rating = self.predict_rating_userbased(user_index, i, k, k_min, 'All', sim, rating_normalisation) except Error: rating = 0.0 else: # The rating is not actually zero, we put zero for excluding them from the result rating = 0.0 user_ratings[i] = rating top_results = {} nonzero_index = user_ratings.nonzero()[0] for item in nonzero_index: top_results[item] = user_ratings[item] sorted_top_results = sorted(top_results.iteritems(), key=itemgetter(1), reverse = True) return [int(i[0]) for i in sorted_top_results], [i[1] for i in sorted_top_results]
def read_options(*args, **kwargs): ''' checks/sets options read from calling module''' if len(args)>1: Z = args[0] else: Z = kwargs["Z"] W = kwargs.get("W", None) if W is None: rows, cols = sp.find(Z)[:2] W = sp.coo_matrix( (np.ones(len(rows), dtype=int), (rows, cols)), shape=Z.shape, dtype=int).tocsr() rowAttr = kwargs.get("rowAttr", None) colAttr = kwargs.get("colAttr", None) crossAttr = kwargs.get("crossAttr", None) learner = kwargs.get("learner", "ridge") train_loss = kwargs.get("train_loss", "sq_err") test_loss = kwargs.get("test_loss", "mse") num_cv = kwargs.get("num_cv", default_cv) init_K = kwargs.get("K", default_K) init_L = kwargs.get("L", default_L) model_filename = kwargs.get("model_filename", default_model_filename) if "alphas" in kwargs: params = [{"alpha":alpha} for alpha in kwargs["alphas"]] else: params = kwargs.get("params", default_param_list) # kwargs get over-written by args arglist = [Z, W, rowAttr, colAttr, crossAttr, learner, params, \ train_loss, test_loss, \ num_cv, init_K, init_L, model_filename] for ix in range(1, len(args)): arglist[ix] = args[ix] return arglist
def draw_log_hist(X): """Draw tokens histogram in log scales""" i, j, v = spr.find(X) tokens_count = np.zeros(X.shape[0]) indexes = np.arange(0,X.shape[0]) for colNumb in range(0,X.shape[1]): users_count = len(np.extract( j == colNumb, j)) print colNumb, users_count if (users_count): tokens_count[users_count] += 1 nonzero_ind = np.nonzero(tokens_count) x = np.take(indexes,nonzero_ind) y = np.take(tokens_count,nonzero_ind) print x print y pl.figure(figsize=(30,30)) pl.title("Token frequency distribution") pl.xlabel('users_count', size = 12) pl.ylabel('tokens_count', size = 12) ax = pl.subplot(111) ax.set_xscale('log') ax.set_yscale('log') pl.scatter(x, y) pl.show() return
def extract_manifold_distances_knn(D, knn=[3,4,5,7,10], add_mst=None): ''' Return the distances along a k nearest neighbour graph for the given distances D (Using dijkstra). It also returns the knn graph itself. This is a generator function and will return an iterator for each k given in knn. Optionally you can add the edges from an additional graph (usually mst), in order to ensure full connectedness. Give the graph you want to add as add_mst=mst. returns iterator for each k: iter([distances along knn, knn]) ''' # K Nearest Neighbours distances idxs = np.argsort(D) r = range(D.shape[0]) for k in knn: idx = idxs[:, :k] _distances = sparse.csc_matrix(D.shape) for neighbours in idx.T: _distances[r, neighbours] = D[r, neighbours] if add_mst is not None: for i,j,v in zip(*sparse.find(add_mst)): if _distances[i,j] == 0: _distances[i,j] = v nearest_neighbour_distances = dijkstra(_distances, directed=False) yield nearest_neighbour_distances, _distances
def index_map_to_region_map( hom_mat, reg2gen ): """ hom_mat : numpy matrix representing a map on generators (index map). reg2gen : dictionary mapping region -> generator(s). (I.e, which regions support which generators) Returns a DiGraph object of the map on regions in phase space. """ H = hom_mat R = reg2gen Rinv = invert_dictionary( R ) G = DiGraph() # find where region k maps to based the index map for k in R.keys(): # find generator connections if hasattr( H, 'nnz' ): if len( R[k] ) == 0: continue gen_conns, _J, _V = sparse.find( H[:,R[k]] ) else: # dense matrix case gen_conns = np.where( H[:,R[k]] != 0 )[0] gen_conns = gen_conns.tolist()[0] # fix matrix formatting for edge in gen_conns: for glist in Rinv.keys(): if edge in glist: G.add_edge( k, Rinv[glist][0] ) # return the graph so that we have access to the nodes labels that # correspond directly to regions with generators. return G
def build_weight_matrix(self, tf_idf_matrix): nnz_i, nnz_j, elems = sparse.find(tf_idf_matrix) value = np.zeros(elems.shape[0]) value.fill(self.options['WM']) r = sparse.coo_matrix((value, (nnz_i, nnz_j)), shape=tf_idf_matrix.shape) return r.tocsr()
def _individuals_erank(score_mat, test_data): """ Computes the expected rank of the test data. The returned value is the average across all individuals. For each individual we average the rank first on hers test data. INPUT: ------- 1. score_mat: <(I, L) csr_mat> users scores. Doesn't have to be probabilities (for SVD) 2. test_data: <(I, L) csr_mat> users test observations. OUTPUT: -------- 1. avg_erank: <float> avg. erank of all the individual. """ avg_erank = 0 start = time.time() I = score_mat.shape[0] for i in range(I): i_test, i_counts = np.vstack(find(test_data[i]))[1:] i_erank = _obj_erank(score_mat[i], i_test) i_erank *= i_counts avg_erank += np.sum(i_erank) / np.sum(i_counts) if i % 200 == 0: log.debug('Done testing %d out of %d users' % (i, I)) total = time.time() - start log.info('Erank for individuals took %d seconds. %.2f secs on avg for indiv' % (total, total / I)) return avg_erank / I
def get_vector_length(v): idxs, _, value = sparse.find(v) sumxx = 0 for i in idxs: x = v[(i,i)] sumxx += x*x return math.sqrt(sumxx*1.0)
def fit(self, X, Y): self.n_topics = Y.shape[1] ones = len(sp.find(Y)[2]) self.mu = ones / X.shape[0] for clf in self.clfs: clf.fit(X, Y) return self
def connectivityMatrixNew(self): self.patterns =np.random.lognormal(self.mu,self.sigma, size=(self.p,self.N)) patterns_pre=self.g(self.patterns) patterns_post=self.f(self.patterns) #creating connectivity connectivity=sparse.csr_matrix(1.*np.random.binomial(1,self.c,(self.N,self.N))) index_row=sparse.find(connectivity)[0] index_col=sparse.find(connectivity)[1] for i in range(len(index_row)): connectivity[index_row[i],index_col[i]]= (1./(self.c*self.N*self.intg2))*patterns_post[:,index_row[i]].dot(patterns_pre[:,index_col[i]]) #print 'Porcentage of the Connectivity Matrix Built: ',100.*round(float(i)/len(index_row),1) self.connectivity=connectivity
def evalModel(self, inData, inGt, inImg, gtShape, plot=True): if(inGt is not None): if(self.gtSparse): (gtOutY, gtOutX, gtVals) = sp.find(inGt) feedDict = {self.inputImage:inData, self.gtIndices:[gtOutY, gtOutX], self.gtValues:gtVals, } else: feedDict = {self.inputImage:inData, self.gt:inGt} else: feedDict ={self.inputImage:inData} #Do not augument when evaluating if(self.augment): feedDict[self.doAug] = 0.0 outVals = self.est.eval(feed_dict=feedDict, session=self.sess) if(inGt is not None): summary = self.sess.run(self.mergedSummary, feed_dict=feedDict) self.test_writer.add_summary(summary, self.timestep) if(plot): filename = self.featureMapDir + "test_" + str(self.timestep) #if(self.gtSparse): # gt = np.reshape(inGt.toarray(), (self.batchSize, gtShape[0], gtShape[1], gtShape[2], gtShape[3])) #else: # gt = inGt #data = (inData, inGt, inImg) #self.evalAndPlotCam(feedDict, data, gt, filename) if(self.plotFM): self.evalAndPlotFeaturemaps(feedDict, filename) return outVals
def feature_training2(self, initialize_model = True, verbose = False): ''' Compute each features using a Gradient Descent approach This version call the Cython estimator_loop2() function ''' rmse = 2.0 # Initialize the model with previous results if available if initialize_model: self.svd_v = np.zeros([self.dimensionality, self.nbr_users]) + self.feature_init self.svd_u = np.zeros([self.dimensionality, self.nbr_items]) + self.feature_init nbr_ratings = find(self.relationship_matrix)[2].shape[0] ratings_cache = np.zeros(self.nbr_users * self.nbr_items, dtype = np.float64) ratings_index, ratings = self.get_ratings() for f in range(self.dimensionality): epoch = 0 while (epoch < self.min_epochs or rmse <= rmse_last - self.min_improvement): rmse_last = rmse rmse = estimator_subloop(f, epoch, self.min_improvement, self.dimensionality, self.feature_init, self.learning_rate, self.K, self.svd_u, self.svd_v, ratings_index, ratings, ratings_cache, self.nbr_users, self.nbr_items, int(verbose)) epoch += 1 predictor_subloop(f, epoch, self.dimensionality, self.feature_init, self.svd_u, self.svd_v, ratings_index, ratings, ratings_cache, self.nbr_users, self.nbr_items)
def get_feedback(self): nbr_ratings = find(self.N)[2].shape[0] ratings_id = np.zeros([nbr_ratings,2], dtype = np.int32) ratings_hash = np.zeros([self.nbr_users,2], dtype = np.int32) for i, (user_index, feature_index) in enumerate(self.N_iterator()): ratings_id[i] = [int(user_index), int(feature_index)] index = np.arange(nbr_ratings) # Add sort here # We assume ratings_id is sorted for u in range(self.nbr_users): position = np.where(ratings_id[:,0] == u)[0] try: seek = position[0] span = len(position) except: seek = -1 span = -1 ratings_hash[u] = [seek, span] return ratings_id[index], ratings_hash
def find_user_top_match(self, user_index, nbr_recommendations = 5): ''' Compute all the feature's rating for a given user, sort the result and output the most relevants. * user_index: Internal id of the user * nbr_recommendations: Numbers of recommendation [5] ''' user_ratings = np.zeros(self.nbr_items) self.relationship_matrix_csc = self.relationship_matrix.T.tocsc() already_rated = find(self.relationship_matrix_csc[:,user_index])[0] already_rated = np.r_[already_rated,user_index] for i, rating in enumerate(user_ratings): if i not in already_rated: try: rating = self.predict_rating(i, user_index) except Error: rating = 0.0 else: # The rating is not actually zero, we put zero for excluding them from the result rating = 0.0 user_ratings[i] = rating top_results = {} nonzero_index = user_ratings.nonzero()[0] for item in nonzero_index: top_results[item] = user_ratings[item] sorted_top_results = sorted(top_results.iteritems(), key=itemgetter(1), reverse = True) return [int(i[0]) for i in sorted_top_results[0:nbr_recommendations]], [i[1] for i in sorted_top_results[0:nbr_recommendations]]
def main(layers, selector, outfile, threshold=0.55): print("Reading climate layers", file=sys.stderr) layers = read_climate_layers(layers) print("Reading selector raster", file=sys.stderr) selector = get_selector_raster(selector, threshold=threshold) print("Calculating weighted climate layers", file=sys.stderr) layers = get_weighted_layers(layers, selector) if outfile == 'stdout': ofh = sys.stdout else: ofh = open(outfile, 'w') print("Writing output matrix", file=sys.stderr) header = ['x', 'y', 'maxent_weight', ] + list(sorted(layers.keys())) print(*header, sep='\t', file=ofh) xs, ys, ws = sparse.find(csr_matrix(selector)) for x, y, w in zip(xs, ys, ws): line = [x, y, w,] for cl, layer in sorted(layers.items()): line.append(layer[x, y]) print(*line, sep='\t', file=ofh) ofh.close() print("Done!", file=sys.stderr)
def threshold_coherence_based_mst(date12_list, coh_list): """Return a minimum spanning tree of network based on the coherence inverse. Inputs: date12_list - list of string in YYMMDD-YYMMDD format coh_list - list of float, average coherence for each interferogram Output: mst_date12_list - list of string in YYMMDD-YYMMDD format, for MST network of interferograms """ # coh_list --> coh_mat --> weight_mat coh_mat = coherence_matrix(date12_list, coh_list) mask = ~np.isnan(coh_mat) wei_mat = np.zeros(coh_mat.shape) wei_mat[:] = np.inf wei_mat[mask] = 1/coh_mat[mask] # MST path based on weight matrix wei_mat_csr = sparse.csr_matrix(wei_mat) mst_mat_csr = sparse.csgraph.minimum_spanning_tree(wei_mat_csr) # Get date6_list date12_list = ptime.yymmdd_date12(date12_list) m_dates = [date12.split('-')[0] for date12 in date12_list] s_dates = [date12.split('-')[1] for date12 in date12_list] date6_list = ptime.yymmdd(sorted(ptime.yyyymmdd(list(set(m_dates + s_dates))))) # Convert MST index matrix into date12 list [s_idx_list, m_idx_list] = [date_idx_array.tolist() for date_idx_array in sparse.find(mst_mat_csr)[0:2]] mst_date12_list = [] for i in range(len(m_idx_list)): idx = sorted([m_idx_list[i], s_idx_list[i]]) date12 = date6_list[idx[0]]+'-'+date6_list[idx[1]] mst_date12_list.append(date12) return mst_date12_list
def graph_srw_transition_matrix(A): """ For a graph given by an adjacency matrix A, construct the transition matrix of the srw on the graph. A: an adjacency matrix, symmetric """ (I,J,V) = ssp.find(A) n = A.shape[0] P = ssp.lil_matrix((n,n)) nnz = I.shape[0] row_start = 0 while row_start < nnz: row = I[row_start] # find the end of the row row_end = row_start while row_end < nnz and I[row_end] == row: row_end = row_end+1 # srw probability p = 1. / (row_end-row_start) # fill P for row_entry in range(row_start, row_end): P[row, J[row_entry]] = p # continue with the next row row_start = row_end return P.tocsr()
def binarize_coo(coo): ''' Returns a copy of a coo matrix whose nonzero entries have been mapped to 1 ''' i, j, d = sp.find(coo) d = np.ones(d.shape) return sp.coo_matrix((d,(i,j)),shape=coo.shape)
def Find(M): if isinstance(M, ndarray): # numpy array or matrix rows, cols = where(M) vals = M[rows,cols] else: from scipy import sparse as sp assert sp.isspmatrix(M) rows, cols, vals = sp.find(M) return rows.tolist(), cols.tolist(), vals.tolist()
def _enumMaximumMatching2(g): """Find all maximum matchings in an undirected bipartite graph `g`. Similar to _enumMaximumMatching but implemented using adjacency matrix of graph for slight speed boost. Parameters ---------- g: Undirected bipartite graph. Nodes are separated by their 'bipartite' attribute. Returns ------- list Each is a list of edges forming a maximum matching of `g`. Author ------ guangzhi XU ([email protected]; [email protected]) Update time: 2017-05-21 20:04:51. """ from scipy import sparse s1 = set(n for n, d in g.nodes(data=True) if d['bipartite'] == 0) s2 = set(g) - s1 n1 = len(s1) nodes = list(s1) + list(s2) adj = nx.adjacency_matrix(g, nodes).tolil() all_matches = [] #----------------Find one matching---------------- match = bipartite.hopcroft_karp_matching(g) matchadj = np.zeros(adj.shape).astype('int') for kk, vv in match.items(): matchadj[nodes.index(kk), nodes.index(vv)] = 1 matchadj = sparse.lil_matrix(matchadj) all_matches.append(matchadj) #-----------------Enter recursion----------------- all_matches = _enumMaximumMatchingIter2(adj, matchadj, all_matches, n1, None, True) #---------------Re-orient match arcs--------------- all_matches2 = [] for ii in all_matches: match_list = sparse.find(ii[:n1] == 1) m1 = [nodes[jj] for jj in match_list[0]] m2 = [nodes[jj] for jj in match_list[1]] match_list = zip(m1, m2) all_matches2.append(match_list) print('got all') return all_matches2
def save_bow_as_sparse(bow, filename): ''' Using a sparse matrix for storage should decrease disk usage, and make loading much easier (removing the need to explicitly write the number of rows ''' to_save = np.array( sp.find(sp.coo_matrix(bow)), dtype=np.uint32 ) to_save.tofile(filename)
def construct_pyg_graph(node_ids, adj, dists, node_features, y, node_label='drnl', use_orig_A=False, directed=False, use_orig_graph=False): # Construct a pytorch_geometric graph from a scipy csr adjacency matrix. #u, v, r = ssp.find(adj) num_nodes = adj.shape[0] node_ids = torch.LongTensor(node_ids) #u, v = torch.LongTensor(u), torch.LongTensor(v) #r = torch.LongTensor(r) #edge_index = torch.stack([u, v], 0) #edge_weight = r.to(torch.float) y = torch.tensor([y]) if use_orig_graph: u, v, r = ssp.find(adj) num_nodes = adj.shape[0] u, v = torch.LongTensor(u), torch.LongTensor(v) r = torch.LongTensor(r) edge_index = torch.stack([u, v], 0) edge_weight = r.to(torch.float) if node_label == 'drnl': z = drnl_node_labeling(adj, 0, 1) elif node_label == 'hop': z = torch.tensor(dists) data = Data(node_features, edge_index, edge_weight=edge_weight, y=y, z=z, node_id=node_ids, num_nodes=num_nodes) return data elif not directed: if node_label == 'drnl': z = drnl_node_labeling(adj, 0, 1) elif node_label == 'hop': z = torch.tensor(dists) if use_orig_A: o_data = Data(node_features, edge_index, edge_weight=edge_weight, y=y, z=z, node_id=node_ids, num_nodes=num_nodes) else: o_data = None L_node_features, L_edges, L_num_nodes, w, z1, z2, L_node_ids = construct_line_graph_undirected( node_ids, adj, z, node_features) edge_weight = torch.ones(len(L_edges)) #print(L_edges) data = Data(L_node_features, L_edges.t(), edge_weight=edge_weight, y=y, w=torch.LongTensor(w), z1=torch.LongTensor(z1), z2=torch.LongTensor(z2), node_id=L_node_ids, num_nodes=len(L_node_ids), o_data=o_data) return data else: L_node_features, L_edges, L_num_nodes, L_node_ids, L_node_classes = construct_line_graph_directed( node_ids, adj, node_features) return L_node_features, L_edges, L_num_nodes, L_node_ids, L_node_classes
print(e) finally: return m def get_sup_vec(gmm): mu = gmm.means_.flatten() sd = gmm.covariances_.flatten() return np.concatenate([mu, sd]) res = [] for a, artist in tqdm(zip(A.T, artists), ncols=80): fns = [] for tid in tqdm(sp.find(a)[1], ncols=80): if tids[tid] in path_map: fn = os.path.join(song_root, path_map[tids[tid]]) if os.path.exists(fn): fns.append(fn) else: continue else: continue M = filter(lambda x: x is not None, pmap(get_mfcc, fns, n_jobs=16)) if len(M) == 0: res.append((artist, None)) continue
def mkNN(X, k, measure='euclidean'): """ Construct mutual_kNN for large scale dataset If j is one of i's closest neighbors and i is also one of j's closest members, the edge will appear once with (i,j) where i < j. Parameters ---------- X : [n_samples, n_dim] array k : int number of neighbors for each sample in X """ from scipy.spatial import distance from scipy.sparse import csr_matrix, triu, find from scipy.sparse.csgraph import minimum_spanning_tree samples = X.shape[0] batchsize = 10000 b = np.arange(k + 1) b = tuple(b[1:].ravel()) z = np.zeros((samples, k)) weigh = np.zeros_like(z) # This loop speeds up the computation by operating in batches # This can be parallelized to further utilize CPU/GPU resource for x in np.arange(0, samples, batchsize): start = x end = min(x + batchsize, samples) parallelized_metrics = list(distance_metrics().keys()) if (measure in parallelized_metrics): w = pairwise_distances(X=X[start:end], Y=X, metric=measure, n_jobs=-1) else: w = distance.cdist(X[start:end], X, measure) y = np.argpartition(w, b, axis=1) z[start:end, :] = y[:, 1:k + 1] weigh[start:end, :] = np.reshape( w[tuple(np.repeat(np.arange(end - start), k)), tuple(y[:, 1:k + 1].ravel())], (end - start, k)) del (w) ind = np.repeat(np.arange(samples), k) P = csr_matrix((np.ones((samples * k)), (ind.ravel(), z.ravel())), shape=(samples, samples)) Q = csr_matrix((weigh.ravel(), (ind.ravel(), z.ravel())), shape=(samples, samples)) Tcsr = minimum_spanning_tree(Q) P = P.minimum(P.transpose()) + Tcsr.maximum(Tcsr.transpose()) P = triu(P, k=1) return np.asarray(find(P)).T
def _extrude_2d(g: pp.Grid, z: np.ndarray) -> Tuple[pp.Grid, np.ndarray, np.ndarray]: """ Extrude a 2d grid into 3d by prismatic extension. The original grid is assumed to be in the xy-plane, that is, any existing non-zero z-direction is ignored. Both the original and the new grid will have their geometry computed. Parameters: g (pp.Grid): Original grid to be extruded. Should have dimension 2. z (np.ndarray): z-coordinates of the nodes in the extruded grid. Should be either non-negative or non-positive, and be sorted in increasing or decreasing order, respectively. Returns: pp.Grid: A grid of dimension 3. np.array of np.arrays: Cell mappings, so that element ci gives all indices of cells in the extruded grid that comes from cell ci in the original grid. np.array of np.arrays: Face mappings, so that element fi gives all indices of faces in the extruded grid that comes from face fi in the original grid. """ g.compute_geometry() negative_extrusion = np.all(z <= 0) ## Bookkeeping of the number of grid items # Number of nodes in the z-direction num_node_layers = z.size # Number of cell layers, one less than the nodes num_cell_layers = num_node_layers - 1 # Short hand for the number of cells in the 2d grid nc_2d = g.num_cells nf_2d = g.num_faces nn_2d = g.num_nodes # The number of nodes in the 3d grid is given by the number of 2d nodes, and the # number of node layers nn_3d = nn_2d * num_node_layers # The 3d cell count is similar to that for the nodes nc_3d = nc_2d * num_cell_layers # The number of faces is more intricate: In each layer of cells, there will be as # many faces as there is in the 2d grid. In addition, in the direction of extrusion # there will be one set of faces per node layer, with each layer containing as many # faces as there are cells in the 2d grid nf_3d = nf_2d * num_cell_layers + nc_2d * num_node_layers ## Nodes - only coorinades are needed # The nodes in the 2d grid are copied for all layers, with the z-coordinates changed # for each layer. This means that for a vertical pilar, the face-node and cell-node # relations can be inferred from that in the original 2d grid, with index increments # of size nn_2d x_layer = g.nodes[0] y_layer = g.nodes[1] nodes = np.empty((3, 0)) # Stack the layers of nodes for zloc in z: nodes = np.hstack((nodes, np.vstack((x_layer, y_layer, zloc * np.ones(nn_2d))))) ## Face-node relations # The 3d grid has two types of faces: Those formed by faces in the 2d grid, termed # 'vertical' below, and those on the top and bottom of the 3d cells, termed # horizontal # Face-node relation for the 2d grid. We know there are exactly two nodes in each # 2d face. fn_2d = g.face_nodes.indices.reshape((2, g.num_faces), order="F") # Nodes of the faces for the bottom layer of 3d cells. These are formed by # connecting nodes in the bottom layer with those immediately above fn_layer = np.vstack((fn_2d[0], fn_2d[1], fn_2d[1] + nn_2d, fn_2d[0] + nn_2d)) # For the vertical cells, the flux direction indicated in cell_face map will be # inherited from the 2d grid (see below). The normal vector, which should be # consistent with this value, is effectively computed from the ordering of the # face-node relation (and the same is true for several other geometric quantities). # This requires that the face-nodes are sorted in a CCW order when seen from the # side of a positive cell_face value. To sort this out, we need to flip some of the # columns in fn_layer # Faces, cells and values of the 2d cell-face map [fi, ci, sgn] = sps.find(g.cell_faces) # Only consider each face once _, idx = np.unique(fi, return_index=True) # The node ordering in fn_layer will be CCW seen from cell ci if the cell center of # ci is CW relative to the line from the first to the second node of the 2d cell. # # Example: with p0 = [0, 0, 0], p1 = [1, 0, 0], the 3d face will have further nodes # p2 = [1, 0, 1], p3 = [0, 0, 1]. # This will be counterclockwise to a 2d cell center of, say, [0.5, -0.5, 0], # (which is CW relative to p0 and p1) # p0 = g.nodes[:, fn_2d[0, fi[idx]]] p1 = g.nodes[:, fn_2d[1, fi[idx]]] pc = g.cell_centers[:, ci[idx]] ccw_2d = pp.geometry_property_checks.is_ccw_polyline(p0, p1, pc) # We should flip those columns in fn_layer where the sign is positive, and the 2d # is not ccw (meaning the 3d will be). Similarly, also flip negative signs and 2d # ccw. flip = np.logical_or( np.logical_and(sgn[idx] > 0, np.logical_not(ccw_2d)), np.logical_and(sgn[idx] < 0, ccw_2d), ) # Finally, if the extrusion is in the negative direction, the ordering of all # face-node relations is the oposite of that indicated above. if negative_extrusion: flip = np.logical_not(flip) fn_layer[:, flip] = fn_layer[np.array([1, 0, 3, 2])][:, flip] # The face-node relation for the vertical cells are found by stacking those in the # bottom layer, with an appropriate offset. This also implies that the vertical # faces of a cell in layer k are the same as the faces of the corresponding 2d cell, # with the appropriate adjustments for the number of faces and cells in each layer fn_rows_vertical = np.empty((4, 0)) # Loop over all layers of cells for k in range(num_cell_layers): fn_rows_vertical = np.hstack((fn_rows_vertical, fn_layer + nn_2d * k)) # Reshape the node indices into a single array fn_rows_vertical = fn_rows_vertical.ravel("F") # All vertical faces have exactly four nodes nodes_per_face_vertical = 4 # Aim for a csc-representation of the faces. Column pointers fn_cols_vertical = np.arange( 0, nodes_per_face_vertical * nf_2d * num_cell_layers, nodes_per_face_vertical ) # Next, deal with the horizontal faces. The face-node relation is based on the # cell-node relation of the 2d grid. # The structure of this constrution is a bit more involved than for the vertical # faces, since the 2d cells have an unknown, and generally varying, number of nodes cn_2d = g.cell_nodes() # Short hand for node indices of each cell. cn_ind_2d = cn_2d.indices.copy() # Similar to the vertical faces, the face-node relation in 3d should match the # sign in the cell-face relation, so that the generated normal vector points out of # the cell with cf-value 1. # This requires a sorting of the nodes for each cell for ci in range(nc_2d): # Node indices of this 2d cell start = cn_2d.indptr[ci] stop = cn_2d.indptr[ci + 1] ni = cn_ind_2d[start:stop] coord = g.nodes[:2, ni] # Sort the points. # IMPLEMENTATION NOTE: this probably assumes convexity of the 2d cell. sort_ind = pp.utils.sort_points.sort_point_plane( np.vstack((coord, np.zeros(coord.shape[1]))), g.cell_centers[:, ci].reshape((-1, 1)), ) # Indices that sort the nodes. The sort function contains a rotation, which # implies that it is unknown whether the ordering is cw or ccw # If the sorted points are ccw, we store them, unless the extrusion is negative # in which case the ordering should be cw, and the points are turned. if pp.geometry_property_checks.is_ccw_polygon(coord[:, sort_ind]): if negative_extrusion: cn_ind_2d[start:stop] = cn_ind_2d[start:stop][sort_ind[::-1]] else: cn_ind_2d[start:stop] = cn_ind_2d[start:stop][sort_ind] # Else, the ordering should be negative. elif pp.geometry_property_checks.is_ccw_polygon(coord[:, sort_ind[::-1]]): if negative_extrusion: cn_ind_2d[start:stop] = cn_ind_2d[start:stop][sort_ind] else: cn_ind_2d[start:stop] = cn_ind_2d[start:stop][sort_ind[::-1]] else: raise ValueError("this should not happen. Is the cell non-convex??") # Compressed column storage for horizontal faces: Store node indices fn_rows_horizontal = np.array([], dtype=np.int) # .. and pointers to the start of new faces fn_cols_horizontal = np.array(0, dtype=np.int) # Loop over all layers of nodes (one more than number of cells) # This means that the horizontal faces of a given cell is given by its index (bottom) # and its index + the number of 2d cells, both offset with the total number of # vertical faces for k in range(num_node_layers): # The horizontal cell-node relation for this layer is the bottom one, plus an # offset of the number of 2d nodes, per layer fn_rows_horizontal = np.hstack((fn_rows_horizontal, cn_ind_2d + nn_2d * k)) # The index pointers are those of the 2d cell-node relation. # Adjustment for the vertical faces is done below # Drop the final element of the 2d indptr, which effectively signifies the end # of this array (we will add the corresponding element for the full array below) fn_cols_horizontal = np.hstack( (fn_cols_horizontal, cn_2d.indptr[1:] + cn_ind_2d.size * k) ) # Add the final element which marks the end of the array # fn_cols_horizontal = np.hstack((fn_cols_horizontal, fn_rows_horizontal.size)) # The horizontal faces are appended to the vertical ones. The node indices are the # same, but the face indices must be increased by the number of vertical faces num_vertical_faces = nf_2d * num_cell_layers fn_cols_horizontal += num_vertical_faces * nodes_per_face_vertical # Put together the vertical and horizontal data, create the face-node relation indptr = np.hstack((fn_cols_vertical, fn_cols_horizontal)).astype(np.int) indices = np.hstack((fn_rows_vertical, fn_rows_horizontal)).astype(np.int) data = np.ones(indices.size, dtype=np.int) # Finally, construct the face-node sparse matrix face_nodes = sps.csc_matrix((data, indices, indptr), shape=(nn_3d, nf_3d)) ### Next the cell-faces. # Similar to the face-nodes, the easiest option is first to deal with the vertical # faces, which can be inferred directly from faces in the 2d grid, and then the # horizontal direction. # IMPLEMENTATION NOTE: Since all cells have both horizontal and vertical faces, and # these are found in separate operations, the easiest way to assemble the 3d # cell-face matrix is to construct information for a coo-matrix (not compressed # storage), and then convert later. This has some overhead, but the alternative # is to combine and sort the face indices in the horizontal and vertical components # so that all faces of any cell is stored together. This is most conveniently # left to scipy sparse .tocsc() function ## Vertical faces # For the vertical faces, the information from the 2d grid can be copied cf_rows_2d = g.cell_faces.indices cf_cols_2d = g.cell_faces.indptr cf_data_2d = g.cell_faces.data cf_rows_vertical = np.array([], dtype=np.int) # For the cells, we will store the number of facqes for each cell. This will later # be expanded to a full set of cell indices cf_vertical_cell_count = np.array([], dtype=np.int) cf_data_vertical = np.array([]) for k in range(num_cell_layers): # The face indices are found from the 2d information, with increaments that # reflect how many layers of vertical faces there are below cf_rows_vertical = np.hstack((cf_rows_vertical, cf_rows_2d + k * nf_2d)) # The diff here gives the number of faces per cell cf_vertical_cell_count = np.hstack( (cf_vertical_cell_count, np.diff(cf_cols_2d)) ) # The data is just plus and minus ones, no need to adjust cf_data_vertical = np.hstack((cf_data_vertical, cf_data_2d)) # Expand information of the number of faces per cell into a corresponding full set # of cell indices cf_cols_vertical = pp.utils.matrix_compression.rldecode( np.arange(nc_3d), cf_vertical_cell_count ) ## Horizontal faces # There is one set of faces per layer of nodes. # The cell_face relation will assign -1 to the upper cells, and +1 to lower cells. # This corresponds to normal vectors pointing upwards. # The bottom and top layers are special, in that they have only one neighboring # cell. All other layers have two (they are internal) # Bottom layer cf_rows_horizontal = num_vertical_faces + np.arange(nc_2d) cf_cols_horizontal = np.arange(nc_2d) cf_data_horizontal = -np.ones(nc_2d, dtype=np.int) # Intermediate layers, note for k in range(1, num_cell_layers): # Face indices are given twice, for the lower and upper neighboring cell # The offset of the face index is the number of vertical faces plus the number # of horizontal faces in lower layers rows_here = ( num_vertical_faces + k * nc_2d + np.hstack((np.arange(nc_2d), np.arange(nc_2d))) ) cf_rows_horizontal = np.hstack((cf_rows_horizontal, rows_here)) # Cell indices, first of the lower layer, then of the upper cols_here = np.hstack( ((k - 1) * nc_2d + np.arange(nc_2d), k * nc_2d + np.arange(nc_2d)) ) cf_cols_horizontal = np.hstack((cf_cols_horizontal, cols_here)) # Data: +1 for the lower cells, -1 for the upper data_here = np.hstack((np.ones(nc_2d), -np.ones(nc_2d))) cf_data_horizontal = np.hstack((cf_data_horizontal, data_here)) # Top layer, with index offset for all other faces cf_rows_horizontal = np.hstack( ( cf_rows_horizontal, num_vertical_faces + num_cell_layers * nc_2d + np.arange(nc_2d), ) ) # Similarly, the cell indices of the topbost layer cf_cols_horizontal = np.hstack( (cf_cols_horizontal, (num_cell_layers - 1) * nc_2d + np.arange(nc_2d)) ) # Only +1 in the data (oposite to lowermost layer) cf_data_horizontal = np.hstack((cf_data_horizontal, np.ones(nc_2d))) # Merge horizontal and vertical layers cf_rows = np.hstack((cf_rows_horizontal, cf_rows_vertical)) cf_cols = np.hstack((cf_cols_horizontal, cf_cols_vertical)) cf_data = np.hstack((cf_data_horizontal, cf_data_vertical)) cell_faces = sps.coo_matrix( (cf_data, (cf_rows, cf_cols)), shape=(nf_3d, nc_3d) ).tocsc() tags = _define_tags(g, num_cell_layers) name = g.name.copy() name.append("Extrude 2d->3d") g_info = g.name.copy() g_info.append("Extrude 1d->2d") g_new = pp.Grid(3, nodes, face_nodes, cell_faces, g_info, tags=tags) g_new.compute_geometry() # Mappings between old and new cells and faces cell_map, face_map = _create_mappings(g, g_new, num_cell_layers) return g_new, cell_map, face_map
def graph_sparsify(M, epsilon, maxiter=10): r"""Sparsify a graph (with Spielman-Srivastava). Parameters ---------- M : Graph or sparse matrix Graph structure or a Laplacian matrix epsilon : int Sparsification parameter Returns ------- Mnew : Graph or sparse matrix New graph structure or sparse matrix Notes ----- Epsilon should be between 1/sqrt(N) and 1 Examples -------- >>> from pygsp import reduction >>> G = graphs.Sensor(256, Nc=20, distributed=True) >>> epsilon = 0.4 >>> G2 = reduction.graph_sparsify(G, epsilon) References ---------- See :cite:`spielman2011graph`, :cite:`rudelson1999random` and :cite:`rudelson2007sampling`. for more informations """ # Test the input parameters if isinstance(M, graphs.Graph): if not M.lap_type == 'combinatorial': raise NotImplementedError L = M.L else: L = M N = np.shape(L)[0] if not 1. / np.sqrt(N) <= epsilon < 1: raise ValueError('GRAPH_SPARSIFY: Epsilon out of required range') # Not sparse resistance_distances = utils.resistance_distance(L).toarray() # Get the Weight matrix if isinstance(M, graphs.Graph): W = M.W else: W = np.diag(L.diagonal()) - L.toarray() W[W < 1e-10] = 0 W = sparse.coo_matrix(W) W.data[W.data < 1e-10] = 0 W = W.tocsc() W.eliminate_zeros() start_nodes, end_nodes, weights = sparse.find(sparse.tril(W)) # Calculate the new weights. weights = np.maximum(0, weights) Re = np.maximum(0, resistance_distances[start_nodes, end_nodes]) Pe = weights * Re Pe = Pe / np.sum(Pe) for i in range(maxiter): # Rudelson, 1996 Random Vectors in the Isotropic Position # (too hard to figure out actual C0) C0 = 1 / 30. # Rudelson and Vershynin, 2007, Thm. 3.1 C = 4 * C0 q = round(N * np.log(N) * 9 * C**2 / (epsilon**2)) results = stats.rv_discrete(values=(np.arange(np.shape(Pe)[0]), Pe)).rvs(size=int(q)) spin_counts = stats.itemfreq(results).astype(int) per_spin_weights = weights / (q * Pe) counts = np.zeros(np.shape(weights)[0]) counts[spin_counts[:, 0]] = spin_counts[:, 1] new_weights = counts * per_spin_weights sparserW = sparse.csc_matrix((new_weights, (start_nodes, end_nodes)), shape=(N, N)) sparserW = sparserW + sparserW.T sparserL = sparse.diags(sparserW.diagonal(), 0) - sparserW if graphs.Graph(W=sparserW).is_connected(): break elif i == maxiter - 1: logger.warning( 'Despite attempts to reduce epsilon, sparsified graph is disconnected' ) else: epsilon -= (epsilon - 1 / np.sqrt(N)) / 2. if isinstance(M, graphs.Graph): sparserW = sparse.diags(sparserL.diagonal(), 0) - sparserL if not M.is_directed(): sparserW = (sparserW + sparserW.T) / 2. Mnew = graphs.Graph(W=sparserW) #M.copy_graph_attributes(Mnew) else: Mnew = sparse.lil_matrix(sparserL) return Mnew
from numpy import * from scipy.sparse import block_diag, find, hstack import string nCl = 10 coos = random.randint(1, 10, size=nCl).tolist() lob = map(lambda n: ones((n,n)), coos) graph = block_diag(lob) graph_abc = vstack(find(graph)).T with open('graph_test.abc', 'w') as fh: for i in range(graph_abc.shape[0]): ld = graph_abc[i].tolist() ld[0] = string.printable[int(ld[0])] ld[1] = string.printable[int(ld[1])] fh.write('%s %s %.6f\n' % tuple(ld))
def matrix_rhs(self, g, data): """ Return the matrix and righ-hand side for a discretization of a second order elliptic equation using hybrid dual virtual element method. The name of data in the input dictionary (data) are: perm : tensor.SecondOrderTensor Permeability defined cell-wise. If not given a identity permeability is assumed and a warning arised. source : array (self.g.num_cells) Scalar source term defined cell-wise. If not given a zero source term is assumed and a warning arised. bc : boundary conditions (optional) bc_val : dictionary (optional) Values of the boundary conditions. The dictionary has at most the following keys: 'dir' and 'neu', for Dirichlet and Neumann boundary conditions, respectively. Parameters ---------- g : grid, or a subclass, with geometry fields computed. data: dictionary to store the data. Return ------ matrix: sparse csr (g.num_faces+g_num_cells, g.num_faces+g_num_cells) Saddle point matrix obtained from the discretization. rhs: array (g.num_faces+g_num_cells) Right-hand side which contains the boundary conditions and the scalar source term. Examples -------- b_faces_neu = ... # id of the Neumann faces b_faces_dir = ... # id of the Dirichlet faces bnd = bc.BoundaryCondition(g, np.hstack((b_faces_dir, b_faces_neu)), ['dir']*b_faces_dir.size + ['neu']*b_faces_neu.size) bnd_val = {'dir': fun_dir(g.face_centers[:, b_faces_dir]), 'neu': fun_neu(f.face_centers[:, b_faces_neu])} data = {'perm': perm, 'source': f, 'bc': bnd, 'bc_val': bnd_val} H, rhs = hybrid.matrix_rhs(g, data) l = sps.linalg.spsolve(H, rhs) u, p = hybrid.compute_up(g, l, data) P0u = dual.project_u(g, perm, u) """ # pylint: disable=invalid-name # If a 0-d grid is given then we return an identity matrix if g.dim == 0: return sps.identity(self.ndof(g), format="csr"), np.zeros(1) parameter_dictionary = data[pp.PARAMETERS][self.keyword] k = parameter_dictionary["second_order_tensor"] f = parameter_dictionary["source"] bc = parameter_dictionary["bc"] bc_val = parameter_dictionary["bc_values"] a = parameter_dictionary["aperture"] faces, _, sgn = sps.find(g.cell_faces) # Map the domain to a reference geometry (i.e. equivalent to compute # surface coordinates in 1d and 2d) c_centers, f_normals, f_centers, _, _, _ = pp.map_geometry.map_grid(g) # Weight for the stabilization term diams = g.cell_diameters() weight = np.power(diams, 2 - g.dim) # Allocate the data to store matrix entries, that's the most efficient # way to create a sparse matrix. size = np.sum( np.square(g.cell_faces.indptr[1:] - g.cell_faces.indptr[:-1])) row = np.empty(size, dtype=np.int) col = np.empty(size, dtype=np.int) data = np.empty(size) rhs = np.zeros(g.num_faces) idx = 0 # Use a dummy keyword to trick the constructor of dualVEM. massHdiv = pp.MVEM("dummy").massHdiv # define the function to compute the inverse of the permeability matrix if g.dim == 1: inv_matrix = DualElliptic._inv_matrix_1d elif g.dim == 2: inv_matrix = DualElliptic._inv_matrix_2d elif g.dim == 3: inv_matrix = DualElliptic._inv_matrix_3d for c in np.arange(g.num_cells): # For the current cell retrieve its faces loc = slice(g.cell_faces.indptr[c], g.cell_faces.indptr[c + 1]) faces_loc = faces[loc] ndof = faces_loc.size # Retrieve permeability and normals assumed outward to the cell. sgn_loc = sgn[loc].reshape((-1, 1)) normals = np.multiply(np.tile(sgn_loc.T, (g.dim, 1)), f_normals[:, faces_loc]) # Compute the H_div-mass local matrix A = massHdiv( k.values[0:g.dim, 0:g.dim, c], inv_matrix(k.values[0:g.dim, 0:g.dim, c]), c_centers[:, c], a[c] * g.cell_volumes[c], f_centers[:, faces_loc], a[c] * normals, np.ones(ndof), diams[c], weight[c], )[0] # Compute the Div local matrix B = -np.ones((ndof, 1)) # Compute the hybrid local matrix C = np.eye(ndof, ndof) # Perform the static condensation to compute the hybrid local matrix invA = np.linalg.inv(A) S = 1 / np.dot(B.T, np.dot(invA, B)) L = np.dot(np.dot(invA, np.dot(B, np.dot(S, B.T))), invA) L = np.dot(np.dot(C.T, L - invA), C) # Compute the local hybrid right using the static condensation rhs[faces_loc] += np.dot(C.T, np.dot(invA, np.dot(B, np.dot(S, f[c]))))[:, 0] # Save values for hybrid matrix indices = np.tile(faces_loc, (faces_loc.size, 1)) loc_idx = slice(idx, idx + indices.size) row[loc_idx] = indices.T.ravel() col[loc_idx] = indices.ravel() data[loc_idx] = L.ravel() idx += indices.size # construct the global matrices H = sps.coo_matrix((data, (row, col))).tocsr() # Apply the boundary conditions if bc is not None: if np.any(bc.is_dir): norm = sps.linalg.norm(H, np.inf) is_dir = np.where(bc.is_dir)[0] H[is_dir, :] *= 0 H[is_dir, is_dir] = norm rhs[is_dir] = norm * bc_val[is_dir] if np.any(bc.is_neu): faces, _, sgn = sps.find(g.cell_faces) sgn = sgn[np.unique(faces, return_index=True)[1]] is_neu = np.where(bc.is_neu)[0] rhs[is_neu] += sgn[is_neu] * bc_val[is_neu] * g.face_areas[ is_neu] return H, rhs
def gollapudi1(self, repeat=1, scale=1000): """[Gollapudi et. al., 2006](1) is an integer weighted MinHash algorithm, which skips much unnecessary hash value computation by employing the idea of "active index". S. Gollapudi and R. Panigraphy, "Exploiting Asymmetry in Hierarchical Topic Extraction", in CIKM, 2006, pp. 475-482. Parameters ----------- repeat: int, default: 1 the number of repeating the algorithm as the part of the seed of the random number generator scale: int, default: 1000 a large constant to transform real-valued weights into integer ones Returns ----------- fingerprints_k: ndarray, shape (n_instances, dimension_num) one component of hash codes $(k, y_k)$ for data matrix, where row represents a data instance fingerprints_y: ndarray, shape (n_instances, dimension_num) one component of hash codes $(k, y_k)$ for data matrix, where row represents a data instance elapsed: float time of hashing data matrix Notes ---------- The operations of seeking "active indices" and computing hashing values are implemented by C++ due to low efficiency of Python. The operations cannot be vectorized in Python so that it would be very slow. """ fingerprints_k = np.zeros((self.instance_num, self.dimension_num)) fingerprints_y = np.zeros((self.instance_num, self.dimension_num)) start = time.time() for j_sample in range(0, self.instance_num): feature_id = sparse.find(self.weighted_set[:, j_sample] > 0)[0] feature_id_num = feature_id.shape[0] fingerprints = CDLL('./cpluspluslib/gollapudi1_fingerprints.so') fingerprints.GenerateFingerprintOfInstance.argtypes = [ c_int, np.ctypeslib.ndpointer(dtype=c_int, ndim=1, flags="C_CONTIGUOUS"), np.ctypeslib.ndpointer(c_int, ndim=1, flags="C_CONTIGUOUS"), c_int, c_int, np.ctypeslib.ndpointer(dtype=c_double, ndim=1, flags="C_CONTIGUOUS"), np.ctypeslib.ndpointer(dtype=c_double, ndim=1, flags="C_CONTIGUOUS") ] fingerprints.GenerateFingerprintOfInstance.restype = None feature_weight = np.array( scale * self.weighted_set[feature_id, j_sample].todense())[:, 0] fingerprint_k = np.zeros((1, self.dimension_num))[0] fingerprint_y = np.zeros((1, self.dimension_num))[0] fingerprints.GenerateFingerprintOfInstance( self.dimension_num, feature_weight, feature_id, feature_id_num, self.seed * repeat, fingerprint_k, fingerprint_y) fingerprints_k[j_sample, :] = fingerprint_k fingerprints_y[j_sample, :] = fingerprint_y elapsed = time.time() - start return fingerprints_k, fingerprints_y, elapsed
def cws(self, repeat=1): """The Consistent Weighted Sampling (CWS) algorithm, as the first of the Consistent Weighted Sampling scheme, extends "active indices" from $[0, S]$ in [Gollapudi et. al., 2006](1) to $[0, +\infty]$. M. Manasse, F. McSherry, and K. Talwar, "Consistent Weighted Sampling", Unpublished technical report, 2010. Parameters ----------- repeat: int, default: 1 the number of repeating the algorithm as the part of the seed of the random number generator Returns ----------- fingerprints_k: ndarray, shape (n_instances, dimension_num) one component of hash codes $(k, y_k)$ for data matrix, where row represents a data instance fingerprints_y: ndarray, shape (n_instances, dimension_num) one component of hash codes $(k, y_k)$ for data matrix, where row represents a data instance elapsed: float time of hashing data matrix Notes ---------- The operations of seeking "active indices" and computing hashing values are implemented by C++ due to low efficiency of Python. The operations cannot be vectorized in Python so that it would be very slow. """ fingerprints_k = np.zeros((self.instance_num, self.dimension_num)) fingerprints_y = np.zeros((self.instance_num, self.dimension_num)) start = time.time() for j_sample in range(0, self.instance_num): feature_id = sparse.find(self.weighted_set[:, j_sample] > 0)[0] feature_id_num = feature_id.shape[0] fingerprints = CDLL('./cpluspluslib/cws_fingerprints.so') fingerprints.GenerateFingerprintOfInstance.argtypes = [ c_int, np.ctypeslib.ndpointer(dtype=c_double, ndim=1, flags="C_CONTIGUOUS"), np.ctypeslib.ndpointer(dtype=c_int, ndim=1, flags="C_CONTIGUOUS"), c_int, c_int, np.ctypeslib.ndpointer(dtype=c_double, ndim=1, flags="C_CONTIGUOUS"), np.ctypeslib.ndpointer(dtype=c_double, ndim=1, flags="C_CONTIGUOUS") ] fingerprints.GenerateFingerprintOfInstance.restype = None weights = np.array(self.weighted_set[feature_id, j_sample].todense())[:, 0] fingerprint_k = np.zeros((1, self.dimension_num))[0] fingerprint_y = np.zeros((1, self.dimension_num))[0] fingerprints.GenerateFingerprintOfInstance( self.dimension_num, weights, feature_id, feature_id_num, self.seed * repeat, fingerprint_k, fingerprint_y) fingerprints_k[j_sample, :] = fingerprint_k fingerprints_y[j_sample, :] = fingerprint_y elapsed = time.time() - start return fingerprints_k, fingerprints_y, elapsed
def haeupler(self, repeat=1, scale=1000): """[Haeupler et. al., 2014] preserves the remaining float part with probability after each weight is multiplied by a large constant. B. Haeupler, M. Manasse, and K. Talwar, "Consistent Weighted Sampling Made Fast, Small, and Easy", arXiv preprint arXiv: 1410.4266, 2014 Parameters ---------- scale: int, default: 1000 a large constant to transform real-valued weights into integer ones repeat: int, default: 1 the number of repeating the algorithm as the part of the seed of the random number generator Returns ---------- fingerprints: ndarray, shape (n_instances, dimension_num) hash codes for data matrix, where row represents a data instance elapsed: float time of hashing data matrix Notes ---------- The operation of expanding the original weighted set by scaling the weights is implemented by C++ due to low efficiency of Python. The operations cannot be vectorized in Python so that it would be very slow. """ fingerprints = np.zeros((self.instance_num, self.dimension_num)) np.random.seed(self.seed * np.power(2, repeat - 1)) expanded_set_predefined_size = np.ceil( np.max(np.sum(self.weighted_set * scale, axis=0)) * 100).astype(int) start = time.time() hash_parameters = np.random.randint(1, self.C_PRIME, (self.dimension_num, 2)) for j_sample in range(0, self.instance_num): expanded_feature_id = np.zeros((1, expanded_set_predefined_size)) feature_id = sparse.find(self.weighted_set[:, j_sample] > 0)[0] feature_id_num = feature_id.shape[0] expanded_set = CDLL('./cpluspluslib/haeupler_expandset.so') expanded_set.GenerateExpandedSet.argtypes = [ c_int, np.ctypeslib.ndpointer(dtype=c_double, ndim=1, flags="C_CONTIGUOUS"), np.ctypeslib.ndpointer(dtype=c_int, ndim=1, flags="C_CONTIGUOUS"), c_int, c_int, c_int, np.ctypeslib.ndpointer(dtype=c_double, ndim=1, flags="C_CONTIGUOUS") ] expanded_set.GenerateExpandedSet.restype = None feature_weight = np.array( scale * self.weighted_set[feature_id, j_sample].todense())[:, 0] expanded_feature_id = expanded_feature_id[0, :] expanded_set.GenerateExpandedSet(expanded_set_predefined_size, feature_weight, feature_id, feature_id_num, scale, self.seed * repeat, expanded_feature_id) expanded_feature_id = expanded_feature_id[expanded_feature_id != 0] expanded_feature_id_num = expanded_feature_id.shape[0] k_hash = np.mod( np.dot(np.transpose(np.array([expanded_feature_id])), np.array([np.transpose(hash_parameters[:, 1])])) + np.dot(np.ones((expanded_feature_id_num, 1)), np.array([np.transpose(hash_parameters[:, 1])])), self.C_PRIME) min_position = np.argmin(k_hash, axis=0) fingerprints[j_sample, :] = expanded_feature_id[min_position] elapsed = time.time() - start return fingerprints, elapsed
def close(self): i, j, v = sparse.find(self.data) log.info("Glove matrix has %d entries", len(i))
def upload_dtm(run_id, output_path): stat = RunStats.objects.get(pk=run_id) print("upload dtm results to db") info = readInfo(os.path.join(output_path, "lda-seq/info.dat")) topic_ids = db.add_topics(stat.K, stat.run_id) vocab_ids = [] input_path = output_path.replace("-output-", "-input-") with open(os.path.join(input_path, 'foo-vocab.dat'), 'r') as f: for l in f: try: vocab_ids.append(int(l.split(':')[0].strip())) except: pass ids = [] docsizes = [] with open(os.path.join(input_path, 'foo-docids.dat'), 'r') as f: for l in f: try: id, s = [int(x.strip()) for x in l.split(':')] ids.append(id) docsizes.append(s) except: pass time_range = sorted([tp.n for tp in stat.periods.all().order_by('n')]) ################################# # TopicTerms print("writing topic terms") topics = range(info['NUM_TOPICS']) pool = Pool(processes=8) pool.map( partial(dtm_topic, info=info, topic_ids=topic_ids, vocab_ids=vocab_ids, ys=time_range, run_id=run_id, output_path=output_path), topics) pool.terminate() gc.collect() ###################################### # Doctopics print("writing doctopics") gamma = np.fromfile(os.path.join(output_path, 'lda-seq/gam.dat'), dtype=float, sep=" ") gamma = gamma.reshape((int(len(gamma) / stat.K), stat.K)) gamma = find(csr_matrix(gamma)) glength = len(gamma[0]) chunk_size = 100000 ps = 16 parallel_add = True all_dts = [] make_t = 0 add_t = 0 for i in range(glength // chunk_size + 1): dts = [] values_list = [] f = i * chunk_size l = (i + 1) * chunk_size if l > glength: l = glength docs = range(f, l) doc_batches = [] for p in range(ps): doc_batches.append([x for x in docs if x % ps == p]) pool = Pool(processes=ps) make_t0 = time() values_list.append( pool.map( partial(db.f_gamma_batch, gamma=gamma, docsizes=docsizes, docUTset=ids, topic_ids=topic_ids, run_id=run_id), doc_batches)) pool.terminate() make_t += time() - make_t0 django.db.connections.close_all() add_t0 = time() values_list = [item for sublist in values_list for item in sublist] pool = Pool(processes=ps) pool.map(db.insert_many, values_list) pool.terminate() add_t += time() - add_t0 gc.collect() sys.stdout.flush() stat = RunStats.objects.get(run_id=run_id) stat.last_update = timezone.now() stat.status = 3 # 3 = finished stat.save() management.call_command('update_run', run_id)
def compute_up(self, g, solution, data): """ Return the velocity and pressure computed from the hybrid variables. Parameters ---------- g : grid, or a subclass, with geometry fields computed. solution : array (g.num_faces) Hybrid solution of the system. data: dictionary to store the data. See self.matrix_rhs for a detaild description. Return ------ u : array (g.num_faces) Velocity at each face. p : array (g.num_cells) Pressure at each cell. """ # pylint: disable=invalid-name if g.dim == 0: return 0, solution[0] param = data["param"] k = param.get_tensor(self) f = param.get_source(self) a = param.aperture faces, _, sgn = sps.find(g.cell_faces) # Map the domain to a reference geometry (i.e. equivalent to compute # surface coordinates in 1d and 2d) c_centers, f_normals, f_centers, _, _, _ = pp.map_geometry.map_grid(g) # Weight for the stabilization term diams = g.cell_diameters() weight = np.power(diams, 2 - g.dim) # Allocation of the pressure and velocity vectors p = np.zeros(g.num_cells) u = np.zeros(g.num_faces) massHdiv = pp.DualVEM().massHdiv for c in np.arange(g.num_cells): # For the current cell retrieve its faces loc = slice(g.cell_faces.indptr[c], g.cell_faces.indptr[c + 1]) faces_loc = faces[loc] ndof = faces_loc.size # Retrieve permeability and normals assumed outward to the cell. sgn_loc = sgn[loc].reshape((-1, 1)) normals = np.multiply(np.tile(sgn_loc.T, (g.dim, 1)), f_normals[:, faces_loc]) # Compute the H_div-mass local matrix A = massHdiv( k.values[0:g.dim, 0:g.dim, c], c_centers[:, c], a[c] * g.cell_volumes[c], f_centers[:, faces_loc], a[c] * normals, np.ones(ndof), diams[c], weight[c], )[0] # Compute the Div local matrix B = -np.ones((ndof, 1)) # Compute the hybrid local matrix C = np.eye(ndof, ndof) # Perform the static condensation to compute the pressure and velocity S = 1 / np.dot(B.T, solve(A, B)) l_loc = solution[faces_loc].reshape((-1, 1)) p[c] = np.dot(S, f[c] - np.dot(B.T, solve(A, np.dot(C, l_loc)))) u[faces_loc] = -np.multiply( sgn_loc, solve(A, np.dot(B, p[c]) + np.dot(C, l_loc))) return u, p
def save_sparse_matrix(filename,x): print 'finding the sparse entries' [rowidx,colidx,val]=sp.find(x) y = np.column_stack((rowidx,colidx,val)) print 'saving shuffled matrix ' np.savetxt(filename,y,fmt='%u',delimiter=' ')
def find_positive_cc(self, margin=False, ignore=False): #cc3d = self.cc3d[14:-14,44:-44,44:-44] gt = self.gt_cc3d if margin: gt[14:-14, 44:-44, 44:-44] = 0 if ignore: gt[gt > 200] = 0 cc3d = pickle.load( open(os.path.join(self.trial_name, 'saved_post_cc3d.pkl'), 'rb')) #post = (self.gtvol)*((self.gtvol%2)==0) #gt = post #cc3d = cc3d[11:-11,192/2:-192/2,192/2:-192/2] #gt = gt[11:-11,192/2:-192/2,192/2:-192/2] #pdb.set_trace() seg_gt_overlap = scipy.sparse.csc_matrix( (np.ones_like(self.segvol.ravel()), (self.segvol.ravel(), self.seg_gtvol.ravel()))) seg_gt_map = np.argmax(seg_gt_overlap[:, 1:], axis=1) overlaps = scipy.sparse.csc_matrix( (np.ones_like(gt.ravel()), (gt.ravel(), cc3d.ravel()))) from scipy.sparse import find ntp = 0 nfp = 0 count = 0 #unique_ccid = np.unique(cc3d) #for ii in range(1,unique_ccid.shape[0]): #uid = unique_ccid[ii] #pdb.set_trace() true_positive_detected = [] detection_true = 0 detection_false = 0 for uid in self.cc_partners.keys(): rows, dummy, values = find(overlaps[:, uid]) count = count + len(self.cc_partners[uid]) if rows[0] == 0: rows = rows[1:] values = values[1:] if len(values) < 1: for ci in range(len(self.cc_partners[uid])): self.cc_partners[uid][ci]['label'] = -1 detection_false = detection_false + len(self.cc_partners[uid]) continue #match_id = rows[np.argmax(values)] found = 0 for mi in range(rows.shape[0]): match_id = rows[mi] match_amt = values[mi] #print 'found: {0}, {1}'.format(self.cc_partners[uid]['pre'], self.cc_partners[uid]['post']) #print 'gt: {0}, {1}'.format(self.gt_partners[match_id]['pre'], self.gt_partners[match_id]['post']) print uid, match_id #if uid==3805 and match_id==185: #pdb.set_trace() for ci in range(len(self.cc_partners[uid])): pre_seg_gt_id = seg_gt_map[self.cc_partners[uid][ci] ['pre_seg']][0, 0] + 1 post_seg_gt_id = seg_gt_map[self.cc_partners[uid][ci] ['post_seg']][0, 0] + 1 if pre_seg_gt_id == 0 or post_seg_gt_id == 0: print "seg id not present in gt" #pdb.set_trace() continue if (pre_seg_gt_id == self.gt_partners[match_id]['pre']) and \ (post_seg_gt_id == self.gt_partners[match_id]['post']): #print 'true positive ', uid print 'found: {0}, {1}'.format( self.cc_partners[uid][ci]['pre_seg'], self.cc_partners[uid][ci]['post_seg']) print 'gt: {0}, {1}'.format( self.gt_partners[match_id]['pre'], self.gt_partners[match_id]['post']) true_positive_detected.append(match_id) detection_true = detection_true + 1 self.cc_partners[uid][ci]['label'] = 1 self.cc_partners[uid][ci]['gt_id'] = match_id else: if self.cc_partners[uid][ci].has_key('label'): if self.cc_partners[uid][ci]['label'] == 1: continue else: #break detection_false = detection_false + 1 self.cc_partners[uid][ci]['label'] = -1 self.cc_partners[uid][ci]['gt_id'] = match_id #break unique_gt = np.setdiff1d(np.unique(gt), [0]) nunique_detected = len(np.unique(true_positive_detected)) nmiss = len(unique_gt) - nunique_detected print 'detected = ', nunique_detected print 'missed = {0} ({1})'.format(nmiss, nmiss * 1. / len(unique_gt)) print 'detection_true = ', detection_true print 'detection false = ', detection_false #pdb.set_trace() pickle.dump( self.cc_partners, open( os.path.join(self.trial_name, 'saved_cc_partners_with_label.pkl'), 'wb'))
def __init__(self, layout, block_size=64, heads=None, mask_callback=None, name=None): if len(layout.shape) == 2: assert heads is not None, "heads must be explicitly specified when using shared layouts per head" # broadcast same layout over all heads layout = np.expand_dims(layout, 0) if heads is None: heads = layout.shape[0] assert block_size in ( 8, 16, 32, 64), "Block sizes of 8, 16, 32 and 64 currently supported" assert len(layout.shape) == 3, "bad layout shape: " + str(layout.shape) assert layout.shape[1] == layout.shape[2], "layout should be square" #self.layout = layout > 0 # save boolean version for serialization purposes, TODO: save packbits or csr version self.blk_size = block_size self.name = name self.heads = heads self.lut_heads = layout.shape[0] self.ctx_blks = layout.shape[1] self.blk_shape = (block_size, block_size) self.nn_max = 0 self.tn_max = 0 if layout.dtype != np.int32: layout = layout.astype(np.int32) self.nt_lut = list() self.nn_lut = list() self.tn_lut = list() self.nt_list = list() self.nn_list = list() self.tn_list = list() blocks = None for head in range(layout.shape[0]): # convert to csr for vastly more efficient python iteration on large sparse layouts csr = sparse.csr_matrix(layout[head, :, :]) ys, xs, bs = sparse.find(csr) # xs is in sorted order by default if blocks is None: blocks = len(bs) else: assert len( bs ) == blocks, "number of layout blocks must be equal across heads" # make blocks contiguous along the rows (softmax code leverages this for increased performance) nt_list = sorted(zip(ys, xs)) ys = [b[0] for b in nt_list] xs = [b[1] for b in nt_list] nt_lut = np.array(nt_list, dtype=np.int32) nn_lut, nn_list, nn_max = self.xn_lut(ys, xs, blocks) tn_lut, tn_list, tn_max = self.xn_lut(xs, ys, blocks) self.nt_lut.append(nt_lut) self.nn_lut.append(nn_lut) self.tn_lut.append(tn_lut) self.nt_list.append(nt_list) self.nn_list.append(nn_list) self.tn_list.append(tn_list) self.nn_max = max(self.nn_max, nn_max) self.tn_max = max(self.tn_max, tn_max) self.blocks = blocks self.nt_lut = get_constant(np.array(self.nt_lut, dtype=np.int32), name="nt") self.nn_lut = get_constant(np.array(self.nn_lut, dtype=np.int32), name="nn") self.tn_lut = get_constant(np.array(self.tn_lut, dtype=np.int32), name="tn") if mask_callback is not None: self.init_softmax_mask(mask_callback) else: self.softmax_mask = None self.softmax_mask_np = None
def tree_multiresolution(G, Nlevel, reduction_method='resistance_distance', compute_full_eigen=False, root=None): r"""Compute a multiresolution of trees Parameters ---------- G : Graph Graph structure of a tree. Nlevel : Number of times to downsample and coarsen the tree root : int The index of the root of the tree. (default = 1) reduction_method : str The graph reduction method (default = 'resistance_distance') compute_full_eigen : bool To also compute the graph Laplacian eigenvalues for every tree in the sequence Returns ------- Gs : ndarray Ndarray, with each element containing a graph structure represent a reduced tree. subsampled_vertex_indices : ndarray Indices of the vertices of the previous tree that are kept for the subsequent tree. """ if not root: if hasattr(G, 'root'): root = G.root else: root = 1 Gs = [G] if compute_full_eigen: Gs[0].compute_fourier_basis() subsampled_vertex_indices = [] depths, parents = _tree_depths(G.A, root) old_W = G.W for lev in range(Nlevel): # Identify the vertices in the even depths of the current tree down_odd = round(depths) % 2 down_even = np.ones((Gs[lev].N)) - down_odd keep_inds = np.where(down_even == 1)[0] subsampled_vertex_indices.append(keep_inds) # There will be one undirected edge in the new graph connecting each # non-root subsampled vertex to its new parent. Here, we find the new # indices of the new parents non_root_keep_inds, new_non_root_inds = np.setdiff1d(keep_inds, root) old_parents_of_non_root_keep_inds = parents[non_root_keep_inds] old_grandparents_of_non_root_keep_inds = parents[ old_parents_of_non_root_keep_inds] # TODO new_non_root_parents = dsearchn(keep_inds, old_grandparents_of_non_root_keep_inds) old_W_i_inds, old_W_j_inds, old_W_weights = sparse.find(old_W) i_inds = np.concatenate((new_non_root_inds, new_non_root_parents)) j_inds = np.concatenate((new_non_root_parents, new_non_root_inds)) new_N = np.sum(down_even) if reduction_method == "unweighted": new_weights = np.ones(np.shape(i_inds)) elif reduction_method == "sum": # TODO old_weights_to_parents_inds = dsearchn([old_W_i_inds,old_W_j_inds], [non_root_keep_inds, old_parents_of_non_root_keep_inds]); old_weights_to_parents = old_W_weights[old_weights_to_parents_inds] # old_W(non_root_keep_inds,old_parents_of_non_root_keep_inds); # TODO old_weights_parents_to_grandparents_inds = dsearchn([old_W_i_inds, old_W_j_inds], [old_parents_of_non_root_keep_inds, old_grandparents_of_non_root_keep_inds]) old_weights_parents_to_grandparents = old_W_weights[ old_weights_parents_to_grandparents_inds] # old_W(old_parents_of_non_root_keep_inds,old_grandparents_of_non_root_keep_inds); new_weights = old_weights_to_parents + old_weights_parents_to_grandparents new_weights = np.concatenate((new_weights.new_weights)) elif reduction_method == "resistance_distance": # TODO old_weights_to_parents_inds = dsearchn([old_W_i_inds, old_W_j_inds], [non_root_keep_inds, old_parents_of_non_root_keep_inds]) old_weights_to_parents = old_W_weight[sold_weights_to_parents_inds] # old_W(non_root_keep_inds,old_parents_of_non_root_keep_inds); # TODO old_weights_parents_to_grandparents_inds = dsearchn([old_W_i_inds, old_W_j_inds], [old_parents_of_non_root_keep_inds, old_grandparents_of_non_root_keep_inds]) old_weights_parents_to_grandparents = old_W_weights[ old_weights_parents_to_grandparents_inds] # old_W(old_parents_of_non_root_keep_inds,old_grandparents_of_non_root_keep_inds); new_weights = 1. / (1. / old_weights_to_parents + 1. / old_weights_parents_to_grandparents) new_weights = np.concatenate(([new_weights, new_weights])) else: raise ValueError('Unknown graph reduction method.') new_W = sparse.csc_matrix((new_weights, (i_inds, j_inds)), shape=(new_N, new_N)) # Update parents new_root = np.where(keep_inds == root)[0] parents = np.zeros(np.shape(keep_inds)[0], np.shape(keep_inds)[0]) parents[:new_root - 1, new_root:] = new_non_root_parents # Update depths depths = depths[keep_inds] depths = depths / 2. # Store new tree Gtemp = graphs.Graph(new_W, coords=Gs[lev].coords[keep_inds], limits=G.limits, root=new_root) #Gs[lev].copy_graph_attributes(Gtemp, False) if compute_full_eigen: Gs[lev + 1].compute_fourier_basis() # Replace current adjacency matrix and root Gs.append(Gtemp) old_W = new_W root = new_root return Gs, subsampled_vertex_indices
def generate_coarse_grid_single(g, subdiv, face_map): """ Specific function for a single grid. Use the common interface instead. """ subdiv = np.asarray(subdiv) assert subdiv.size == g.num_cells # declare the storage array to build the cell_faces map cell_faces = np.empty(0, dtype=g.cell_faces.indptr.dtype) cells = np.empty(0, dtype=cell_faces.dtype) orient = np.empty(0, dtype=g.cell_faces.data.dtype) # declare the storage array to build the face_nodes map face_nodes = np.empty(0, dtype=g.face_nodes.indptr.dtype) nodes = np.empty(0, dtype=face_nodes.dtype) visit = np.zeros(g.num_faces, dtype=np.bool) # compute the face_node indexes num_nodes_per_face = g.face_nodes.indptr[1:] - g.face_nodes.indptr[:-1] face_node_ind = matrix_compression.rldecode(np.arange(g.num_faces), num_nodes_per_face) cells_list = np.unique(subdiv) cell_volumes = np.zeros(cells_list.size) cell_centers = np.zeros((3, cells_list.size)) for cellId, cell in enumerate(cells_list): # extract the cells of the original mesh associated to a specific label cells_old = np.where(subdiv == cell)[0] # compute the volume cell_volumes[cellId] = np.sum(g.cell_volumes[cells_old]) cell_centers[:, cellId] = np.average(g.cell_centers[:, cells_old], axis=1) # reconstruct the cell_faces mapping faces_old, _, orient_old = sps.find(g.cell_faces[:, cells_old]) mask = np.ones(faces_old.size, dtype=np.bool) mask[np.unique(faces_old, return_index=True)[1]] = False # extract the indexes of the internal edges, to be discared index = np.array( [np.where(faces_old == f)[0] for f in faces_old[mask]], dtype=np.int).ravel() faces_new = np.delete(faces_old, index) cell_faces = np.r_[cell_faces, faces_new] cells = np.r_[cells, np.repeat(cellId, faces_new.shape[0])] orient = np.r_[orient, np.delete(orient_old, index)] # reconstruct the face_nodes mapping # consider only the unvisited faces not_visit = ~visit[faces_new] if not_visit.size == 0 or np.all(~not_visit): continue # mask to consider only the external faces mask = np.atleast_1d( np.sum( [face_node_ind == f for f in faces_new[not_visit]], axis=0, dtype=np.bool, )) face_nodes = np.r_[face_nodes, face_node_ind[mask]] nodes_new = g.face_nodes.indices[mask] nodes = np.r_[nodes, nodes_new] visit[faces_new] = True # Rename the faces cell_faces_unique = np.unique(cell_faces) cell_faces_id = np.arange(cell_faces_unique.size, dtype=cell_faces.dtype) cell_faces = np.array([ cell_faces_id[np.where(cell_faces_unique == f)[0]] for f in cell_faces ]).ravel() shape = (cell_faces_unique.size, cells_list.size) cell_faces = sps.csc_matrix((orient, (cell_faces, cells)), shape=shape) # Rename the nodes face_nodes = np.array([ cell_faces_id[np.where(cell_faces_unique == f)[0]] for f in face_nodes ]).ravel() nodes_list = np.unique(nodes) nodes_id = np.arange(nodes_list.size, dtype=nodes.dtype) nodes = np.array([nodes_id[np.where(nodes_list == n)[0]] for n in nodes]).ravel() # sort the nodes nodes = nodes[np.argsort(face_nodes, kind="mergesort")] data = np.ones(nodes.size, dtype=g.face_nodes.data.dtype) indptr = np.r_[0, np.cumsum(np.bincount(face_nodes))] face_nodes = sps.csc_matrix((data, nodes, indptr)) # store again the data in the same grid g.name.append("coarse") g.nodes = g.nodes[:, nodes_list] g.num_nodes = g.nodes.shape[1] g.face_nodes = face_nodes g.num_faces = g.face_nodes.shape[1] g.face_areas = g.face_areas[cell_faces_unique] g.tags = tags.extract(g.tags, cell_faces_unique, tags.standard_face_tags()) g.face_normals = g.face_normals[:, cell_faces_unique] g.face_centers = g.face_centers[:, cell_faces_unique] g.cell_faces = cell_faces g.num_cells = g.cell_faces.shape[1] g.cell_volumes = cell_volumes g.cell_centers = half_space.star_shape_cell_centers(g) is_nan = np.isnan(g.cell_centers[0, :]) g.cell_centers[:, is_nan] = cell_centers[:, is_nan] if face_map: return np.array([cell_faces_unique, cell_faces_id])
def extrude_grid_bucket(gb: pp.GridBucket, z: np.ndarray) -> Tuple[pp.GridBucket, Dict]: """ Extrude a GridBucket by extending all fixed-dimensional grids in the z-direction. In practice, the original grid bucket will be 2d, and the result is 3d. The returned GridBucket is fully functional, including mortar grids on the gb edges. The data dictionaries on nodes and edges are mainly empty. Data can be transferred from the original GridBucket via the returned map between old and new grids. Parameters: gb (pp.GridBukcet): Mixed-dimensional grid to be extruded. Should be 2d. z (np.ndarray): z-coordinates of the nodes in the extruded grid. Should be either non-negative or non-positive, and be sorted in increasing or decreasing order, respectively. Returns: gb (pp.GridBucket): Mixed-dimensional grid, 3d. The data dictionaries on nodes and edges are mostly empty. dict: Mapping from individual grids in the old bucket to the corresponding extruded grids in the new one. The dictionary values are a namedtuple with elements grid (new grid), cell_map and face_map, where the two latter describe mapping between the new and old grid, see extrude_grid for details. """ # New GridBucket. to be filled in gb_new = pp.GridBucket() # Data structure for mapping between old and new grids g_map = {} # Container for grid information Mapping = namedtuple("mapping", ["grid", "cell_map", "face_map"]) # Loop over all grids in the old bucket, extrude the grid, save mapping information for g, _ in gb: g_new, cell_map, face_map = extrude_grid(g, z) if hasattr(g, "frac_num"): g_new.frac_num = g.frac_num gb_new.add_nodes([g_new]) g_map[g] = Mapping(g_new, cell_map, face_map) # Loop over all edges in the old grid, create corresponding edges in the new gb. # Also define mortar_grids for e, d in gb.edges(): # grids of the old edge, extruded version of each grid gl, gh = gb.nodes_of_edge(e) gl_new = g_map[gl].grid gh_new = g_map[gh].grid # Next, we need the cell-face mapping for the new grid. # The idea is to first find the old map, then replace each cell-face relation # with the set of cells and faces (exploiting first that the new grids are # matching due to the extrusion algorithm, and second that the cell-map and # face-map stores indices in increasing layer index, so that the first cell # and first face both are in the first layer, thus they match, etc.). face_cells_old = d["face_cells"] # cells (in low-dim grid) and faces in high-dim grid that define the same # geometric quantity cells, faces, _ = sps.find(face_cells_old) # Cell-map for the low-dimensional grid, face-map for the high-dim cell_map = g_map[gl].cell_map face_map = g_map[gh].face_map # Data structure for the new face-cell map rows = np.empty(0, dtype=np.int) cols = np.empty(0, dtype=np.int) # The standard MortarGrid __init__ assumes that when faces are split because of # a fracture, the faces are ordered with one side first, then the other. This # will not be True for this layered construction. Instead, keep track of all # faces that should be moved to the other side. face_on_other_side = np.empty(0, dtype=np.int) # Loop over cells in gl would not have been as clean, as each cell is associated # with faces on both sides # Faces are found from the high-dim grid, cells in the low-dim grid for idx in range(faces.size): rows = np.hstack((rows, cell_map[cells[idx]])) cols = np.hstack((cols, face_map[faces[idx]])) # Here, we tacitly assume that the original grid had its faces split in the # standard way, that is, all faces on one side have index lower than any # face on the other side. if faces[idx] > np.median(faces): face_on_other_side = np.hstack( (face_on_other_side, face_map[faces[idx]]) ) data = np.ones(rows.size, dtype=np.bool) # Create new face-cell map face_cells_new = sps.coo_matrix( (data, (rows, cols)), shape=(gl_new.num_cells, gh_new.num_faces) ).tocsc() # Define the new edge e = (gh_new, gl_new) # Add to new gb, together with the new face-cell map gb_new.add_edge(e, face_cells_new) # Create a mortar grid, add to data of new edge side_g = { mortar_grid.LEFT_SIDE: gl_new.copy(), mortar_grid.RIGHT_SIDE: gl_new.copy(), } # Construct mortar grid, with instructions on which faces belong to which side mg = pp.MortarGrid( gl_new.dim, side_g, face_cells_new, face_duplicate_ind=face_on_other_side ) d_new = gb_new.edge_props(e) d_new["mortar_grid"] = mg return gb_new, g_map
def create_partition(A, seeds=None, **kwargs): """ Create the partition based on an input matrix using the algebraic multigrid method coarse/fine-splittings based on direct couplings. The standard values for cdepth and epsilon are taken from the following reference. For more information see: U. Trottenberg, C. W. Oosterlee, and A. Schuller. Multigrid. Academic press, 2000. Parameters ---------- A: sparse matrix used for the agglomeration cdepth: the greather is the more intense the aggregation will be, e.g. less cells if it is used combined with generate_coarse_grid epsilon: weight for the off-diagonal entries to define the "strong negatively cupling" seeds: (optional) to define a-priori coarse cells Returns ------- out: agglomeration indices How to use ---------- part = create_partition(tpfa_matrix(g)) g = generate_coarse_grid(g, part) """ cdepth = int(kwargs.get("cdepth", 2)) epsilon = kwargs.get("epsilon", 0.25) if A.size == 0: return np.zeros(1) Nc = A.shape[0] # For each node, which other nodes are strongly connected to it ST = sps.lil_matrix((Nc, Nc), dtype=np.bool) # In the first instance, all cells are strongly connected to each other At = A.T for i in np.arange(Nc): loc = slice(At.indptr[i], At.indptr[i + 1]) ci, vals = At.indices[loc], At.data[loc] neg = vals < 0.0 nvals = vals[neg] nci = ci[neg] minId = np.argmin(nvals) ind = -nvals >= epsilon * np.abs(nvals[minId]) ST[nci[ind], i] = True # Temporary field, will store connections of depth 1 for _ in np.arange(2, cdepth + 1): STold = ST.copy() for j in np.arange(Nc): rowj = np.array(STold.rows[j]) if rowj.size == 0: continue row = np.hstack([STold.rows[r] for r in rowj]) ST[j, np.concatenate((rowj, row))] = True del STold ST.setdiag(False) lmbda = np.array([len(s) for s in ST.rows]) # Define coarse nodes candidate = np.ones(Nc, dtype=np.bool) is_fine = np.zeros(Nc, dtype=np.bool) is_coarse = np.zeros(Nc, dtype=np.bool) # cells that are not important for any other cells are on the fine scale. for row_id, row in enumerate(ST.rows): if not row: is_fine[row_id] = True candidate[row_id] = False ST = ST.tocsr() it = 0 while np.any(candidate): i = np.argmax(lmbda) is_coarse[i] = True j = ST.indices[ST.indptr[i]:ST.indptr[i + 1]] jf = j[candidate[j]] is_fine[jf] = True candidate[np.r_[i, jf]] = False loop = ST.indices[mcolon.mcolon(ST.indptr[jf], ST.indptr[jf + 1])] for row in np.unique(loop): s = ST.indices[ST.indptr[row]:ST.indptr[row + 1]] lmbda[row] = s[candidate[s]].size + 2 * s[is_fine[s]].size lmbda[np.logical_not(candidate)] = -1 it = it + 1 # Something went wrong during aggregation assert it <= Nc del lmbda, ST if seeds is not None: is_coarse[seeds] = True is_fine[seeds] = False # If two neighbors are coarse, eliminate one of them without touching the # seeds c2c = np.abs(A) > 0 c2c_rows, _, _ = sps.find(c2c) pairs = np.empty((0, 2), dtype=np.int) for idx, it in enumerate(np.where(is_coarse)[0]): loc = slice(c2c.indptr[it], c2c.indptr[it + 1]) ind = np.setdiff1d(c2c_rows[loc], it) cind = ind[is_coarse[ind]] new_pair = np.stack((np.repeat(it, cind.size), cind), axis=-1) pairs = np.append(pairs, new_pair, axis=0) # Remove one of the neighbors cells if pairs.size: pairs = setmembership.unique_rows(np.sort(pairs, axis=1))[0] for ij in pairs: A_val = np.array(A[ij, ij]).ravel() ids = ij[np.argsort(A_val)] ids = np.setdiff1d(ids, seeds, assume_unique=True) if ids.size: is_coarse[ids[0]] = False is_fine[ids[0]] = True coarse = np.where(is_coarse)[0] # Primal grid NC = coarse.size primal = sps.lil_matrix((NC, Nc), dtype=np.bool) primal[np.arange(NC), coarse[np.arange(NC)]] = True connection = sps.lil_matrix((Nc, Nc), dtype=np.double) for it in np.arange(Nc): n = np.setdiff1d(c2c_rows[c2c.indptr[it]:c2c.indptr[it + 1]], it) loc = slice(A.indptr[it], A.indptr[it + 1]) A_idx, A_row = A.indices[loc], A.data[loc] mask = A_idx != it connection[it, n] = np.abs(A_row[mask] / A_row[np.logical_not(mask)]) connection = connection.tocsr() candidates_rep = np.ediff1d(connection.indptr) candidates_idx = np.repeat(is_coarse, candidates_rep) candidates = np.stack( ( connection.indices[candidates_idx], np.repeat(np.arange(NC), candidates_rep[is_coarse]), ), axis=-1, ) connection_idx = mcolon.mcolon(connection.indptr[coarse], connection.indptr[coarse + 1]) vals = sps.csr_matrix( accumarray.accum(candidates, connection.data[connection_idx], size=[Nc, NC])) del candidates_rep, candidates_idx, connection_idx it = NC not_found = np.logical_not(is_coarse) # Process the strongest connection globally while np.any(not_found): np.argmax(vals.data) vals.argmax(axis=0) mcind = np.atleast_1d(np.squeeze(np.asarray(vals.argmax(axis=0)))) mcval = -np.inf * np.ones(mcind.size) for c, r in enumerate(mcind): loc = slice(vals.indptr[r], vals.indptr[r + 1]) vals_idx, vals_data = vals.indices[loc], vals.data[loc] mask = vals_idx == c if vals_idx.size == 0 or not np.any(mask): continue mcval[c] = vals_data[mask] mi = np.argmax(mcval) nadd = mcind[mi] primal[mi, nadd] = True it = it + 1 if it > Nc + 5: break not_found[nadd] = False vals.data[vals.indptr[nadd]:vals.indptr[nadd + 1]] = 0 loc = slice(connection.indptr[nadd], connection.indptr[nadd + 1]) nc = connection.indices[loc] af = not_found[nc] nc = nc[af] nv = mcval[mi] * connection[nadd, :] nv = nv.data[af] if len(nc) > 0: vals += sps.csr_matrix((nv, (nc, np.repeat(mi, len(nc)))), shape=(Nc, NC)) coarse, fine = primal.tocsr().nonzero() return coarse[np.argsort(fine)]
data = np.array(data,dtype=int) l1 = np.array(l1,dtype=int) #mtx = csr_matrix((data, (l1, newList)),shape=(len(mv_index_fn),len(l1))).toarray() mtx = csr_matrix((data, (l1, newList))) #tmp = mtx[mtx!=0] print(mtx.shape) #******** *************************** Part 2 ***************************** #time_part_2= time.time() list1=[] [D,E,data_one] = find(mtx) F = np.unique(E,return_counts=True) k=0 user_key_index={} j=0 for k in range(mtx.shape[1]): user_key_index[k]= (D[j: j + F[1][k]]).tolist() j=j+F[1][k] def jacc_distance_new(num1,num2): #print("Time 1:", (time.time()-start_time6)) user1 = user_key_index[num1] #print("Time 2:", (time.time()-start_time6)) user2 = user_key_index[num2] #print("Time 3:", (time.time()-start_time6)) m11 = len(set(user1) & set(user2))
def jacobianstructure(self): # Assume the sturcuture of Jac will not change in each iteration # (approaved by other project), sparsity functions are used to get # the structure of Jac. # random initial guess is used to get the jacobian and its structure, # Theoritically, the sturcuture can be get by one time only. num_test = 3 row = np.array([]) col = np.array([]) k = 0 j = 0 for p in range(self.num_nodes-1): Jac_x = np.zeros((self.num_cons, 2*self.num_states)) Jac_con_close = np.zeros((self.num_cons, self.num_con_close)) Jac_con_Lopen = np.zeros((self.num_cons, 2)) Jac_con_Ropen = np.zeros((self.num_cons, 2)) Jac_con_ankle = np.zeros((self.num_cons, 2)) iniL = self.initial_L[p+1, :] iniR = self.initial_R[p+1, :] for q in range(num_test): np.random.seed() x_p = 0.5 - np.random.random(self.num_states) np.random.seed() x_a = 0.5 - np.random.random(self.num_states) np.random.seed() vs_a = 0.1*np.random.random(2) np.random.seed() con_close = 1 -2*np.random.random(self.num_con_close) np.random.seed() u_ankle = 100*np.random.random(2) np.random.seed() u_stanceL = 100*np.random.random(2) np.random.seed() u_stanceR = 100*np.random.random(2) f, dfdx, dfdxdot, df_dConClose, df_dConLOpen, df_dConROpen, df_dConAnkle =\ self.gait2dpi_u(x_a, (x_a - x_p)/self.interval, vs_a, con_close, u_stanceL, u_stanceR, u_ankle, iniL, iniR) Jac_x[:, :self.num_states] -= dfdxdot/self.interval Jac_x[:, self.num_states:2*self.num_states] +=\ (dfdx + dfdxdot/self.interval) Jac_con_close += df_dConClose Jac_con_Lopen += df_dConLOpen Jac_con_Ropen += df_dConROpen Jac_con_ankle += df_dConAnkle for r in range(self.num_cons): row_x, col_x, RA_Jac_x = find(Jac_x[r, :]) row_c, col_c, RA_Jac_c = find(Jac_con_close[r, :]) row_lo, col_lo, RA_Jac_lo = find(Jac_con_Lopen[r, :]) row_ro, col_ro, RA_Jac_ro = find(Jac_con_Ropen[r, :]) row_a, col_a, RA_Jac_a = find(Jac_con_ankle[r, :]) row_xf = row_x + p*self.num_cons + r row_cf = row_c + p*self.num_cons + r row_lof = row_lo + p*self.num_cons + r row_rof = row_ro + p*self.num_cons + r row_af = row_a + p*self.num_cons + r col_xf = col_x + p*self.num_states col_cf = col_c + self.num_nodes*self.num_states col_lof = col_lo + self.num_nodes*self.num_states + self.num_con_close + k*2 col_rof = col_ro + self.num_nodes*self.num_states + self.num_con_close + self.num_stanceL*2 + j*2 col_af = col_a + self.num_nodes*self.num_states + self.num_con_close + self.num_stanceL*2 + self.num_stanceR*2 + p*2 row = np.hstack((row, row_xf, row_cf, row_lof, row_rof, row_af)) col = np.hstack((col, col_xf, col_cf, col_lof, col_rof, col_af)) if iniL[0]: k += 1 if iniR[0]: j += 1 return (row, col)
def refine_grid_1d(g, ratio=2): """ Refine cells in a 1d grid. Parameters: g (grid): A 1d grid, to be refined. ratio (int): Returns: grid: New grid, with finer cells. """ # Implementation note: The main part of the function is the construction of # the new cell-face relation. Since the grid is 1d, nodes and faces are # equivalent, and notation used mostly refers to nodes instead of faces. # Cell-node relation cell_nodes = g.cell_nodes() nodes, cells, _ = sps.find(cell_nodes) # Every cell will contribute (ratio - 1) new nodes num_new_nodes = (ratio - 1) * g.num_cells + g.num_nodes x = np.zeros((3, num_new_nodes)) # Cooridates for splitting of cells theta = np.arange(1, ratio) / float(ratio) pos = 0 shift = 0 # Array that indicates whether an item in the cell-node relation represents # a node not listed before (e.g. whether this is the first or second # occurence of the cell) if_add = np.r_[1, np.ediff1d(cell_nodes.indices)].astype(np.bool) indices = np.empty(0, dtype=np.int) # Template array of node indices for refined cells ind = np.vstack((np.arange(ratio), np.arange(ratio) + 1)).flatten("F") nd = np.r_[np.diff(cell_nodes.indices)[1::2], 0] # Loop over all old cells and refine them. for c in np.arange(g.num_cells): # Find start and end nodes of the old cell loc = slice(cell_nodes.indptr[c], cell_nodes.indptr[c + 1]) start, end = cell_nodes.indices[loc] # Flags for whether this is the first occurences of the the nodes of # the old cell. If so, they should be added to the new node array if_add_loc = if_add[loc] # Local cell-node (thus cell-face) relations of the new grid indices = np.r_[indices, shift + ind] # Add coordinate of the startpoint to the node array if relevant if if_add_loc[0]: x[:, pos:(pos + 1)] = g.nodes[:, start, np.newaxis] pos += 1 # Add coordinates of the internal nodes x[:, pos:( pos + ratio - 1)] = g.nodes[:, start, np.newaxis] * theta + g.nodes[:, end, np.newaxis] * ( 1 - theta) pos += ratio - 1 shift += ratio + (2 - np.sum(if_add_loc) * (1 - nd[c])) - nd[c] # Add coordinate to the endpoint, if relevant if if_add_loc[1]: x[:, pos:(pos + 1)] = g.nodes[:, end, np.newaxis] pos += 1 # For 1d grids, there is a 1-1 relation between faces and nodes face_nodes = sps.identity(x.shape[1], format="csc") cell_faces = sps.csc_matrix(( np.ones(indices.size, dtype=np.bool), indices, np.arange(0, indices.size + 1, 2), )) g = Grid(1, x, face_nodes, cell_faces, "Refined 1d grid") g.compute_geometry() return g
writer.add_summary(summary_, iteration) # save the model saver.save(sess, os.path.join(root_savedir, "model.ckpt")) # close the file writer writer.close() if __name__ == '__main__': N = 200 X = np.random.rand(N, N) < 0.4 from scipy.sparse import find rows, cols, _ = find(X) root_savedir = "/Users/Koa/github-repos/bayes-nnet-mf/saved/vi_binary" root_logdir = os.path.join(root_savedir, "tf_logs") if os.path.exists(root_savedir): shutil.rmtree(root_savedir) model = VIBinaryNNetMF() model.train(N, rows, cols, miss_rows=None, miss_cols=None, n_factors=4, hidden_layer_sizes=[10, 8],
def i2cws(self, repeat=1): """The Improved Improved Consistent Weighted Sampling (I$^2$CWS) algorithm, samples the two special "active indices", $y_k$ and $z_k$, independently by avoiding the equation of $y_k$ and $z_k$ in ICWS. W. Wu, B. Li, L. Chen, C. Zhang and P. S. Yu, "Improved Consistent Weighted Sampling Revisited", DOI: 10.1109/TKDE.2018.2876250, 2018. Parameters ---------- repeat: int, default: 1 the number of repeating the algorithm as the part of the seed of the random number generator Returns ----------- fingerprints_k: ndarray, shape (n_instances, dimension_num) one component of hash codes $(k, y_k)$ for data matrix, where row represents a data instance fingerprints_y: ndarray, shape (n_instances, dimension_num) one component of hash codes $(k, y_k)$ for data matrix, where row represents a data instance elapsed: float time of hashing data matrix """ fingerprints_k = np.zeros((self.instance_num, self.dimension_num)) fingerprints_y = np.zeros((self.instance_num, self.dimension_num)) np.random.seed(self.seed * np.power(2, repeat - 1)) start = time.time() beta1 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num)) beta2 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num)) u1 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num)) u2 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num)) u3 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num)) u4 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num)) v1 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num)) v2 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num)) for j_sample in range(0, self.instance_num): feature_id = sparse.find(self.weighted_set[:, j_sample] > 0)[0] r2 = -np.log(np.multiply(u3[feature_id, :], u4[feature_id, :])) t_matrix = np.floor( np.divide( np.matlib.repmat( np.log(self.weighted_set[feature_id, j_sample].todense( )), 1, self.dimension_num), r2) + beta2[feature_id, :]) z_matrix = np.exp( np.multiply(r2, (t_matrix - beta2[feature_id, :] + 1))) a_matrix = np.divide( -np.log(np.multiply(v1[feature_id, :], v2[feature_id, :])), z_matrix) min_position = np.argmin(a_matrix, axis=0) fingerprints_k[j_sample, :] = feature_id[min_position] r1 = -np.log( np.multiply(u1[feature_id[min_position], :], u2[feature_id[min_position], :])) gamma1 = np.array([-np.log(np.diag(r1[0]))]) b = np.array([np.diag(beta1[feature_id[min_position], :][0])]) t_matrix = np.floor( np.divide( np.log( np.transpose(self.weighted_set[ feature_id[min_position], j_sample].todense())), gamma1) + b) fingerprints_y[j_sample, :] = np.exp( np.multiply(gamma1, (t_matrix - b))) elapsed = time.time() - start return fingerprints_k, fingerprints_y, elapsed
def subgraph_extraction_labeling(ind, A, h=1, sample_ratio=1.0, max_nodes_per_hop=None, u_features=None, v_features=None, class_values=None): # extract the h-hop enclosing subgraph around link 'ind' dist = 0 u_nodes, v_nodes = [ind[0]], [ind[1]] u_dist, v_dist = [0], [0] u_visited, v_visited = set([ind[0]]), set([ind[1]]) u_fringe, v_fringe = set([ind[0]]), set([ind[1]]) for dist in range(1, h + 1): v_fringe, u_fringe = neighbors(u_fringe, A, True), neighbors(v_fringe, A, False) u_fringe = u_fringe - u_visited v_fringe = v_fringe - v_visited u_visited = u_visited.union(u_fringe) v_visited = v_visited.union(v_fringe) if sample_ratio < 1.0: u_fringe = random.sample(u_fringe, int(sample_ratio * len(u_fringe))) v_fringe = random.sample(v_fringe, int(sample_ratio * len(v_fringe))) if max_nodes_per_hop is not None: if max_nodes_per_hop < len(u_fringe): u_fringe = random.sample(u_fringe, max_nodes_per_hop) if max_nodes_per_hop < len(v_fringe): v_fringe = random.sample(v_fringe, max_nodes_per_hop) if len(u_fringe) == 0 and len(v_fringe) == 0: break u_nodes = u_nodes + list(u_fringe) v_nodes = v_nodes + list(v_fringe) u_dist = u_dist + [dist] * len(u_fringe) v_dist = v_dist + [dist] * len(v_fringe) subgraph = A[u_nodes, :][:, v_nodes] # remove link between target nodes subgraph[0, 0] = 0 # construct nx graph g = nx.Graph() g.add_nodes_from(range(len(u_nodes)), bipartite='u') g.add_nodes_from(range(len(u_nodes), len(u_nodes) + len(v_nodes)), bipartite='v') u, v, r = ssp.find(subgraph) # r is 1, 2... (rating labels + 1) r = r.astype(int) v += len(u_nodes) #g.add_weighted_edges_from(zip(u, v, r)) g.add_edges_from(zip(u, v)) edge_types = dict(zip(zip(u, v), r - 1)) # transform r back to rating label nx.set_edge_attributes(g, name='type', values=edge_types) # get structural node labels node_labels = [x * 2 for x in u_dist] + [x * 2 + 1 for x in v_dist] # get node features if u_features is not None: u_features = u_features[u_nodes] if v_features is not None: v_features = v_features[v_nodes] node_features = None if False: # directly use padded node features if u_features is not None and v_features is not None: u_extended = np.concatenate([ u_features, np.zeros([u_features.shape[0], v_features.shape[1]]) ], 1) v_extended = np.concatenate([ np.zeros([v_features.shape[0], u_features.shape[1]]), v_features ], 1) node_features = np.concatenate([u_extended, v_extended], 0) if False: # use identity features (one-hot encodings of node idxes) u_ids = one_hot(u_nodes, A.shape[0] + A.shape[1]) v_ids = one_hot([x + A.shape[0] for x in v_nodes], A.shape[0] + A.shape[1]) node_ids = np.concatenate([u_ids, v_ids], 0) #node_features = np.concatenate([node_features, node_ids], 1) node_features = node_ids if True: # only output node features for the target user and item if u_features is not None and v_features is not None: node_features = [u_features[0], v_features[0]] return g, node_labels, node_features
def mutual_info_score(labels_true, labels_pred, *, contingency=None): """Mutual Information between two clusterings. The Mutual Information is a measure of the similarity between two labels of the same data. Where :math:`|U_i|` is the number of the samples in cluster :math:`U_i` and :math:`|V_j|` is the number of the samples in cluster :math:`V_j`, the Mutual Information between clusterings :math:`U` and :math:`V` is given as: .. math:: MI(U,V)=\\sum_{i=1}^{|U|} \\sum_{j=1}^{|V|} \\frac{|U_i\\cap V_j|}{N} \\log\\frac{N|U_i \\cap V_j|}{|U_i||V_j|} This metric is independent of the absolute values of the labels: a permutation of the class or cluster label values won't change the score value in any way. This metric is furthermore symmetric: switching ``label_true`` with ``label_pred`` will return the same score value. This can be useful to measure the agreement of two independent label assignments strategies on the same dataset when the real ground truth is not known. Read more in the :ref:`User Guide <mutual_info_score>`. Parameters ---------- labels_true : int array, shape = [n_samples] A clustering of the data into disjoint subsets. labels_pred : int array-like of shape (n_samples,) A clustering of the data into disjoint subsets. contingency : {None, array, sparse matrix}, \ shape = [n_classes_true, n_classes_pred] A contingency matrix given by the :func:`contingency_matrix` function. If value is ``None``, it will be computed, otherwise the given value is used, with ``labels_true`` and ``labels_pred`` ignored. Returns ------- mi : float Mutual information, a non-negative value Notes ----- The logarithm used is the natural logarithm (base-e). See also -------- adjusted_mutual_info_score: Adjusted against chance Mutual Information normalized_mutual_info_score: Normalized Mutual Information """ if contingency is None: labels_true, labels_pred = check_clusterings(labels_true, labels_pred) contingency = contingency_matrix(labels_true, labels_pred, sparse=True) else: contingency = check_array(contingency, accept_sparse=['csr', 'csc', 'coo'], dtype=[int, np.int32, np.int64]) if isinstance(contingency, np.ndarray): # For an array nzx, nzy = np.nonzero(contingency) nz_val = contingency[nzx, nzy] elif sp.issparse(contingency): # For a sparse matrix nzx, nzy, nz_val = sp.find(contingency) else: raise ValueError("Unsupported type for 'contingency': %s" % type(contingency)) contingency_sum = contingency.sum() pi = np.ravel(contingency.sum(axis=1)) pj = np.ravel(contingency.sum(axis=0)) log_contingency_nm = np.log(nz_val) contingency_nm = nz_val / contingency_sum # Don't need to calculate the full outer product, just for non-zeroes outer = (pi.take(nzx).astype(np.int64, copy=False) * pj.take(nzy).astype(np.int64, copy=False)) log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum()) mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) + contingency_nm * log_outer) return np.clip(mi.sum(), 0.0, None)
def _sparse_num_jac(fun, t, y, f, h, factor, y_scale, structure, groups): n = y.shape[0] n_groups = np.max(groups) + 1 h_vecs = np.empty((n_groups, n)) for group in range(n_groups): e = np.equal(group, groups) h_vecs[group] = h * e h_vecs = h_vecs.T f_new = fun(t, y[:, None] + h_vecs) df = f_new - f[:, None] i, j, _ = find(structure) diff = coo_matrix((df[i, groups[j]], (i, j)), shape=(n, n)).tocsc() max_ind = np.array(abs(diff).argmax(axis=0)).ravel() r = np.arange(n) max_diff = np.asarray(np.abs(diff[max_ind, r])).ravel() scale = np.maximum(np.abs(f[max_ind]), np.abs(f_new[max_ind, groups[r]])) diff_too_small = max_diff < NUM_JAC_DIFF_REJECT * scale if np.any(diff_too_small): ind, = np.nonzero(diff_too_small) new_factor = NUM_JAC_FACTOR_INCREASE * factor[ind] h_new = (y[ind] + new_factor * y_scale[ind]) - y[ind] h_new_all = np.zeros(n) h_new_all[ind] = h_new groups_unique = np.unique(groups[ind]) groups_map = np.empty(n_groups, dtype=int) h_vecs = np.empty((groups_unique.shape[0], n)) for k, group in enumerate(groups_unique): e = np.equal(group, groups) h_vecs[k] = h_new_all * e groups_map[group] = k h_vecs = h_vecs.T f_new = fun(t, y[:, None] + h_vecs) df = f_new - f[:, None] i, j, _ = find(structure[:, ind]) diff_new = coo_matrix((df[i, groups_map[groups[ind[j]]]], (i, j)), shape=(n, ind.shape[0])).tocsc() max_ind_new = np.array(abs(diff_new).argmax(axis=0)).ravel() r = np.arange(ind.shape[0]) max_diff_new = np.asarray(np.abs(diff_new[max_ind_new, r])).ravel() scale_new = np.maximum( np.abs(f[max_ind_new]), np.abs(f_new[max_ind_new, groups_map[groups[ind]]])) update = max_diff[ind] * scale_new < max_diff_new * scale[ind] if np.any(update): update, = np.nonzero(update) update_ind = ind[update] factor[update_ind] = new_factor[update] h[update_ind] = h_new[update] diff[:, update_ind] = diff_new[:, update] scale[update_ind] = scale_new[update] max_diff[update_ind] = max_diff_new[update] diff.data /= np.repeat(h, np.diff(diff.indptr)) factor[max_diff < NUM_JAC_DIFF_SMALL * scale] *= NUM_JAC_FACTOR_INCREASE factor[max_diff > NUM_JAC_DIFF_BIG * scale] *= NUM_JAC_FACTOR_DECREASE factor = np.maximum(factor, NUM_JAC_MIN_FACTOR) return diff, factor
def construct_line_graph_directed(node_ids, A, node_features): u, v, r = ssp.find(A) print(f'max_weight: {max(r)}') #print(f'num_edges_khop: {len(u)}') #print(f'num_nodes_khop: {node_ids.size()}') node_ids = node_ids.tolist() node_features = node_features.tolist() G = nx.DiGraph() #G.add_nodes_from(node_ids) rows, cols = A.nonzero() A_edges_forward = list(zip(u, v)) A_edges_reverse = list(zip(v, u)) info = {} node_class = {} for edge in A_edges_forward: src, end = edge[0], edge[1] weight = A[src, end] edge_label = [0] * 52 + [node_features[src] != node_features[end]] #print(weight) edge_label[weight] = 1 f1, f2 = node_features[src], node_features[end] info[(src, end)] = edge_label node_class[(src, end)] = [f1, f2] for edge in A_edges_reverse: src, end = edge[0], edge[1] weight = A[end, src] edge_label = [0] * 52 + [node_features[src] != node_features[end]] #print(weight) edge_label[weight] = 1 f1, f2 = node_features[src], node_features[end] info[(src, end)] = edge_label node_class[(src, end)] = [f1, f2] G.add_edges_from(A_edges_forward) G.add_edges_from(A_edges_reverse) L = nx.line_graph(G) num_nodes = L.number_of_nodes() L_node_ids = list(L.nodes) L_edges = list(L.edges) L_node_features = [] index = {} node_ids, f = [], [] value = 0 for node in L_node_ids: node_ids.append(value) L_node_features.append(info[node]) f.append(node_class[node]) index[node] = value value += 1 edge_list = [] for edge in L_edges: v1, v2 = edge[0], edge[1] n1, n2 = index[v1], index[v2] edge_list.append([n1, n2]) return torch.LongTensor(L_node_features), torch.LongTensor( edge_list), num_nodes, torch.LongTensor(node_ids), torch.LongTensor(f)
def cfl(self, g_h, g_l, data_h, data_l, data_edge, d_name="mortar_solution"): """ Return the time step according to the CFL condition. Note: the vector field is assumed to be given as the normal velocity, weighted with the face area, at each face. The name of data in the input dictionary (data) are: discharge : array (g.num_faces) Normal velocity at each face, weighted by the face area. Parameters: g_h: grid of higher dimension g_l: grid of lower dimension data_h: dictionary which stores the data for the higher dimensional grid data_l: dictionary which stores the data for the lower dimensional grid data: dictionary which stores the data for the edges of the grid bucket Return: deltaT: time step according to CFL condition. Note: the design of this function has not been updated according to the mortar structure. Instead, mg.high_to_mortar_int.nonzero()[1] is used to map the 'mortar_solution' (one flux for each mortar dof) to the old discharge (one flux for each g_h face). """ # Retrieve the discharge, which is mandatory aperture_h = data_h["param"].get_aperture() aperture_l = data_l["param"].get_aperture() phi_l = data_l["param"].get_porosity() mg = data_edge["mortar_grid"] discharge = np.zeros(g_h.num_faces) discharge[mg.high_to_mortar_int.nonzero()[1]] = data_edge[d_name] if g_h.dim == g_l.dim: # More or less same as below, except we have cell_cells in the place # of face_cells (see grid_bucket.duplicate_without_dimension). phi_h = data_h["param"].get_porosity() cells_l, cells_h = data_edge["face_cells"].nonzero() not_zero = ~np.isclose( np.zeros(discharge.shape), discharge, atol=0) if not np.any(not_zero): return np.Inf diff = g_h.cell_centers[:, cells_h] - g_l.cell_centers[:, cells_l] dist = np.linalg.norm(diff, 2, axis=0) # Use minimum of cell values for convenience phi_l = phi_l[cells_l] phi_h = phi_h[cells_h] apt_h = aperture_h[cells_h] apt_l = aperture_l[cells_l] coeff = np.minimum(phi_h, phi_l) * np.minimum(apt_h, apt_l) return np.amin(np.abs(np.divide(dist, discharge)) * coeff) # Recover the information for the grid-grid mapping cells_l, faces_h, _ = sps.find(data_edge["face_cells"]) # Detect and remove the faces which have zero in "discharge" not_zero = ~np.isclose( np.zeros(faces_h.size), discharge[faces_h], atol=0) if not np.any(not_zero): return np.inf cells_l = cells_l[not_zero] faces_h = faces_h[not_zero] # Mapping from faces_h to cell_h cell_faces_h = g_h.cell_faces.tocsr()[faces_h, :] cells_h = cell_faces_h.nonzero()[1][not_zero] # Retrieve and map additional data aperture_h = aperture_h[cells_h] aperture_l = aperture_l[cells_l] phi_l = phi_l[cells_l] # Compute discrete distance cell to face centers for the lower # dimensional grid dist = 0.5 * np.divide(aperture_l, aperture_h) # Since discharge is multiplied by the aperture wighted face areas, we # divide through that quantity to get velocities in [length/time] velocity = np.divide(discharge[faces_h], g_h.face_areas[faces_h] * aperture_h) # deltaT is deltaX/velocity with coefficient return np.amin(np.abs(np.divide(dist, velocity)) * phi_l)