Beispiel #1
0
    def evalModel(self, inData, inGt, inImg, gtShape, plot=True):
        (dataOutY, dataOutX, dataVals) = sp.find(inData)
        if(inGt != None):
            if(self.gtSparse):
                (gtOutY, gtOutX, gtVals) = sp.find(inGt)
                feedDict = {self.dataIndices:[dataOutY, dataOutX], self.dataValues:dataVals,
                        self.gtIndices:[gtOutY, gtOutX], self.gtValues:gtVals,
                        self.keep_prob:1.0
                        }
            else:
                feedDict = {self.dataIndices:[dataOutY, dataOutX], self.dataValues:dataVals,
                        self.gt:inGt,
                       self.keep_prob:1.0
                       }
        else:
            feedDict = {self.dataIndices:[dataOutY, dataOutX], self.dataValues:dataVals,
                    self.keep_prob:1.0
                    }

        outVals = self.est.eval(feed_dict=feedDict, session=self.sess)
        if(inGt != None):
            summary = self.sess.run(self.mergedSummary, feed_dict=feedDict)
            self.test_writer.add_summary(summary, self.timestep)
        if(plot):
            filename = self.plotDir + "test_" + str(self.timestep)
            if(self.gtSparse):
                gt = np.reshape(inGt.toarray(), (self.batchSize, gtShape[0], gtShape[1], gtShape[2], gtShape[3]))
            else:
                gt = inGt
            data = (inData, inGt, inImg)
            self.evalAndPlotCam(feedDict, data, gt, filename)

        return outVals
Beispiel #2
0
    def trainModel(self, dataObj, save, plot):
        for i in range(self.innerSteps):
            #Get data from dataObj
            data = dataObj.getData(self.batchSize)
            (dataOutY, dataOutX, dataVals) = sp.find(data[0])

            if(self.gtSparse):
                (gtOutY, gtOutX, gtVals) = sp.find(data[1])
                feedDict = {self.dataIndices:[dataOutY, dataOutX], self.dataValues:dataVals,
                            self.gtIndices:[gtOutY, gtOutX], self.gtValues:gtVals}
            else:
                feedDict = {self.dataIndices:[dataOutY, dataOutX], self.dataValues:dataVals,
                        self.gt:data[1]}

            #feedDict = {self.inputImage: data[0], self.gt: data[1]}
            #Run optimizer
            self.sess.run(self.optimizerAll, feed_dict=feedDict)
            self.sess.run(self.optimizerBias, feed_dict=feedDict)
            if(i%self.writeStep == 0):
                summary = self.sess.run(self.mergedSummary, feed_dict=feedDict)
                self.train_writer.add_summary(summary, self.timestep)
            if(i%self.progress == 0):
                print "Timestep ", self.timestep
            self.timestep+=1
        if(save):
            save_path = self.saver.save(self.sess, self.saveFile, global_step=self.timestep, write_meta_graph=False)
            print("Model saved in file: %s" % save_path)
        if(plot):
            filename = self.plotDir + "train_" + str(self.timestep)
            gtShape = dataObj.gtShape
            if(self.gtSparse):
                gt = np.reshape(data[1].toarray(), (self.batchSize, gtShape[0], gtShape[1], gtShape[2], gtShape[3]))
            else:
                gt = data[1]
            self.evalAndPlotCam(feedDict, data, gt, filename)
Beispiel #3
0
 def lookup_arg_freqs(self, word):
     i = self.vocabulary.get(word)
     if i is None:
         return None
     sum1 = sum(find(self.binary_sparse[::2, i])[2])
     sum2 = sum(find(self.binary_sparse[1::2, i])[2])
     return sum1, sum2
Beispiel #4
0
 def lookup_0_freqs(self, word):
     i = self.vocabulary.get(word)
     if i is None:
         return None
     out_sum = sum(find(self.zero_sparse[i, :])[2])
     in_sum = sum(find(self.zero_sparse[:, i])[2])
     return out_sum, in_sum
	def learning(self,npres):
		learned_connectivity=sparse.lil_matrix(self.connectivity)
		for i in range(npres):
			
			#Potentiation
			mydim=len(sparse.find(self.stimulus[i])[1])
			row=np.tile(sparse.find(self.stimulus[i])[1],mydim)+1
			col=np.repeat(sparse.find(self.stimulus[i])[1],mydim)+1
			pot_entries=np.random.binomial(1,self.qp,mydim*mydim)
			row_pot=list(sparse.find(pot_entries*row)[2]-1)
			col_pot=list(sparse.find(pot_entries*col)[2]-1)
			learned_connectivity[row_pot,col_pot]=1
			
			#pot_matrix=sparse.coo_matrix((pot_entries, (row, col)), shape=(self.N, self.N))
			
			
			#Depression
			mydim_col=len(sparse.find(-(self.stimulus[i]-np.ones(self.N)))[1])
			mydim_row=mydim
			col=np.repeat(sparse.find(-(self.stimulus[i]-np.ones(self.N)))[1],mydim_row)+1
			row=np.tile(sparse.find(self.stimulus[i])[1],mydim_col)+1
			dep_entries=np.random.binomial(1,self.qn,mydim_row*mydim_col) #entries that wont be depressed
			row_dep=list(sparse.find(dep_entries*row)[2]-1)
			col_dep=list(sparse.find(dep_entries*col)[2]-1)
			learned_connectivity[row_dep,col_dep]=0
			
			
			
			print 'learning pattern ',i+1,' of ',self.p
		
		return learned_connectivity
Beispiel #6
0
 def lookup_bin_freqs(self, word):
     i = self.binary_vocab.get(word)
     if i is None:
         return None
     sum1 = sum(find(self.binary_sparse[2*i, :])[2])
     sum2 = sum(find(self.binary_sparse[2*i+1, :])[2])
     return sum1, sum2
Beispiel #7
0
def cosine_similarity(v1,v2):
    """fast cosine similarity for sparse vectors"""

    v1_idxs, _, value = sparse.find(v1)
    v2_idxs, _, value = sparse.find(v2)

    sumxx, sumxy, sumyy = 0, 0, 0
    for i in set(np.append(v1_idxs, v2_idxs)):
        x = v1[(i,0)]; y = v2[(i,0)]
        sumxx += x*x
        sumyy += y*y
        sumxy += x*y
    return sumxy/math.sqrt(sumxx*sumyy)
Beispiel #8
0
def array_equal(a1, a2):
    """array_equal that supports sparse and dense arrays with missing values"""
    if a1.shape != a2.shape:
        return False

    if not (sp.issparse(a1) or sp.issparse(a2)):  # Both dense: just compare
        return np.allclose(a1, a2, equal_nan=True)

    v1 = np.vstack(sp.find(a1)).T
    v2 = np.vstack(sp.find(a2)).T
    if not (sp.issparse(a1) and sp.issparse(a2)):  # Any dense: order indices
        v1.sort(axis=0)
        v2.sort(axis=0)
    return np.allclose(v1, v2, equal_nan=True)
	def connectivityMatrixNew(self):

		self.patterns =np.random.normal(0,1, size=(self.p,self.N))
		mybin=np.random.binomial(1,0.5,size=(self.p,self.N))
		#self.patterns =np.multiply(mybin,np.random.normal(-1,1, size=(self.p,self.N)))+np.multiply(1-mybin,np.random.normal(1,1,size=(self.p,self.N)))
		#mu1=0.0
		#sigma1=1.0
		#self.patterns =np.random.lognormal(mu1,sigma1, size=(self.p,self.N))-np.exp(mu1+(sigma1**2)/2.)
		print 'Patterns created. N patterns:',self.p
		patterns_pre=self.patterns
		patterns_post=self.patterns
		
		#creating  connectivity with sparse matrices
		rv=bernoulli(1).rvs
		#connectivity=sparse.csr_matrix(sparse.random(self.N,self.N,density=self.c,data_rvs=rv))
		indexes=sparse.find(sparse.random(self.N,self.N,density=self.c,data_rvs=rv))
		print 'Connectivity created. N patterns:',self.p
		
		#finding the non zero entries
		#index_row=sparse.find(connectivity)[0]
		#index_col=sparse.find(connectivity)[1]
		
		# smart way to write down the outer product learning
		connectivity=(self.Amp/(self.c*self.N))*np.einsum('ij,ij->j',patterns_post[:,indexes[0]],patterns_pre[:,indexes[1]])
		connectivity=sparse.csr_matrix((connectivity,(indexes[0],indexes[1])),shape=(self.N,self.N))
		'Connectivity loaded with patterns. N patterns:',self.p
		self.connectivity=connectivity
Beispiel #10
0
 def find_user_top_match(self, user_index, nbr_recommendations = 5, k = 20, k_min = 10, sim = 0.15, rating_normalisation = True):
     '''
     Compute all the item's rating for a given user and output the most relevant
     '''
     user_ratings = np.zeros(self.nbr_items)
     already_rated = find(self.relationship_matrix_csc[:,user_index])[0]
     already_rated = np.r_[already_rated,user_index]
     
     for i, rating in enumerate(user_ratings):
         if i not in already_rated:
             try:
                 rating = self.predict_rating_userbased(user_index, i, k, k_min, 'All', sim, rating_normalisation)
             except Error:
                 rating = 0.0
         else:
             # The rating is not actually zero, we put zero for excluding them from the result
             rating = 0.0
     
         user_ratings[i] = rating
     
     top_results = {}
     nonzero_index = user_ratings.nonzero()[0]
     
     for item in nonzero_index:
         top_results[item] = user_ratings[item]
     
     sorted_top_results = sorted(top_results.iteritems(), key=itemgetter(1), reverse = True)
     
     return [int(i[0]) for i in sorted_top_results], [i[1] for i in sorted_top_results]
Beispiel #11
0
def read_options(*args, **kwargs):
    ''' checks/sets options read from calling module'''
    if len(args)>1:
        Z = args[0]
    else:
        Z = kwargs["Z"]
    W = kwargs.get("W", None)
    if W is None:
        rows, cols = sp.find(Z)[:2]
        W = sp.coo_matrix( (np.ones(len(rows), dtype=int), (rows, cols)), shape=Z.shape, dtype=int).tocsr()
    rowAttr = kwargs.get("rowAttr", None)
    colAttr = kwargs.get("colAttr", None)
    crossAttr = kwargs.get("crossAttr", None)
    
    learner = kwargs.get("learner", "ridge")
    train_loss = kwargs.get("train_loss", "sq_err")
    test_loss = kwargs.get("test_loss", "mse")
    num_cv = kwargs.get("num_cv", default_cv)
    init_K = kwargs.get("K", default_K) 
    init_L = kwargs.get("L", default_L) 
    model_filename = kwargs.get("model_filename", default_model_filename)
    
    if "alphas" in kwargs:
        params = [{"alpha":alpha} for alpha in kwargs["alphas"]]
    else:
        params = kwargs.get("params", default_param_list) 
    
    # kwargs get over-written by args
    arglist = [Z, W, rowAttr, colAttr, crossAttr, learner, params, \
               train_loss, test_loss, \
               num_cv, init_K, init_L, model_filename]
    for ix in range(1, len(args)):
        arglist[ix] = args[ix]
    return arglist
Beispiel #12
0
def draw_log_hist(X):
    """Draw tokens histogram in log scales"""
    i, j, v = spr.find(X)
    tokens_count = np.zeros(X.shape[0])
    indexes = np.arange(0,X.shape[0])
    for colNumb in range(0,X.shape[1]):
        users_count = len(np.extract( j == colNumb, j))
        print colNumb, users_count
        if (users_count):
            tokens_count[users_count] += 1
    nonzero_ind = np.nonzero(tokens_count)
    x = np.take(indexes,nonzero_ind)
    y = np.take(tokens_count,nonzero_ind)
    print x
    print y
    pl.figure(figsize=(30,30))
    pl.title("Token frequency distribution")
    pl.xlabel('users_count', size = 12)
    pl.ylabel('tokens_count', size = 12)
    ax = pl.subplot(111)
    ax.set_xscale('log')
    ax.set_yscale('log')
    pl.scatter(x, y)
    pl.show()
    return
Beispiel #13
0
def extract_manifold_distances_knn(D, knn=[3,4,5,7,10], add_mst=None):
    '''
    Return the distances along a k nearest neighbour graph for the given
    distances D (Using dijkstra). It also returns the knn graph itself.
    This is a generator function and will return an iterator for each k given in knn.

    Optionally you can add the edges from an additional graph (usually mst), in
    order to ensure full connectedness. Give the graph you want to add as add_mst=mst.

    returns iterator for each k: iter([distances along knn, knn])
    '''
    # K Nearest Neighbours distances
    idxs = np.argsort(D)
    r = range(D.shape[0])
    for k in knn:
        idx = idxs[:, :k]
        _distances = sparse.csc_matrix(D.shape)
        for neighbours in idx.T:
            _distances[r, neighbours] = D[r, neighbours]
        if add_mst is not None:
            for i,j,v in zip(*sparse.find(add_mst)):
                if _distances[i,j] == 0:
                    _distances[i,j] = v
        nearest_neighbour_distances = dijkstra(_distances, directed=False)
        yield nearest_neighbour_distances, _distances
Beispiel #14
0
def index_map_to_region_map( hom_mat, reg2gen ):
    """
    hom_mat : numpy matrix representing a map on generators (index map).

    reg2gen : dictionary mapping region -> generator(s). (I.e, which
    regions support which generators)

    Returns a DiGraph object of the map on regions in phase space.
    """
    H = hom_mat
    R = reg2gen
    Rinv = invert_dictionary( R )
    G = DiGraph()
    
    # find where region k maps to based the index map
    for k in R.keys():
        # find generator connections
        if hasattr( H, 'nnz' ):
            if len( R[k] ) == 0:
                continue
            gen_conns, _J, _V = sparse.find( H[:,R[k]] )
        else:
            # dense matrix case
            gen_conns = np.where( H[:,R[k]] != 0 )[0]
            gen_conns = gen_conns.tolist()[0] # fix matrix formatting
        for edge in gen_conns:
            for glist in Rinv.keys():
                if edge in glist:
                    G.add_edge( k, Rinv[glist][0] )

    # return the graph so that we have access to the nodes labels that
    # correspond directly to regions with generators.
    return G
Beispiel #15
0
    def build_weight_matrix(self, tf_idf_matrix):
        nnz_i, nnz_j, elems = sparse.find(tf_idf_matrix)
        value = np.zeros(elems.shape[0])
        value.fill(self.options['WM'])

        r = sparse.coo_matrix((value, (nnz_i, nnz_j)), shape=tf_idf_matrix.shape)
        return r.tocsr()
Beispiel #16
0
def _individuals_erank(score_mat, test_data):
    """
    Computes the expected rank of the test data. The returned value is the average across all individuals.
    For each individual we average the rank first on hers test data.

     INPUT:
    -------
        1. score_mat:    <(I, L) csr_mat>  users scores. Doesn't have to be probabilities (for SVD)
        2. test_data:    <(I, L) csr_mat>  users test observations.

     OUTPUT:
    --------
        1. avg_erank:   <float>  avg. erank of all the individual.
    """
    avg_erank = 0

    start = time.time()
    I = score_mat.shape[0]
    for i in range(I):
        i_test, i_counts = np.vstack(find(test_data[i]))[1:]
        i_erank = _obj_erank(score_mat[i], i_test)
        i_erank *= i_counts

        avg_erank += np.sum(i_erank) / np.sum(i_counts)

        if i % 200 == 0:
            log.debug('Done testing %d out of %d users' % (i, I))

    total = time.time() - start
    log.info('Erank for individuals took %d seconds. %.2f secs on avg for indiv' % (total, total / I))
    return avg_erank / I
Beispiel #17
0
def get_vector_length(v):
    idxs, _, value = sparse.find(v)
    sumxx = 0
    for i in idxs:
        x = v[(i,i)]
        sumxx += x*x
    return math.sqrt(sumxx*1.0)
Beispiel #18
0
 def fit(self, X, Y):
     self.n_topics = Y.shape[1]
     ones = len(sp.find(Y)[2])
     self.mu = ones / X.shape[0]
     for clf in self.clfs:
         clf.fit(X, Y)
     return self
	def connectivityMatrixNew(self):
		
		self.patterns =np.random.lognormal(self.mu,self.sigma, size=(self.p,self.N))
		
		patterns_pre=self.g(self.patterns)
		patterns_post=self.f(self.patterns)

		#creating  connectivity
		connectivity=sparse.csr_matrix(1.*np.random.binomial(1,self.c,(self.N,self.N)))
		index_row=sparse.find(connectivity)[0]
		index_col=sparse.find(connectivity)[1]
	
		for i in range(len(index_row)):
			connectivity[index_row[i],index_col[i]]= (1./(self.c*self.N*self.intg2))*patterns_post[:,index_row[i]].dot(patterns_pre[:,index_col[i]])
			#print 'Porcentage of the Connectivity Matrix Built: ',100.*round(float(i)/len(index_row),1)
		self.connectivity=connectivity
Beispiel #20
0
    def evalModel(self, inData, inGt, inImg, gtShape, plot=True):

        if(inGt is not None):
            if(self.gtSparse):
                (gtOutY, gtOutX, gtVals) = sp.find(inGt)
                feedDict = {self.inputImage:inData,
                            self.gtIndices:[gtOutY, gtOutX], self.gtValues:gtVals,
                            }
            else:
                feedDict = {self.inputImage:inData,
                        self.gt:inGt}
        else:
            feedDict ={self.inputImage:inData}

        #Do not augument when evaluating
        if(self.augment):
            feedDict[self.doAug] = 0.0

        outVals = self.est.eval(feed_dict=feedDict, session=self.sess)
        if(inGt is not None):
            summary = self.sess.run(self.mergedSummary, feed_dict=feedDict)
            self.test_writer.add_summary(summary, self.timestep)
        if(plot):
            filename = self.featureMapDir + "test_" + str(self.timestep)
            #if(self.gtSparse):
            #    gt = np.reshape(inGt.toarray(), (self.batchSize, gtShape[0], gtShape[1], gtShape[2], gtShape[3]))
            #else:
            #    gt = inGt
            #data = (inData, inGt, inImg)
            #self.evalAndPlotCam(feedDict, data, gt, filename)
            if(self.plotFM):
                self.evalAndPlotFeaturemaps(feedDict, filename)

        return outVals
Beispiel #21
0
 def feature_training2(self, initialize_model = True, verbose = False):
     '''
     Compute each features using a Gradient Descent approach
     This version call the Cython estimator_loop2() function
     '''
     
     rmse = 2.0
     
     # Initialize the model with previous results if available
     if initialize_model:
         self.svd_v = np.zeros([self.dimensionality, self.nbr_users]) + self.feature_init
         self.svd_u = np.zeros([self.dimensionality, self.nbr_items]) + self.feature_init
     
     nbr_ratings = find(self.relationship_matrix)[2].shape[0]
     ratings_cache = np.zeros(self.nbr_users * self.nbr_items, dtype = np.float64)
     
     ratings_index, ratings = self.get_ratings()
     
     for f in range(self.dimensionality):
         epoch = 0
         
         while (epoch < self.min_epochs or rmse <= rmse_last - self.min_improvement):
             rmse_last = rmse
             rmse = estimator_subloop(f, epoch, self.min_improvement, self.dimensionality, self.feature_init, self.learning_rate,
                     self.K, self.svd_u, self.svd_v, ratings_index, ratings, ratings_cache, self.nbr_users, self.nbr_items, int(verbose))
             
             epoch += 1
             
         predictor_subloop(f, epoch, self.dimensionality, self.feature_init, self.svd_u, self.svd_v, ratings_index, ratings, ratings_cache,
                             self.nbr_users, self.nbr_items)
Beispiel #22
0
    def get_feedback(self):
        
        nbr_ratings = find(self.N)[2].shape[0]
        ratings_id = np.zeros([nbr_ratings,2], dtype = np.int32)
        ratings_hash = np.zeros([self.nbr_users,2], dtype = np.int32)

        for i, (user_index, feature_index) in enumerate(self.N_iterator()):
            ratings_id[i] = [int(user_index), int(feature_index)]
        
        index = np.arange(nbr_ratings)

        # Add sort here
        
        # We assume ratings_id is sorted
        for u in range(self.nbr_users):
            position = np.where(ratings_id[:,0] == u)[0]
            try:
                seek = position[0]
                span = len(position)
            except:
                seek = -1
                span = -1
            
            ratings_hash[u] = [seek, span]
            
        return ratings_id[index], ratings_hash
Beispiel #23
0
    def find_user_top_match(self, user_index, nbr_recommendations = 5):
        '''
        Compute all the feature's rating for a given user, sort the result and output the most relevants.

            * user_index: Internal id of the user
            * nbr_recommendations: Numbers of recommendation [5]
        '''
        user_ratings = np.zeros(self.nbr_items)
        self.relationship_matrix_csc = self.relationship_matrix.T.tocsc()

        already_rated = find(self.relationship_matrix_csc[:,user_index])[0]
        already_rated = np.r_[already_rated,user_index]
        
        for i, rating in enumerate(user_ratings):
            if i not in already_rated:
                try:
                    rating = self.predict_rating(i, user_index)
                except Error:
                    rating = 0.0
            else:
                # The rating is not actually zero, we put zero for excluding them from the result
                rating = 0.0
        
            user_ratings[i] = rating
        
        top_results = {}
        nonzero_index = user_ratings.nonzero()[0]
        
        for item in nonzero_index:
            top_results[item] = user_ratings[item]
        
        sorted_top_results = sorted(top_results.iteritems(), key=itemgetter(1), reverse = True)
        
        return [int(i[0]) for i in sorted_top_results[0:nbr_recommendations]], [i[1] for i in sorted_top_results[0:nbr_recommendations]]
def main(layers, selector, outfile, threshold=0.55):
    print("Reading climate layers", file=sys.stderr)
    layers = read_climate_layers(layers)
    print("Reading selector raster", file=sys.stderr)
    selector = get_selector_raster(selector, threshold=threshold)
    print("Calculating weighted climate layers", file=sys.stderr)
    layers = get_weighted_layers(layers, selector)

    if outfile == 'stdout':
        ofh = sys.stdout
    else:
        ofh = open(outfile, 'w')

    print("Writing output matrix", file=sys.stderr)
    header = ['x', 'y', 'maxent_weight', ] + list(sorted(layers.keys()))
    print(*header, sep='\t', file=ofh)
    xs, ys, ws = sparse.find(csr_matrix(selector))
    for x, y, w in zip(xs, ys, ws):
        line = [x, y, w,]
        for cl, layer in sorted(layers.items()):
            line.append(layer[x, y])
        print(*line, sep='\t', file=ofh)

    ofh.close()
    print("Done!", file=sys.stderr)
Beispiel #25
0
def threshold_coherence_based_mst(date12_list, coh_list):
    """Return a minimum spanning tree of network based on the coherence inverse.
    Inputs:
        date12_list - list of string in YYMMDD-YYMMDD format
        coh_list    - list of float, average coherence for each interferogram
    Output:
        mst_date12_list - list of string in YYMMDD-YYMMDD format, for MST network of interferograms 
    """
    # coh_list --> coh_mat --> weight_mat
    coh_mat = coherence_matrix(date12_list, coh_list)
    mask = ~np.isnan(coh_mat)
    wei_mat = np.zeros(coh_mat.shape)
    wei_mat[:] = np.inf
    wei_mat[mask] = 1/coh_mat[mask]

    # MST path based on weight matrix
    wei_mat_csr = sparse.csr_matrix(wei_mat)
    mst_mat_csr = sparse.csgraph.minimum_spanning_tree(wei_mat_csr)

    # Get date6_list
    date12_list = ptime.yymmdd_date12(date12_list)
    m_dates = [date12.split('-')[0] for date12 in date12_list]
    s_dates = [date12.split('-')[1] for date12 in date12_list]
    date6_list = ptime.yymmdd(sorted(ptime.yyyymmdd(list(set(m_dates + s_dates)))))

    # Convert MST index matrix into date12 list
    [s_idx_list, m_idx_list] = [date_idx_array.tolist()
                                for date_idx_array in sparse.find(mst_mat_csr)[0:2]]
    mst_date12_list = []
    for i in range(len(m_idx_list)):
        idx = sorted([m_idx_list[i], s_idx_list[i]])
        date12 = date6_list[idx[0]]+'-'+date6_list[idx[1]]
        mst_date12_list.append(date12)
    return mst_date12_list
def graph_srw_transition_matrix(A):
	"""
	For a graph given by an adjacency matrix A, construct the 
	transition matrix of the srw on the graph.

	A: an adjacency matrix, symmetric
	"""
	(I,J,V) = ssp.find(A)
	n = A.shape[0]

	P = ssp.lil_matrix((n,n))
	nnz = I.shape[0]

	row_start = 0
	while row_start < nnz:
		row = I[row_start]

		# find the end of the row
		row_end = row_start
		while row_end < nnz and I[row_end] == row:
			row_end = row_end+1

		# srw probability
		p = 1. / (row_end-row_start)

		# fill P
		for row_entry in range(row_start, row_end):
			P[row, J[row_entry]] = p

		# continue with the next row
		row_start = row_end

	return P.tocsr()
Beispiel #27
0
def binarize_coo(coo):
    '''
    Returns a copy of a coo matrix whose nonzero
    entries have been mapped to 1
    '''
    i, j, d = sp.find(coo)
    d = np.ones(d.shape)
    return sp.coo_matrix((d,(i,j)),shape=coo.shape)
Beispiel #28
0
def Find(M):
    if isinstance(M, ndarray): # numpy array or matrix
        rows, cols = where(M)
        vals = M[rows,cols]
    else:
        from scipy import sparse as sp
        assert sp.isspmatrix(M)
        rows, cols, vals = sp.find(M)
    return rows.tolist(), cols.tolist(), vals.tolist()
Beispiel #29
0
def _enumMaximumMatching2(g):
    """Find all maximum matchings in an undirected bipartite graph `g`.
    Similar to _enumMaximumMatching but implemented using adjacency matrix
    of graph for slight speed boost.

    Parameters
    ----------
    g: 
        Undirected bipartite graph. Nodes are separated by their
        'bipartite' attribute.

    Returns
    -------
    list
        Each is a list of edges forming a maximum matching of `g`. 

    Author
    ------
    guangzhi XU ([email protected]; [email protected])
    Update time: 2017-05-21 20:04:51.

    """
    from scipy import sparse

    s1 = set(n for n, d in g.nodes(data=True) if d['bipartite'] == 0)
    s2 = set(g) - s1
    n1 = len(s1)
    nodes = list(s1) + list(s2)

    adj = nx.adjacency_matrix(g, nodes).tolil()
    all_matches = []

    #----------------Find one matching----------------
    match = bipartite.hopcroft_karp_matching(g)

    matchadj = np.zeros(adj.shape).astype('int')
    for kk, vv in match.items():
        matchadj[nodes.index(kk), nodes.index(vv)] = 1
    matchadj = sparse.lil_matrix(matchadj)

    all_matches.append(matchadj)

    #-----------------Enter recursion-----------------
    all_matches = _enumMaximumMatchingIter2(adj, matchadj, all_matches, n1, None, True)

    #---------------Re-orient match arcs---------------
    all_matches2 = []
    for ii in all_matches:
        match_list = sparse.find(ii[:n1] == 1)
        m1 = [nodes[jj] for jj in match_list[0]]
        m2 = [nodes[jj] for jj in match_list[1]]
        match_list = zip(m1, m2)

        all_matches2.append(match_list)

    print('got all')
    return all_matches2
Beispiel #30
0
def save_bow_as_sparse(bow, filename):
  '''
  Using a sparse matrix for storage should decrease disk usage,
  and make loading much easier (removing the need to explicitly
  write the number of rows
  '''
  to_save = np.array( sp.find(sp.coo_matrix(bow)), dtype=np.uint32 ) 

  to_save.tofile(filename)
def construct_pyg_graph(node_ids,
                        adj,
                        dists,
                        node_features,
                        y,
                        node_label='drnl',
                        use_orig_A=False,
                        directed=False,
                        use_orig_graph=False):
    # Construct a pytorch_geometric graph from a scipy csr adjacency matrix.
    #u, v, r = ssp.find(adj)
    num_nodes = adj.shape[0]

    node_ids = torch.LongTensor(node_ids)
    #u, v = torch.LongTensor(u), torch.LongTensor(v)

    #r = torch.LongTensor(r)
    #edge_index = torch.stack([u, v], 0)
    #edge_weight = r.to(torch.float)

    y = torch.tensor([y])
    if use_orig_graph:
        u, v, r = ssp.find(adj)
        num_nodes = adj.shape[0]

        u, v = torch.LongTensor(u), torch.LongTensor(v)
        r = torch.LongTensor(r)
        edge_index = torch.stack([u, v], 0)
        edge_weight = r.to(torch.float)

        if node_label == 'drnl':
            z = drnl_node_labeling(adj, 0, 1)
        elif node_label == 'hop':
            z = torch.tensor(dists)
        data = Data(node_features,
                    edge_index,
                    edge_weight=edge_weight,
                    y=y,
                    z=z,
                    node_id=node_ids,
                    num_nodes=num_nodes)
        return data
    elif not directed:
        if node_label == 'drnl':
            z = drnl_node_labeling(adj, 0, 1)
        elif node_label == 'hop':
            z = torch.tensor(dists)

        if use_orig_A:
            o_data = Data(node_features,
                          edge_index,
                          edge_weight=edge_weight,
                          y=y,
                          z=z,
                          node_id=node_ids,
                          num_nodes=num_nodes)
        else:
            o_data = None

        L_node_features, L_edges, L_num_nodes, w, z1, z2, L_node_ids = construct_line_graph_undirected(
            node_ids, adj, z, node_features)
        edge_weight = torch.ones(len(L_edges))
        #print(L_edges)
        data = Data(L_node_features,
                    L_edges.t(),
                    edge_weight=edge_weight,
                    y=y,
                    w=torch.LongTensor(w),
                    z1=torch.LongTensor(z1),
                    z2=torch.LongTensor(z2),
                    node_id=L_node_ids,
                    num_nodes=len(L_node_ids),
                    o_data=o_data)
        return data
    else:
        L_node_features, L_edges, L_num_nodes, L_node_ids, L_node_classes = construct_line_graph_directed(
            node_ids, adj, node_features)

        return L_node_features, L_edges, L_num_nodes, L_node_ids, L_node_classes
Beispiel #32
0
        print(e)
    finally:
        return m


def get_sup_vec(gmm):
    mu = gmm.means_.flatten()
    sd = gmm.covariances_.flatten()
    return np.concatenate([mu, sd])


res = []
for a, artist in tqdm(zip(A.T, artists), ncols=80):

    fns = []
    for tid in tqdm(sp.find(a)[1], ncols=80):
        if tids[tid] in path_map:
            fn = os.path.join(song_root, path_map[tids[tid]])
            if os.path.exists(fn):
                fns.append(fn)
            else:
                continue
        else:
            continue

    M = filter(lambda x: x is not None, pmap(get_mfcc, fns, n_jobs=16))

    if len(M) == 0:
        res.append((artist, None))
        continue
Beispiel #33
0
def mkNN(X, k, measure='euclidean'):
    """
    Construct mutual_kNN for large scale dataset

    If j is one of i's closest neighbors and i is also one of j's closest members,
    the edge will appear once with (i,j) where i < j.

    Parameters
    ----------
    X : [n_samples, n_dim] array
    k : int
      number of neighbors for each sample in X
    """
    from scipy.spatial import distance
    from scipy.sparse import csr_matrix, triu, find
    from scipy.sparse.csgraph import minimum_spanning_tree

    samples = X.shape[0]
    batchsize = 10000
    b = np.arange(k + 1)
    b = tuple(b[1:].ravel())

    z = np.zeros((samples, k))
    weigh = np.zeros_like(z)

    # This loop speeds up the computation by operating in batches
    # This can be parallelized to further utilize CPU/GPU resource
    for x in np.arange(0, samples, batchsize):
        start = x
        end = min(x + batchsize, samples)

        parallelized_metrics = list(distance_metrics().keys())

        if (measure in parallelized_metrics):
            w = pairwise_distances(X=X[start:end],
                                   Y=X,
                                   metric=measure,
                                   n_jobs=-1)
        else:
            w = distance.cdist(X[start:end], X, measure)

        y = np.argpartition(w, b, axis=1)

        z[start:end, :] = y[:, 1:k + 1]
        weigh[start:end, :] = np.reshape(
            w[tuple(np.repeat(np.arange(end - start), k)),
              tuple(y[:, 1:k + 1].ravel())], (end - start, k))
        del (w)

    ind = np.repeat(np.arange(samples), k)

    P = csr_matrix((np.ones((samples * k)), (ind.ravel(), z.ravel())),
                   shape=(samples, samples))
    Q = csr_matrix((weigh.ravel(), (ind.ravel(), z.ravel())),
                   shape=(samples, samples))

    Tcsr = minimum_spanning_tree(Q)
    P = P.minimum(P.transpose()) + Tcsr.maximum(Tcsr.transpose())
    P = triu(P, k=1)

    return np.asarray(find(P)).T
Beispiel #34
0
def _extrude_2d(g: pp.Grid, z: np.ndarray) -> Tuple[pp.Grid, np.ndarray, np.ndarray]:
    """ Extrude a 2d grid into 3d by prismatic extension.

    The original grid is assumed to be in the xy-plane, that is, any existing non-zero
    z-direction is ignored.

    Both the original and the new grid will have their geometry computed.

    Parameters:
        g (pp.Grid): Original grid to be extruded. Should have dimension 2.
        z (np.ndarray): z-coordinates of the nodes in the extruded grid. Should be
            either non-negative or non-positive, and be sorted in increasing or
            decreasing order, respectively.

    Returns:
        pp.Grid: A grid of dimension 3.
        np.array of np.arrays: Cell mappings, so that element ci gives all indices of
            cells in the extruded grid that comes from cell ci in the original grid.
        np.array of np.arrays: Face mappings, so that element fi gives all indices of
            faces in the extruded grid that comes from face fi in the original grid.

    """

    g.compute_geometry()

    negative_extrusion = np.all(z <= 0)

    ## Bookkeeping of the number of grid items

    # Number of nodes in the z-direction
    num_node_layers = z.size
    # Number of cell layers, one less than the nodes
    num_cell_layers = num_node_layers - 1

    # Short hand for the number of cells in the 2d grid
    nc_2d = g.num_cells
    nf_2d = g.num_faces
    nn_2d = g.num_nodes

    # The number of nodes in the 3d grid is given by the number of 2d nodes, and the
    # number of node layers
    nn_3d = nn_2d * num_node_layers
    # The 3d cell count is similar to that for the nodes
    nc_3d = nc_2d * num_cell_layers
    # The number of faces is more intricate: In each layer of cells, there will be as
    # many faces as there is in the 2d grid. In addition, in the direction of extrusion
    # there will be one set of faces per node layer, with each layer containing as many
    # faces as there are cells in the 2d grid
    nf_3d = nf_2d * num_cell_layers + nc_2d * num_node_layers

    ## Nodes - only coorinades are needed
    # The nodes in the 2d grid are copied for all layers, with the z-coordinates changed
    # for each layer. This means that for a vertical pilar, the face-node and cell-node
    # relations can be inferred from that in the original 2d grid, with index increments
    # of size nn_2d
    x_layer = g.nodes[0]
    y_layer = g.nodes[1]

    nodes = np.empty((3, 0))
    # Stack the layers of nodes
    for zloc in z:
        nodes = np.hstack((nodes, np.vstack((x_layer, y_layer, zloc * np.ones(nn_2d)))))

    ## Face-node relations
    # The 3d grid has two types of faces: Those formed by faces in the 2d grid, termed
    # 'vertical' below, and those on the top and bottom of the 3d cells, termed
    # horizontal

    # Face-node relation for the 2d grid. We know there are exactly two nodes in each
    # 2d face.
    fn_2d = g.face_nodes.indices.reshape((2, g.num_faces), order="F")

    # Nodes of the faces for the bottom layer of 3d cells. These are formed by
    # connecting nodes in the bottom layer with those immediately above
    fn_layer = np.vstack((fn_2d[0], fn_2d[1], fn_2d[1] + nn_2d, fn_2d[0] + nn_2d))

    # For the vertical cells, the flux direction indicated in cell_face map will be
    # inherited from the 2d grid (see below). The normal vector, which should be
    # consistent with this value, is effectively computed from the ordering of the
    # face-node relation (and the same is true for several other geometric quantities).
    # This requires that the face-nodes are sorted in a CCW order when seen from the
    # side of a positive cell_face value. To sort this out, we need to flip some of the
    # columns in fn_layer

    # Faces, cells and values of the 2d cell-face map
    [fi, ci, sgn] = sps.find(g.cell_faces)
    # Only consider each face once
    _, idx = np.unique(fi, return_index=True)

    # The node ordering in fn_layer will be CCW seen from cell ci if the cell center of
    # ci is CW relative to the line from the first to the second node of the 2d cell.
    #
    # Example: with p0 = [0, 0, 0], p1 = [1, 0, 0], the 3d face will have further nodes
    #               p2 = [1, 0, 1], p3 = [0, 0, 1].
    # This will be counterclockwise to a 2d cell center of, say, [0.5, -0.5, 0],
    #  (which is CW relative to p0 and p1)
    #
    p0 = g.nodes[:, fn_2d[0, fi[idx]]]
    p1 = g.nodes[:, fn_2d[1, fi[idx]]]
    pc = g.cell_centers[:, ci[idx]]
    ccw_2d = pp.geometry_property_checks.is_ccw_polyline(p0, p1, pc)

    # We should flip those columns in fn_layer where the sign is positive, and the 2d
    # is not ccw (meaning the 3d will be). Similarly, also flip negative signs and 2d
    # ccw.
    flip = np.logical_or(
        np.logical_and(sgn[idx] > 0, np.logical_not(ccw_2d)),
        np.logical_and(sgn[idx] < 0, ccw_2d),
    )

    # Finally, if the extrusion is in the negative direction, the ordering of all
    # face-node relations is the oposite of that indicated above.
    if negative_extrusion:
        flip = np.logical_not(flip)

    fn_layer[:, flip] = fn_layer[np.array([1, 0, 3, 2])][:, flip]

    # The face-node relation for the vertical cells are found by stacking those in the
    # bottom layer, with an appropriate offset. This also implies that the vertical
    # faces of a cell in layer k are the same as the faces of the corresponding 2d cell,
    # with the appropriate adjustments for the number of faces and cells in each layer
    fn_rows_vertical = np.empty((4, 0))
    # Loop over all layers of cells
    for k in range(num_cell_layers):
        fn_rows_vertical = np.hstack((fn_rows_vertical, fn_layer + nn_2d * k))

    # Reshape the node indices into a single array
    fn_rows_vertical = fn_rows_vertical.ravel("F")

    # All vertical faces have exactly four nodes
    nodes_per_face_vertical = 4
    # Aim for a csc-representation of the faces. Column pointers
    fn_cols_vertical = np.arange(
        0, nodes_per_face_vertical * nf_2d * num_cell_layers, nodes_per_face_vertical
    )

    # Next, deal with the horizontal faces. The face-node relation is based on the
    # cell-node relation of the 2d grid.
    # The structure of this constrution is a bit more involved than for the vertical
    # faces, since the 2d cells have an unknown, and generally varying, number of nodes
    cn_2d = g.cell_nodes()

    # Short hand for node indices of each cell.
    cn_ind_2d = cn_2d.indices.copy()

    # Similar to the vertical faces, the face-node relation in 3d should match the
    # sign in the cell-face relation, so that the generated normal vector points out of
    # the cell with cf-value 1.
    # This requires a sorting of the nodes for each cell
    for ci in range(nc_2d):
        # Node indices of this 2d cell
        start = cn_2d.indptr[ci]
        stop = cn_2d.indptr[ci + 1]
        ni = cn_ind_2d[start:stop]

        coord = g.nodes[:2, ni]
        # Sort the points.
        # IMPLEMENTATION NOTE: this probably assumes convexity of the 2d cell.
        sort_ind = pp.utils.sort_points.sort_point_plane(
            np.vstack((coord, np.zeros(coord.shape[1]))),
            g.cell_centers[:, ci].reshape((-1, 1)),
        )
        # Indices that sort the nodes. The sort function contains a rotation, which
        # implies that it is unknown whether the ordering is cw or ccw
        # If the sorted points are ccw, we store them, unless the extrusion is negative
        # in which case the ordering should be cw, and the points are turned.
        if pp.geometry_property_checks.is_ccw_polygon(coord[:, sort_ind]):
            if negative_extrusion:
                cn_ind_2d[start:stop] = cn_ind_2d[start:stop][sort_ind[::-1]]
            else:
                cn_ind_2d[start:stop] = cn_ind_2d[start:stop][sort_ind]
        # Else, the ordering should be negative.
        elif pp.geometry_property_checks.is_ccw_polygon(coord[:, sort_ind[::-1]]):
            if negative_extrusion:
                cn_ind_2d[start:stop] = cn_ind_2d[start:stop][sort_ind]
            else:
                cn_ind_2d[start:stop] = cn_ind_2d[start:stop][sort_ind[::-1]]
        else:
            raise ValueError("this should not happen. Is the cell non-convex??")

    # Compressed column storage for horizontal faces: Store node indices
    fn_rows_horizontal = np.array([], dtype=np.int)
    # .. and pointers to the start of new faces
    fn_cols_horizontal = np.array(0, dtype=np.int)
    # Loop over all layers of nodes (one more than number of cells)
    # This means that the horizontal faces of a given cell is given by its index (bottom)
    # and its index + the number of 2d cells, both offset with the total number of
    # vertical faces
    for k in range(num_node_layers):
        # The horizontal cell-node relation for this layer is the bottom one, plus an
        # offset of the number of 2d nodes, per layer
        fn_rows_horizontal = np.hstack((fn_rows_horizontal, cn_ind_2d + nn_2d * k))
        # The index pointers are those of the 2d cell-node relation.
        # Adjustment for the vertical faces is done below
        # Drop the final element of the 2d indptr, which effectively signifies the end
        # of this array (we will add the corresponding element for the full array below)
        fn_cols_horizontal = np.hstack(
            (fn_cols_horizontal, cn_2d.indptr[1:] + cn_ind_2d.size * k)
        )

    # Add the final element which marks the end of the array
    # fn_cols_horizontal = np.hstack((fn_cols_horizontal, fn_rows_horizontal.size))
    # The horizontal faces are appended to the vertical ones. The node indices are the
    # same, but the face indices must be increased by the number of vertical faces
    num_vertical_faces = nf_2d * num_cell_layers
    fn_cols_horizontal += num_vertical_faces * nodes_per_face_vertical

    # Put together the vertical and horizontal data, create the face-node relation
    indptr = np.hstack((fn_cols_vertical, fn_cols_horizontal)).astype(np.int)
    indices = np.hstack((fn_rows_vertical, fn_rows_horizontal)).astype(np.int)
    data = np.ones(indices.size, dtype=np.int)

    # Finally, construct the face-node sparse matrix
    face_nodes = sps.csc_matrix((data, indices, indptr), shape=(nn_3d, nf_3d))

    ### Next the cell-faces.
    # Similar to the face-nodes, the easiest option is first to deal with the vertical
    # faces, which can be inferred directly from faces in the 2d grid, and then the
    # horizontal direction.
    # IMPLEMENTATION NOTE: Since all cells have both horizontal and vertical faces, and
    # these are found in separate operations, the easiest way to assemble the 3d
    # cell-face matrix is to construct information for a coo-matrix (not compressed
    # storage), and then convert later. This has some overhead, but the alternative
    # is to combine and sort the face indices in the horizontal and vertical components
    # so that all faces of any cell is stored together. This is most conveniently
    # left to scipy sparse .tocsc() function

    ## Vertical faces
    # For the vertical faces, the information from the 2d grid can be copied

    cf_rows_2d = g.cell_faces.indices
    cf_cols_2d = g.cell_faces.indptr
    cf_data_2d = g.cell_faces.data

    cf_rows_vertical = np.array([], dtype=np.int)
    # For the cells, we will store the number of facqes for each cell. This will later
    # be expanded to a full set of cell indices
    cf_vertical_cell_count = np.array([], dtype=np.int)
    cf_data_vertical = np.array([])

    for k in range(num_cell_layers):
        # The face indices are found from the 2d information, with increaments that
        # reflect how many layers of vertical faces there are below
        cf_rows_vertical = np.hstack((cf_rows_vertical, cf_rows_2d + k * nf_2d))
        # The diff here gives the number of faces per cell
        cf_vertical_cell_count = np.hstack(
            (cf_vertical_cell_count, np.diff(cf_cols_2d))
        )
        # The data is just plus and minus ones, no need to adjust
        cf_data_vertical = np.hstack((cf_data_vertical, cf_data_2d))

    # Expand information of the number of faces per cell into a corresponding full set
    # of cell indices
    cf_cols_vertical = pp.utils.matrix_compression.rldecode(
        np.arange(nc_3d), cf_vertical_cell_count
    )

    ## Horizontal faces
    # There is one set of faces per layer of nodes.
    # The cell_face relation will assign -1 to the upper cells, and +1 to lower cells.
    # This corresponds to normal vectors pointing upwards.
    # The bottom and top layers are special, in that they have only one neighboring
    # cell. All other layers have two (they are internal)

    # Bottom layer
    cf_rows_horizontal = num_vertical_faces + np.arange(nc_2d)
    cf_cols_horizontal = np.arange(nc_2d)
    cf_data_horizontal = -np.ones(nc_2d, dtype=np.int)

    # Intermediate layers, note
    for k in range(1, num_cell_layers):
        # Face indices are given twice, for the lower and upper neighboring cell
        # The offset of the face index is the number of vertical faces plus the number
        # of horizontal faces in lower layers
        rows_here = (
            num_vertical_faces
            + k * nc_2d
            + np.hstack((np.arange(nc_2d), np.arange(nc_2d)))
        )
        cf_rows_horizontal = np.hstack((cf_rows_horizontal, rows_here))

        # Cell indices, first of the lower layer, then of the upper
        cols_here = np.hstack(
            ((k - 1) * nc_2d + np.arange(nc_2d), k * nc_2d + np.arange(nc_2d))
        )
        cf_cols_horizontal = np.hstack((cf_cols_horizontal, cols_here))
        # Data: +1 for the lower cells, -1 for the upper
        data_here = np.hstack((np.ones(nc_2d), -np.ones(nc_2d)))
        cf_data_horizontal = np.hstack((cf_data_horizontal, data_here))

    # Top layer, with index offset for all other faces
    cf_rows_horizontal = np.hstack(
        (
            cf_rows_horizontal,
            num_vertical_faces + num_cell_layers * nc_2d + np.arange(nc_2d),
        )
    )
    # Similarly, the cell indices of the topbost layer
    cf_cols_horizontal = np.hstack(
        (cf_cols_horizontal, (num_cell_layers - 1) * nc_2d + np.arange(nc_2d))
    )
    # Only +1 in the data (oposite to lowermost layer)
    cf_data_horizontal = np.hstack((cf_data_horizontal, np.ones(nc_2d)))

    # Merge horizontal and vertical layers
    cf_rows = np.hstack((cf_rows_horizontal, cf_rows_vertical))
    cf_cols = np.hstack((cf_cols_horizontal, cf_cols_vertical))
    cf_data = np.hstack((cf_data_horizontal, cf_data_vertical))

    cell_faces = sps.coo_matrix(
        (cf_data, (cf_rows, cf_cols)), shape=(nf_3d, nc_3d)
    ).tocsc()

    tags = _define_tags(g, num_cell_layers)

    name = g.name.copy()
    name.append("Extrude 2d->3d")
    g_info = g.name.copy()
    g_info.append("Extrude 1d->2d")

    g_new = pp.Grid(3, nodes, face_nodes, cell_faces, g_info, tags=tags)
    g_new.compute_geometry()

    # Mappings between old and new cells and faces
    cell_map, face_map = _create_mappings(g, g_new, num_cell_layers)

    return g_new, cell_map, face_map
Beispiel #35
0
def graph_sparsify(M, epsilon, maxiter=10):
    r"""Sparsify a graph (with Spielman-Srivastava).

    Parameters
    ----------
    M : Graph or sparse matrix
        Graph structure or a Laplacian matrix
    epsilon : int
        Sparsification parameter

    Returns
    -------
    Mnew : Graph or sparse matrix
        New graph structure or sparse matrix

    Notes
    -----
    Epsilon should be between 1/sqrt(N) and 1

    Examples
    --------
    >>> from pygsp import reduction
    >>> G = graphs.Sensor(256, Nc=20, distributed=True)
    >>> epsilon = 0.4
    >>> G2 = reduction.graph_sparsify(G, epsilon)

    References
    ----------
    See :cite:`spielman2011graph`, :cite:`rudelson1999random` and :cite:`rudelson2007sampling`.
    for more informations

    """
    # Test the input parameters
    if isinstance(M, graphs.Graph):
        if not M.lap_type == 'combinatorial':
            raise NotImplementedError
        L = M.L
    else:
        L = M

    N = np.shape(L)[0]

    if not 1. / np.sqrt(N) <= epsilon < 1:
        raise ValueError('GRAPH_SPARSIFY: Epsilon out of required range')

    # Not sparse
    resistance_distances = utils.resistance_distance(L).toarray()
    # Get the Weight matrix
    if isinstance(M, graphs.Graph):
        W = M.W
    else:
        W = np.diag(L.diagonal()) - L.toarray()
        W[W < 1e-10] = 0

    W = sparse.coo_matrix(W)
    W.data[W.data < 1e-10] = 0
    W = W.tocsc()
    W.eliminate_zeros()

    start_nodes, end_nodes, weights = sparse.find(sparse.tril(W))

    # Calculate the new weights.
    weights = np.maximum(0, weights)
    Re = np.maximum(0, resistance_distances[start_nodes, end_nodes])
    Pe = weights * Re
    Pe = Pe / np.sum(Pe)

    for i in range(maxiter):
        # Rudelson, 1996 Random Vectors in the Isotropic Position
        # (too hard to figure out actual C0)
        C0 = 1 / 30.
        # Rudelson and Vershynin, 2007, Thm. 3.1
        C = 4 * C0
        q = round(N * np.log(N) * 9 * C**2 / (epsilon**2))

        results = stats.rv_discrete(values=(np.arange(np.shape(Pe)[0]),
                                            Pe)).rvs(size=int(q))
        spin_counts = stats.itemfreq(results).astype(int)
        per_spin_weights = weights / (q * Pe)

        counts = np.zeros(np.shape(weights)[0])
        counts[spin_counts[:, 0]] = spin_counts[:, 1]
        new_weights = counts * per_spin_weights

        sparserW = sparse.csc_matrix((new_weights, (start_nodes, end_nodes)),
                                     shape=(N, N))
        sparserW = sparserW + sparserW.T
        sparserL = sparse.diags(sparserW.diagonal(), 0) - sparserW

        if graphs.Graph(W=sparserW).is_connected():
            break
        elif i == maxiter - 1:
            logger.warning(
                'Despite attempts to reduce epsilon, sparsified graph is disconnected'
            )
        else:
            epsilon -= (epsilon - 1 / np.sqrt(N)) / 2.

    if isinstance(M, graphs.Graph):
        sparserW = sparse.diags(sparserL.diagonal(), 0) - sparserL
        if not M.is_directed():
            sparserW = (sparserW + sparserW.T) / 2.

        Mnew = graphs.Graph(W=sparserW)
        #M.copy_graph_attributes(Mnew)
    else:
        Mnew = sparse.lil_matrix(sparserL)

    return Mnew
Beispiel #36
0
from numpy import *
from scipy.sparse import block_diag, find, hstack
import string


nCl = 10
coos = random.randint(1, 10, size=nCl).tolist()
lob = map(lambda n: ones((n,n)), coos)
graph = block_diag(lob)
graph_abc = vstack(find(graph)).T
with open('graph_test.abc', 'w') as fh:
    for i in range(graph_abc.shape[0]):
        ld = graph_abc[i].tolist()
        ld[0] = string.printable[int(ld[0])]
        ld[1] = string.printable[int(ld[1])]
        fh.write('%s %s %.6f\n' % tuple(ld))
Beispiel #37
0
    def matrix_rhs(self, g, data):
        """
        Return the matrix and righ-hand side for a discretization of a second
        order elliptic equation using hybrid dual virtual element method.
        The name of data in the input dictionary (data) are:
        perm : tensor.SecondOrderTensor
            Permeability defined cell-wise. If not given a identity permeability
            is assumed and a warning arised.
        source : array (self.g.num_cells)
            Scalar source term defined cell-wise. If not given a zero source
            term is assumed and a warning arised.
        bc : boundary conditions (optional)
        bc_val : dictionary (optional)
            Values of the boundary conditions. The dictionary has at most the
            following keys: 'dir' and 'neu', for Dirichlet and Neumann boundary
            conditions, respectively.

        Parameters
        ----------
        g : grid, or a subclass, with geometry fields computed.
        data: dictionary to store the data.

        Return
        ------
        matrix: sparse csr (g.num_faces+g_num_cells, g.num_faces+g_num_cells)
            Saddle point matrix obtained from the discretization.
        rhs: array (g.num_faces+g_num_cells)
            Right-hand side which contains the boundary conditions and the scalar
            source term.

        Examples
        --------
        b_faces_neu = ... # id of the Neumann faces
        b_faces_dir = ... # id of the Dirichlet faces
        bnd = bc.BoundaryCondition(g, np.hstack((b_faces_dir, b_faces_neu)),
                                ['dir']*b_faces_dir.size + ['neu']*b_faces_neu.size)
        bnd_val = {'dir': fun_dir(g.face_centers[:, b_faces_dir]),
                   'neu': fun_neu(f.face_centers[:, b_faces_neu])}

        data = {'perm': perm, 'source': f, 'bc': bnd, 'bc_val': bnd_val}

        H, rhs = hybrid.matrix_rhs(g, data)
        l = sps.linalg.spsolve(H, rhs)
        u, p = hybrid.compute_up(g, l, data)
        P0u = dual.project_u(g, perm, u)

        """
        # pylint: disable=invalid-name

        # If a 0-d grid is given then we return an identity matrix
        if g.dim == 0:
            return sps.identity(self.ndof(g), format="csr"), np.zeros(1)

        parameter_dictionary = data[pp.PARAMETERS][self.keyword]
        k = parameter_dictionary["second_order_tensor"]
        f = parameter_dictionary["source"]
        bc = parameter_dictionary["bc"]
        bc_val = parameter_dictionary["bc_values"]
        a = parameter_dictionary["aperture"]

        faces, _, sgn = sps.find(g.cell_faces)

        # Map the domain to a reference geometry (i.e. equivalent to compute
        # surface coordinates in 1d and 2d)
        c_centers, f_normals, f_centers, _, _, _ = pp.map_geometry.map_grid(g)

        # Weight for the stabilization term
        diams = g.cell_diameters()
        weight = np.power(diams, 2 - g.dim)

        # Allocate the data to store matrix entries, that's the most efficient
        # way to create a sparse matrix.
        size = np.sum(
            np.square(g.cell_faces.indptr[1:] - g.cell_faces.indptr[:-1]))
        row = np.empty(size, dtype=np.int)
        col = np.empty(size, dtype=np.int)
        data = np.empty(size)
        rhs = np.zeros(g.num_faces)

        idx = 0
        # Use a dummy keyword to trick the constructor of dualVEM.
        massHdiv = pp.MVEM("dummy").massHdiv

        # define the function to compute the inverse of the permeability matrix
        if g.dim == 1:
            inv_matrix = DualElliptic._inv_matrix_1d
        elif g.dim == 2:
            inv_matrix = DualElliptic._inv_matrix_2d
        elif g.dim == 3:
            inv_matrix = DualElliptic._inv_matrix_3d

        for c in np.arange(g.num_cells):
            # For the current cell retrieve its faces
            loc = slice(g.cell_faces.indptr[c], g.cell_faces.indptr[c + 1])
            faces_loc = faces[loc]
            ndof = faces_loc.size

            # Retrieve permeability and normals assumed outward to the cell.
            sgn_loc = sgn[loc].reshape((-1, 1))
            normals = np.multiply(np.tile(sgn_loc.T, (g.dim, 1)),
                                  f_normals[:, faces_loc])

            # Compute the H_div-mass local matrix
            A = massHdiv(
                k.values[0:g.dim, 0:g.dim, c],
                inv_matrix(k.values[0:g.dim, 0:g.dim, c]),
                c_centers[:, c],
                a[c] * g.cell_volumes[c],
                f_centers[:, faces_loc],
                a[c] * normals,
                np.ones(ndof),
                diams[c],
                weight[c],
            )[0]
            # Compute the Div local matrix
            B = -np.ones((ndof, 1))
            # Compute the hybrid local matrix
            C = np.eye(ndof, ndof)

            # Perform the static condensation to compute the hybrid local matrix
            invA = np.linalg.inv(A)
            S = 1 / np.dot(B.T, np.dot(invA, B))
            L = np.dot(np.dot(invA, np.dot(B, np.dot(S, B.T))), invA)
            L = np.dot(np.dot(C.T, L - invA), C)

            # Compute the local hybrid right using the static condensation
            rhs[faces_loc] += np.dot(C.T,
                                     np.dot(invA,
                                            np.dot(B, np.dot(S, f[c]))))[:, 0]

            # Save values for hybrid matrix
            indices = np.tile(faces_loc, (faces_loc.size, 1))
            loc_idx = slice(idx, idx + indices.size)
            row[loc_idx] = indices.T.ravel()
            col[loc_idx] = indices.ravel()
            data[loc_idx] = L.ravel()
            idx += indices.size

        # construct the global matrices
        H = sps.coo_matrix((data, (row, col))).tocsr()

        # Apply the boundary conditions
        if bc is not None:

            if np.any(bc.is_dir):
                norm = sps.linalg.norm(H, np.inf)
                is_dir = np.where(bc.is_dir)[0]

                H[is_dir, :] *= 0
                H[is_dir, is_dir] = norm
                rhs[is_dir] = norm * bc_val[is_dir]

            if np.any(bc.is_neu):
                faces, _, sgn = sps.find(g.cell_faces)
                sgn = sgn[np.unique(faces, return_index=True)[1]]

                is_neu = np.where(bc.is_neu)[0]
                rhs[is_neu] += sgn[is_neu] * bc_val[is_neu] * g.face_areas[
                    is_neu]

        return H, rhs
Beispiel #38
0
    def gollapudi1(self, repeat=1, scale=1000):
        """[Gollapudi et. al., 2006](1) is an integer weighted MinHash algorithm,
           which skips much unnecessary hash value computation by employing the idea of "active index".
           S. Gollapudi and R. Panigraphy, "Exploiting Asymmetry in Hierarchical Topic Extraction",
           in CIKM, 2006, pp. 475-482.

        Parameters
        -----------
        repeat: int, default: 1
            the number of repeating the algorithm as the part of the seed of the random number generator

        scale: int, default: 1000
            a large constant to transform real-valued weights into integer ones

        Returns
        -----------
        fingerprints_k: ndarray, shape (n_instances, dimension_num)
            one component of hash codes $(k, y_k)$ for data matrix, where row represents a data instance

        fingerprints_y: ndarray, shape (n_instances, dimension_num)
            one component of hash codes $(k, y_k)$ for data matrix, where row represents a data instance

        elapsed: float
            time of hashing data matrix

        Notes
        ----------
        The operations of seeking "active indices" and computing hashing values are implemented by C++
        due to low efficiency of Python. The operations cannot be vectorized in Python so that it would be
        very slow.
        """
        fingerprints_k = np.zeros((self.instance_num, self.dimension_num))
        fingerprints_y = np.zeros((self.instance_num, self.dimension_num))

        start = time.time()

        for j_sample in range(0, self.instance_num):

            feature_id = sparse.find(self.weighted_set[:, j_sample] > 0)[0]
            feature_id_num = feature_id.shape[0]

            fingerprints = CDLL('./cpluspluslib/gollapudi1_fingerprints.so')
            fingerprints.GenerateFingerprintOfInstance.argtypes = [
                c_int,
                np.ctypeslib.ndpointer(dtype=c_int,
                                       ndim=1,
                                       flags="C_CONTIGUOUS"),
                np.ctypeslib.ndpointer(c_int, ndim=1, flags="C_CONTIGUOUS"),
                c_int, c_int,
                np.ctypeslib.ndpointer(dtype=c_double,
                                       ndim=1,
                                       flags="C_CONTIGUOUS"),
                np.ctypeslib.ndpointer(dtype=c_double,
                                       ndim=1,
                                       flags="C_CONTIGUOUS")
            ]
            fingerprints.GenerateFingerprintOfInstance.restype = None
            feature_weight = np.array(
                scale * self.weighted_set[feature_id, j_sample].todense())[:,
                                                                           0]
            fingerprint_k = np.zeros((1, self.dimension_num))[0]
            fingerprint_y = np.zeros((1, self.dimension_num))[0]

            fingerprints.GenerateFingerprintOfInstance(
                self.dimension_num, feature_weight, feature_id, feature_id_num,
                self.seed * repeat, fingerprint_k, fingerprint_y)

            fingerprints_k[j_sample, :] = fingerprint_k
            fingerprints_y[j_sample, :] = fingerprint_y

        elapsed = time.time() - start

        return fingerprints_k, fingerprints_y, elapsed
Beispiel #39
0
    def cws(self, repeat=1):
        """The Consistent Weighted Sampling (CWS) algorithm, as the first of the Consistent Weighted Sampling scheme,
           extends "active indices" from $[0, S]$ in [Gollapudi et. al., 2006](1) to $[0, +\infty]$.
           M. Manasse, F. McSherry, and K. Talwar, "Consistent Weighted Sampling", Unpublished technical report, 2010.

        Parameters
        -----------
        repeat: int, default: 1
            the number of repeating the algorithm as the part of the seed of the random number generator

        Returns
        -----------
        fingerprints_k: ndarray, shape (n_instances, dimension_num)
            one component of hash codes $(k, y_k)$ for data matrix, where row represents a data instance

        fingerprints_y: ndarray, shape (n_instances, dimension_num)
            one component of hash codes $(k, y_k)$ for data matrix, where row represents a data instance

        elapsed: float
            time of hashing data matrix

        Notes
        ----------
        The operations of seeking "active indices" and computing hashing values are implemented by C++
        due to low efficiency of Python. The operations cannot be vectorized in Python so that it would be
        very slow.
        """

        fingerprints_k = np.zeros((self.instance_num, self.dimension_num))
        fingerprints_y = np.zeros((self.instance_num, self.dimension_num))

        start = time.time()

        for j_sample in range(0, self.instance_num):

            feature_id = sparse.find(self.weighted_set[:, j_sample] > 0)[0]
            feature_id_num = feature_id.shape[0]

            fingerprints = CDLL('./cpluspluslib/cws_fingerprints.so')
            fingerprints.GenerateFingerprintOfInstance.argtypes = [
                c_int,
                np.ctypeslib.ndpointer(dtype=c_double,
                                       ndim=1,
                                       flags="C_CONTIGUOUS"),
                np.ctypeslib.ndpointer(dtype=c_int,
                                       ndim=1,
                                       flags="C_CONTIGUOUS"), c_int, c_int,
                np.ctypeslib.ndpointer(dtype=c_double,
                                       ndim=1,
                                       flags="C_CONTIGUOUS"),
                np.ctypeslib.ndpointer(dtype=c_double,
                                       ndim=1,
                                       flags="C_CONTIGUOUS")
            ]
            fingerprints.GenerateFingerprintOfInstance.restype = None
            weights = np.array(self.weighted_set[feature_id,
                                                 j_sample].todense())[:, 0]
            fingerprint_k = np.zeros((1, self.dimension_num))[0]
            fingerprint_y = np.zeros((1, self.dimension_num))[0]

            fingerprints.GenerateFingerprintOfInstance(
                self.dimension_num, weights, feature_id, feature_id_num,
                self.seed * repeat, fingerprint_k, fingerprint_y)

            fingerprints_k[j_sample, :] = fingerprint_k
            fingerprints_y[j_sample, :] = fingerprint_y

        elapsed = time.time() - start

        return fingerprints_k, fingerprints_y, elapsed
Beispiel #40
0
    def haeupler(self, repeat=1, scale=1000):
        """[Haeupler et. al., 2014] preserves the remaining float part with probability
           after each weight is multiplied by a large constant.
           B. Haeupler, M. Manasse, and K. Talwar, "Consistent Weighted Sampling Made Fast, Small, and Easy",
           arXiv preprint arXiv: 1410.4266, 2014

        Parameters
        ----------
        scale: int, default: 1000
            a large constant to transform real-valued weights into integer ones

        repeat: int, default: 1
            the number of repeating the algorithm as the part of the seed of the random number generator

        Returns
        ----------
        fingerprints: ndarray, shape (n_instances, dimension_num)
            hash codes for data matrix, where row represents a data instance

        elapsed: float
            time of hashing data matrix

        Notes
        ----------
        The operation of expanding the original weighted set by scaling the weights is implemented by C++
        due to low efficiency of Python. The operations cannot be vectorized in Python so that it would be
        very slow.
        """
        fingerprints = np.zeros((self.instance_num, self.dimension_num))

        np.random.seed(self.seed * np.power(2, repeat - 1))

        expanded_set_predefined_size = np.ceil(
            np.max(np.sum(self.weighted_set * scale, axis=0)) *
            100).astype(int)
        start = time.time()
        hash_parameters = np.random.randint(1, self.C_PRIME,
                                            (self.dimension_num, 2))

        for j_sample in range(0, self.instance_num):

            expanded_feature_id = np.zeros((1, expanded_set_predefined_size))
            feature_id = sparse.find(self.weighted_set[:, j_sample] > 0)[0]
            feature_id_num = feature_id.shape[0]

            expanded_set = CDLL('./cpluspluslib/haeupler_expandset.so')
            expanded_set.GenerateExpandedSet.argtypes = [
                c_int,
                np.ctypeslib.ndpointer(dtype=c_double,
                                       ndim=1,
                                       flags="C_CONTIGUOUS"),
                np.ctypeslib.ndpointer(dtype=c_int,
                                       ndim=1,
                                       flags="C_CONTIGUOUS"), c_int, c_int,
                c_int,
                np.ctypeslib.ndpointer(dtype=c_double,
                                       ndim=1,
                                       flags="C_CONTIGUOUS")
            ]
            expanded_set.GenerateExpandedSet.restype = None
            feature_weight = np.array(
                scale * self.weighted_set[feature_id, j_sample].todense())[:,
                                                                           0]
            expanded_feature_id = expanded_feature_id[0, :]
            expanded_set.GenerateExpandedSet(expanded_set_predefined_size,
                                             feature_weight, feature_id,
                                             feature_id_num, scale,
                                             self.seed * repeat,
                                             expanded_feature_id)

            expanded_feature_id = expanded_feature_id[expanded_feature_id != 0]
            expanded_feature_id_num = expanded_feature_id.shape[0]
            k_hash = np.mod(
                np.dot(np.transpose(np.array([expanded_feature_id])),
                       np.array([np.transpose(hash_parameters[:, 1])])) +
                np.dot(np.ones((expanded_feature_id_num, 1)),
                       np.array([np.transpose(hash_parameters[:, 1])])),
                self.C_PRIME)
            min_position = np.argmin(k_hash, axis=0)
            fingerprints[j_sample, :] = expanded_feature_id[min_position]

        elapsed = time.time() - start

        return fingerprints, elapsed
Beispiel #41
0
 def close(self):
     i, j, v = sparse.find(self.data)
     log.info("Glove matrix has %d entries", len(i))
Beispiel #42
0
def upload_dtm(run_id, output_path):
    stat = RunStats.objects.get(pk=run_id)
    print("upload dtm results to db")

    info = readInfo(os.path.join(output_path, "lda-seq/info.dat"))

    topic_ids = db.add_topics(stat.K, stat.run_id)

    vocab_ids = []
    input_path = output_path.replace("-output-", "-input-")
    with open(os.path.join(input_path, 'foo-vocab.dat'), 'r') as f:
        for l in f:
            try:
                vocab_ids.append(int(l.split(':')[0].strip()))
            except:
                pass

    ids = []
    docsizes = []
    with open(os.path.join(input_path, 'foo-docids.dat'), 'r') as f:
        for l in f:
            try:
                id, s = [int(x.strip()) for x in l.split(':')]
                ids.append(id)
                docsizes.append(s)
            except:
                pass

    time_range = sorted([tp.n for tp in stat.periods.all().order_by('n')])

    #################################
    # TopicTerms

    print("writing topic terms")
    topics = range(info['NUM_TOPICS'])
    pool = Pool(processes=8)
    pool.map(
        partial(dtm_topic,
                info=info,
                topic_ids=topic_ids,
                vocab_ids=vocab_ids,
                ys=time_range,
                run_id=run_id,
                output_path=output_path), topics)
    pool.terminate()
    gc.collect()

    ######################################
    # Doctopics
    print("writing doctopics")
    gamma = np.fromfile(os.path.join(output_path, 'lda-seq/gam.dat'),
                        dtype=float,
                        sep=" ")
    gamma = gamma.reshape((int(len(gamma) / stat.K), stat.K))

    gamma = find(csr_matrix(gamma))
    glength = len(gamma[0])
    chunk_size = 100000
    ps = 16
    parallel_add = True

    all_dts = []

    make_t = 0
    add_t = 0

    for i in range(glength // chunk_size + 1):
        dts = []
        values_list = []
        f = i * chunk_size
        l = (i + 1) * chunk_size
        if l > glength:
            l = glength
        docs = range(f, l)
        doc_batches = []
        for p in range(ps):
            doc_batches.append([x for x in docs if x % ps == p])
        pool = Pool(processes=ps)
        make_t0 = time()
        values_list.append(
            pool.map(
                partial(db.f_gamma_batch,
                        gamma=gamma,
                        docsizes=docsizes,
                        docUTset=ids,
                        topic_ids=topic_ids,
                        run_id=run_id), doc_batches))
        pool.terminate()
        make_t += time() - make_t0
        django.db.connections.close_all()

        add_t0 = time()
        values_list = [item for sublist in values_list for item in sublist]

        pool = Pool(processes=ps)
        pool.map(db.insert_many, values_list)
        pool.terminate()

        add_t += time() - add_t0
        gc.collect()
        sys.stdout.flush()

    stat = RunStats.objects.get(run_id=run_id)
    stat.last_update = timezone.now()
    stat.status = 3  # 3 = finished
    stat.save()
    management.call_command('update_run', run_id)
Beispiel #43
0
    def compute_up(self, g, solution, data):
        """
        Return the velocity and pressure computed from the hybrid variables.

        Parameters
        ----------
        g : grid, or a subclass, with geometry fields computed.
        solution : array (g.num_faces) Hybrid solution of the system.
        data: dictionary to store the data. See self.matrix_rhs for a detaild
            description.

        Return
        ------
        u : array (g.num_faces) Velocity at each face.
        p : array (g.num_cells) Pressure at each cell.

        """
        # pylint: disable=invalid-name

        if g.dim == 0:
            return 0, solution[0]

        param = data["param"]
        k = param.get_tensor(self)
        f = param.get_source(self)
        a = param.aperture

        faces, _, sgn = sps.find(g.cell_faces)

        # Map the domain to a reference geometry (i.e. equivalent to compute
        # surface coordinates in 1d and 2d)
        c_centers, f_normals, f_centers, _, _, _ = pp.map_geometry.map_grid(g)

        # Weight for the stabilization term
        diams = g.cell_diameters()
        weight = np.power(diams, 2 - g.dim)

        # Allocation of the pressure and velocity vectors
        p = np.zeros(g.num_cells)
        u = np.zeros(g.num_faces)
        massHdiv = pp.DualVEM().massHdiv

        for c in np.arange(g.num_cells):
            # For the current cell retrieve its faces
            loc = slice(g.cell_faces.indptr[c], g.cell_faces.indptr[c + 1])
            faces_loc = faces[loc]
            ndof = faces_loc.size

            # Retrieve permeability and normals assumed outward to the cell.
            sgn_loc = sgn[loc].reshape((-1, 1))
            normals = np.multiply(np.tile(sgn_loc.T, (g.dim, 1)),
                                  f_normals[:, faces_loc])

            # Compute the H_div-mass local matrix
            A = massHdiv(
                k.values[0:g.dim, 0:g.dim, c],
                c_centers[:, c],
                a[c] * g.cell_volumes[c],
                f_centers[:, faces_loc],
                a[c] * normals,
                np.ones(ndof),
                diams[c],
                weight[c],
            )[0]
            # Compute the Div local matrix
            B = -np.ones((ndof, 1))
            # Compute the hybrid local matrix
            C = np.eye(ndof, ndof)

            # Perform the static condensation to compute the pressure and velocity
            S = 1 / np.dot(B.T, solve(A, B))
            l_loc = solution[faces_loc].reshape((-1, 1))

            p[c] = np.dot(S, f[c] - np.dot(B.T, solve(A, np.dot(C, l_loc))))
            u[faces_loc] = -np.multiply(
                sgn_loc, solve(A,
                               np.dot(B, p[c]) + np.dot(C, l_loc)))

        return u, p
Beispiel #44
0
def save_sparse_matrix(filename,x):
    print 'finding the sparse entries'
    [rowidx,colidx,val]=sp.find(x)
    y = np.column_stack((rowidx,colidx,val))
    print 'saving shuffled matrix '
    np.savetxt(filename,y,fmt='%u',delimiter=' ')
Beispiel #45
0
    def find_positive_cc(self, margin=False, ignore=False):

        #cc3d = self.cc3d[14:-14,44:-44,44:-44]
        gt = self.gt_cc3d
        if margin:
            gt[14:-14, 44:-44, 44:-44] = 0

        if ignore:
            gt[gt > 200] = 0

        cc3d = pickle.load(
            open(os.path.join(self.trial_name, 'saved_post_cc3d.pkl'), 'rb'))
        #post = (self.gtvol)*((self.gtvol%2)==0)
        #gt = post
        #cc3d = cc3d[11:-11,192/2:-192/2,192/2:-192/2]
        #gt = gt[11:-11,192/2:-192/2,192/2:-192/2]

        #pdb.set_trace()

        seg_gt_overlap = scipy.sparse.csc_matrix(
            (np.ones_like(self.segvol.ravel()), (self.segvol.ravel(),
                                                 self.seg_gtvol.ravel())))
        seg_gt_map = np.argmax(seg_gt_overlap[:, 1:], axis=1)

        overlaps = scipy.sparse.csc_matrix(
            (np.ones_like(gt.ravel()), (gt.ravel(), cc3d.ravel())))
        from scipy.sparse import find
        ntp = 0
        nfp = 0
        count = 0
        #unique_ccid = np.unique(cc3d)
        #for ii in range(1,unique_ccid.shape[0]):
        #uid = unique_ccid[ii]
        #pdb.set_trace()
        true_positive_detected = []
        detection_true = 0
        detection_false = 0
        for uid in self.cc_partners.keys():

            rows, dummy, values = find(overlaps[:, uid])
            count = count + len(self.cc_partners[uid])
            if rows[0] == 0:
                rows = rows[1:]
                values = values[1:]
            if len(values) < 1:
                for ci in range(len(self.cc_partners[uid])):
                    self.cc_partners[uid][ci]['label'] = -1

                detection_false = detection_false + len(self.cc_partners[uid])
                continue

        #match_id = rows[np.argmax(values)]
            found = 0
            for mi in range(rows.shape[0]):
                match_id = rows[mi]
                match_amt = values[mi]
                #print 'found: {0}, {1}'.format(self.cc_partners[uid]['pre'], self.cc_partners[uid]['post'])
                #print 'gt: {0}, {1}'.format(self.gt_partners[match_id]['pre'], self.gt_partners[match_id]['post'])
                print uid, match_id
                #if uid==3805 and match_id==185:
                #pdb.set_trace()
                for ci in range(len(self.cc_partners[uid])):

                    pre_seg_gt_id = seg_gt_map[self.cc_partners[uid][ci]
                                               ['pre_seg']][0, 0] + 1
                    post_seg_gt_id = seg_gt_map[self.cc_partners[uid][ci]
                                                ['post_seg']][0, 0] + 1
                    if pre_seg_gt_id == 0 or post_seg_gt_id == 0:
                        print "seg id not present in gt"
                        #pdb.set_trace()
                        continue

                    if (pre_seg_gt_id == self.gt_partners[match_id]['pre']) and \
                    (post_seg_gt_id == self.gt_partners[match_id]['post']):

                        #print 'true positive ', uid
                        print 'found: {0}, {1}'.format(
                            self.cc_partners[uid][ci]['pre_seg'],
                            self.cc_partners[uid][ci]['post_seg'])
                        print 'gt: {0}, {1}'.format(
                            self.gt_partners[match_id]['pre'],
                            self.gt_partners[match_id]['post'])

                        true_positive_detected.append(match_id)
                        detection_true = detection_true + 1

                        self.cc_partners[uid][ci]['label'] = 1
                        self.cc_partners[uid][ci]['gt_id'] = match_id

                    else:
                        if self.cc_partners[uid][ci].has_key('label'):
                            if self.cc_partners[uid][ci]['label'] == 1:
                                continue
                        else:
                            #break
                            detection_false = detection_false + 1
                            self.cc_partners[uid][ci]['label'] = -1
                            self.cc_partners[uid][ci]['gt_id'] = match_id
                        #break

        unique_gt = np.setdiff1d(np.unique(gt), [0])
        nunique_detected = len(np.unique(true_positive_detected))

        nmiss = len(unique_gt) - nunique_detected

        print 'detected = ', nunique_detected
        print 'missed =  {0} ({1})'.format(nmiss, nmiss * 1. / len(unique_gt))
        print 'detection_true = ', detection_true
        print 'detection false = ', detection_false

        #pdb.set_trace()
        pickle.dump(
            self.cc_partners,
            open(
                os.path.join(self.trial_name,
                             'saved_cc_partners_with_label.pkl'), 'wb'))
Beispiel #46
0
    def __init__(self,
                 layout,
                 block_size=64,
                 heads=None,
                 mask_callback=None,
                 name=None):

        if len(layout.shape) == 2:
            assert heads is not None, "heads must be explicitly specified when using shared layouts per head"
            # broadcast same layout over all heads
            layout = np.expand_dims(layout, 0)

        if heads is None:
            heads = layout.shape[0]

        assert block_size in (
            8, 16, 32,
            64), "Block sizes of 8, 16, 32 and 64 currently supported"
        assert len(layout.shape) == 3, "bad layout shape: " + str(layout.shape)
        assert layout.shape[1] == layout.shape[2], "layout should be square"

        #self.layout       = layout > 0  # save boolean version for serialization purposes, TODO: save packbits or csr version
        self.blk_size = block_size
        self.name = name
        self.heads = heads
        self.lut_heads = layout.shape[0]
        self.ctx_blks = layout.shape[1]
        self.blk_shape = (block_size, block_size)
        self.nn_max = 0
        self.tn_max = 0

        if layout.dtype != np.int32:
            layout = layout.astype(np.int32)

        self.nt_lut = list()
        self.nn_lut = list()
        self.tn_lut = list()
        self.nt_list = list()
        self.nn_list = list()
        self.tn_list = list()
        blocks = None
        for head in range(layout.shape[0]):

            # convert to csr for vastly more efficient python iteration on large sparse layouts
            csr = sparse.csr_matrix(layout[head, :, :])
            ys, xs, bs = sparse.find(csr)  # xs is in sorted order by default
            if blocks is None:
                blocks = len(bs)
            else:
                assert len(
                    bs
                ) == blocks, "number of layout blocks must be equal across heads"

            # make blocks contiguous along the rows (softmax code leverages this for increased performance)
            nt_list = sorted(zip(ys, xs))
            ys = [b[0] for b in nt_list]
            xs = [b[1] for b in nt_list]

            nt_lut = np.array(nt_list, dtype=np.int32)
            nn_lut, nn_list, nn_max = self.xn_lut(ys, xs, blocks)
            tn_lut, tn_list, tn_max = self.xn_lut(xs, ys, blocks)

            self.nt_lut.append(nt_lut)
            self.nn_lut.append(nn_lut)
            self.tn_lut.append(tn_lut)
            self.nt_list.append(nt_list)
            self.nn_list.append(nn_list)
            self.tn_list.append(tn_list)
            self.nn_max = max(self.nn_max, nn_max)
            self.tn_max = max(self.tn_max, tn_max)

        self.blocks = blocks
        self.nt_lut = get_constant(np.array(self.nt_lut, dtype=np.int32),
                                   name="nt")
        self.nn_lut = get_constant(np.array(self.nn_lut, dtype=np.int32),
                                   name="nn")
        self.tn_lut = get_constant(np.array(self.tn_lut, dtype=np.int32),
                                   name="tn")

        if mask_callback is not None:
            self.init_softmax_mask(mask_callback)
        else:
            self.softmax_mask = None
            self.softmax_mask_np = None
Beispiel #47
0
def tree_multiresolution(G,
                         Nlevel,
                         reduction_method='resistance_distance',
                         compute_full_eigen=False,
                         root=None):
    r"""Compute a multiresolution of trees

    Parameters
    ----------
    G : Graph
        Graph structure of a tree.
    Nlevel : Number of times to downsample and coarsen the tree
    root : int
        The index of the root of the tree. (default = 1)
    reduction_method : str
        The graph reduction method (default = 'resistance_distance')
    compute_full_eigen : bool
        To also compute the graph Laplacian eigenvalues for every tree in the sequence

    Returns
    -------
    Gs : ndarray
        Ndarray, with each element containing a graph structure represent a reduced tree.
    subsampled_vertex_indices : ndarray
        Indices of the vertices of the previous tree that are kept for the subsequent tree.

    """

    if not root:
        if hasattr(G, 'root'):
            root = G.root
        else:
            root = 1

    Gs = [G]

    if compute_full_eigen:
        Gs[0].compute_fourier_basis()

    subsampled_vertex_indices = []
    depths, parents = _tree_depths(G.A, root)
    old_W = G.W

    for lev in range(Nlevel):
        # Identify the vertices in the even depths of the current tree
        down_odd = round(depths) % 2
        down_even = np.ones((Gs[lev].N)) - down_odd
        keep_inds = np.where(down_even == 1)[0]
        subsampled_vertex_indices.append(keep_inds)

        # There will be one undirected edge in the new graph connecting each
        # non-root subsampled vertex to its new parent. Here, we find the new
        # indices of the new parents
        non_root_keep_inds, new_non_root_inds = np.setdiff1d(keep_inds, root)
        old_parents_of_non_root_keep_inds = parents[non_root_keep_inds]
        old_grandparents_of_non_root_keep_inds = parents[
            old_parents_of_non_root_keep_inds]
        # TODO new_non_root_parents = dsearchn(keep_inds, old_grandparents_of_non_root_keep_inds)

        old_W_i_inds, old_W_j_inds, old_W_weights = sparse.find(old_W)
        i_inds = np.concatenate((new_non_root_inds, new_non_root_parents))
        j_inds = np.concatenate((new_non_root_parents, new_non_root_inds))
        new_N = np.sum(down_even)

        if reduction_method == "unweighted":
            new_weights = np.ones(np.shape(i_inds))

        elif reduction_method == "sum":
            # TODO old_weights_to_parents_inds = dsearchn([old_W_i_inds,old_W_j_inds], [non_root_keep_inds, old_parents_of_non_root_keep_inds]);
            old_weights_to_parents = old_W_weights[old_weights_to_parents_inds]
            # old_W(non_root_keep_inds,old_parents_of_non_root_keep_inds);
            # TODO old_weights_parents_to_grandparents_inds = dsearchn([old_W_i_inds, old_W_j_inds], [old_parents_of_non_root_keep_inds, old_grandparents_of_non_root_keep_inds])
            old_weights_parents_to_grandparents = old_W_weights[
                old_weights_parents_to_grandparents_inds]
            # old_W(old_parents_of_non_root_keep_inds,old_grandparents_of_non_root_keep_inds);
            new_weights = old_weights_to_parents + old_weights_parents_to_grandparents
            new_weights = np.concatenate((new_weights.new_weights))

        elif reduction_method == "resistance_distance":
            # TODO old_weights_to_parents_inds = dsearchn([old_W_i_inds, old_W_j_inds], [non_root_keep_inds, old_parents_of_non_root_keep_inds])
            old_weights_to_parents = old_W_weight[sold_weights_to_parents_inds]
            # old_W(non_root_keep_inds,old_parents_of_non_root_keep_inds);
            # TODO old_weights_parents_to_grandparents_inds = dsearchn([old_W_i_inds, old_W_j_inds], [old_parents_of_non_root_keep_inds, old_grandparents_of_non_root_keep_inds])
            old_weights_parents_to_grandparents = old_W_weights[
                old_weights_parents_to_grandparents_inds]
            # old_W(old_parents_of_non_root_keep_inds,old_grandparents_of_non_root_keep_inds);
            new_weights = 1. / (1. / old_weights_to_parents +
                                1. / old_weights_parents_to_grandparents)
            new_weights = np.concatenate(([new_weights, new_weights]))

        else:
            raise ValueError('Unknown graph reduction method.')

        new_W = sparse.csc_matrix((new_weights, (i_inds, j_inds)),
                                  shape=(new_N, new_N))
        # Update parents
        new_root = np.where(keep_inds == root)[0]
        parents = np.zeros(np.shape(keep_inds)[0], np.shape(keep_inds)[0])
        parents[:new_root - 1, new_root:] = new_non_root_parents

        # Update depths
        depths = depths[keep_inds]
        depths = depths / 2.

        # Store new tree
        Gtemp = graphs.Graph(new_W,
                             coords=Gs[lev].coords[keep_inds],
                             limits=G.limits,
                             root=new_root)
        #Gs[lev].copy_graph_attributes(Gtemp, False)

        if compute_full_eigen:
            Gs[lev + 1].compute_fourier_basis()

        # Replace current adjacency matrix and root
        Gs.append(Gtemp)

        old_W = new_W
        root = new_root

    return Gs, subsampled_vertex_indices
Beispiel #48
0
def generate_coarse_grid_single(g, subdiv, face_map):
    """
    Specific function for a single grid. Use the common interface instead.
    """

    subdiv = np.asarray(subdiv)
    assert subdiv.size == g.num_cells

    # declare the storage array to build the cell_faces map
    cell_faces = np.empty(0, dtype=g.cell_faces.indptr.dtype)
    cells = np.empty(0, dtype=cell_faces.dtype)
    orient = np.empty(0, dtype=g.cell_faces.data.dtype)

    # declare the storage array to build the face_nodes map
    face_nodes = np.empty(0, dtype=g.face_nodes.indptr.dtype)
    nodes = np.empty(0, dtype=face_nodes.dtype)
    visit = np.zeros(g.num_faces, dtype=np.bool)

    # compute the face_node indexes
    num_nodes_per_face = g.face_nodes.indptr[1:] - g.face_nodes.indptr[:-1]
    face_node_ind = matrix_compression.rldecode(np.arange(g.num_faces),
                                                num_nodes_per_face)

    cells_list = np.unique(subdiv)
    cell_volumes = np.zeros(cells_list.size)
    cell_centers = np.zeros((3, cells_list.size))

    for cellId, cell in enumerate(cells_list):
        # extract the cells of the original mesh associated to a specific label
        cells_old = np.where(subdiv == cell)[0]

        # compute the volume
        cell_volumes[cellId] = np.sum(g.cell_volumes[cells_old])
        cell_centers[:, cellId] = np.average(g.cell_centers[:, cells_old],
                                             axis=1)

        # reconstruct the cell_faces mapping
        faces_old, _, orient_old = sps.find(g.cell_faces[:, cells_old])
        mask = np.ones(faces_old.size, dtype=np.bool)
        mask[np.unique(faces_old, return_index=True)[1]] = False
        # extract the indexes of the internal edges, to be discared
        index = np.array(
            [np.where(faces_old == f)[0] for f in faces_old[mask]],
            dtype=np.int).ravel()
        faces_new = np.delete(faces_old, index)
        cell_faces = np.r_[cell_faces, faces_new]
        cells = np.r_[cells, np.repeat(cellId, faces_new.shape[0])]
        orient = np.r_[orient, np.delete(orient_old, index)]

        # reconstruct the face_nodes mapping
        # consider only the unvisited faces
        not_visit = ~visit[faces_new]
        if not_visit.size == 0 or np.all(~not_visit):
            continue
        # mask to consider only the external faces
        mask = np.atleast_1d(
            np.sum(
                [face_node_ind == f for f in faces_new[not_visit]],
                axis=0,
                dtype=np.bool,
            ))
        face_nodes = np.r_[face_nodes, face_node_ind[mask]]

        nodes_new = g.face_nodes.indices[mask]
        nodes = np.r_[nodes, nodes_new]
        visit[faces_new] = True

    # Rename the faces
    cell_faces_unique = np.unique(cell_faces)
    cell_faces_id = np.arange(cell_faces_unique.size, dtype=cell_faces.dtype)
    cell_faces = np.array([
        cell_faces_id[np.where(cell_faces_unique == f)[0]] for f in cell_faces
    ]).ravel()
    shape = (cell_faces_unique.size, cells_list.size)
    cell_faces = sps.csc_matrix((orient, (cell_faces, cells)), shape=shape)

    # Rename the nodes
    face_nodes = np.array([
        cell_faces_id[np.where(cell_faces_unique == f)[0]] for f in face_nodes
    ]).ravel()
    nodes_list = np.unique(nodes)
    nodes_id = np.arange(nodes_list.size, dtype=nodes.dtype)
    nodes = np.array([nodes_id[np.where(nodes_list == n)[0]]
                      for n in nodes]).ravel()

    # sort the nodes
    nodes = nodes[np.argsort(face_nodes, kind="mergesort")]
    data = np.ones(nodes.size, dtype=g.face_nodes.data.dtype)
    indptr = np.r_[0, np.cumsum(np.bincount(face_nodes))]
    face_nodes = sps.csc_matrix((data, nodes, indptr))

    # store again the data in the same grid
    g.name.append("coarse")

    g.nodes = g.nodes[:, nodes_list]
    g.num_nodes = g.nodes.shape[1]

    g.face_nodes = face_nodes
    g.num_faces = g.face_nodes.shape[1]
    g.face_areas = g.face_areas[cell_faces_unique]
    g.tags = tags.extract(g.tags, cell_faces_unique, tags.standard_face_tags())
    g.face_normals = g.face_normals[:, cell_faces_unique]
    g.face_centers = g.face_centers[:, cell_faces_unique]

    g.cell_faces = cell_faces
    g.num_cells = g.cell_faces.shape[1]
    g.cell_volumes = cell_volumes
    g.cell_centers = half_space.star_shape_cell_centers(g)
    is_nan = np.isnan(g.cell_centers[0, :])
    g.cell_centers[:, is_nan] = cell_centers[:, is_nan]

    if face_map:
        return np.array([cell_faces_unique, cell_faces_id])
Beispiel #49
0
def extrude_grid_bucket(gb: pp.GridBucket, z: np.ndarray) -> Tuple[pp.GridBucket, Dict]:
    """ Extrude a GridBucket by extending all fixed-dimensional grids in the z-direction.

    In practice, the original grid bucket will be 2d, and the result is 3d.

    The returned GridBucket is fully functional, including mortar grids on the gb edges.
    The data dictionaries on nodes and edges are mainly empty. Data can be transferred from
    the original GridBucket via the returned map between old and new grids.

    Parameters:
        gb (pp.GridBukcet): Mixed-dimensional grid to be extruded. Should be 2d.
        z (np.ndarray): z-coordinates of the nodes in the extruded grid. Should be
            either non-negative or non-positive, and be sorted in increasing or
            decreasing order, respectively.

    Returns:
        gb (pp.GridBucket): Mixed-dimensional grid, 3d. The data dictionaries on nodes and
            edges are mostly empty.
        dict: Mapping from individual grids in the old bucket to the corresponding
            extruded grids in the new one. The dictionary values are a namedtuple with
            elements grid (new grid), cell_map and face_map, where the two latter
            describe mapping between the new and old grid, see extrude_grid for details.

    """

    # New GridBucket. to be filled in
    gb_new = pp.GridBucket()

    # Data structure for mapping between old and new grids
    g_map = {}

    # Container for grid information
    Mapping = namedtuple("mapping", ["grid", "cell_map", "face_map"])

    # Loop over all grids in the old bucket, extrude the grid, save mapping information
    for g, _ in gb:
        g_new, cell_map, face_map = extrude_grid(g, z)

        if hasattr(g, "frac_num"):
            g_new.frac_num = g.frac_num

        gb_new.add_nodes([g_new])

        g_map[g] = Mapping(g_new, cell_map, face_map)

    # Loop over all edges in the old grid, create corresponding edges in the new gb.
    # Also define mortar_grids
    for e, d in gb.edges():

        # grids of the old edge, extruded version of each grid
        gl, gh = gb.nodes_of_edge(e)
        gl_new = g_map[gl].grid
        gh_new = g_map[gh].grid

        # Next, we need the cell-face mapping for the new grid.
        # The idea is to first find the old map, then replace each cell-face relation
        # with the set of cells and faces (exploiting first that the new grids are
        # matching due to the extrusion algorithm, and second that the cell-map and
        # face-map stores indices in increasing layer index, so that the first cell
        # and first face both are in the first layer, thus they match, etc.).
        face_cells_old = d["face_cells"]

        # cells (in low-dim grid) and faces in high-dim grid that define the same
        # geometric quantity
        cells, faces, _ = sps.find(face_cells_old)

        # Cell-map for the low-dimensional grid, face-map for the high-dim
        cell_map = g_map[gl].cell_map
        face_map = g_map[gh].face_map

        # Data structure for the new face-cell map
        rows = np.empty(0, dtype=np.int)
        cols = np.empty(0, dtype=np.int)

        # The standard MortarGrid __init__ assumes that when faces are split because of
        # a fracture, the faces are ordered with one side first, then the other. This
        # will not be True for this layered construction. Instead, keep track of all
        # faces that should be moved to the other side.
        face_on_other_side = np.empty(0, dtype=np.int)

        # Loop over cells in gl would not have been as clean, as each cell is associated
        # with faces on both sides
        # Faces are found from the high-dim grid, cells in the low-dim grid
        for idx in range(faces.size):
            rows = np.hstack((rows, cell_map[cells[idx]]))
            cols = np.hstack((cols, face_map[faces[idx]]))

            # Here, we tacitly assume that the original grid had its faces split in the
            # standard way, that is, all faces on one side have index lower than any
            # face on the other side.
            if faces[idx] > np.median(faces):
                face_on_other_side = np.hstack(
                    (face_on_other_side, face_map[faces[idx]])
                )

        data = np.ones(rows.size, dtype=np.bool)
        # Create new face-cell map
        face_cells_new = sps.coo_matrix(
            (data, (rows, cols)), shape=(gl_new.num_cells, gh_new.num_faces)
        ).tocsc()

        # Define the new edge
        e = (gh_new, gl_new)
        # Add to new gb, together with the new face-cell map
        gb_new.add_edge(e, face_cells_new)

        # Create a mortar grid, add to data of new edge
        side_g = {
            mortar_grid.LEFT_SIDE: gl_new.copy(),
            mortar_grid.RIGHT_SIDE: gl_new.copy(),
        }

        # Construct mortar grid, with instructions on which faces belong to which side
        mg = pp.MortarGrid(
            gl_new.dim, side_g, face_cells_new, face_duplicate_ind=face_on_other_side
        )

        d_new = gb_new.edge_props(e)

        d_new["mortar_grid"] = mg

    return gb_new, g_map
Beispiel #50
0
def create_partition(A, seeds=None, **kwargs):
    """
    Create the partition based on an input matrix using the algebraic multigrid
    method coarse/fine-splittings based on direct couplings. The standard values
    for cdepth and epsilon are taken from the following reference.

    For more information see: U. Trottenberg, C. W. Oosterlee, and A. Schuller.
    Multigrid. Academic press, 2000.

    Parameters
    ----------
    A: sparse matrix used for the agglomeration
    cdepth: the greather is the more intense the aggregation will be, e.g. less
        cells if it is used combined with generate_coarse_grid
    epsilon: weight for the off-diagonal entries to define the "strong
        negatively cupling"
    seeds: (optional) to define a-priori coarse cells

    Returns
    -------
    out: agglomeration indices

    How to use
    ----------
    part = create_partition(tpfa_matrix(g))
    g = generate_coarse_grid(g, part)

    """

    cdepth = int(kwargs.get("cdepth", 2))
    epsilon = kwargs.get("epsilon", 0.25)

    if A.size == 0:
        return np.zeros(1)
    Nc = A.shape[0]

    # For each node, which other nodes are strongly connected to it
    ST = sps.lil_matrix((Nc, Nc), dtype=np.bool)

    # In the first instance, all cells are strongly connected to each other
    At = A.T

    for i in np.arange(Nc):
        loc = slice(At.indptr[i], At.indptr[i + 1])
        ci, vals = At.indices[loc], At.data[loc]
        neg = vals < 0.0
        nvals = vals[neg]
        nci = ci[neg]
        minId = np.argmin(nvals)
        ind = -nvals >= epsilon * np.abs(nvals[minId])
        ST[nci[ind], i] = True

    # Temporary field, will store connections of depth 1
    for _ in np.arange(2, cdepth + 1):
        STold = ST.copy()
        for j in np.arange(Nc):
            rowj = np.array(STold.rows[j])
            if rowj.size == 0:
                continue
            row = np.hstack([STold.rows[r] for r in rowj])
            ST[j, np.concatenate((rowj, row))] = True

    del STold

    ST.setdiag(False)
    lmbda = np.array([len(s) for s in ST.rows])

    # Define coarse nodes
    candidate = np.ones(Nc, dtype=np.bool)
    is_fine = np.zeros(Nc, dtype=np.bool)
    is_coarse = np.zeros(Nc, dtype=np.bool)

    # cells that are not important for any other cells are on the fine scale.
    for row_id, row in enumerate(ST.rows):
        if not row:
            is_fine[row_id] = True
            candidate[row_id] = False

    ST = ST.tocsr()
    it = 0
    while np.any(candidate):
        i = np.argmax(lmbda)
        is_coarse[i] = True
        j = ST.indices[ST.indptr[i]:ST.indptr[i + 1]]
        jf = j[candidate[j]]
        is_fine[jf] = True
        candidate[np.r_[i, jf]] = False
        loop = ST.indices[mcolon.mcolon(ST.indptr[jf], ST.indptr[jf + 1])]
        for row in np.unique(loop):
            s = ST.indices[ST.indptr[row]:ST.indptr[row + 1]]
            lmbda[row] = s[candidate[s]].size + 2 * s[is_fine[s]].size
        lmbda[np.logical_not(candidate)] = -1
        it = it + 1

        # Something went wrong during aggregation
        assert it <= Nc

    del lmbda, ST

    if seeds is not None:
        is_coarse[seeds] = True
        is_fine[seeds] = False

    # If two neighbors are coarse, eliminate one of them without touching the
    # seeds
    c2c = np.abs(A) > 0
    c2c_rows, _, _ = sps.find(c2c)

    pairs = np.empty((0, 2), dtype=np.int)
    for idx, it in enumerate(np.where(is_coarse)[0]):
        loc = slice(c2c.indptr[it], c2c.indptr[it + 1])
        ind = np.setdiff1d(c2c_rows[loc], it)
        cind = ind[is_coarse[ind]]
        new_pair = np.stack((np.repeat(it, cind.size), cind), axis=-1)
        pairs = np.append(pairs, new_pair, axis=0)

    # Remove one of the neighbors cells
    if pairs.size:
        pairs = setmembership.unique_rows(np.sort(pairs, axis=1))[0]
        for ij in pairs:
            A_val = np.array(A[ij, ij]).ravel()
            ids = ij[np.argsort(A_val)]
            ids = np.setdiff1d(ids, seeds, assume_unique=True)
            if ids.size:
                is_coarse[ids[0]] = False
                is_fine[ids[0]] = True

    coarse = np.where(is_coarse)[0]

    # Primal grid
    NC = coarse.size
    primal = sps.lil_matrix((NC, Nc), dtype=np.bool)
    primal[np.arange(NC), coarse[np.arange(NC)]] = True

    connection = sps.lil_matrix((Nc, Nc), dtype=np.double)
    for it in np.arange(Nc):
        n = np.setdiff1d(c2c_rows[c2c.indptr[it]:c2c.indptr[it + 1]], it)
        loc = slice(A.indptr[it], A.indptr[it + 1])
        A_idx, A_row = A.indices[loc], A.data[loc]
        mask = A_idx != it
        connection[it, n] = np.abs(A_row[mask] / A_row[np.logical_not(mask)])

    connection = connection.tocsr()

    candidates_rep = np.ediff1d(connection.indptr)
    candidates_idx = np.repeat(is_coarse, candidates_rep)
    candidates = np.stack(
        (
            connection.indices[candidates_idx],
            np.repeat(np.arange(NC), candidates_rep[is_coarse]),
        ),
        axis=-1,
    )

    connection_idx = mcolon.mcolon(connection.indptr[coarse],
                                   connection.indptr[coarse + 1])
    vals = sps.csr_matrix(
        accumarray.accum(candidates,
                         connection.data[connection_idx],
                         size=[Nc, NC]))
    del candidates_rep, candidates_idx, connection_idx

    it = NC
    not_found = np.logical_not(is_coarse)
    # Process the strongest connection globally
    while np.any(not_found):

        np.argmax(vals.data)
        vals.argmax(axis=0)
        mcind = np.atleast_1d(np.squeeze(np.asarray(vals.argmax(axis=0))))
        mcval = -np.inf * np.ones(mcind.size)
        for c, r in enumerate(mcind):
            loc = slice(vals.indptr[r], vals.indptr[r + 1])
            vals_idx, vals_data = vals.indices[loc], vals.data[loc]
            mask = vals_idx == c
            if vals_idx.size == 0 or not np.any(mask):
                continue
            mcval[c] = vals_data[mask]

        mi = np.argmax(mcval)
        nadd = mcind[mi]

        primal[mi, nadd] = True
        it = it + 1
        if it > Nc + 5:
            break

        not_found[nadd] = False
        vals.data[vals.indptr[nadd]:vals.indptr[nadd + 1]] = 0

        loc = slice(connection.indptr[nadd], connection.indptr[nadd + 1])
        nc = connection.indices[loc]
        af = not_found[nc]
        nc = nc[af]
        nv = mcval[mi] * connection[nadd, :]
        nv = nv.data[af]
        if len(nc) > 0:
            vals += sps.csr_matrix((nv, (nc, np.repeat(mi, len(nc)))),
                                   shape=(Nc, NC))

    coarse, fine = primal.tocsr().nonzero()
    return coarse[np.argsort(fine)]
Beispiel #51
0

data = np.array(data,dtype=int)
l1 = np.array(l1,dtype=int) 
#mtx = csr_matrix((data, (l1, newList)),shape=(len(mv_index_fn),len(l1))).toarray()
mtx = csr_matrix((data, (l1, newList)))
#tmp = mtx[mtx!=0]
print(mtx.shape)
   

#******** *************************** Part 2 *****************************

#time_part_2= time.time()

list1=[]
[D,E,data_one] = find(mtx)
F = np.unique(E,return_counts=True)
k=0
user_key_index={}
j=0
for k in range(mtx.shape[1]):
    user_key_index[k]= (D[j: j + F[1][k]]).tolist()
    j=j+F[1][k]

def jacc_distance_new(num1,num2):
    #print("Time 1:", (time.time()-start_time6))
    user1 = user_key_index[num1]
    #print("Time 2:", (time.time()-start_time6))
    user2 = user_key_index[num2]
    #print("Time 3:", (time.time()-start_time6))
    m11 = len(set(user1) & set(user2))
    def jacobianstructure(self):
        # Assume the sturcuture of Jac will not change in each iteration
		# (approaved by other project), sparsity functions are used to get
		# the structure of Jac.
		
		# random initial guess is used to get the jacobian and its structure, 
		# Theoritically, the sturcuture can be get by one time only.
        
        num_test = 3
    
        row = np.array([])
        col = np.array([])
        
        k = 0
        j = 0

        for p in range(self.num_nodes-1):
            Jac_x = np.zeros((self.num_cons, 2*self.num_states))
            Jac_con_close = np.zeros((self.num_cons, self.num_con_close))
            Jac_con_Lopen = np.zeros((self.num_cons, 2))
            Jac_con_Ropen = np.zeros((self.num_cons, 2))
            Jac_con_ankle = np.zeros((self.num_cons, 2))
            
            iniL = self.initial_L[p+1, :]
            iniR = self.initial_R[p+1, :]
        
            for q in range(num_test):
                np.random.seed()
                x_p = 0.5 - np.random.random(self.num_states)
                np.random.seed()
                x_a = 0.5 - np.random.random(self.num_states)
                np.random.seed()
                vs_a = 0.1*np.random.random(2)
                np.random.seed()
                con_close = 1 -2*np.random.random(self.num_con_close)
                np.random.seed()
                u_ankle = 100*np.random.random(2)
                
                np.random.seed()
                u_stanceL = 100*np.random.random(2)
                np.random.seed()
                u_stanceR = 100*np.random.random(2)
                
                f, dfdx, dfdxdot, df_dConClose, df_dConLOpen, df_dConROpen, df_dConAnkle =\
                self.gait2dpi_u(x_a, (x_a - x_p)/self.interval, vs_a, con_close,
                            u_stanceL, u_stanceR, u_ankle, iniL, iniR)
                
                Jac_x[:, :self.num_states] -= dfdxdot/self.interval
                
                Jac_x[:, self.num_states:2*self.num_states] +=\
                (dfdx + dfdxdot/self.interval)
                
                Jac_con_close += df_dConClose
                Jac_con_Lopen += df_dConLOpen
                Jac_con_Ropen += df_dConROpen
                Jac_con_ankle += df_dConAnkle
                
            for r in range(self.num_cons):
                
                row_x, col_x, RA_Jac_x = find(Jac_x[r, :])
                row_c, col_c, RA_Jac_c = find(Jac_con_close[r, :])
                row_lo, col_lo, RA_Jac_lo = find(Jac_con_Lopen[r, :])
                row_ro, col_ro, RA_Jac_ro = find(Jac_con_Ropen[r, :])
                row_a, col_a, RA_Jac_a = find(Jac_con_ankle[r, :])

                
                row_xf = row_x + p*self.num_cons + r
                row_cf = row_c + p*self.num_cons + r
                row_lof = row_lo + p*self.num_cons + r
                row_rof = row_ro + p*self.num_cons + r
                row_af = row_a + p*self.num_cons + r
                
                col_xf = col_x + p*self.num_states
                col_cf = col_c + self.num_nodes*self.num_states
                col_lof = col_lo + self.num_nodes*self.num_states + self.num_con_close + k*2
                col_rof = col_ro + self.num_nodes*self.num_states + self.num_con_close + self.num_stanceL*2 + j*2
                col_af = col_a + self.num_nodes*self.num_states + self.num_con_close + self.num_stanceL*2 + self.num_stanceR*2 + p*2
					
                row = np.hstack((row, row_xf, row_cf, row_lof, row_rof, row_af))
                col = np.hstack((col, col_xf, col_cf, col_lof, col_rof, col_af))
                    
            if iniL[0]:
                k += 1
            if iniR[0]:
                j += 1
		
        return (row, col)
Beispiel #53
0
def refine_grid_1d(g, ratio=2):
    """ Refine cells in a 1d grid.

    Parameters:
        g (grid): A 1d grid, to be refined.
        ratio (int):

    Returns:
        grid: New grid, with finer cells.

    """

    # Implementation note: The main part of the function is the construction of
    # the new cell-face relation. Since the grid is 1d, nodes and faces are
    # equivalent, and notation used mostly refers to nodes instead of faces.

    # Cell-node relation
    cell_nodes = g.cell_nodes()
    nodes, cells, _ = sps.find(cell_nodes)

    # Every cell will contribute (ratio - 1) new nodes
    num_new_nodes = (ratio - 1) * g.num_cells + g.num_nodes
    x = np.zeros((3, num_new_nodes))
    # Cooridates for splitting of cells
    theta = np.arange(1, ratio) / float(ratio)
    pos = 0
    shift = 0

    # Array that indicates whether an item in the cell-node relation represents
    # a node not listed before (e.g. whether this is the first or second
    # occurence of the cell)
    if_add = np.r_[1, np.ediff1d(cell_nodes.indices)].astype(np.bool)

    indices = np.empty(0, dtype=np.int)
    # Template array of node indices for refined cells
    ind = np.vstack((np.arange(ratio), np.arange(ratio) + 1)).flatten("F")
    nd = np.r_[np.diff(cell_nodes.indices)[1::2], 0]

    # Loop over all old cells and refine them.
    for c in np.arange(g.num_cells):
        # Find start and end nodes of the old cell
        loc = slice(cell_nodes.indptr[c], cell_nodes.indptr[c + 1])
        start, end = cell_nodes.indices[loc]

        # Flags for whether this is the first occurences of the the nodes of
        # the old cell. If so, they should be added to the new node array
        if_add_loc = if_add[loc]

        # Local cell-node (thus cell-face) relations of the new grid
        indices = np.r_[indices, shift + ind]

        # Add coordinate of the startpoint to the node array if relevant
        if if_add_loc[0]:
            x[:, pos:(pos + 1)] = g.nodes[:, start, np.newaxis]
            pos += 1

        # Add coordinates of the internal nodes
        x[:, pos:(
            pos + ratio -
            1)] = g.nodes[:, start,
                          np.newaxis] * theta + g.nodes[:, end, np.newaxis] * (
                              1 - theta)
        pos += ratio - 1
        shift += ratio + (2 - np.sum(if_add_loc) * (1 - nd[c])) - nd[c]

        # Add coordinate to the endpoint, if relevant
        if if_add_loc[1]:
            x[:, pos:(pos + 1)] = g.nodes[:, end, np.newaxis]
            pos += 1

    # For 1d grids, there is a 1-1 relation between faces and nodes
    face_nodes = sps.identity(x.shape[1], format="csc")
    cell_faces = sps.csc_matrix((
        np.ones(indices.size, dtype=np.bool),
        indices,
        np.arange(0, indices.size + 1, 2),
    ))
    g = Grid(1, x, face_nodes, cell_faces, "Refined 1d grid")
    g.compute_geometry()

    return g
                        writer.add_summary(summary_, iteration)

            # save the model
            saver.save(sess, os.path.join(root_savedir, "model.ckpt"))

        # close the file writer
        writer.close()


if __name__ == '__main__':

    N = 200
    X = np.random.rand(N, N) < 0.4

    from scipy.sparse import find
    rows, cols, _ = find(X)

    root_savedir = "/Users/Koa/github-repos/bayes-nnet-mf/saved/vi_binary"
    root_logdir = os.path.join(root_savedir, "tf_logs")

    if os.path.exists(root_savedir):
        shutil.rmtree(root_savedir)

    model = VIBinaryNNetMF()
    model.train(N,
                rows,
                cols,
                miss_rows=None,
                miss_cols=None,
                n_factors=4,
                hidden_layer_sizes=[10, 8],
Beispiel #55
0
    def i2cws(self, repeat=1):
        """The Improved Improved Consistent Weighted Sampling (I$^2$CWS) algorithm, samples the two special
           "active indices", $y_k$ and $z_k$, independently by avoiding the equation of $y_k$ and $z_k$ in ICWS.
           W. Wu, B. Li, L. Chen, C. Zhang and P. S. Yu, "Improved Consistent Weighted Sampling Revisited",
           DOI: 10.1109/TKDE.2018.2876250, 2018.

        Parameters
        ----------
        repeat: int, default: 1
            the number of repeating the algorithm as the part of the seed of the random number generator

        Returns
        -----------
        fingerprints_k: ndarray, shape (n_instances, dimension_num)
            one component of hash codes $(k, y_k)$ for data matrix, where row represents a data instance

        fingerprints_y: ndarray, shape (n_instances, dimension_num)
            one component of hash codes $(k, y_k)$ for data matrix, where row represents a data instance

        elapsed: float
            time of hashing data matrix
        """

        fingerprints_k = np.zeros((self.instance_num, self.dimension_num))
        fingerprints_y = np.zeros((self.instance_num, self.dimension_num))

        np.random.seed(self.seed * np.power(2, repeat - 1))
        start = time.time()

        beta1 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num))
        beta2 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num))
        u1 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num))
        u2 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num))
        u3 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num))
        u4 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num))
        v1 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num))
        v2 = np.random.uniform(0, 1, (self.feature_num, self.dimension_num))

        for j_sample in range(0, self.instance_num):
            feature_id = sparse.find(self.weighted_set[:, j_sample] > 0)[0]

            r2 = -np.log(np.multiply(u3[feature_id, :], u4[feature_id, :]))
            t_matrix = np.floor(
                np.divide(
                    np.matlib.repmat(
                        np.log(self.weighted_set[feature_id, j_sample].todense(
                        )), 1, self.dimension_num), r2) + beta2[feature_id, :])
            z_matrix = np.exp(
                np.multiply(r2, (t_matrix - beta2[feature_id, :] + 1)))
            a_matrix = np.divide(
                -np.log(np.multiply(v1[feature_id, :], v2[feature_id, :])),
                z_matrix)

            min_position = np.argmin(a_matrix, axis=0)
            fingerprints_k[j_sample, :] = feature_id[min_position]

            r1 = -np.log(
                np.multiply(u1[feature_id[min_position], :],
                            u2[feature_id[min_position], :]))
            gamma1 = np.array([-np.log(np.diag(r1[0]))])

            b = np.array([np.diag(beta1[feature_id[min_position], :][0])])
            t_matrix = np.floor(
                np.divide(
                    np.log(
                        np.transpose(self.weighted_set[
                            feature_id[min_position],
                            j_sample].todense())), gamma1) + b)
            fingerprints_y[j_sample, :] = np.exp(
                np.multiply(gamma1, (t_matrix - b)))

        elapsed = time.time() - start

        return fingerprints_k, fingerprints_y, elapsed
Beispiel #56
0
def subgraph_extraction_labeling(ind,
                                 A,
                                 h=1,
                                 sample_ratio=1.0,
                                 max_nodes_per_hop=None,
                                 u_features=None,
                                 v_features=None,
                                 class_values=None):
    # extract the h-hop enclosing subgraph around link 'ind'
    dist = 0
    u_nodes, v_nodes = [ind[0]], [ind[1]]
    u_dist, v_dist = [0], [0]
    u_visited, v_visited = set([ind[0]]), set([ind[1]])
    u_fringe, v_fringe = set([ind[0]]), set([ind[1]])
    for dist in range(1, h + 1):
        v_fringe, u_fringe = neighbors(u_fringe, A,
                                       True), neighbors(v_fringe, A, False)
        u_fringe = u_fringe - u_visited
        v_fringe = v_fringe - v_visited
        u_visited = u_visited.union(u_fringe)
        v_visited = v_visited.union(v_fringe)
        if sample_ratio < 1.0:
            u_fringe = random.sample(u_fringe,
                                     int(sample_ratio * len(u_fringe)))
            v_fringe = random.sample(v_fringe,
                                     int(sample_ratio * len(v_fringe)))
        if max_nodes_per_hop is not None:
            if max_nodes_per_hop < len(u_fringe):
                u_fringe = random.sample(u_fringe, max_nodes_per_hop)
            if max_nodes_per_hop < len(v_fringe):
                v_fringe = random.sample(v_fringe, max_nodes_per_hop)
        if len(u_fringe) == 0 and len(v_fringe) == 0:
            break
        u_nodes = u_nodes + list(u_fringe)
        v_nodes = v_nodes + list(v_fringe)
        u_dist = u_dist + [dist] * len(u_fringe)
        v_dist = v_dist + [dist] * len(v_fringe)
    subgraph = A[u_nodes, :][:, v_nodes]
    # remove link between target nodes
    subgraph[0, 0] = 0
    # construct nx graph
    g = nx.Graph()
    g.add_nodes_from(range(len(u_nodes)), bipartite='u')
    g.add_nodes_from(range(len(u_nodes),
                           len(u_nodes) + len(v_nodes)),
                     bipartite='v')
    u, v, r = ssp.find(subgraph)  # r is 1, 2... (rating labels + 1)
    r = r.astype(int)
    v += len(u_nodes)
    #g.add_weighted_edges_from(zip(u, v, r))
    g.add_edges_from(zip(u, v))

    edge_types = dict(zip(zip(u, v),
                          r - 1))  # transform r back to rating label
    nx.set_edge_attributes(g, name='type', values=edge_types)
    # get structural node labels
    node_labels = [x * 2 for x in u_dist] + [x * 2 + 1 for x in v_dist]

    # get node features
    if u_features is not None:
        u_features = u_features[u_nodes]
    if v_features is not None:
        v_features = v_features[v_nodes]
    node_features = None
    if False:
        # directly use padded node features
        if u_features is not None and v_features is not None:
            u_extended = np.concatenate([
                u_features,
                np.zeros([u_features.shape[0], v_features.shape[1]])
            ], 1)
            v_extended = np.concatenate([
                np.zeros([v_features.shape[0], u_features.shape[1]]),
                v_features
            ], 1)
            node_features = np.concatenate([u_extended, v_extended], 0)
    if False:
        # use identity features (one-hot encodings of node idxes)
        u_ids = one_hot(u_nodes, A.shape[0] + A.shape[1])
        v_ids = one_hot([x + A.shape[0] for x in v_nodes],
                        A.shape[0] + A.shape[1])
        node_ids = np.concatenate([u_ids, v_ids], 0)
        #node_features = np.concatenate([node_features, node_ids], 1)
        node_features = node_ids
    if True:
        # only output node features for the target user and item
        if u_features is not None and v_features is not None:
            node_features = [u_features[0], v_features[0]]

    return g, node_labels, node_features
Beispiel #57
0
def mutual_info_score(labels_true, labels_pred, *, contingency=None):
    """Mutual Information between two clusterings.

    The Mutual Information is a measure of the similarity between two labels of
    the same data. Where :math:`|U_i|` is the number of the samples
    in cluster :math:`U_i` and :math:`|V_j|` is the number of the
    samples in cluster :math:`V_j`, the Mutual Information
    between clusterings :math:`U` and :math:`V` is given as:

    .. math::

        MI(U,V)=\\sum_{i=1}^{|U|} \\sum_{j=1}^{|V|} \\frac{|U_i\\cap V_j|}{N}
        \\log\\frac{N|U_i \\cap V_j|}{|U_i||V_j|}

    This metric is independent of the absolute values of the labels:
    a permutation of the class or cluster label values won't change the
    score value in any way.

    This metric is furthermore symmetric: switching ``label_true`` with
    ``label_pred`` will return the same score value. This can be useful to
    measure the agreement of two independent label assignments strategies
    on the same dataset when the real ground truth is not known.

    Read more in the :ref:`User Guide <mutual_info_score>`.

    Parameters
    ----------
    labels_true : int array, shape = [n_samples]
        A clustering of the data into disjoint subsets.

    labels_pred : int array-like of shape (n_samples,)
        A clustering of the data into disjoint subsets.

    contingency : {None, array, sparse matrix}, \
                  shape = [n_classes_true, n_classes_pred]
        A contingency matrix given by the :func:`contingency_matrix` function.
        If value is ``None``, it will be computed, otherwise the given value is
        used, with ``labels_true`` and ``labels_pred`` ignored.

    Returns
    -------
    mi : float
       Mutual information, a non-negative value

    Notes
    -----
    The logarithm used is the natural logarithm (base-e).

    See also
    --------
    adjusted_mutual_info_score: Adjusted against chance Mutual Information
    normalized_mutual_info_score: Normalized Mutual Information
    """
    if contingency is None:
        labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
        contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
    else:
        contingency = check_array(contingency,
                                  accept_sparse=['csr', 'csc', 'coo'],
                                  dtype=[int, np.int32, np.int64])

    if isinstance(contingency, np.ndarray):
        # For an array
        nzx, nzy = np.nonzero(contingency)
        nz_val = contingency[nzx, nzy]
    elif sp.issparse(contingency):
        # For a sparse matrix
        nzx, nzy, nz_val = sp.find(contingency)
    else:
        raise ValueError("Unsupported type for 'contingency': %s" %
                         type(contingency))

    contingency_sum = contingency.sum()
    pi = np.ravel(contingency.sum(axis=1))
    pj = np.ravel(contingency.sum(axis=0))
    log_contingency_nm = np.log(nz_val)
    contingency_nm = nz_val / contingency_sum
    # Don't need to calculate the full outer product, just for non-zeroes
    outer = (pi.take(nzx).astype(np.int64, copy=False) *
             pj.take(nzy).astype(np.int64, copy=False))
    log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum())
    mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) +
          contingency_nm * log_outer)
    return np.clip(mi.sum(), 0.0, None)
Beispiel #58
0
def _sparse_num_jac(fun, t, y, f, h, factor, y_scale, structure, groups):
    n = y.shape[0]
    n_groups = np.max(groups) + 1
    h_vecs = np.empty((n_groups, n))
    for group in range(n_groups):
        e = np.equal(group, groups)
        h_vecs[group] = h * e
    h_vecs = h_vecs.T

    f_new = fun(t, y[:, None] + h_vecs)
    df = f_new - f[:, None]

    i, j, _ = find(structure)
    diff = coo_matrix((df[i, groups[j]], (i, j)), shape=(n, n)).tocsc()
    max_ind = np.array(abs(diff).argmax(axis=0)).ravel()
    r = np.arange(n)
    max_diff = np.asarray(np.abs(diff[max_ind, r])).ravel()
    scale = np.maximum(np.abs(f[max_ind]), np.abs(f_new[max_ind, groups[r]]))

    diff_too_small = max_diff < NUM_JAC_DIFF_REJECT * scale
    if np.any(diff_too_small):
        ind, = np.nonzero(diff_too_small)
        new_factor = NUM_JAC_FACTOR_INCREASE * factor[ind]
        h_new = (y[ind] + new_factor * y_scale[ind]) - y[ind]
        h_new_all = np.zeros(n)
        h_new_all[ind] = h_new

        groups_unique = np.unique(groups[ind])
        groups_map = np.empty(n_groups, dtype=int)
        h_vecs = np.empty((groups_unique.shape[0], n))
        for k, group in enumerate(groups_unique):
            e = np.equal(group, groups)
            h_vecs[k] = h_new_all * e
            groups_map[group] = k
        h_vecs = h_vecs.T

        f_new = fun(t, y[:, None] + h_vecs)
        df = f_new - f[:, None]
        i, j, _ = find(structure[:, ind])
        diff_new = coo_matrix((df[i, groups_map[groups[ind[j]]]], (i, j)),
                              shape=(n, ind.shape[0])).tocsc()

        max_ind_new = np.array(abs(diff_new).argmax(axis=0)).ravel()
        r = np.arange(ind.shape[0])
        max_diff_new = np.asarray(np.abs(diff_new[max_ind_new, r])).ravel()
        scale_new = np.maximum(
            np.abs(f[max_ind_new]),
            np.abs(f_new[max_ind_new, groups_map[groups[ind]]]))

        update = max_diff[ind] * scale_new < max_diff_new * scale[ind]
        if np.any(update):
            update, = np.nonzero(update)
            update_ind = ind[update]
            factor[update_ind] = new_factor[update]
            h[update_ind] = h_new[update]
            diff[:, update_ind] = diff_new[:, update]
            scale[update_ind] = scale_new[update]
            max_diff[update_ind] = max_diff_new[update]

    diff.data /= np.repeat(h, np.diff(diff.indptr))

    factor[max_diff < NUM_JAC_DIFF_SMALL * scale] *= NUM_JAC_FACTOR_INCREASE
    factor[max_diff > NUM_JAC_DIFF_BIG * scale] *= NUM_JAC_FACTOR_DECREASE
    factor = np.maximum(factor, NUM_JAC_MIN_FACTOR)

    return diff, factor
def construct_line_graph_directed(node_ids, A, node_features):

    u, v, r = ssp.find(A)
    print(f'max_weight: {max(r)}')

    #print(f'num_edges_khop: {len(u)}')
    #print(f'num_nodes_khop: {node_ids.size()}')

    node_ids = node_ids.tolist()
    node_features = node_features.tolist()

    G = nx.DiGraph()
    #G.add_nodes_from(node_ids)
    rows, cols = A.nonzero()
    A_edges_forward = list(zip(u, v))
    A_edges_reverse = list(zip(v, u))

    info = {}
    node_class = {}
    for edge in A_edges_forward:
        src, end = edge[0], edge[1]
        weight = A[src, end]
        edge_label = [0] * 52 + [node_features[src] != node_features[end]]
        #print(weight)
        edge_label[weight] = 1

        f1, f2 = node_features[src], node_features[end]
        info[(src, end)] = edge_label
        node_class[(src, end)] = [f1, f2]

    for edge in A_edges_reverse:
        src, end = edge[0], edge[1]
        weight = A[end, src]
        edge_label = [0] * 52 + [node_features[src] != node_features[end]]
        #print(weight)
        edge_label[weight] = 1

        f1, f2 = node_features[src], node_features[end]
        info[(src, end)] = edge_label
        node_class[(src, end)] = [f1, f2]

    G.add_edges_from(A_edges_forward)
    G.add_edges_from(A_edges_reverse)

    L = nx.line_graph(G)
    num_nodes = L.number_of_nodes()

    L_node_ids = list(L.nodes)
    L_edges = list(L.edges)

    L_node_features = []

    index = {}
    node_ids, f = [], []
    value = 0
    for node in L_node_ids:
        node_ids.append(value)
        L_node_features.append(info[node])
        f.append(node_class[node])
        index[node] = value
        value += 1

    edge_list = []
    for edge in L_edges:
        v1, v2 = edge[0], edge[1]
        n1, n2 = index[v1], index[v2]
        edge_list.append([n1, n2])

    return torch.LongTensor(L_node_features), torch.LongTensor(
        edge_list), num_nodes, torch.LongTensor(node_ids), torch.LongTensor(f)
Beispiel #60
0
    def cfl(self,
            g_h,
            g_l,
            data_h,
            data_l,
            data_edge,
            d_name="mortar_solution"):
        """
        Return the time step according to the CFL condition.
        Note: the vector field is assumed to be given as the normal velocity,
        weighted with the face area, at each face.

        The name of data in the input dictionary (data) are:
        discharge : array (g.num_faces)
            Normal velocity at each face, weighted by the face area.

        Parameters:
            g_h: grid of higher dimension
            g_l: grid of lower dimension
            data_h: dictionary which stores the data for the higher dimensional
                grid
            data_l: dictionary which stores the data for the lower dimensional
                grid
            data: dictionary which stores the data for the edges of the grid
                bucket

        Return:
            deltaT: time step according to CFL condition.

        Note: the design of this function has not been updated according
        to the mortar structure. Instead, mg.high_to_mortar_int.nonzero()[1]
        is used to map the 'mortar_solution' (one flux for each mortar dof) to
        the old discharge (one flux for each g_h face).

        """
        # Retrieve the discharge, which is mandatory

        aperture_h = data_h["param"].get_aperture()
        aperture_l = data_l["param"].get_aperture()
        phi_l = data_l["param"].get_porosity()
        mg = data_edge["mortar_grid"]
        discharge = np.zeros(g_h.num_faces)
        discharge[mg.high_to_mortar_int.nonzero()[1]] = data_edge[d_name]
        if g_h.dim == g_l.dim:
            # More or less same as below, except we have cell_cells in the place
            # of face_cells (see grid_bucket.duplicate_without_dimension).
            phi_h = data_h["param"].get_porosity()
            cells_l, cells_h = data_edge["face_cells"].nonzero()
            not_zero = ~np.isclose(
                np.zeros(discharge.shape), discharge, atol=0)
            if not np.any(not_zero):
                return np.Inf

            diff = g_h.cell_centers[:, cells_h] - g_l.cell_centers[:, cells_l]
            dist = np.linalg.norm(diff, 2, axis=0)

            # Use minimum of cell values for convenience
            phi_l = phi_l[cells_l]
            phi_h = phi_h[cells_h]
            apt_h = aperture_h[cells_h]
            apt_l = aperture_l[cells_l]
            coeff = np.minimum(phi_h, phi_l) * np.minimum(apt_h, apt_l)
            return np.amin(np.abs(np.divide(dist, discharge)) * coeff)

        # Recover the information for the grid-grid mapping
        cells_l, faces_h, _ = sps.find(data_edge["face_cells"])

        # Detect and remove the faces which have zero in "discharge"
        not_zero = ~np.isclose(
            np.zeros(faces_h.size), discharge[faces_h], atol=0)
        if not np.any(not_zero):
            return np.inf

        cells_l = cells_l[not_zero]
        faces_h = faces_h[not_zero]
        # Mapping from faces_h to cell_h
        cell_faces_h = g_h.cell_faces.tocsr()[faces_h, :]
        cells_h = cell_faces_h.nonzero()[1][not_zero]
        # Retrieve and map additional data
        aperture_h = aperture_h[cells_h]
        aperture_l = aperture_l[cells_l]
        phi_l = phi_l[cells_l]
        # Compute discrete distance cell to face centers for the lower
        # dimensional grid
        dist = 0.5 * np.divide(aperture_l, aperture_h)
        # Since discharge is multiplied by the aperture wighted face areas, we
        # divide through that quantity to get velocities in [length/time]
        velocity = np.divide(discharge[faces_h],
                             g_h.face_areas[faces_h] * aperture_h)
        # deltaT is deltaX/velocity with coefficient
        return np.amin(np.abs(np.divide(dist, velocity)) * phi_l)