コード例 #1
0
ファイル: graph_utils.py プロジェクト: bgruening/bcfind
def compute_connected_components(adjacency_matrix):
    """
    Given an adjacency matrix of a graph, computes the number of connected components.

    Parameters

    adjacency_matrix: adjacency matrix of the graph

    Returns

    cc: number of connected components in the graph represented by the adjacency matrix
    """

    if issparse(adjacency_matrix):
        difference_matrix = adjacency_matrix - adjacency_matrix.transpose()
        is_symmetric_p = np.all(1e-10 > difference_matrix.data)
        is_symmetric_n = np.all(difference_matrix.data > -1e-10)
    else:
        difference_matrix = adjacency_matrix - adjacency_matrix.T
        is_symmetric_p = np.all(1e-10 > difference_matrix)
        is_symmetric_n = np.all(difference_matrix > -1e-10)
    if is_symmetric_p and is_symmetric_n:
        return connected_components(adjacency_matrix, False)
    else:
        return connected_components(adjacency_matrix, True)
コード例 #2
0
ファイル: COFIBA.py プロジェクト: qw2ky/CoLinUCB_Revised
	def updateItemClusters(self, userID, chosenItem, itemClusterNum, articlePool):
		m = self.itemNum
		n = len(self.users)
		#UserNeighbor = {}
		for a in articlePool:
			if self.IGraph[chosenItem.id][a.id] == 1:
				#UserNeighbor[a.id] = np.ones([n,n])
				for i in range(n):
					diff = math.fabs(np.dot( self.users[userID].UserTheta, a.featureVector )- np.dot( self.users[i].UserTheta, a.featureVector))
					CB = self.alpha_2* (np.sqrt(np.dot(np.dot(a.featureVector, self.users[userID].AInv),  a.featureVector)) + np.sqrt(np.dot(np.dot(a.featureVector, self.users[i].AInv),  a.featureVector))) * np.sqrt(np.log10(self.time+1))
					if diff > CB:
						self.UserNeighbor[a.id][userID][i] = 0
						self.UserNeighbor[a.id][i][userID] = 0
				if not np.array_equal(UserNeighbor[a.id], self.UGraph[itemClusterNum]):
					self.IGraph[chosenItem.id][a.id] = 0
					self.IGraph[a.id][chosenItem.id] = 0
					#print 'delete edge'
		self.N_components_Item, component_list_Item = connected_components(csr_matrix(self.IGraph))
		self.Iclusters = component_list_Item

		# For each new item cluster, allocate a new connected graph over users representing a single user clsuter 
		self.UGraph = []
		self.Uclusters = []
		for i in range(self.N_components_Item):
			if self.cluster_init =='Erdos-Renyi':
				p = 3 * math.log(len(self.users))/len(self.users)
				self.UGraph.append(np.random.choice([0, 1], size=(len(self.users),len(self.users)), p=[1-p, p]))
			else:
				self.UGraph.append(np.ones([len(self.users), len(self.users)]) ) 
			self.Uclusters.append([])
			N_components_U, components_U = connected_components(csr_matrix(self.UGraph[i]))
			self.Uclusters[i] = components_U
		return self.N_components_Item
コード例 #3
0
def test_strong_connections():
    X1de = np.array([[0, 1, 0],
                     [0, 0, 0],
                     [0, 0, 0]])
    X2de = X1de + X1de.T

    X1sp = csgraph.csgraph_from_dense(X1de, null_value=0)
    X2sp = csgraph.csgraph_from_dense(X2de, null_value=0)

    for X in X1sp, X1de:
        n_components, labels =\
            csgraph.connected_components(X, directed=True,
                                         connection='strong')

        assert_equal(n_components, 3)
        labels.sort()
        assert_array_almost_equal(labels, [0, 1, 2])

    for X in X2sp, X2de:
        n_components, labels =\
            csgraph.connected_components(X, directed=True,
                                         connection='strong')

        assert_equal(n_components, 2)
        labels.sort()
        assert_array_almost_equal(labels, [0, 0, 1])
コード例 #4
0
ファイル: CLUB.py プロジェクト: e-hu/CoLinUCB_Revised
	def __init__(self,dimension,alpha,lambda_,n,alpha_2, cluster_init="Complete"):
		self.time = 0
		#N_LinUCBAlgorithm.__init__(dimension = dimension, alpha=alpha,lambda_ = lambda_,n=n)
		self.users = []
		#algorithm have n users, each user has a user structure
		for i in range(n):
			self.users.append(CLUBUserStruct(dimension,lambda_, i)) 

		self.dimension = dimension
		self.alpha = alpha
		self.alpha_2 = alpha_2
		if (cluster_init=="Erdos-Renyi"):
			p = 3*math.log(n)/n
			self.Graph = np.random.choice([0, 1], size=(n,n), p=[1-p, p])
			self.clusters = []
			g = csr_matrix(self.Graph)
			N_components, components = connected_components(g)
		else:
			self.Graph = np.ones([n,n]) 
			self.clusters = []
			g = csr_matrix(self.Graph)
			N_components, components = connected_components(g)

		self.CanEstimateCoUserPreference = False
		self.CanEstimateUserPreference = False
		self.CanEstimateW = False
コード例 #5
0
ファイル: test_image.py プロジェクト: NelleV/scikit-learn
def test_grid_to_graph():
    # Checking that the function works with graphs containing no edges
    size = 2
    roi_size = 1
    # Generating two convex parts with one vertex
    # Thus, edges will be empty in _to_graph
    mask = np.zeros((size, size), dtype=np.bool)
    mask[0:roi_size, 0:roi_size] = True
    mask[-roi_size:, -roi_size:] = True
    mask = mask.reshape(size ** 2)
    A = grid_to_graph(n_x=size, n_y=size, mask=mask, return_as=np.ndarray)
    assert_true(connected_components(A)[0] == 2)

    # Checking that the function works whatever the type of mask is
    mask = np.ones((size, size), dtype=np.int16)
    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask)
    assert_true(connected_components(A)[0] == 1)

    # Checking dtype of the graph
    mask = np.ones((size, size))
    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.bool)
    assert_true(A.dtype == np.bool)
    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.int)
    assert_true(A.dtype == np.int)
    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask,
                      dtype=np.float64)
    assert_true(A.dtype == np.float64)
コード例 #6
0
ファイル: util.py プロジェクト: knaaptime/region
def _randomly_divide_connected_graph(adj, n_regions):
    """
    Divide the provided connected graph into `n_regions` regions.

    Parameters
    ----------
    adj : :class:`scipy.sparse.csr_matrix`
        Adjacency matrix.
    n_regions : int
        The desired number of clusters. Must be > 0 and <= number of nodes.

    Returns
    -------
    labels : :class:`numpy.ndarray`
        Each element (an integer in {0, ..., `n_regions` - 1}) specifies the
        region an area (defined by the index in the array) belongs to.

    Examples
    --------
    >>> from scipy.sparse import diags
    >>> n_nodes = 10
    >>> adj_diagonal = [1] * (n_nodes-1)
    >>> # 10x10 adjacency matrix representing the path 0-1-2-...-9-10
    >>> adj = diags([adj_diagonal, adj_diagonal], offsets=[-1, 1])
    >>> n_regions_desired = 4
    >>> labels = _randomly_divide_connected_graph(adj, n_regions_desired)
    >>> n_regions_obtained = len(set(labels))
    >>> n_regions_desired == n_regions_obtained
    True
    """
    if not n_regions > 0:
        msg = "n_regions is {} but must be positive.".format(n_regions)
        raise ValueError(msg)
    n_areas = adj.shape[0]
    if not n_regions <= n_areas:
        msg = "n_regions is {} but must less than or equal to " + \
              "the number of nodes which is {}".format(n_regions, n_areas)
        raise ValueError(msg)
    mst = csg.minimum_spanning_tree(adj)
    for _ in range(n_regions - 1):
        # try different links to cut and pick the one leading to the most
        # balanced solution
        best_link = None
        max_region_size = float("inf")
        for __ in range(5):
            mst_copy = mst.copy()
            nonzero_i, nonzero_j = mst_copy.nonzero()
            random_position = random.randrange(len(nonzero_i))
            i, j = nonzero_i[random_position], nonzero_j[random_position]
            mst_copy[i, j] = 0
            mst_copy.eliminate_zeros()
            labels = csg.connected_components(mst_copy, directed=False)[1]
            max_size = max(np.unique(labels, return_counts=True)[1])
            if max_size < max_region_size:
                best_link = (i, j)
                max_region_size = max_size
        mst[best_link[0], best_link[1]] = 0
        mst.eliminate_zeros()
    return csg.connected_components(mst)[1]
コード例 #7
0
ファイル: workspace.py プロジェクト: Erotemic/ibeis
def intra_encounter_matching():
    import numpy as np
    from scipy.sparse import coo_matrix, csgraph
    qreq_, cm_list = testdata_workflow()
    # qaids = [cm.qaid for cm in cm_list]
    # top_aids = [cm.get_top_aids(5) for cm in cm_list]
    aid_pairs = np.array([(cm.qaid, daid)
                          for cm in cm_list for daid in cm.get_top_aids(5)])
    top_scores = ut.flatten([cm.get_top_scores(5) for cm in cm_list])

    N = aid_pairs.max() + 1
    mat = coo_matrix((top_scores, aid_pairs.T), shape=(N, N))
    csgraph.connected_components(mat)
    tree = csgraph.minimum_spanning_tree(mat)  # NOQA
    import plottool as pt
    dense = mat.todense()
    pt.imshow(dense / dense.max() * 255)
    pt.show_if_requested()

    # baseline jobid
    import opengm
    # https://github.com/opengm/opengm/blob/master/src/interfaces/python/examples/tutorial/OpenGM%20tutorial.ipynb
    numVar = 10
    unaries = np.ones([numVar, 3], dtype=opengm.value_type)
    gm = opengm.gm(np.ones(numVar, dtype=opengm.label_type) * 3)
    unary_fids = gm.addFunctions(unaries)
    gm.addFactors(unary_fids, np.arange(numVar))
    infParam = opengm.InfParam(
        workflow=ut.ensure_ascii('(IC)(TTC-I,CC-I)'),
    )
    inf = opengm.inference.Multicut(gm, parameter=infParam)
    visitor = inf.verboseVisitor(printNth=1, multiline=False)
    inf.infer(visitor)
    arg = inf.arg()

    # gridVariableIndices = opengm.secondOrderGridVis(img.shape[0], img.shape[1])
    # fid = gm.addFunction(regularizer)
    # gm.addFactors(fid, gridVariableIndices)
    # regularizer = opengm.pottsFunction([3, 3], 0.0, beta)
    # gridVariableIndices = opengm.secondOrderGridVis(img.shape[0], img.shape[1])
    # fid = gm.addFunction(regularizer)
    # gm.addFactors(fid, gridVariableIndices)

    unaries = np.random.rand(10, 10, 2)
    potts = opengm.PottsFunction([2, 2], 0.0, 0.4)
    gm = opengm.grid2d2Order(unaries=unaries, regularizer=potts)

    inf = opengm.inference.GraphCut(gm)
    inf.infer()
    arg = inf.arg()  # NOQA
    """
コード例 #8
0
ファイル: test_image.py プロジェクト: NelleV/scikit-learn
def test_connect_regions_with_grid():
    try:
        face = sp.face(gray=True)
    except AttributeError:
        # Newer versions of scipy have face in misc
        from scipy import misc
        face = misc.face(gray=True)
    mask = face > 50
    graph = grid_to_graph(*face.shape, mask=mask)
    assert_equal(ndimage.label(mask)[1], connected_components(graph)[0])

    mask = face > 150
    graph = grid_to_graph(*face.shape, mask=mask, dtype=None)
    assert_equal(ndimage.label(mask)[1], connected_components(graph)[0])
コード例 #9
0
def join_CCs_simple(X, W, num_ccs=1, verbose=False):
  """Old method for connecting the graph. Use join_CCs now."""
  n, labels = connected_components(W, directed=False, return_labels=True)
  CC_labels = labels
  while n > num_ccs:
    if verbose:
      print n, 'connected components'
    Dcenter, min_edge_idxs = inter_cluster_distance(X, n, labels)
    p_inds,q_inds = min_k_indices(Dcenter, 2).T  # self + 1 == 2
    ii,jj = min_edge_idxs[p_inds,q_inds].T
    W[ii,jj] = 1
    W[jj,ii] = 1
    n, labels = connected_components(W, directed=False, return_labels=True)
  return CC_labels
コード例 #10
0
    def visCC(self):
        """fix me.... :/"""

        """to visualize the neighbours"""
        if isVisualize:
            fig888 = plt.figure()
            ax     = plt.subplot(1,1,1)

        """ visualization, see if connected components make sense"""
        s111,c111 = connected_components(sparsemtx) #s is the total CComponent, c is the label
        color     = np.array([np.random.randint(0,255) for _ in range(3*int(s111))]).reshape(s111,3)
        fig888    = plt.figure(888)
        ax        = plt.subplot(1,1,1)
        # im = plt.imshow(np.zeros([528,704,3]))
        for i in range(s111):
            ind = np.where(c111==i)[0]
            print ind
            for jj in range(len(ind)):
                startlimit = np.min(np.where(x[ind[jj],:]!=0))
                endlimit = np.max(np.where(x[ind[jj],:]!=0))
                # lines = ax.plot(x[ind[jj],startlimit:endlimit], y[ind[jj],startlimit:endlimit],color = (0,1,0),linewidth=2)
                lines = ax.plot(x[ind[jj],startlimit:endlimit], y[ind[jj],startlimit:endlimit],color = (color[i-1].T)/255.,linewidth=2)
                fig888.canvas.draw()
            plt.pause(0.0001) 
        plt.show()
コード例 #11
0
def markov_stationary_components(P, tol=1e-12):
    """
    Split the chain first to connected components, and solve the
    stationary state for the smallest one
    """
    n = P.shape[0]

    # 0. Drop zero edges
    P = P.tocsr()
    P.eliminate_zeros()

    # 1. Separate to connected components
    n_components, labels = csgraph.connected_components(P, directed=True, connection='strong')

    # 2. Pick the smallest one
    sizes = [(labels == j).sum() for j in range(n_components)]
    min_j = np.argmin(sizes)
    indices = np.flatnonzero(labels == min_j)

    #print("Solving for component {0}/{1} of size {2}".format(min_j, n_components, indices.size))

    # 3. Solve stationary state for it
    p = np.zeros(n)
    if indices.size == 1:
        # Simple case
        p[indices] = 1
    else:
        p[indices] = markov_stationary_one(P[indices,:][:,indices], tol=tol)

    return p
コード例 #12
0
 def assertSingleClass(self,P):
     """ 
     Check whether the rate/probability matrix consists of a single connected class.
     Otherwise, the steady state distribution is not well defined.
     """
     components, _ = csgraph.connected_components(P, directed=True, connection='weak')   
     assert components==1, "The Markov chain has %r communicating classes. Make sure there is a single communicating class." %components
コード例 #13
0
 def __init__(self, g_map=None, poly_map=None, connect_using_avg_resistances=False, connect_four_neighbors_only=False, g_graph=None, node_names=None):
     if g_map is not None:
         self.is_network = False
         self.g_map = g_map
         self.poly_map = poly_map
         self.connect_using_avg_resistances = connect_using_avg_resistances
         self.connect_four_neighbors_only = connect_four_neighbors_only
         
         self.node_map = HabitatGraph._construct_node_map(g_map, poly_map)
         (component_map, components) = HabitatGraph._construct_component_map(g_map, self.node_map, connect_using_avg_resistances, connect_four_neighbors_only)
         self.component_map = component_map
         self.components = components
         
         self.num_components = components.max()
         self.num_nodes = self.node_map.max()
     else:
         self.is_network = True
         self.g_graph = g_graph          # is the sparse CSR matrix 
         self.node_map = node_names    # list of node names
         
         (_num_components, C) = connected_components(g_graph)
         C += 1
         self.components = C
         
         self.num_components = C.max()
         self.num_nodes = self.node_map.size
コード例 #14
0
ファイル: citation_network.py プロジェクト: damirah/wsdm_cup
 def get_cc(self):
     """
     :return:
     """
     return csgraph.connected_components(self.edges, directed=True,
                                         connection='weak',
                                         return_labels=True)
コード例 #15
0
ファイル: problem.py プロジェクト: duckworthd/cvxcluster
  def clusters(self):
    from scipy.sparse.csgraph import connected_components
    from scipy.sparse import csr_matrix

    n_samples  = self.problem.n_samples
    n_features = self.problem.n_features

    # two centers are "the same" if they're much closer to each other than any
    # 2 data points are (excepting overlapping points)
    norm      = self.problem.norm
    X, w      = self.problem.X, self.problem.w
    distances = [norm(X[i] - X[j]) for (i, j, _) in iterrows(w)]
    distances = [d for d in distances if d > 0]
    if len(distances) == 0:
      epsilon = 1e-5  # arbitrary
    else:
      epsilon = min(d for d in distances if d > 0) * 1e-2

    edgelist = []
    for l,(i,j,_) in enumerate(iterrows(self.problem.w)):
      if np.linalg.norm(self.v[l]) / n_features < epsilon:
        edgelist.append( (1, i, j) )
        edgelist.append( (1, j, i) )
    if len(edgelist) > 0:
      vals, rows, cols = zip(*edgelist)
    else:
      vals, rows, cols = [], [], []
    adjacency            = csr_matrix((vals, (rows, cols)), shape=(n_samples, n_samples))
    n_components, labels = connected_components(adjacency, directed=False)

    return labels
コード例 #16
0
ファイル: connectivity.py プロジェクト: wmayner/pyphi
def _connected(cm, nodes, connection):
    """Test connectivity for the connectivity matrix."""
    if nodes is not None:
        cm = cm[np.ix_(nodes, nodes)]

    num_components, _ = connected_components(cm, connection=connection)
    return num_components < 2
コード例 #17
0
ファイル: candidates.py プロジェクト: gzuidhof/luna16
def merge_candidates_scan(candidates, seriesuid, distance=5.):
    distances = pdist(candidates, metric='euclidean')
    adjacency_matrix = squareform(distances)

    # Determine nodes within distance, replace by 1 (=adjacency matrix)
    adjacency_matrix = np.where(adjacency_matrix<=distance,1,0)

    # Determine all connected components in the graph
    n, labels = connected_components(adjacency_matrix)
    new_candidates = np.zeros((n,3))

    # Take the mean for these connected components
    for cluster_i in range(n):
        points = candidates[np.where(labels==cluster_i)]
        center = np.mean(points,axis=0)
        new_candidates[cluster_i,:] = center

    x = new_candidates[:,0]
    y = new_candidates[:,1]
    z = new_candidates[:,2]
    labels = [seriesuid]*len(x)
    class_name = [0]*len(x)

    data= zip(labels,x,y,z,class_name)

    new_candidates = pd.DataFrame(data,columns=CANDIDATES_COLUMNS)

    return new_candidates
コード例 #18
0
ファイル: util.py プロジェクト: frnsys/galaxy
def split_dist_matrix(dist_matrix, overwrite=False):
    # Create the minimum spanning tree.
    # `overwrite=True` will make changes in place, which is more efficient.
    mst = minimum_spanning_tree(csr_matrix(dist_matrix), overwrite=overwrite)
    mst = mst.toarray()

    # Get the index of the maximum value.
    # `argmax` returns the index of the _flattened_ array;
    # `unravel_index` converts it back.
    idx = np.unravel_index(mst.argmax(), mst.shape)

    # Clear out the maximum value to split the tree.
    mst[idx] = 0

    # Label connected components.
    num_graphs, labels = connected_components(mst, directed=False)

    # We should have two trees.
    assert(num_graphs == 2)

    # Use indices as node ids and group them according to their graph.
    results = [[] for i in range(max(labels) + 1)]
    for idx, label in enumerate(labels):
        results[label].append(idx)

    return results
コード例 #19
0
def compute_Ws(X, num_ccs):
  with Timer('Calculating pairwise distances...'):
    D = pairwise_distances(X, metric='sqeuclidean')
  np.save('mnist_D.npy', D)
  # k-nn
  with Timer('Calculating knn graph...'):
    for k in xrange(1,10):
      Wknn = neighbor_graph(D, precomputed=True, k=k, symmetrize=True)
      n = connected_components(Wknn, directed=False, return_labels=False)
      if n <= num_ccs:
        break
    else:
      assert False, 'k too low'
  np.save('mnist_Wknn.npy', Wknn)
  print 'knn (k=%d)' % k

  # b-matching
  with Timer('Calculating b-matching graph...'):
    # using 8 decimal places kills the disk
    Wbma = hacky_b_matching(D, k, fmt='%.1f')
  np.save('mnist_Wbma.npy', Wbma)

  # msg
  with Timer('Calculating MSG graph...'):
    Wmsg = manifold_spanning_graph(X, 2, num_ccs=num_ccs)
  np.save('mnist_Wmsg.npy', Wmsg)

  return D, Wknn, Wbma, Wmsg
コード例 #20
0
ファイル: linalg.py プロジェクト: bhmm/bhmm-nopreserve
def is_connected(C, directed=True):
    r"""Return true, if the input count matrix is completely connected.
    Effectively checking if the number of connected components equals one.
    (EMMA function)

    Parameters
    ----------
    C : scipy.sparse matrix or numpy ndarray
        Count matrix specifying edge weights.
    directed : bool, optional
       Whether to compute connected components for a directed  or
       undirected graph. Default is True.

    Returns
    -------
    connected : boolean, returning true only if C is connected.


    """
    from scipy.sparse import csr_matrix
    from scipy.sparse.sputils import isdense
    import scipy.sparse.csgraph as csgraph
    if isdense(C):
        C = csr_matrix(C)
    nc=csgraph.connected_components(C, directed=directed, connection='strong', return_labels=False)
    return nc == 1
コード例 #21
0
ファイル: util.py プロジェクト: knaaptime/region
def assert_feasible(solution, adj, n_regions=None):
    """
    Parameters
    ----------
    solution : :class:`numpy.ndarray`
        Array of region labels.
    adj : :class:`scipy.sparse.csr_matrix`
        Adjacency matrix representing the contiguity relation.
    n_regions : `int` or `None`
        An `int` represents the desired number of regions.
        If `None`, then the number of regions is not checked.

    Raises
    ------
    exc : `ValueError`
        A `ValueError` is raised if clustering is not spatially contiguous.
        Given the `n_regions` argument is not `None`, a `ValueError` is raised
        also if the number of regions is not equal to the `n_regions` argument.
    """
    if n_regions is not None:
        if len(set(solution)) != n_regions:
            raise ValueError("The number of regions is {} but "
                             "should be {}".format(len(solution), n_regions))
    for region_label in set(solution):
        _, comp_labels = csg.connected_components(adj)
        # check whether equal region_label implies equal comp_label
        comp_labels_in_region = comp_labels[solution == region_label]
        if not all_elements_equal(comp_labels_in_region):
            raise ValueError("Region {} is not spatially "
                             "contiguous.".format(region_label))
コード例 #22
0
ファイル: assessment.py プロジェクト: greglever/PyEMMA
def is_ergodic(T, tol):
    """
    checks if T is 'ergodic'
    
    Parameters
    ----------
    T : scipy.sparse matrix
        Transition matrix
    tol : float
        tolerance
        
    Returns
    -------
    Truth value : bool
    True, if # strongly connected components = 1
    False, otherwise
    """
    if isdense(T):
        T = T.tocsr()
    if not is_transition_matrix(T, tol):
        raise ValueError("given matrix is not a valid transition matrix.")
    
    num_components = connected_components(T, directed=True, \
                                          connection='strong', \
                                          return_labels=False)
    
    return num_components == 1
コード例 #23
0
    def _spectral_embedding(self, affinity_matrix):
        """ Computes spectral embedding.
            First calculates normalized laplacian
            Then does the eigenvalue decomposition
        """
        numComponents, labels = connected_components(affinity_matrix)

        if numComponents > 1:
            # for each component figure out embedding, return the complete embedding
            embedding = []
            connected_component = np.zeros(affinity_matrix.shape)
            for i in xrange(numComponents):
                for j in affinity_matrix.shape[0]:
                    if labels[j] == i:
                        connected_component[:, i]
                embedding.append(self._spectral_embedding(connected_component))
            return embedding

        self.n_components += 1
        L, diag_vector = laplacian(affinity_matrix, normed=True, return_diag=True)

        D = np.diag(diag_vector)
        # eigvals, eigvects = eigsh(-L, k=self.n_components, sigma=1.0, which='LM')

        eigvals, eigvects = eigh(L)
        embedding = eigvects.T[: self.n_components] * diag_vector

        return embedding[1 : self.n_components].T
コード例 #24
0
ファイル: csgraph_utils.py プロジェクト: knaaptime/region
def is_connected(adj):
    """
    Parameters
    ----------
    adj : :class:`scipy.sparse.csr_matrix`
        Adjacency matrix.

    Returns
    -------
    connected : `bool`
        `True` if graph defined by adjecency matrix `adj` is connected.
        `False` otherwise.

    Examples
    --------
    >>> import numpy as np
    >>> from scipy.sparse import csr_matrix
    >>> connected = csr_matrix(np.array([[0, 1],
    ...                                  [1, 0]]))
    >>> is_connected(connected)
    True
    >>> disconnected = csr_matrix(np.array([[0, 0],
    ...                                     [0, 0]]))
    >>> is_connected(disconnected)
    False
    """
    n_connected_components = csg.connected_components(adj, directed=False,
                                                      return_labels=False)
    return True if n_connected_components == 1 else False
コード例 #25
0
ファイル: rena.py プロジェクト: ahoyosid/ReNA
def nearest_neighbor_grouping(connectivity, data_matrix, n_clusters, thr):
    """ Cluster according to nn and reduce the data and connectivity
    """
    # Nearest neighbor conenctivity
    nn_connectivity = _nn_connectivity(connectivity, thr)

    n_features = connectivity.shape[0]

    n_labels = n_features - (nn_connectivity + nn_connectivity.T).nnz / 2

    if n_labels < n_clusters:
        # cut some links to achieve the desired number of clusters
        alpha = n_features - n_clusters

        nn_connectivity = nn_connectivity + nn_connectivity.T

        edges_ = np.array(nn_connectivity.nonzero())

        plop = edges_[0] - edges_[1]

        select = np.argsort(plop)[:alpha]

        nn_connectivity = coo_matrix(
            (np.ones(2 * alpha),
             np.hstack((edges_[:, select], edges_[::-1, select]))),
            (n_features, n_features))

    # Clustering step: getting the connected components of the nn matrix
    n_labels, labels = csgraph.connected_components(nn_connectivity)

    # Reduction step: reduction by averaging
    reduced_connectivity, reduced_data_matrix = reduce_data_and_connectivity(
        labels, n_labels, connectivity, data_matrix, thr)

    return reduced_connectivity, reduced_data_matrix, labels
コード例 #26
0
ファイル: test_big_phi.py プロジェクト: tmprd/pyphi
def test_strongly_connected():
    # A disconnected matrix.
    cm1 = np.array([[0, 0, 1],
                    [0, 1, 0],
                    [1, 0, 0]])
    # A strongly connected matrix.
    cm2 = np.array([[0, 1, 0],
                    [0, 0, 1],
                    [1, 0, 0]])
    # A weakly connected matrix.
    cm3 = np.array([[0, 1, 0],
                    [0, 0, 1],
                    [0, 1, 0]])
    assert connected_components(csr_matrix(cm1), connection='strong')[0] > 1
    assert connected_components(csr_matrix(cm2), connection='strong')[0] == 1
    assert connected_components(csr_matrix(cm3), connection='strong')[0] > 1
コード例 #27
0
ファイル: cleaning.py プロジェクト: ParsonsRD/ctapipe
def number_of_islands(geom, mask):
    """
    Search a given pixel mask for connected clusters.
    This can be used to seperate between gamma and hadronic showers.

    Parameters
    ----------
    geom: `~ctapipe.instrument.CameraGeometry`
        Camera geometry information
    mask: ndarray
        input mask (array of booleans)

    Returns
    -------
    num_islands: int
        Total number of clusters
    island_labels: ndarray
        Contains cluster membership of each pixel.
        Dimesion equals input mask.
        Entries range from 0 (not in the pixel mask) to num_islands.
    """
    # compress sparse neighbor matrix
    neighbor_matrix_compressed = geom.neighbor_matrix_sparse[mask][:, mask]
    # pixels in no cluster have label == 0
    island_labels = np.zeros(geom.n_pixels)

    num_islands, island_labels_compressed = connected_components(
        neighbor_matrix_compressed,
        directed=False
    )

    # count clusters from 1 onwards
    island_labels[mask] = island_labels_compressed + 1

    return num_islands, island_labels
コード例 #28
0
def steadystate_solve(K):
    # Reformulate K to remove sink/source states
    n_components, component_assignments = csgraph.connected_components(K, connection="strong")
    largest_component = Counter(component_assignments).most_common(1)[0][0]
    components = np.where(component_assignments == largest_component)[0]

    ii = np.ix_(components, components)
    K_mod = K[ii]
    K_mod = normalize(K_mod)

    eigvals, eigvecs = np.linalg.eig(K_mod.T)
    eigvals = np.real(eigvals)
    eigvecs = np.real(eigvecs)

    maxi = np.argmax(eigvals)
    if not np.allclose(np.abs(eigvals[maxi]), 1.0):
        print('WARNING: Steady-state undetermined for current iteration')
        bin_prob = K.diagonal().copy()
        bin_prob = bin_prob / np.sum(bin_prob)
        return bin_prob

    sub_bin_prob = eigvecs[:, maxi] / np.sum(eigvecs[:, maxi])

    bin_prob = np.zeros(K.shape[0])
    bin_prob[components] = sub_bin_prob

    return bin_prob
コード例 #29
0
def tree_information_sparse(forest, n_features):
    """Computes mutual information objective from forest.

    Parameters
    ----------
    forest: sparse matrix
        graph containing trees representing cluster
    n_features: int
        dimensionality of input space.
    """
    entropy = 0
    sym_forest = forest + forest.T
    n_components, components = connected_components(sym_forest)
    if np.any(components < 0):
        # there is a lonely node
        entropy -= 1e10
    # n_samples = len(components)

    for i in range(n_components):
        inds = np.where(components == i)[0]
        subforest = forest[inds[:, np.newaxis], inds]
        L = subforest.sum()
        n_samples_c = len(inds)
        if L == 0:
            warnings.warn("L is zero. This means there are identical points in"
                          " the dataset")
            L = 1e-10
        entropy += (n_samples_c * ((n_features - 1) * np.log(n_samples_c) -
                                   n_features * np.log(L)))
    return entropy
コード例 #30
0
 def generate_threshold_mesh(self, min_value=0.0, max_value=1.0e9):
     r"""
     Generates a mesh excluding all blocks below the min_value arg. Regions
     that are isolated by the thresholding are also automatically removed.
     """
     #
     # thresholding the data and then checking for isolated clusters
     self._field.threshold_data(min_value, max_value, repl=0.0)
     self._field.copy_data(self)
     #
     adj_matrix = self._field.create_adjacency_matrix()
     num_cs, cs_ids = csgraph.connected_components(csgraph=adj_matrix,
                                                   directed=False)
     # only saving the largest cluster
     if num_cs > 1:
         cs_count = sp.zeros(num_cs, dtype=int)
         for cs_num in cs_ids:
             cs_count[cs_num] += 1
         self.data_vector[sp.where(cs_ids != sp.argmax(cs_count))[0]] = 0.0
         self.data_map = sp.reshape(self.data_vector, (self.nz, self.nx))
     #
     self._field.data_map = self.data_map
     self._field.data_vector = sp.ravel(self.data_map)
     #
     # generating blocks and vertices
     mask = self.data_map > 0.0
     self._generate_masked_mesh(cell_mask=mask)
コード例 #31
0
 def mergeable(self, clusters, parent=None):
     clusters = list(clusters)
     return connected_components(self._neighbours.loc[clusters, clusters],
                                 directed=False,
                                 return_labels=False) == 1
コード例 #32
0
    def reduction_implication_network(self, rounding_parameter):
        """
        Use an implication network to reduce the current problem instance.
        :param rounding_parameter: the projection data will be truncated to [rounding_parameter], in order to be able
               to use integer capacities in the implication network.
        """
        current_N = self.A.shape[1]
        A_csc = self.A.tocsc()
        B = 10**rounding_parameter * A_csc.transpose() * A_csc
        int_y = (10**rounding_parameter * self.y +
                 0.1 * np.ones_like(self.y)).astype(int)
        diagonal = B.diagonal()
        B -= csr_matrix(np.diag(diagonal))
        A_y = self.A.transpose() @ int_y
        outeredges = csr_matrix(2 * A_y - diagonal)
        Adjacency_matrix = 2 * sparse.bmat(
            [[None, 2 * B, None, None],
             [None, None, None, outeredges.transpose()],
             [outeredges, None, None, None], [None, None, 0, None]],
            format='csr')
        max_flow_output = csgraph.maximum_flow(Adjacency_matrix, 2 * current_N,
                                               2 * current_N + 1)

        flow = max_flow_output.residual
        symmetric_central_flow = flow[:current_N, current_N:2 *
                                      current_N] + flow[:current_N,
                                                        current_N:2 *
                                                        current_N].transpose()
        symmetric_central_flow.data //= 2
        flow[:current_N, current_N:2 * current_N] = symmetric_central_flow
        flow[current_N:2 * current_N, :current_N] = -symmetric_central_flow
        symmetric_outer_flow = \
            flow[2*current_N, :current_N] + flow[current_N: 2 * current_N, 2*current_N + 1].transpose()
        symmetric_outer_flow.data //= 2
        flow[2 * current_N, :current_N] = symmetric_outer_flow
        flow[current_N:2 * current_N,
             2 * current_N + 1] = symmetric_outer_flow.transpose()

        residual = Adjacency_matrix - flow
        residual.eliminate_zeros()
        n_components, labels = csgraph.connected_components(
            residual, connection='strong')

        component_type = np.zeros(n_components, dtype=int)
        # Type 14: u and 1 - u are both contained in the component
        # Type 15: u is contained in the component, 1 - u is not, no path from u to u - 1
        # Type 16: u is contained in the component, 1 - u is not, there exists a path from u to u - 1
        indices = []
        vals = []
        for i in range(current_N):
            component = labels[i]
            if component_type[component] == 0:
                if component == labels[i + current_N]:
                    component_type[component] = 14
                else:
                    reachable = csgraph.breadth_first_order(
                        residual, i, return_predecessors=False)
                    if i + current_N in reachable:
                        component_type[component] = 16
                    else:
                        component_type[component] = 15
            if component_type[component] == 15:
                indices.append(i)
                vals.append(1)
            elif component_type[component] == 16:
                indices.append(i)
                vals.append(0)

        no_reductions = len(indices)
        order = np.array(indices).argsort()
        for j in range(no_reductions):
            i = order[no_reductions - j - 1]
            self.problem_reduction_single(indices[i], vals[i])
        return 0
コード例 #33
0
def mergeROIS(Y_res,
              A,
              b,
              C,
              f,
              d1,
              d2,
              P_,
              thr=0.8,
              mx=50,
              sn=None,
              deconv_method='spgl1',
              min_size=3,
              max_size=8,
              dist=3,
              method_exp='ellipse',
              expandCore=iterate_structure(generate_binary_structure(2, 1),
                                           2).astype(int)):
    """
    merging of spatially overlapping components that have highly correlated tmeporal activity
    % The correlation threshold for merging overlapping components is user specified in P.merge_thr (default value 0.85)
    % Inputs:
    % Y_res:        residual movie after subtracting all found components
    % A:            matrix of spatial components
    % b:            spatial background
    % C:            matrix of temporal components
    % f:            temporal background
    % P:            parameter struct
    
    % Outputs:
    % A:            matrix of new spatial components
    % C:            matrix of new temporal components
    % nr:           new number of components
    % merged_ROIs:  list of old components that were merged
    
    % Written by:
    % Andrea Giovannucci from implementation of Eftychios A. Pnevmatikakis, Simons Foundation, 2015
    """

    #%

    nr = A.shape[1]
    [d, T] = np.shape(Y_res)
    C_corr = np.corrcoef(C[:nr, :], C[:nr, :])[:nr, :nr]
    FF1 = C_corr >= thr
    #find graph of strongly correlated temporal components
    A_corr = A.T * A
    A_corr.setdiag(0)
    FF2 = A_corr > 0  # % find graph of overlapping spatial components
    FF3 = np.logical_and(FF1, FF2.todense())
    FF3 = coo_matrix(FF3)
    c, l = csgraph.connected_components(FF3)  # % extract connected components

    p = len(P_[0]['gn'])
    MC = []
    for i in range(c):
        if np.sum(l == i) > 1:
            MC.append((l == i).T)
    MC = np.asarray(MC).T

    if MC.ndim > 1:
        cor = np.zeros((np.shape(MC)[1], 1))

        for i in range(np.size(cor)):
            fm = np.where(MC[:, i])[0]
            for j1 in range(np.size(fm)):
                for j2 in range(j1 + 1, np.size(fm)):
                    print j1, j2
                    cor[i] = cor[i] + C_corr[fm[j1], fm[j2]]

        Y_res = Y_res + np.dot(b, f)
        if np.size(cor) > 1:
            ind = np.argsort(np.squeeze(cor))[::-1]
        else:
            ind = [0]

        nm = min((np.size(ind), mx))  # number of merging operations

        A_merged = coo_matrix((d, nm)).tocsr()
        C_merged = np.zeros((nm, T))

        P_merged = []
        merged_ROIs = []
        #%
        for i in range(nm):
            P_cycle = dict()
            merged_ROI = np.where(MC[:, ind[i]])[0]
            merged_ROIs.append(merged_ROI)
            nC = np.sqrt(np.sum(C[merged_ROI, :]**2, axis=1))
            #        A_merged[:,i] = np.squeeze((A[:,merged_ROI]*spdiags(nC,0,len(nC),len(nC))).sum(axis=1))
            A_merged[:, i] = csr_matrix(
                (A[:, merged_ROI] *
                 spdiags(nC, 0, len(nC), len(nC))).sum(axis=1))

            Y_res = Y_res + A[:, merged_ROI] * C[merged_ROI, :]

            aa_1 = scipy.sparse.linalg.spsolve(
                spdiags(nC, 0, len(nC), len(nC)), C[merged_ROI, :])
            aa_2 = (aa_1).mean(axis=0)

            ff = np.nonzero(A_merged[:, i])[0]

            cc, _, _, Ptemp = update_temporal_components(
                np.asarray(Y_res[ff, :]),
                A_merged[ff, i],
                b[ff],
                aa_2,
                f,
                p=p,
                deconv_method=deconv_method)

            aa, bb, cc = update_spatial_components(np.asarray(Y_res),
                                                   cc,
                                                   f,
                                                   A_merged[:, i],
                                                   d1=d1,
                                                   d2=d2,
                                                   sn=sn,
                                                   min_size=min_size,
                                                   max_size=max_size,
                                                   dist=dist,
                                                   method=method_exp,
                                                   expandCore=expandCore)

            A_merged[:, i] = aa.tocsr()

            cc, _, _, Ptemp = update_temporal_components(
                Y_res[ff, :],
                A_merged[ff, i],
                bb[ff],
                cc,
                f,
                p=p,
                deconv_method=deconv_method)

            P_cycle = P_[merged_ROI[0]].copy()
            P_cycle['gn'] = Ptemp[0]['gn']
            P_cycle['b'] = Ptemp[0]['b']
            P_cycle['c1'] = Ptemp[0]['c1']
            P_cycle['neuron_sn'] = Ptemp[0]['neuron_sn']
            P_merged.append(P_cycle)
            C_merged[i, :] = cc
            if i + 1 < nm:
                Y_res[ff, :] = Y_res[ff, :] - A_merged[ff, i] * cc

        #%
        neur_id = np.unique(np.hstack(merged_ROIs))

        good_neurons = np.setdiff1d(range(nr), neur_id)

        A = scipy.sparse.hstack((A[:, good_neurons], A_merged.tocsc()))
        C = np.vstack((C[good_neurons, :], C_merged))

        #    P_new=list(P_[good_neurons].copy())
        P_new = [P_[pp] for pp in good_neurons]

        for p in P_merged:
            P_new.append(p)

        nr = nr - len(neur_id) + nm

    else:
        warnings.warn('No neurons merged!')
        merged_ROIs = []
        P_new = P_

    return A, C, nr, merged_ROIs, P_new
コード例 #34
0
def merge_quantified_calls(args, dbo_args, endpoint_args):

    myprint('merging candidate calls')

    quantified_svcall_list = read_object_file(args.quantified_bk_pair_file,
                                              QuantifiedBKCand)

    edge_list = list()

    myprint('building edges for candidate calls')

    frm_id_set_list = list()
    for i in range(0, len(quantified_svcall_list)):
        frm_id_set = quantified_svcall_list[i].all_frm_id_set()
        frm_id_set_list.append(frm_id_set)

    for i in range(0, len(quantified_svcall_list)):
        for j in range(i + 1, len(quantified_svcall_list)):
            frm_id_set1 = frm_id_set_list[i]
            frm_id_set2 = frm_id_set_list[j]
            shared_frm_id_set = frm_id_set1.intersection(frm_id_set2)
            n_frm_id1 = len(frm_id_set1)
            n_frm_id2 = len(frm_id_set2)
            n_shared_frm = len(shared_frm_id_set)
            if n_shared_frm >= min(n_frm_id1, n_frm_id2) / 2:
                edge_list.append((i, j))
                edge_list.append((j, i))

    row = list()
    col = list()
    data = list()

    for edge in edge_list:
        row.append(edge[0])
        col.append(edge[1])
        data.append(1)

    n_node = len(quantified_svcall_list)
    myprint('connected components')
    bedpe_csr_matrix = csr_matrix((data, (row, col)), shape=[n_node, n_node])
    n_components, label_list = connected_components(bedpe_csr_matrix,
                                                    directed=False)

    component_element_db = [0] * n_components

    for i in range(0, len(component_element_db)):
        component_element_db[i] = list()
    # component_element_db[component_id] = list of bedpe index
    for i in range(0, len(label_list)):
        component_element_db[label_list[i]].append(i)

    merged_call_list = list()
    for component_id in range(0, len(component_element_db)):
        bedpe_merge_group = list()
        for index in component_element_db[component_id]:
            bedpe_merge_group.append(quantified_svcall_list[index])

        merged_call = merge1call_group(bedpe_merge_group)
        merged_call_list.append(merged_call)

    merged_call_bedpe_file = args.merged_bedpe_file

    merged_call_bedpe_fp = open(merged_call_bedpe_file, 'w')
    for merged_call in merged_call_list:
        if merged_call.score < 20: continue
        merged_call_bedpe_fp.write(merged_call.output_core() + endl)

    merged_call_bedpe_fp.close()

    return
コード例 #35
0
        for proto, conns in node['connections'].items():
            for conn in conns:
                if isinstance(conn, str):
                    conn = {'id': conn, 'quality': 1.0, 'state': 'open'}
                if conn['id'] not in ids or conn['state'] != 'open':
                    continue
                if proto == 'webrtc-stream':
                    stream_df.at[node['id'], conn['id']] = 1
                else:
                    mesh_df.at[node['id'], conn['id']] = conn['quality']

    mesh_path = csg.dijkstra(mesh_df, directed=False, unweighted=True)
    dm = pd.DataFrame(mesh_path, index=ids, columns=ids)
    dm = clean_frame(dm)
    if dm.get(router, pd.Series([])).any():
        connected_components, labels = csg.connected_components(dm.values)
        average_distance_to_router = dm.get(router).sum() / len(dm)
        components, _ = np.histogram(labels.T, labels.max() + 1)
        component_distribution = ','.join(str(c) for c in sorted(components))
        largest_component = max(components)
        analysis.append({
            'average_distance_to_router': average_distance_to_router,
            'number_of_connected_nodes': len(dm),
            'number_of_total_nodes': len(mesh),
            'connected_components': connected_components,
            'component_distribution': component_distribution,
            'largest_component': largest_component
        })

        if mesh_index == len(mesh_series) - 1:
            for node_id, distance in dm.get(router).to_dict().items():
コード例 #36
0
def filter_hypothesis(hypothesisPoints,
                      votingDirection,
                      scoreSum,
                      pointsDirection,
                      votingScore,
                      similiarityThresh,
                      neighThresh,
                      scoreThres,
                      minClusterSize,
                      returnCopies=False):
    # Filter hypothesis according to minimum number of neighbour, minimum score
    # and minimum size of cluster they belong to.

    if returnCopies:
        print(
            'gagggggggg\ngagagagawwge\ngawegewagagaew\ngwagawegaweg\ngawegawgwgeaag'
        )
        hypothesisPoints = hypothesisPoints.clone()
        votingDirection = votingDirection.clone()
        scoreSum = scoreSum.clone()
        pointsDirection = pointsDirection.clone()
        votingScore = votingScore.clone()

    # Keep hypotheses with enough score
    hasEnoughScore = scoreSum > scoreThres
    hypothesisPoints = hypothesisPoints[hasEnoughScore]
    votingDirection = votingDirection[hasEnoughScore]
    scoreSum = scoreSum[hasEnoughScore]
    pointsDirection = pointsDirection[hasEnoughScore]
    votingScore = votingScore[hasEnoughScore]

    # Keep removing hypotheses until all fulfill the different minimum thresholds.
    keepHypotheses = torch.tensor([0]).byte()
    while not torch.all(keepHypotheses):
        # Find hypotheses with enough neighbours

        pointsDistances = torch.from_numpy(squareform(
            pdist(hypothesisPoints))).cuda()
        votingDistances = torch.from_numpy(squareform(
            pdist(votingDirection))).cuda()
        affinity_matrix = torch.exp(-(pointsDistances * 1 / 5)**2) * torch.exp(
            -(votingDistances * 2)**2)

        adjMatrix = (affinity_matrix > similiarityThresh) & ~torch.eye(
            affinity_matrix.shape[0]).byte().cuda()

        nNeighbours = torch.sum(adjMatrix, dim=1)
        hasEnoughNeighbours = nNeighbours > neighThresh

        # Partition hypotheses into subclusters uding adjacancy matrix
        n_clusters, clusterLabels = connected_components(csgraph=adjMatrix,
                                                         directed=False,
                                                         return_labels=True)

        # Determine the size of the subclusters
        clusterLabels = torch.from_numpy(clusterLabels).cuda()
        uniqueClusterLabels = torch.unique(clusterLabels)
        clusterVotes = clusterLabels == uniqueClusterLabels[:, None]
        clusterSizes = torch.sum(clusterVotes, dim=1)

        # Find which clusters are big enough and keep the belonging hypotheses
        isBigCluster = clusterSizes > minClusterSize
        uniqueClusterLabelsKeep = uniqueClusterLabels[isBigCluster]
        hasEnoughClusterSize = torch.any(
            clusterLabels == uniqueClusterLabelsKeep[:, None], dim=0)

        # Keep hypotheses which are both belonging to a big enough cluster and have
        # enough neighbours
        keepHypotheses = hasEnoughNeighbours & hasEnoughClusterSize
        hypothesisPoints = hypothesisPoints[keepHypotheses]
        votingDirection = votingDirection[keepHypotheses]
        scoreSum = scoreSum[keepHypotheses]
        pointsDirection = pointsDirection[keepHypotheses]
        votingScore = votingScore[keepHypotheses]
        clusterLabels = clusterLabels[keepHypotheses]

    # Create weights for hypotheses according to 1/(cluster size they belong to)
    clusterWeight = torch.sum(clusterVotes.float() /
                              clusterSizes[:, None].float(),
                              dim=0)

    # Create weights for hypotheses according to their inlierCount/(total inlierCount of cluster they belong to)
    weight = clusterVotes.float() * scoreSum[None]
    scoreAndClusterWeight = torch.sum(weight /
                                      torch.sum(weight, dim=1)[:, None],
                                      dim=0)

    if returnCopies:
        return hypothesisPoints, votingDirection, scoreSum, pointsDirection, votingScore, adjMatrix, nNeighbours, clusterWeight, scoreAndClusterWeight, clusterLabels
    else:
        return adjMatrix, nNeighbours, clusterWeight, scoreAndClusterWeight, clusterLabels
コード例 #37
0
import numpy as np
from scipy.sparse.csgraph import connected_components
from scipy.sparse import csr_matrix

l = [[0, 1, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 1],
     [0, 0, 0, 0, 0]]

# n, labels = connected_components(l)
# AttributeError: 'list' object has no attribute 'dtype'

a = np.array(l)
print(type(a))
# <class 'numpy.ndarray'>

n, labels = connected_components(a)

print(n)
# 2

print(labels)
# [0 0 0 1 1]

csr = csr_matrix(l)
print(csr)
#   (0, 1)	1
#   (0, 2)	1
#   (1, 2)	1
#   (3, 4)	1

print(type(csr))
# <class 'scipy.sparse.csr.csr_matrix'>
コード例 #38
0
def test_planted_distribution_model():
    """ Tests the main graph generator with statistics and visualized degree distribution and edge adjacency matrix
    """
    print("\n--- 'planted_distribution_model_H', 'planted_distribution_model_P', 'number_of_connectedComponents', 'create_blocked_matrix_from_graph' --")
    CHOICE = 21
    print("CHOICE:", CHOICE)
    debug = 0

    # directed = True                     # !!! TODO: not yet clear what undirected means here, only P accepts directed
    backEdgesAllowed = True             # ??? should be enforced in code
    sameInAsOutDegreeRanking = False
    distribution = 'powerlaw'
    exponent = -0.3
    VERSION_P = True


    # --- AAAI figures ---
    if CHOICE in [1, 2, 3, 4, 5, 6]:
        n = 120
        alpha0 = [1/6, 1/3, 1/2]
        h = 8
        P = np.array([[1, h, 1],
                      [1, 1, h],
                      [h, 1, 1]])

    if CHOICE == 1:                     # P (equivalent to 2), AAAI 2
        m = 1080

    elif CHOICE == 2:                   # H (equivalent to 1)
        H0 = row_normalize_matrix(P)
        d_vec = [18, 9, 6]
        VERSION_P = False

    elif CHOICE == 3:                   # H (equivalent to 4), AAAI 3
        H0 = row_normalize_matrix(P)
        d_vec = 9
        VERSION_P = False

    elif CHOICE == 4:                   # P (equivalent to 3)
        P = np.array([[1, h, 1],
                      [2, 2, 2*h],
                      [3*h, 3, 3]])
        m = 1080

    elif CHOICE == 5:                   # H (equivalent to 2), but backedges=False
        H0 = row_normalize_matrix(P)
        d_vec = [18, 9, 6]
        VERSION_P = False
        backEdgesAllowed = False

    elif CHOICE == 6:                   # P undirected, AAAI 4
        P = np.array([[1, h, 1],
                      [h, 1, 1],
                      [1, 1, h]])
        directed = False
        backEdgesAllowed = False
        m = 540

    # --- AGAIN DIRECTED ---
    if CHOICE == 12:
        n = 1001
        alpha0 = [0.6, 0.2, 0.2]
        P = np.array([[0.1, 0.8, 0.1],
                      [0.8, 0.1, 0.1],
                      [0.1, 0.1, 0.8]])
        m = 3000
        distribution = 'uniform'    # uniform powerlaw
        exponent = None
        backEdgesAllowed = False    # ??? should be enforced in code

    if CHOICE == 13:
        # Nice for block matrix visualization
        n = 1000
        alpha0 = [0.334, 0.333, 0.333]
        h = 2
        P = np.array([[1, h, 1],
                      [h, 1, 1],
                      [1, 1, h]])
        m = 2000
        distribution = 'uniform'    # uniform powerlaw
        exponent = None
        backEdgesAllowed = False    # ??? should be enforced in code

    if CHOICE == 14:
        n = 1000
        alpha0 = [0.3334, 0.3333, 0.3333]
        h = 10
        P = np.array([[1, h, 1],
                      [h, 1, 1],
                      [1, 1, h]])
        m = 10000
        exponent = -0.55


    # --- UNDIRECTED ---
    if CHOICE == 20:
        n = 100
        alpha0 = [0.6, 0.2, 0.2]
        h = 1.4
        P = np.array([[1, h, 1],
                      [h, 1, 1],
                      [1, 1, h]])
        H0 = row_normalize_matrix(P)
        d_vec = 5
        directed = False
        exponent = -0.3
        VERSION_P = False

    elif CHOICE == 21:
        n = 1001
        alpha0 = [0.6, 0.2, 0.2]
        h = 4
        P = np.array([[1, h, 1],
                      [h, 1, 1],
                      [1, 1, h]])
        H0 = row_normalize_matrix(P)
        d_vec = 3.4                   # don't specify vector for undirected
        distribution = 'uniform'    # uniform powerlaw
        exponent = -0.5
        directed = False
        backEdgesAllowed = True             # ignored in code for undirected
        VERSION_P = False
        sameInAsOutDegreeRanking = True     # ignored in code for undirected

    elif CHOICE == 22:
        n = 1000
        m = 3000
        alpha0 = [0.6, 0.2, 0.2]
        h = 4
        P = np.array([[1, 3*h, 1],
                      [2*h, 1, 1],
                      [1, 1, h]])
        distribution = 'uniform'    # uniform powerlaw
        exponent = -0.5
        directed = False
        backEdgesAllowed = False             # ignored in code for undirected
        sameInAsOutDegreeRanking = True     # ignored in code for undirected
        debug=0

        VERSION_P = True
        H0 = row_normalize_matrix(P)


    # --- Create the graph
    start = time.time()
    if VERSION_P:
        W, Xd = planted_distribution_model(n, alpha=alpha0, P=P, m=m,
                                           distribution=distribution, exponent=exponent,
                                           directed=directed,
                                           backEdgesAllowed=backEdgesAllowed, sameInAsOutDegreeRanking=sameInAsOutDegreeRanking,
                                           debug=debug)
    else:
        W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d_vec,
                                                  distribution=distribution, exponent=exponent,
                                                  directed=directed, backEdgesAllowed=backEdgesAllowed, sameInAsOutDegreeRanking=sameInAsOutDegreeRanking,
                                                  debug=debug)

    time_est = time.time()-start
    print("Time for graph generation: {}".format(time_est))

    # - Undirectd degrees: In + Out
    W_und = W.multiply(W.transpose())
    """if backEdgesAllowed then there can be edges in both directions."""
    # W_und.data[:] = np.sign(W_und.data)  # W contains weighted edges -> unweighted before counting edges with Ptot
    print("Fraction of edges that go in both directions: {}".format(np.sum(W_und.data) / np.sum(W.data)))

    # --- Statistics on created graph
    print("\n- 'calculate_Ptot_from_graph':")
    P_tot = calculate_Ptot_from_graph(W, Xd)
    print("P_tot:\n{}".format(P_tot))
    print("sum(P_tot): {}".format(np.sum(P_tot)))
    print("P (normalized to sum=1):\n{}".format(1. * P_tot / np.sum(P_tot)))           # Potential: normalized sum = 1
    H = row_normalize_matrix(P_tot)
    print("H (row-normalized):\n{}".format(H))

    print("\n- 'calculate_nVec_from_Xd':")
    n_vec = calculate_nVec_from_Xd(Xd)
    print("n_vec: {}".format(n_vec))
    print("alpha: {}".format(1.*n_vec / sum(n_vec)))

    print("\n- Average Out/Indegree 'calculate_average_outdegree_from_graph' (assumes directed for total; for undirected the totals are incorrect):")
    print("Average outdegree: {}".format(calculate_average_outdegree_from_graph(W)))
    print("Average indegree: {}".format(calculate_average_outdegree_from_graph(W.transpose())))
    print("Average total degree: {}".format(calculate_average_outdegree_from_graph(W + W.transpose())))
    print("Average outdegree per class: {}".format(calculate_average_outdegree_from_graph(W, Xd)))
    print("Average indegree per class: {}".format(calculate_average_outdegree_from_graph(W.transpose(), Xd)))
    print("Average total degree per class: {}".format(calculate_average_outdegree_from_graph(W + W.transpose(), Xd)))

    # - Overall degree distribution: In / out
    print("\n- Overall Out/In/Total degree distribution 'calculate_outdegree_distribution_from_graph':")
    print("Overall Out and Indegree distribution:")
    d_out_vec_tot = calculate_outdegree_distribution_from_graph(W, Xd=None)
    d_in_vec_tot = calculate_outdegree_distribution_from_graph(W.transpose(), Xd=None)
    print("Outdegree distribution (degree / number):\n{}".format(np.array([d_out_vec_tot.keys(), d_out_vec_tot.values()])))
    print("Indegree distribution (degree / number):\n{}".format(np.array([d_in_vec_tot.keys(), d_in_vec_tot.values()])))

    # - Overall degree distribution: In + Out
    d_tot_vec_tot = calculate_outdegree_distribution_from_graph(W + W.transpose(), Xd=None)
    print("Total degree distribution (degree / number):\n{}".format(np.array([d_tot_vec_tot.keys(), d_tot_vec_tot.values()])))

    # - Per-class degree distribution: In / out
    print("\n- Per-class Out/In/Total degree distribution 'calculate_outdegree_distribution_from_graph':")
    print("\nOutdegree distribution per class:")
    d_out_vec = calculate_outdegree_distribution_from_graph(W, Xd)
    for i in range(len(d_out_vec)):
        print("Class {}:".format(i))
        print(np.array([d_out_vec[i].keys(), d_out_vec[i].values()]))
    print("Indegree distribution per class:")
    d_in_vec = calculate_outdegree_distribution_from_graph(W.transpose(), Xd)
    for i in range(len(d_in_vec)):
        print("Class {}:".format(i))
        print(np.array([d_in_vec[i].keys(), d_in_vec[i].values()]))

    # - per-class degree distribution: In + out
    print("\nTotal degree distribution per class:")
    d_vec_und = calculate_outdegree_distribution_from_graph(W + W.transpose(), Xd)
    for i in range(len(d_vec_und)):
        print("Class {}:".format(i))
        print(np.array([d_vec_und[i].keys(), d_vec_und[i].values()]))

    print("\n- number of weakly connected components':")
    print("Number of weakly connected components: {}".format(connected_components(W, directed=True, connection='weak', return_labels=False)))


    # --- convergence boundary
    # print("\n- '_out_eps_convergence_directed_linbp', 'eps_convergence_linbp'")
    # if directed:
    #     eps_noEcho = _out_eps_convergence_directed_linbp(P, W, echo=False)
    #     eps_Echo = _out_eps_convergence_directed_linbp(P, W, echo=True)
    # else:
    Hc = to_centering_beliefs(H)
    eps_noEcho = eps_convergence_linbp(Hc, W, echo=False)
    eps_Echo = eps_convergence_linbp(Hc, W, echo=True)
    print("Eps (w/ echo): {}".format(eps_Echo))
    print("Eps (no echo): {}".format(eps_noEcho))


    # --- Fig1: Draw edge distributions
    print("\n- Fig1: Draw degree distributions")
    params = {'backend': 'pdf',
              'lines.linewidth': 4,
              'font.size': 10,
              'axes.labelsize': 24,  # fontsize for x and y labels (was 10)
              'axes.titlesize': 22,
              'xtick.labelsize': 20,
              'ytick.labelsize': 20,
              'legend.fontsize': 8,
              'figure.figsize': [5, 4],
              'font.family': 'sans-serif'
    }
    mpl.rcdefaults()
    mpl.rcParams.update(params)
    fig = plt.figure(1)
    ax = fig.add_axes([0.15, 0.15, 0.8, 0.8])  # main axes
    ax.xaxis.labelpad = -12
    ax.yaxis.labelpad = -12

    # A: Draw directed degree distribution
    y_vec = []
    for i in range(len(d_out_vec)):
        y = np.repeat(list(d_out_vec[i].keys()), list(d_out_vec[i].values()) )    # !!! np.repeat
        y = -np.sort(-y)
        y_vec.append(y)
        # print ("Class {}:\n{}".format(i,y))
    y_tot = np.repeat(list(d_out_vec_tot.keys()), list(d_out_vec_tot.values()))             # total outdegree
    y_tot = -np.sort(-y_tot)
    plt.loglog(range(1, len(y_vec[0])+1), y_vec[0], lw=4, color='orange', label=r"A out", linestyle='-')        # !!! plot default index starts from 0 otherwise
    plt.loglog(range(1, len(y_vec[1])+1), y_vec[1], lw=4, color='blue', label=r"B out", linestyle='--')
    plt.loglog(range(1, len(y_vec[2])+1), y_vec[2], lw=4, color='green', label=r"C out", linestyle=':')
    plt.loglog(range(1, len(y_tot)+1), y_tot, lw=1, color='black', label=r"tot out", linestyle='-')

    # B: Draw second edge distribution of undirected degree distribution
    y_vec = []
    for i in range(len(d_vec_und)):
        y = np.repeat(list(d_vec_und[i].keys()), list(d_vec_und[i].values()) )    # !!! np.repeat
        y = -np.sort(-y)
        y_vec.append(y)
        # print ("Class {}:\n{}".format(i,y))
    y_tot = np.repeat(list(d_tot_vec_tot.keys()), list(d_tot_vec_tot.values()))             # total outdegree
    y_tot = -np.sort(-y_tot)
    plt.loglog(range(1, len(y_vec[0])+1), y_vec[0], lw=4, color='orange', label=r"A", linestyle='-')
    plt.loglog(range(1, len(y_vec[1])+1), y_vec[1], lw=4, color='blue', label=r"B", linestyle='--')
    plt.loglog(range(1, len(y_vec[2])+1), y_vec[2], lw=4, color='green', label=r"C", linestyle=':')
    plt.loglog(range(1, len(y_tot)+1), y_tot, lw=1, color='black', label=r"tot", linestyle='-')

    plt.legend(loc='upper right', labelspacing=0)
    filename = 'figs/Fig_test_planted_distribution_model1_{}.pdf'.format(CHOICE)
    plt.savefig(filename, dpi=None, facecolor='w', edgecolor='w',
                orientation='portrait', papertype='letter', format='pdf',
                transparent=True, bbox_inches='tight', pad_inches=0.1,
                # frameon=None,                 # TODO: frameon deprecated
                )
    os.system("open " + filename)


    # --- Fig2: Draw block matrix
    print("\n- Fig2: 'create_blocked_matrix_from_graph'")
    W_new, Xd_new = create_blocked_matrix_from_graph(W, Xd)

    fig = plt.figure(2)
    row, col = W_new.nonzero()                      # transform the sparse W back to row col format
    plt.plot(col, row, 'o', color='r', markersize=2, markeredgewidth=2, lw=0, zorder=3)    # Notice (col, row) because first axis is vertical in matrices
    # plt.matshow(W_new.todense(), cmap=plt.cm.Greys)  # cmap=plt.cm.gray / Blues   # alternative that does not work as well
    plt.gca().invert_yaxis()    # invert the y-axis to start on top and go down

    # Show quadrants
    d1 = alpha0[0] * n
    d2 = (alpha0[0] + alpha0[1]) * n
    plt.grid(which='major', color='0.7', linestyle='-', linewidth=1)
    plt.xticks([0, d1, d2, n])
    plt.yticks([0, d1, d2, n])
    plt.xlabel('to', labelpad=-1)
    plt.ylabel('from', rotation=90, labelpad=0)

    frame = plt.gca()
    # frame.axes.xaxis.set_ticklabels([])       # would hide the labels
    # frame.axes.yaxis.set_ticklabels([])
    frame.tick_params(direction='inout', width=1, length=10)

    filename = 'figs/Fig_test_planted_distribution_model2_{}.pdf'.format(CHOICE)
    plt.savefig(filename, dpi=None, facecolor='w', edgecolor='w',
            orientation='portrait', papertype='letter', format='pdf',
            transparent=True, bbox_inches='tight', pad_inches=0.1)
    os.system("open " + filename)
コード例 #39
0
ファイル: scc_npsp.py プロジェクト: k-harada/AtCoder
def scc(n, m, edge_array):
    tmp = np.ones(m, dtype=np.int32).T
    graph = csr_matrix((tmp, (edge_array[:])), (n, n))

    return connected_components(graph, directed=True, connection='strong')
コード例 #40
0
ファイル: __init__.py プロジェクト: vikram-saha/scanpy
    def compute_neighbors(
        self,
        n_neighbors: int = 30,
        knn: bool = True,
        n_pcs: Optional[int] = None,
        use_rep: Optional[str] = None,
        method: _Method = 'umap',
        random_state: AnyRandom = 0,
        write_knn_indices: bool = False,
        metric: _Metric = 'euclidean',
        metric_kwds: Mapping[str, Any] = MappingProxyType({}),
    ) -> None:
        """\
        Compute distances and connectivities of neighbors.

        Parameters
        ----------
        n_neighbors
             Use this number of nearest neighbors.
        knn
             Restrict result to `n_neighbors` nearest neighbors.
        {n_pcs}
        {use_rep}

        Returns
        -------
        Writes sparse graph attributes `.distances` and `.connectivities`.
        Also writes `.knn_indices` and `.knn_distances` if
        `write_knn_indices==True`.
        """
        from sklearn.metrics import pairwise_distances
        start_neighbors = logg.debug('computing neighbors')
        if n_neighbors > self._adata.shape[0]:  # very small datasets
            n_neighbors = 1 + int(0.5*self._adata.shape[0])
            logg.warning(f'n_obs too small: adjusting to `n_neighbors = {n_neighbors}`')
        if method == 'umap' and not knn:
            raise ValueError('`method = \'umap\' only with `knn = True`.')
        if method == 'rapids' and metric != 'euclidean':
            raise ValueError("`method` 'rapids' only supports the 'euclidean' `metric`.")
        if method not in {'umap', 'gauss', 'rapids'}:
            raise ValueError("`method` needs to be 'umap', 'gauss', or 'rapids'.")
        if self._adata.shape[0] >= 10000 and not knn:
            logg.warning('Using high n_obs without `knn=True` takes a lot of memory...')
        self.n_neighbors = n_neighbors
        self.knn = knn
        X = _choose_representation(self._adata, use_rep=use_rep, n_pcs=n_pcs)
        # neighbor search
        use_dense_distances = (metric == 'euclidean' and X.shape[0] < 8192) or knn == False
        if use_dense_distances:
            _distances = pairwise_distances(X, metric=metric, **metric_kwds)
            knn_indices, knn_distances = _get_indices_distances_from_dense_matrix(
                _distances, n_neighbors)
            if knn:
                self._distances = _get_sparse_matrix_from_indices_distances_numpy(
                    knn_indices, knn_distances, X.shape[0], n_neighbors)
            else:
                self._distances = _distances
        elif method == 'rapids':
            knn_indices, knn_distances = compute_neighbors_rapids(X, n_neighbors)
        else:
            # non-euclidean case and approx nearest neighbors
            if X.shape[0] < 4096:
                X = pairwise_distances(X, metric=metric, **metric_kwds)
                metric = 'precomputed'
            knn_indices, knn_distances, forest = compute_neighbors_umap(
                X, n_neighbors, random_state, metric=metric, metric_kwds=metric_kwds)
            # very cautious here
            try:
                if forest:
                    self._rp_forest = _make_forest_dict(forest)
            except:
                pass
        # write indices as attributes
        if write_knn_indices:
            self.knn_indices = knn_indices
            self.knn_distances = knn_distances
        start_connect = logg.debug('computed neighbors', time=start_neighbors)
        if not use_dense_distances or method in {'umap', 'rapids'}:
            # we need self._distances also for method == 'gauss' if we didn't
            # use dense distances
            self._distances, self._connectivities = _compute_connectivities_umap(
                knn_indices,
                knn_distances,
                self._adata.shape[0],
                self.n_neighbors,
            )
        # overwrite the umap connectivities if method is 'gauss'
        # self._distances is unaffected by this
        if method == 'gauss':
            self._compute_connectivities_diffmap()
        logg.debug('computed connectivities', time=start_connect)
        self._number_connected_components = 1
        if issparse(self._connectivities):
            from scipy.sparse.csgraph import connected_components
            self._connected_components = connected_components(self._connectivities)
            self._number_connected_components = self._connected_components[0]
コード例 #41
0
    def set_saturation_level_imposed_joined_coarse(self):

        levels = self.data_impress['LEVEL'].copy()
        dual_flag = self.data_impress['DUAL_1'].copy()
        gid1 = self.data_impress['GID_1']
        gid0 = self.data_impress['GID_0']
        level_0_ini = set(gid0[levels==0])
        saturation = self.data_impress['saturation']
        all_wells = set(self.all_wells_ids)
        gids_lv1_sat = set()
        gidsc = np.unique(gid1)
        internal_faces = self.elements_lv0['internal_faces']
        v0 = self.elements_lv0['neig_internal_faces']

        ds = saturation[v0]
        ds = np.absolute(ds[:,1] - ds[:,0])

        inds = ds >= self.delta_sat_max

        levels[v0[inds][:,0]] = 0
        levels[v0[inds][:,1]] = 0

        all_lv0 = set(gid0[levels==0])
        for gidc in gidsc:
            gids0 = gid0[gid1==gidc]
            vertex = gids0[dual_flag[gids0]==3]
            if (levels[gids0].max()-levels[gids0].min())>0:
                facs=np.unique(np.concatenate(self.elements_lv0['volumes_face_faces'][gids0]))
                facs=np.intersect1d(facs,internal_faces)
                ad=np.vstack(self.elements_lv0['faces_face_volumes'][facs])
                ad0=ad[:,0]
                ad1=ad[:,1]
                l0=levels[ad0]
                l1=levels[ad1]
                map_lid=-np.ones(max(gid0)+1)
                map_lid[gids0]=np.arange(len(gids0))
                l0[map_lid[ad0]<0]=-1
                l1[map_lid[ad1]<0]=-1
                fadj1=l0+l1>=0
                # import pdb; pdb.set_trace()
                lines=map_lid[ad0[fadj1]].astype(int)
                cols=map_lid[ad1[fadj1]].astype(int)
                data=np.ones(len(lines))
                graph=csc_matrix((data,(lines,cols)),shape=(len(gids0),len(gids0)))
                n_l,labels=csgraph.connected_components(graph,connection='weak')
                groups=[gids0[labels==k] for k in range(n_l)]
                ls=np.array([len(g) for g in groups])
                print(ls.max())
                if ls.max()>1:
                    vols_nv1=np.array(groups)[ls==ls.max()][0]
                    levels[np.setdiff1d(gids0,vols_nv1)]=0

                # import pdb; pdb.set_trace()


        for gidc in gidsc:
            gids0 = gid0[gid1==gidc]
            if set(gids0) & all_lv0:
                gids_fora = np.array(list(set(gids0) - all_lv0))
                if len(gids_fora) > 0:
                    levels[gids_fora] = 1
                gids_lv1_sat.add(gidc)

        cids_neigh = self.ml_data['coarse_id_neig_face_level_'+str(1)]
        cids_level = self.ml_data['coarse_primal_id_level_'+str(1)]

        for gidc in gids_lv1_sat:
            vizs = cids_neigh[cids_level==gidc][0]
            for viz in vizs:
                if set([viz]) & gids_lv1_sat:
                    continue
                gids0 = gid0[gid1==viz]
                if set(gids0) & all_lv0:
                    gids_fora = np.array(list(set(gids0) - all_lv0))
                    levels[gids_fora] = 1
                else:
                    levels[gids0] = 1

        self.data_impress['LEVEL'] = levels.copy()
コード例 #42
0
def get_separable_problems(problem):
    """Return a list of separable problems whose sum is the original one.

    Parameters
    ----------
    problem : Problem
        A problem that consists of separable (sub)problems.

    Returns
    -------
    List
        A list of problems which are separable whose sum is the original one.
    """
    # obj_terms contains the terms in the objective functions. We have to
    # deal with the special case where the objective function is not a sum.
    if isinstance(problem.objective.args[0], cvxtypes.add_expr()):
        obj_terms = problem.objective.args[0].args
    else:
        obj_terms = [problem.objective.args[0]]
    # Remove constant terms, which will be appended to the first separable
    # problem.
    constant_terms = [term for term in obj_terms if term.is_constant()]
    obj_terms = [term for term in obj_terms if not term.is_constant()]

    constraints = problem.constraints
    num_obj_terms = len(obj_terms)
    num_terms = len(obj_terms) + len(constraints)

    # Objective terms and constraints are indexed from 0 to num_terms - 1.
    var_sets = [frozenset(func.variables()) for func in obj_terms + constraints
                ]
    all_vars = frozenset().union(*var_sets)

    adj_matrix = dok_matrix((num_terms, num_terms), dtype=bool)
    for var in all_vars:
        # Find all functions that contain this variable
        term_ids = [i for i, var_set in enumerate(var_sets) if var in var_set]
        # Add an edge between any two objetive terms/constraints sharing
        # this variable.
        if len(term_ids) > 1:
            for i, j in itertools.combinations(term_ids, 2):
                adj_matrix[i, j] = adj_matrix[j, i] = True
    num_components, labels = csgraph.connected_components(adj_matrix,
                                                          directed=False)

    # After splitting, construct subproblems from appropriate objective
    # terms and constraints.
    term_ids_per_subproblem = [[] for _ in range(num_components)]
    for i, label in enumerate(labels):
        term_ids_per_subproblem[label].append(i)
    problem_list = []
    for index in range(num_components):
        terms = [obj_terms[i] for i in term_ids_per_subproblem[index]
                 if i < num_obj_terms]
        # If we just call sum, we'll have an extra 0 in the objective.
        obj = sum(terms[1:], terms[0]) if terms else Constant(0)
        constrs = [constraints[i - num_obj_terms]
                   for i in term_ids_per_subproblem[index]
                   if i >= num_obj_terms]
        problem_list.append(Problem(problem.objective.copy([obj]), constrs))
    # Append constant terms to the first separable problem.
    if constant_terms:
        # Avoid adding an extra 0 in the objective
        sum_constant_terms = sum(constant_terms[1:], constant_terms[0])
        if problem_list:
            problem_list[0].objective.args[0] += sum_constant_terms
        else:
            problem_list.append(Problem(problem.objective.copy(
                [sum_constant_terms])))
    return problem_list
コード例 #43
0
ファイル: utils.py プロジェクト: sverdoot/netgan-without-gan
def train_val_test_split_adjacency(A,
                                   p_val=0.10,
                                   p_test=0.05,
                                   seed=0,
                                   neg_mul=1,
                                   every_node=True,
                                   connected=False,
                                   undirected=False,
                                   use_edge_cover=True,
                                   set_ops=True,
                                   asserts=False):
    """
    Split the edges of the adjacency matrix into train, validation and test edges
    and randomly samples equal amount of validation and test non-edges.
    Parameters
    ----------
    A : scipy.sparse.spmatrix
        Sparse unweighted adjacency matrix
    p_val : float
        Percentage of validation edges. Default p_val=0.10
    p_test : float
        Percentage of test edges. Default p_test=0.05
    seed : int
        Seed for numpy.random. Default seed=0
    neg_mul : int
        What multiplicity of negative samples (non-edges) to have in the test/validation set
        w.r.t the number of edges, i.e. len(non-edges) = L * len(edges). Default neg_mul=1
    every_node : bool
        Make sure each node appears at least once in the train set. Default every_node=True
    connected : bool
        Make sure the training graph is still connected after the split
    undirected : bool
        Whether to make the split undirected, that is if (i, j) is in val/test set then (j, i) is there as well.
        Default undirected=False
    use_edge_cover: bool
        Whether to use (approximate) edge_cover to find the minimum set of edges that cover every node.
        Only active when every_node=True. Default use_edge_cover=True
    set_ops : bool
        Whether to use set operations to construction the test zeros. Default setwise_zeros=True
        Otherwise use a while loop.
    asserts : bool
        Unit test like checks. Default asserts=False
    Returns
    -------
    train_ones : array-like, shape [n_train, 2]
        Indices of the train edges
    val_ones : array-like, shape [n_val, 2]
        Indices of the validation edges
    val_zeros : array-like, shape [n_val, 2]
        Indices of the validation non-edges
    test_ones : array-like, shape [n_test, 2]
        Indices of the test edges
    test_zeros : array-like, shape [n_test, 2]
        Indices of the test non-edges
    """
    assert p_val + p_test > 0
    assert A.max() == 1  # no weights
    assert A.min() == 0  # no negative edges
    assert A.diagonal().sum() == 0  # no self-loops
    #assert not np.any(A.sum(0).A1 + A.sum(1).A1 == 0)  # no dangling nodes

    is_undirected = (A != A.T).nnz == 0

    if undirected:
        assert is_undirected  # make sure is directed
        A = sp.tril(A).tocsr()  # consider only upper triangular
        A.eliminate_zeros()
    else:
        if is_undirected:
            warnings.warn(
                'Graph appears to be undirected. Did you forgot to set undirected=True?'
            )

    np.random.seed(seed)

    E = A.nnz
    N = A.shape[0]
    s_train = int(E * (1 - p_val - p_test))

    idx = np.arange(N)

    # hold some edges so each node appears at least once
    if every_node:
        if connected:
            #assert connected_components(A)[0] == 1  # make sure original graph is connected
            A_hold = minimum_spanning_tree(A)
        else:
            A.eliminate_zeros(
            )  # makes sure A.tolil().rows contains only indices of non-zero elements
            d = A.sum(1).A1

            if use_edge_cover:
                hold_edges = np.array(list(nx.maximal_matching(nx.DiGraph(A))))
                not_in_cover = np.array(
                    list(set(range(N)).difference(hold_edges.flatten())))

                # makes sure the training percentage is not smaller than N/E when every_node is set to True
                min_size = hold_edges.shape[0] + len(not_in_cover)
                if min_size > s_train:
                    raise ValueError(
                        'Training percentage too low to guarantee every node. Min train size needed {:.2f}'
                        .format(min_size / E))

                d_nic = d[not_in_cover]

                hold_edges_d1 = np.column_stack(
                    (not_in_cover[d_nic > 0],
                     np.row_stack(
                         map(np.random.choice,
                             A[not_in_cover[d_nic > 0]].tolil().rows))))

                if np.any(d_nic == 0):
                    hold_edges_d0 = np.column_stack((np.row_stack(
                        map(np.random.choice,
                            A[:, not_in_cover[d_nic == 0]].T.tolil().rows)),
                                                     not_in_cover[d_nic == 0]))
                    hold_edges = np.row_stack(
                        (hold_edges, hold_edges_d0, hold_edges_d1))
                else:
                    hold_edges = np.row_stack((hold_edges, hold_edges_d1))

            else:
                # makes sure the training percentage is not smaller than N/E when every_node is set to True
                if N > s_train:
                    raise ValueError(
                        'Training percentage too low to guarantee every node. Min train size needed {:.2f}'
                        .format(N / E))

                hold_edges_d1 = np.column_stack(
                    (idx[d > 0],
                     np.row_stack(map(np.random.choice,
                                      A[d > 0].tolil().rows))))

                if np.any(d == 0):
                    hold_edges_d0 = np.column_stack((np.row_stack(
                        map(np.random.choice,
                            A[:, d == 0].T.tolil().rows)), idx[d == 0]))
                    hold_edges = np.row_stack((hold_edges_d0, hold_edges_d1))
                else:
                    hold_edges = hold_edges_d1

            if asserts:
                assert np.all(A[hold_edges[:, 0], hold_edges[:, 1]])
                assert len(np.unique(hold_edges.flatten())) == N

            A_hold = edges_to_sparse(hold_edges, N)

        A_hold[A_hold > 1] = 1
        A_hold.eliminate_zeros()
        A_sample = A - A_hold

        s_train = s_train - A_hold.nnz
    else:
        A_sample = A

    idx_ones = np.random.permutation(A_sample.nnz)
    ones = np.column_stack(A_sample.nonzero())
    train_ones = ones[idx_ones[:s_train]]
    test_ones = ones[idx_ones[s_train:]]

    # return back the held edges
    if every_node:
        train_ones = np.row_stack(
            (train_ones, np.column_stack(A_hold.nonzero())))

    n_test = len(test_ones) * neg_mul
    if set_ops:
        # generate slightly more completely random non-edge indices than needed and discard any that hit an edge
        # much faster compared a while loop
        # in the future: estimate the multiplicity (currently fixed 1.3/2.3) based on A_obs.nnz
        if undirected:
            random_sample = np.random.randint(0, N, [int(2.3 * n_test), 2])
            random_sample = random_sample[random_sample[:,
                                                        0] > random_sample[:,
                                                                           1]]
        else:
            random_sample = np.random.randint(0, N, [int(1.3 * n_test), 2])
            random_sample = random_sample[random_sample[:,
                                                        0] != random_sample[:,
                                                                            1]]

        test_zeros = random_sample[A[random_sample[:, 0],
                                     random_sample[:, 1]].A1 == 0]
        test_zeros = np.row_stack(test_zeros)[:n_test]
        #assert test_zeros.shape[0] == n_test
    else:
        test_zeros = []
        while len(test_zeros) < n_test:
            i, j = np.random.randint(0, N, 2)
            if A[i, j] == 0 and (not undirected
                                 or i > j) and (i, j) not in test_zeros:
                test_zeros.append((i, j))
        test_zeros = np.array(test_zeros)

    # split the test set into validation and test set
    s_val_ones = int(len(test_ones) * p_val / (p_val + p_test))
    s_val_zeros = int(len(test_zeros) * p_val / (p_val + p_test))

    val_ones = test_ones[:s_val_ones]
    test_ones = test_ones[s_val_ones:]

    val_zeros = test_zeros[:s_val_zeros]
    test_zeros = test_zeros[s_val_zeros:]

    if undirected:
        # put (j, i) edges for every (i, j) edge in the respective sets and form back original A
        symmetrize = lambda x: np.row_stack(
            (x, np.column_stack((x[:, 1], x[:, 0]))))
        train_ones = symmetrize(train_ones)
        val_ones = symmetrize(val_ones)
        val_zeros = symmetrize(val_zeros)
        test_ones = symmetrize(test_ones)
        test_zeros = symmetrize(test_zeros)
        A = A.maximum(A.T)

    if asserts:
        set_of_train_ones = set(map(tuple, train_ones))
        assert train_ones.shape[0] + test_ones.shape[0] + val_ones.shape[
            0] == A.nnz
        assert (edges_to_sparse(
            np.row_stack((train_ones, test_ones, val_ones)), N) != A).nnz == 0
        assert set_of_train_ones.intersection(set(map(tuple,
                                                      test_ones))) == set()
        assert set_of_train_ones.intersection(set(map(tuple,
                                                      val_ones))) == set()
        assert set_of_train_ones.intersection(set(map(tuple,
                                                      test_zeros))) == set()
        assert set_of_train_ones.intersection(set(map(tuple,
                                                      val_zeros))) == set()
        assert len(set(map(tuple, test_zeros))) == len(test_ones) * neg_mul
        assert len(set(map(tuple, val_zeros))) == len(val_ones) * neg_mul
        assert not connected or connected_components(A_hold)[0] == 1
        assert not every_node or ((A_hold - A) > 0).sum() == 0

    return train_ones, val_ones, val_zeros, test_ones, test_zeros
コード例 #44
0
ファイル: skater.py プロジェクト: pysal/region
    def find_cut(self,
                 MSF,
                 data=None,
                 quorum=-np.inf,
                 labels=None,
                 target_label=None,
                 make=False,
                 verbose=False):
        """
        Find the best cut from the MSF.
        
        MSF: (N,N) scipy sparse matrix with zero elements removed. 
             Represents the adjacency matrix for the minimum spanning forest. 
             Constructed from sparse.csgraph.sparse_from_dense or using MSF.eliminate_zeros(). 
             You MUST remove zero entries for this to work, otherwise they are considered no-cost paths. 
        data: (N,p) attribute matrix. If not provided, replaced with (N,1) vector of ones. 
        quorum: int denoting the minimum number of elements in the region
        labels: (N,) flat vector of labels for each point. Represents the "cluster labels" 
                for disconnected components of the graph. 
        target_label: int from the labels array to subset the MSF. If passed along with `labels`, then a cut
                      will be found that is restricted to that subset of the MSF.
        make: bool, whether or not to modify the input MSF in order to make the best cut that was found. 
        verbose: bool/int, denoting how much output to provide to the user, in terms
                 of print statements or progressbars
                      
        Returns a namedtuple with in_node, out_node, and score. 
        """
        if data is None:
            data = np.ones(MSF.shape)

        if (labels is None) != (target_label is None):
            raise ValueError("Both labels and target_label must be supplied! Only {} provided."\
                             .format(['labels', 'target_label'][int(target_label is None)]))
        if verbose:
            try:
                from tqdm import tqdm
            except ImportError:

                def tqdm(noop, desc=''):
                    return noop
        else:

            def tqdm(noop, desc=''):
                return noop

        zero_in = (labels is not None) and (target_label is not None)
        current_n_subtrees, current_labels = cg.connected_components(
            MSF, directed=False)
        best_deletion = deletion(np.nan, np.nan, np.inf)
        for in_node, out_node in tqdm(
                np.vstack(MSF.nonzero()).T,
                desc='finding cut...'):  # iterate over MSF edges
            if zero_in:
                if labels[in_node] != target_label:
                    continue

            local_MSF = copy.deepcopy(MSF)

            # delete a candidate edge
            local_MSF[in_node, out_node] = 0
            local_MSF.eliminate_zeros()

            # get the connected components
            local_n_subtrees, local_labels = cg.connected_components(
                local_MSF, directed=False)

            if local_n_subtrees <= current_n_subtrees:
                raise Exception('Malformed MSF!')

            # compute the score of these components
            score = self.score(data, labels=local_labels, quorum=quorum)

            # if the score is lower than the best score and quorum is met
            if score < best_deletion.score:
                best_deletion = deletion(in_node, out_node, score)
        if make:
            return self.make_cut(*best_deletion, MSF=MSF)
        return best_deletion
コード例 #45
0
ファイル: skater.py プロジェクト: pysal/region
    def fit(self,
            n_clusters,
            W,
            data=None,
            quorum=-np.inf,
            trace=False,
            islands='increase',
            verbose=False):
        """
        n_clusters : int of clusters wanted
        W : pysal W object expressing the neighbor relationships between observations. 
            Should be symmetric and binary, so Queen/Rook, DistanceBand, or a symmetrized KNN.
        data: np.ndarray of (N,P) shape with N observations and P features
        quorum: floor on the size of regions.
        trace: bool denoting whether to store intermediate
               labelings as the tree gets pruned
        islands: string describing what to do with islands. 
                 If "ignore", will discover `n_clusters` regions, treating islands as their own regions.
                 If "increase", will discover `n_clusters` regions, treating islands as separate from n_clusters. 

        verbose: bool/int describing how much output to provide to the user, 
                 in terms of print statements or progressbars.
        
        NOTE: Optimization occurs with respect to a *dissimilarity* metric, so the problem *minimizes* 
              the map dissimilarity. So, lower scores are better. 
        """
        if trace:
            self._trace = []
        if data is None:
            attribute_kernel = np.ones((W.n, W.n))
            data = np.ones((W.n, 1))
        else:
            attribute_kernel = self.metric(data)
        W.transform = 'b'
        W = W.sparse
        start = time.time()

        super_verbose = verbose > 1
        start_W = time.time()
        dissim = W.multiply(attribute_kernel)
        dissim.eliminate_zeros()
        end_W = time.time() - start_W

        if super_verbose:
            print('Computing Affinity Kernel took {:.2f}s'.format(end_W))

        tree_time = time.time()
        MSF = cg.minimum_spanning_tree(dissim)
        tree_time = time.time() - tree_time
        if super_verbose:
            print('Computing initial MST took {:.2f}s'.format(tree_time))

        initial_component_time = time.time()
        current_n_subtrees, current_labels = cg.connected_components(
            MSF, directed=False)
        initial_component_time = time.time() - initial_component_time

        if super_verbose:
            print('Computing connected components took {:.2f}s.'.format(
                initial_component_time))

        if current_n_subtrees > 1:
            island_warnings = ['Increasing `n_clusters` from {} to {} in order to account for islands.'\
                              .format(n_clusters, n_clusters+current_n_subtrees),
                              'Counting islands towards the remaining {} clusters.'\
                              .format(n_clusters - (current_n_subtrees))]
            ignoring_islands = int(islands.lower() == 'ignore')
            chosen_warning = island_warnings[ignoring_islands]
            warn("By default, the graph is disconnected! {}".format(
                chosen_warning),
                 OptimizeWarning,
                 stacklevel=2)
            if not ignoring_islands:
                n_clusters += (current_n_subtrees)
            _, island_populations = np.unique(current_labels,
                                              return_counts=True)
            if (island_populations < quorum).any():
                raise ValueError(
                    "Islands must be larger than the quorum. If not, drop the small islands and solve for"
                    " clusters in the remaining field.")
        if trace:
            self._trace.append(
                (current_labels, deletion(np.nan, np.nan, np.inf)))
            if super_verbose:
                print(self._trace[-1])
        while current_n_subtrees < n_clusters:  # while we don't have enough regions
            best_deletion = self.find_cut(MSF,
                                          data,
                                          quorum=quorum,
                                          labels=None,
                                          target_label=None,
                                          verbose=verbose)

            if np.isfinite(best_deletion.score):  # if our search succeeds
                # accept the best move as *the* move
                if super_verbose:
                    print('making cut {}...'.format(best_deletion))
                MSF, current_n_subtrees, current_labels = self.make_cut(
                    *best_deletion, MSF=MSF)
            else:  # otherwise, it means the MSF admits no further cuts (no backtracking here)
                current_n_subtrees, current_labels = cg.connected_components(
                    MSF, directed=False)
                warn(
                    "MSF contains no valid moves after finding {} subtrees."
                    "Decrease the size of your quorum to find the remaining {} subtrees."
                    .format(current_n_subtrees,
                            n_clusters - current_n_subtrees),
                    OptimizeWarning,
                    stacklevel=2)
                self.current_labels_ = current_labels
                self.minimum_spanning_forest_ = MSF
                self._elapsed_time = time.time() - start
                return self
            if trace:
                self._trace.append((current_labels, best_deletion))

        self.current_labels_ = current_labels
        self.minimum_spanning_forest_ = MSF
        self._elapsed_time = time.time() - start
        return self
コード例 #46
0
def transitive_closure(M):  # WARNING: Not for large M!
    labels = connected_components(M)[1]
    closure = csr(labels == labels[:, None])
    return closure
コード例 #47
0
    def __init__(self, adata: AnnData, n_dcs: Optional[int] = None):
        self._adata = adata
        self._init_iroot()
        # use the graph in adata
        info_str = ''
        self.knn: Optional[bool] = None
        self._distances: Union[np.ndarray, csr_matrix, None] = None
        self._connectivities: Union[np.ndarray, csr_matrix, None] = None
        self._transitions_sym: Union[np.ndarray, csr_matrix, None] = None
        self._number_connected_components: Optional[int] = None
        self._rp_forest: Optional[RPForestDict] = None
        if 'neighbors' in adata.uns:
            if 'distances' in adata.uns['neighbors']:
                self.knn = issparse(adata.uns['neighbors']['distances'])
                self._distances = adata.uns['neighbors']['distances']
            if 'connectivities' in adata.uns['neighbors']:
                self.knn = issparse(adata.uns['neighbors']['connectivities'])
                self._connectivities = adata.uns['neighbors']['connectivities']
            if 'rp_forest' in adata.uns['neighbors']:
                self._rp_forest = adata.uns['neighbors']['rp_forest']
            if 'params' in adata.uns['neighbors']:
                self.n_neighbors = adata.uns['neighbors']['params'][
                    'n_neighbors']
            else:

                def count_nonzero(a: Union[np.ndarray, csr_matrix]) -> int:
                    return a.count_nonzero() if issparse(
                        a) else np.count_nonzero(a)

                # estimating n_neighbors
                if self._connectivities is None:
                    self.n_neighbors = int(
                        count_nonzero(self._distances) /
                        self._distances.shape[0])
                else:
                    self.n_neighbors = int(
                        count_nonzero(self._connectivities) /
                        self._connectivities.shape[0] / 2)
            info_str += '`.distances` `.connectivities` '
            self._number_connected_components = 1
            if issparse(self._connectivities):
                from scipy.sparse.csgraph import connected_components
                self._connected_components = connected_components(
                    self._connectivities)
                self._number_connected_components = self._connected_components[
                    0]
        if 'X_diffmap' in adata.obsm_keys():
            self._eigen_values = _backwards_compat_get_full_eval(adata)
            self._eigen_basis = _backwards_compat_get_full_X_diffmap(adata)
            if n_dcs is not None:
                if n_dcs > len(self._eigen_values):
                    raise ValueError(
                        'Cannot instantiate using `n_dcs`={}. '
                        'Compute diffmap/spectrum with more components first.'.
                        format(n_dcs))
                self._eigen_values = self._eigen_values[:n_dcs]
                self._eigen_basis = self._eigen_basis[:, :n_dcs]
            self.n_dcs = len(self._eigen_values)
            info_str += '`.eigen_values` `.eigen_basis` `.distances_dpt`'
        else:
            self._eigen_values = None
            self._eigen_basis = None
            self.n_dcs = None
        if info_str != '':
            logg.debug(f'    initialized {info_str}')
コード例 #48
0
def autoSegment2(mol,
                 sel='(protein or resname ACE NME)',
                 basename='P',
                 fields=('segid', ),
                 residgaps=False,
                 residgaptol=1,
                 chaingaps=True,
                 mode='alphanumeric',
                 _logger=True):
    """ Detects bonded segments in a selection and assigns incrementing segid to each segment

    Parameters
    ----------
    mol : :class:`Molecule <moleculekit.molecule.Molecule>` object
        The Molecule object
    sel : str
        Atom selection string on which to check for gaps.
        See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__
    basename : str
        The basename for segment ids. For example if given 'P' it will name the segments 'P1', 'P2', ...
    fields : tuple of strings
        Field to fix. Can be "segid" (default) or any other Molecule field or combinations thereof.
    residgaps : bool
        Set to True to consider gaps in resids as structural gaps. Set to False to ignore resids
    residgaptol : int
        Above what resid difference is considered a gap. I.e. with residgaptol 1, 235-233 = 2 > 1 hence is a gap. We set
        default to 2 because in many PDBs single residues are missing in the proteins without any gaps.
    chaingaps : bool
        Set to True to consider changes in chains as structural gaps. Set to False to ignore chains
    mode : str
        If set to 'numeric' it will use numbers for segment IDs.
        If set to 'alphabetic' it will use letters for segment IDs.
        If set to 'alphanumeric' it will use both numbers and letters for segment IDs.

    Returns
    -------
    newmol : :class:`Molecule <moleculekit.molecule.Molecule>` object
        A new Molecule object with modified segids

    Example
    -------
    >>> newmol = autoSegment2(mol)
    """
    from scipy.sparse import csr_matrix
    from scipy.sparse.csgraph import connected_components

    if isinstance(fields, str):
        fields = (fields, )

    sel += ' and backbone or (resname NME ACE and name N C O CH3)'  # Looking for bonds only over the backbone of the protein
    idx = mol.atomselect(
        sel, indexes=True
    )  # Keep the original atom indexes to map from submol to mol
    submol = mol.copy(
    )  # We filter out everything not on the backbone to calculate only those bonds
    submol.filter(sel, _logger=False)
    bonds = submol._getBonds()  # Calculate both file and guessed bonds

    if residgaps:
        # Remove bonds between residues without continuous resids
        bondresiddiff = np.abs(submol.resid[bonds[:, 0]] -
                               submol.resid[bonds[:, 1]])
        bonds = bonds[bondresiddiff <= residgaptol, :]
    else:
        # Warning about bonds bonding non-continuous resids
        bondresiddiff = np.abs(submol.resid[bonds[:, 0]] -
                               submol.resid[bonds[:, 1]])
        if _logger and np.any(bondresiddiff > 1):
            for i in np.where(bondresiddiff > residgaptol)[0]:
                logger.warning(
                    'Bonds found between resid gaps: resid {} and {}'.format(
                        submol.resid[bonds[i, 0]], submol.resid[bonds[i, 1]]))
    if chaingaps:
        # Remove bonds between residues without same chain
        bondsamechain = submol.chain[bonds[:, 0]] == submol.chain[bonds[:, 1]]
        bonds = bonds[bondsamechain, :]
    else:
        # Warning about bonds bonding different chains
        bondsamechain = submol.chain[bonds[:, 0]] == submol.chain[bonds[:, 1]]
        if _logger and np.any(bondsamechain == False):
            for i in np.where(bondsamechain == False)[0]:
                logger.warning(
                    'Bonds found between chain gaps: resid {}/{} and {}/{}'.
                    format(submol.resid[bonds[i, 0]], submol.chain[bonds[i,
                                                                         0]],
                           submol.resid[bonds[i, 1]], submol.chain[bonds[i,
                                                                         1]]))

    # Calculate connected components using the bonds
    sparsemat = csr_matrix(
        (
            np.ones(bonds.shape[0] * 2),  # Values
            (
                np.hstack((bonds[:, 0], bonds[:, 1])),  # Rows
                np.hstack((bonds[:, 1], bonds[:, 0])))),
        shape=[submol.numAtoms, submol.numAtoms])  # Columns
    numcomp, compidx = connected_components(sparsemat, directed=False)

    # Letters to be used for chains, if free: 0123456789abcd...ABCD..., minus chain symbols already used
    used_chains = set(mol.chain)
    chain_alphabet = _getChainAlphabet(mode)
    available_chains = [x for x in chain_alphabet if x not in used_chains]

    mol = mol.copy()
    prevsegres = None
    for i in range(numcomp):  # For each connected component / segment
        segid = basename + str(i)
        backboneSegIdx = idx[compidx == i]  # The backbone atoms of the segment
        segres = mol.atomselect('same residue as index {}'.format(' '.join(
            map(str, backboneSegIdx))))  # Get whole residues

        # Warning about separating segments with continuous resids
        if _logger and i > 0 and (np.min(mol.resid[segres]) -
                                  np.max(mol.resid[prevsegres])) == 1:
            logger.warning(
                'Separated segments {} and {}, despite continuous resids, due to lack of bonding.'
                .format(basename + str(i - 1), segid))

        # Add the new segment ID to all fields the user specified
        for f in fields:
            if f != 'chain':
                if np.any(mol.__dict__[f] == segid):
                    raise RuntimeError(
                        'Segid {} already exists in the molecule. Please choose different prefix.'
                        .format(segid))
                mol.__dict__[f][
                    segres] = segid  # Assign the segid to the correct atoms
            else:
                mol.__dict__[f][segres] = available_chains[
                    i % len(available_chains)]
        if _logger:
            logger.info('Created segment {} between resid {} and {}.'.format(
                segid, np.min(mol.resid[segres]), np.max(mol.resid[segres])))
        prevsegres = segres  # Store old segment atom indexes for the warning about continuous resids

    return mol
コード例 #49
0
def calc_scc(graph: csr_matrix):
    return connected_components(csgraph=graph,
                                directed=True,
                                connection="strong",
                                return_labels=True)[1]
コード例 #50
0
def _fix_connectivity(X, connectivity, affinity):
    """
    Fixes the connectivity matrix.

    The different steps are:

    - copies it
    - makes it symmetric
    - converts it to LIL if necessary
    - completes it if necessary.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Feature matrix representing `n_samples` samples to be clustered.

    connectivity : sparse matrix, default=None
        Connectivity matrix. Defines for each sample the neighboring samples
        following a given structure of the data. The matrix is assumed to
        be symmetric and only the upper triangular half is used.
        Default is `None`, i.e, the Ward algorithm is unstructured.

    affinity : {"euclidean", "precomputed"}, default="euclidean"
        Which affinity to use. At the moment `precomputed` and
        ``euclidean`` are supported. `euclidean` uses the
        negative squared Euclidean distance between points.

    Returns
    -------
    connectivity : sparse matrix
        The fixed connectivity matrix.

    n_connected_components : int
        The number of connected components in the graph.
    """
    n_samples = X.shape[0]
    if connectivity.shape[0] != n_samples or connectivity.shape[1] != n_samples:
        raise ValueError(
            "Wrong shape for connectivity matrix: %s when X is %s"
            % (connectivity.shape, X.shape)
        )

    # Make the connectivity matrix symmetric:
    connectivity = connectivity + connectivity.T

    # Convert connectivity matrix to LIL
    if not sparse.isspmatrix_lil(connectivity):
        if not sparse.isspmatrix(connectivity):
            connectivity = sparse.lil_matrix(connectivity)
        else:
            connectivity = connectivity.tolil()

    # Compute the number of nodes
    n_connected_components, labels = connected_components(connectivity)

    if n_connected_components > 1:
        warnings.warn(
            "the number of connected components of the "
            "connectivity matrix is %d > 1. Completing it to avoid "
            "stopping the tree early." % n_connected_components,
            stacklevel=2,
        )
        # XXX: Can we do without completing the matrix?
        connectivity = _fix_connected_components(
            X=X,
            graph=connectivity,
            n_connected_components=n_connected_components,
            component_labels=labels,
            metric=affinity,
            mode="connectivity",
        )

    return connectivity, n_connected_components
コード例 #51
0
def getMoves():
    updateIsMunched()
    updateAdjMatrix()
    n_comp, labels = connected_components(adjmatrix, False, 'weak', True)
    numMunchersToDeploy = getNumMunchersToDeploy()
    print numMunchersToDeploy
    updateMunched()
    munched.update(myMuncherPositions)
    nodesToCheck = []
    program_opp = []
    program_gen = []
    program = []
    if len(otherLiveMunchers) > 0:
        nodesToCheck = getOpponentsNeighborNodes()
        if len(nodesToCheck) > 0:
            munchers, prog, pathCount = getMunchers(nodesToCheck)
            munch = []
            program = []
            count = []
            for i in xrange(len(munchers)):
                if munchers[i] not in munch:
                    munch.append(munchers[i])
                    program.append(prog[i])
                    count.append(pathCount[i])
            program = zip(munch, program, count)
            program_opp = sorted(program,
                                 key=lambda prog: prog[2],
                                 reverse=True)
        #if len(nodesToCheck) < numMunchersToDeploy:
        components = getNLargestConnectedComponents(numMunchersToDeploy,
                                                    n_comp, labels)
        nodesToCheck = []
        for component in components:
            if len(component) > 2:
                nodesToCheck.append(getNodesWithMinWeight(component))
            else:
                nodesToCheck.append(component)
        munchers, prog, pathCount = getMunchers(nodesToCheck)
        munch = []
        program = []
        count = []
        for i in xrange(len(munchers)):
            if munchers[i] not in munch:
                munch.append(munchers[i])
                program.append(prog[i])
                count.append(pathCount[i])
        program = zip(munch, program, count)
        program_gen = sorted(program, key=lambda prog: prog[2], reverse=True)
        print program_opp
        print program_gen
        if len(program_opp) > 0:
            program = []
            temp_opp = []
            temp_gen = []
            for prog in program_opp:
                if prog[2] > 0:
                    program.append(prog)
                else:
                    temp_opp.append(prog)
            for prog in program_gen:
                if prog[2] > 0:
                    program.append(prog)
                else:
                    temp_gen.append(prog)
            for prog in temp_opp:
                program.append(prog)
            for prog in temp_gen:
                program.append(prog)
        print program
    else:
        components = getNLargestConnectedComponents(numMunchersToDeploy,
                                                    n_comp, labels)
        for component in components:
            if len(component) > 2:
                nodesToCheck.append(getNodesWithMinWeight(component))
            else:
                nodesToCheck.append(component)
        munchers, prog, pathCount = getMunchers(nodesToCheck)
        munch = []
        program = []
        count = []
        for i in xrange(len(munchers)):
            if munchers[i] not in munch:
                munch.append(munchers[i])
                program.append(prog[i])
                count.append(pathCount[i])
        program = zip(munch, program, count)
        program = sorted(program, key=lambda prog: prog[2], reverse=True)
    prog = []
    for item in program:
        prog.append(str(item[0]) + '/' + str(item[1]))
    #prog = ['{}/{}'.format(a, b) for a, b in zip(munchers, prog)]
    if len(otherLiveMunchers) > 0:
        if remainingStuff[0] >= numMunchersToDeploy:
            prog = prog[:numMunchersToDeploy]
        else:
            prog = prog[:remainingStuff[0]]
    else:
        prog = prog[:1]
    prog = str(len(prog)) + ':' + ','.join(prog)
    print prog
    return prog
コード例 #52
0
ファイル: friendsuggestions.py プロジェクト: yamahagi/atcoder
    a, b = map(int, input().split())
    a -= 1
    b -= 1
    abl.append((a, b))
    adict[a] += 1
    adict[b] += 1
    A[a][b] = 1
    A[b][a] = 1
cdl = []
for _ in range(k):
    c, d = map(int, input().split())
    c -= 1
    d -= 1
    cdl.append((c, d))

kk, labels = connected_components(np.array(A))
ldict = [0 for _ in range(max(labels) + 1)]
for label in labels:
    ldict[label] += 1
cdict = [0 for _ in range(n)]
for (c, d) in cdl:
    if labels[c] == labels[d]:
        if c not in cdict:
            cdict[c] = 1
        else:
            cdict[c] += 1
        if d not in cdict:
            cdict[d] = 1
        else:
            cdict[d] += 1
s = ""
コード例 #53
0
ファイル: random.py プロジェクト: dfhljf/pyswarms
    def __compute_neighbors(self, swarm, k):
        """Helper method to compute the adjacency matrix of the topology

        This method computes the adjacency matrix of the topology using
        the randomized algorithm proposed in [TSWJ2013]. The resulting
        topology is a connected graph. This is achieved by creating three
        matrices:

            * adj_matrix :  The adjacency matrix of the generated graph.
                            It's initialized as an identity matrix to
                            make sure that every particle has itself as
                            a neighbour. This matrix is the return
                            value of the method.
            * neighbor_matrix : The matrix of randomly generated neighbors.
                                This matrix is a matrix of shape
                                :code:`(swarm.n_particles, k)`:
                                with randomly generated elements. It's used
                                to create connections in the adj_matrix.
            * dist_matrix : The distance matrix computed with Dijkstra's
                            algorithm. It is used to determine where the
                            graph needs edges to change it to a connected
                            graph.

        Parameters
        ----------
        swarm : pyswarms.backend.swarms.Swarm
            a Swarm instance
        k : int
            number of neighbors to be considered. Must be a
            positive integer less than :code:`n_particles-1`

        Returns
        -------
        numpy.ndarray
            Adjacency matrix of the topology
        """

        adj_matrix = np.identity(swarm.n_particles, dtype=int)

        neighbor_matrix = np.array([
            np.random.choice(
                # Exclude i from the array
                np.setdiff1d(np.arange(swarm.n_particles), np.array([i])),
                k,
                replace=False,
            ) for i in range(swarm.n_particles)
        ])

        # Set random elements to one using the neighbor matrix
        adj_matrix[np.arange(swarm.n_particles).reshape(swarm.n_particles, 1),
                   neighbor_matrix, ] = 1
        adj_matrix[
            neighbor_matrix,
            np.arange(swarm.n_particles).reshape(swarm.n_particles, 1), ] = 1

        dist_matrix = dijkstra(
            adj_matrix,
            directed=False,
            return_predecessors=False,
            unweighted=True,
        )

        # Generate connected graph.
        while (connected_components(
                adj_matrix, directed=False, return_labels=False) != 1):
            for i, j in itertools.product(range(swarm.n_particles), repeat=2):
                if dist_matrix[i][j] == np.inf:
                    adj_matrix[i][j] = 1

        return adj_matrix
コード例 #54
0
def simplify_links(n):
    ## Complex multi-node links are folded into end-points
    logger.info("Simplifying connected link components")

    if n.links.empty:
        return n, n.buses.index.to_series()

    # Determine connected link components, ignore all links but DC
    adjacency_matrix = n.adjacency_matrix(
        branch_components=['Link'],
        weights=dict(Link=(n.links.carrier == 'DC').astype(float)))

    _, labels = connected_components(adjacency_matrix, directed=False)
    labels = pd.Series(labels, n.buses.index)

    G = n.graph()

    def split_links(nodes):
        nodes = frozenset(nodes)

        seen = set()
        supernodes = {
            m
            for m in nodes if len(G.adj[m]) > 2 or (set(G.adj[m]) - nodes)
        }

        for u in supernodes:
            for m, ls in iteritems(G.adj[u]):
                if m not in nodes or m in seen: continue

                buses = [u, m]
                links = [list(ls)]  #[name for name in ls]]

                while m not in (supernodes | seen):
                    seen.add(m)
                    for m2, ls in iteritems(G.adj[m]):
                        if m2 in seen or m2 == u: continue
                        buses.append(m2)
                        links.append(list(ls))  # [name for name in ls])
                        break
                    else:
                        # stub
                        break
                    m = m2
                if m != u:
                    yield pd.Index((u, m)), buses, links
            seen.add(u)

    busmap = n.buses.index.to_series()

    connection_costs_per_link = _prepare_connection_costs_per_link(n)
    connection_costs_to_bus = pd.DataFrame(
        0., index=n.buses.index, columns=list(connection_costs_per_link))

    for lbl in labels.value_counts().loc[lambda s: s > 2].index:

        for b, buses, links in split_links(labels.index[labels == lbl]):
            if len(buses) <= 2: continue

            logger.debug('nodes = {}'.format(labels.index[labels == lbl]))
            logger.debug('b = {}\nbuses = {}\nlinks = {}'.format(
                b, buses, links))

            m = sp.spatial.distance_matrix(
                n.buses.loc[b, ['x', 'y']], n.buses.loc[buses[1:-1],
                                                        ['x', 'y']])
            busmap.loc[buses] = b[np.r_[0, m.argmin(axis=0), 1]]
            connection_costs_to_bus.loc[
                buses] += _compute_connection_costs_to_bus(
                    n, busmap, connection_costs_per_link, buses)

            all_links = [i for _, i in sum(links, [])]

            p_max_pu = snakemake.config['links'].get('p_max_pu', 1.)
            lengths = n.links.loc[all_links, 'length']
            name = lengths.idxmax() + '+{}'.format(len(links) - 1)
            params = dict(carrier='DC',
                          bus0=b[0],
                          bus1=b[1],
                          length=sum(n.links.loc[[i for _, i in l],
                                                 'length'].mean()
                                     for l in links),
                          p_nom=min(n.links.loc[[i for _, i in l],
                                                'p_nom'].sum() for l in links),
                          underwater_fraction=sum(
                              lengths / lengths.sum() *
                              n.links.loc[all_links, 'underwater_fraction']),
                          p_max_pu=p_max_pu,
                          p_min_pu=-p_max_pu,
                          underground=False,
                          under_construction=False)

            logger.info(
                "Joining the links {} connecting the buses {} to simple link {}"
                .format(", ".join(all_links), ", ".join(buses), name))

            n.mremove("Link", all_links)

            static_attrs = n.components["Link"]["attrs"].loc[
                lambda df: df.static]
            for attr, default in static_attrs.default.iteritems():
                params.setdefault(attr, default)
            n.links.loc[name] = pd.Series(params)

            # n.add("Link", **params)

    logger.debug("Collecting all components using the busmap")

    _aggregate_and_move_components(n, busmap, connection_costs_to_bus)
    return n, busmap
コード例 #55
0
def generate_process_mining(dataframe,
                            user_id_column='user_id',
                            time_column='time',
                            event_label_columns=['event_name', 'event_type'],
                            types_to_include=None,
                            filter_encoding_dict={},
                            num_nodes=15,
                            edge_weight_lower_bound=5):

    df_f = dataframe
    abbreviation_dict = filter_encoding_dict

    # node name preprocessing
    print('Creating	the column "node_name"...')

    if types_to_include:
        type_df_filter = pd.DataFrame({'event_type': types_to_include})
        df_f = df_f.merge(type_df_filter, how='inner', on='event_type')

    df_f = df_f.sort_values(by=[user_id_column, time_column]).reset_index(
        drop=True)

    num_users = df_f['user_id'].nunique()
    num_actions = len(df_f['user_id'])

    df_freq = df_f.groupby([
        'event_name', 'event_type'
    ]).size().reset_index(name='counts').sort_values(by='counts',
                                                     ascending=False)

    # filter out the too prevalent actions (more than 2X of the cases)
    df_low_count = df_freq[df_freq.counts < (num_users * 2)]
    df_f_f1 = df_f.merge(df_low_count,
                         how='inner',
                         on=['event_name', 'event_type'])

    # Apply abbreviations
    if len(abbreviation_dict.keys()) > 0:
        Apply_ABB = True  # Apply abbreviation
    else:
        Apply_ABB = False

    # Create The column node_name
    if isinstance(event_label_columns, str):
        df_f['node_name'] = df_f[event_label_columns]
    else:
        df_f['node_name'] = df_f[event_label_columns[0]]

        for i in range(1, len(event_label_columns)):
            df_f['node_name'] = df_f['node_name'] + ' - ' + df_f[
                event_label_columns[i]]

    # Node name edits and abbrevations
    if Apply_ABB == True:
        for x in abbreviation_dict.keys():
            df_f['node_name'] = df_f['node_name'].replace(
                abbreviation_dict, regex=True)  #replace(x,ABB[x])

    # choose top k most frequent events
    k = num_nodes

    df_enc_event = df_f[[user_id_column, time_column, 'node_name']]
    df_enc_event = df_enc_event.rename(columns={
        user_id_column: 'enc_id',
        time_column: 'time_diff'
    })

    # drop duplicate node_names (might be of different time_diff)
    df_enc_event = df_enc_event.drop_duplicates(subset=['enc_id', 'node_name'])

    # find the sorted event types
    def my_f(x):
        d = []
        d.append(len(x['node_name']))
        d.append(x['time_diff'].median())
        return pd.Series(d)  #, index=[['count', 'time_avg']])

    df_sorted = df_enc_event.groupby('node_name').apply(
        my_f).reset_index().rename(columns={
            0: 'count',
            1: 'time_avg'
        }).sort_values('count',
                       ascending=False)  #.sort_values('count',ascending=False)
    df_sorted = df_sorted.head(k)

    # filter the events to include top k events
    df_enc_event = df_enc_event.merge(df_sorted, how='inner', on='node_name')[[
        'enc_id', 'node_name', 'time_diff', 'time_avg'
    ]].sort_values(['enc_id', 'time_diff'],
                   ascending=True).reset_index(drop=True)
    df_enc_event = df_enc_event[df_enc_event.time_diff >= 0].reset_index(
        drop=True)

    # Create table of unique encounter id

    unq_PC_enc_event = df_enc_event.groupby([
        'enc_id'
    ])['node_name'].apply(list).reset_index().rename(columns={
        'enc_id': 'enc_id',
        0: 'event_list'
    }).sort_values('enc_id', ascending=True)
    unq_PC_enc_time = df_enc_event.groupby([
        'enc_id'
    ])['time_diff'].apply(list).reset_index().rename(columns={
        'enc_id': 'enc_id',
        0: 'time_diff'
    }).sort_values('enc_id', ascending=True)

    unq_PC_enc = unq_PC_enc_event.merge(unq_PC_enc_time,
                                        how='left',
                                        on='enc_id')

    superlist = unq_PC_enc['node_name'].tolist()
    superlist_time = unq_PC_enc['time_diff'].tolist()

    # Get consecuative pairs

    A = {}
    A['node1'] = []
    A['node2'] = []
    A['time_weight'] = []
    A['node1_time'] = []
    A['node2_time'] = []

    df_g = pd.DataFrame(A)

    for i in range(len(superlist)):
        t = 0
        for x, y in zip(superlist[i], superlist[i][1:]):
            t += 1
            td = superlist_time[i][t] - superlist_time[i][t - 1]
            df_temp = pd.DataFrame({
                'node1': [x],
                'node2': [y],
                'time_weight': [td],
                'node1_time':
                [df_sorted[df_sorted.node_name == x].time_avg.iloc[0]],
                'node2_time':
                [df_sorted[df_sorted.node_name == y].time_avg.iloc[0]]
            })
            df_g = pd.concat([df_g, df_temp])

    # Save the weighted edges

    E = {}  # wighted frequency
    T = {}
    Node1_Time = {}
    Node2_Time = {}

    for i in range(len(superlist)):
        t = 0
        for x, y in zip(superlist[i], superlist[i][1:]):
            t += 1
            td = superlist_time[i][t] - superlist_time[i][t - 1]
            t1 = df_sorted[df_sorted.node_name == x].time_avg.iloc[0]
            t2 = df_sorted[df_sorted.node_name == y].time_avg.iloc[0]

            if (x, y) not in E.keys():
                E[(x, y)] = 1
                T[(x, y)] = td
                Node1_Time[(x, y)] = t1
                Node2_Time[(x, y)] = t2
            else:
                E[(x, y)] += 1
            T[(x, y)] += td

    # Filter edges and save to a list of edges with different weight types

    alpha = edge_weight_lower_bound  # lower boud for edge frequency weight to show in the final graph

    A = []
    E_max = 0  # maximum frequency of the edges
    for e in E.keys():

        if E[e] > alpha and e[0] != e[1]:

            t1 = Node1_Time[e]  #/E[e]
            t2 = Node2_Time[e]  #/E[e]
            rtw = T[e] / E[e]  # average relative time of all this type edges
            tw = t2 - t1

            e_type = (e[0], e[1], tw, E[e], t1, t2, rtw)

            if True:  #t1<=t2 and tw<up_threshold and
                A.append(e_type)

                # find maximum freq of edges in order to normalize the freq
                if E[e] > E_max:
                    E_max = E[e]

    # Find Adjacency matrix

    # assign index for the vertices
    I = {}  # index dict
    i = 0
    for e in A:
        if e[0] not in I.keys():
            I[e[0]] = i
            i += 1

        if e[1] not in I.keys():
            I[e[1]] = i
            i += 1

    #######################
    # create adjacency matrix
    I_inv = {value: key for (key, value) in I.items()}

    Adj = np.zeros((i, i))
    Adj_lag = np.zeros((i, i))
    for e in A:
        Adj[I[e[0]], I[e[1]]] = e[3]
        Adj_lag[I[e[0]], I[e[1]]] = e[6]

    I_org = I

    # Normalize the weights to be probability
    for i in range(Adj.shape[0]):
        if np.sum(Adj[i]) > 0:
            Adj[i] = Adj[i] / np.sum(Adj[i])

    if True:  #not node_clustering:
        dot = graphviz.Digraph()
        for e in A:

            prob = Adj[I[e[0]], I[
                e[1]]]  # find the probability weight from the adjacency matrix
            lag = Adj_lag[I[e[0]], I[
                e[1]]]  # find the probability weight from the adjacency matrix
            label = '(' + str(int(prob * 100) / 100) + ', ' + str(
                int(lag * 100) / 100) + ')'
            dot.edge(e[0], e[1], label=label,
                     penwidth=str(3 * prob))  #, penwidth=1)
            #dot.edge.attr['penwidth'] = 1

        engin = sorted(graphviz.ENGINES)[0]
        dot.render('process_graph_no_clustering.gv', view=False)

### Apply graph clustering

#############################
# Create A_eta: threshold graph
    eta = 0.1  # threshold for finding connected graphs

    # find edges with less than threshold and the same type
    A_eta = [
        e for e in A
        if (e[2] <= eta and e[0].split("-")[0] == e[1].split("-")[0])
    ]

    #############################
    # assign index for the vertices

    I = {}  # index dict
    i = 0
    for e in A_eta:
        if e[0] not in I.keys():
            I[e[0]] = i
            i += 1

        if e[1] not in I.keys():
            I[e[1]] = i
            i += 1

    #######################
    # create adjacency matrix
    I_inv = {value: key for (key, value) in I.items()}
    Adj_sh = np.zeros((i, i))
    for e in A_eta:
        Adj_sh[I[e[0]], I[e[1]]] = 1
        Adj_sh[I[e[1]], I[e[0]]] = 1

    ########################
    #### extract connected components
    graph = csr_matrix(Adj_sh)
    n_components, labels = connected_components(csgraph=graph,
                                                directed=False,
                                                return_labels=True)

    #Create Node names for the clustered components:
    W = {}

    for i in range(np.max(labels) + 1):
        I_eq = np.arange(labels.shape[0])[labels == i]
        W[i] = I_inv[I_eq[0]].split("-")[
            0]  #+' : [' #+' : [' + [I_inv[i].split("-")[1] for i in ]
        W[i] = W[i] + ' ['
        k = 0
        for j in I_eq:
            if k > 0:
                W[i] = W[i] + ' ,\n'
            k += 1
            W[i] = W[i] + I_inv[j].split("-")[1]
        W[i] = W[i] + ' ]'
        W[i] = W[i].replace("&", "&amp;")
        W[i] = W[i].replace(":", "")

    ##### Assign hashing from graph nodes to the component labels
    C = {
    }  # C {dict} contains the event names that can be grouped together (keys are event names)
    for i in range(labels.shape[0]):
        C[I_inv[i]] = W[labels[i]]

    ###################
    ###### Create a new graph (list of edges) with shrinked nodes
    A_new = []

    for e in A:

        # consider the edges from the original graph, if one node is in the connected components,
        # then create a new edge with the new aggregated nodes
        e0, e1 = e[0], e[1]  # nodes of a edge
        e0_org = e0
        e1_org = e1

        if e0 in C:
            e0 = C[e0]  # new node
        if e1 in C:
            e1 = C[e1]  # new node

        # add the edge if not a self-loop and not already existing
        if e0 != e1:

            # compute the weights
            prob = Adj[I_org[e0_org], I_org[
                e1_org]]  # find the probability weight from the adjacency matrix
            lag = Adj_lag[I_org[e0_org], I_org[
                e1_org]]  # find the probability weight from the adjacency matrix

            # check if the edge is already in the list
            isinlist = False
            for i in range(len(A_new)):
                if A_new[i][0] == e0 and A_new[i][1] == e1:
                    isinlist = True
                    # add to the edge count
                    new_prob = np.min([A_new[i][2] + prob, 1])
                    new_lag = (
                        (A_new[i][3] * A_new[i][4]) + prob) / (1 + A_new[i][4])
                    new_count = 1 + A_new[i][4]
                    A_new[i] = (e0, e1, new_prob, new_lag, new_count)

            if isinlist == False:
                count = 1
                A_new.append((e0, e1, prob, lag, count))

    #################
    ####### draw the new graph
    if True:
        dot = graphviz.Digraph()

        for e in A_new:
            label = '(' + str(int(e[2] * 100) / 100) + ', ' + str(
                int(e[3] * 100) / 100) + ')'
            dot.edge(e[0], e[1], label=label)

        engin = sorted(graphviz.ENGINES)[0]
        dot.render('process_graph_with_clustering.gv', view=True)

    # Create
    print('Process Mining File Saved!')

    # Conformity Scores

    list_of_the_scores = []

    for i in range(1, len(unq_PC_enc)):
        total_cost = 0
        total_edge_count = 0

        L = unq_PC_enc[unq_PC_enc.enc_id == i].node_name.to_numpy()[0]
        # for on all of the nodes of the patient i
        for j in range(len(L) - 1):

            e0 = L[j]
            e1 = L[j + 1]
            # is there any aggregation?
            if e0 in C:
                e0 = C[e0]  # new node
            if e1 in C:
                e1 = C[e1]  # new node

            total_edge_count += 1

            for e in A_new:
                if e[0] == e0 and e[1] == e1:
                    total_cost += e[2]

        total_edge_count = max(total_edge_count, 1)

        list_of_the_scores.append(total_cost / total_edge_count)

    list_of_the_scores = np.array(list_of_the_scores)

    # Some printing functions
    def return_node_list(i):
        edge_list = []
        L = unq_PC_enc[unq_PC_enc.enc_id == i].node_name.to_numpy()[0]
        # for on all of the nodes of the patient i
        node_list = []
        for j in range(len(L)):

            e0 = L[j]
            # is there any aggregation?
            if e0 in I_org:
                if e0 in C:
                    e0 = C[e0]  # new node
                node_list.append(e0)

        return node_list

    def return_edge_list(i):
        edge_list = []
        node_list = return_node_list(i)

        for j in range(len(node_list) - 1):

            e0 = node_list[j]
            e1 = node_list[j + 1]

            edge_list.append((e0, e1))

        return edge_list

    def print_major_events(i):
        print(df_enc_event[df_enc_event.enc_id == i])
        return 0

    def print_event_log(i):
        print(dataframe[dataframe.user_id == i])
        return 0

    # print the highest and lowest conformity scores
    i = np.argmax(list_of_the_scores) + 1
    max_path = return_node_list(i)
    print('The user index with the highst conformity score: ', i)
    print('Conformity score: ', list_of_the_scores[i - 1])
    print(
        'Event log (major events) of the user with the highst conformity score:'
    )
    print_major_events(i)

    ####### draw the new graph
    G = nx.DiGraph()
    for e in A_new:
        label = '(' + str(int(e[2] * 100) / 100) + ', ' + str(
            int(e[3] * 100) / 100) + ')'
        if (e[0], e[1]) in max_path:

            G.add_edge(e[0], e[1], label=label, penwidth="3",
                       color="green")  #, penwidth=str(3*prob))#, penwidth=1)

        else:
            G.add_edge(e[0], e[1], label=label)

    new_G = nx.nx_agraph.to_agraph(G)
    display(new_G)
    new_G.draw('process_graph_with_max_path.png', prog='dot')

    # print the highest and lowest conformity scores
    i = np.argmin(list_of_the_scores) + 1
    max_path = return_node_list(i)
    print('The user index with the lowest conformity score: ', i)
    print('Conformity score: ', list_of_the_scores[i - 1])
    print(
        'Event log (major events) of the user with the lowest conformity score:'
    )
    print_major_events(i)
コード例 #56
0
mat.sum_duplicates()
print(mat)
print()

# Converting from csr to csc with the tocsc() method
newarr = csr_matrix(arr).tocsc()
print(newarr)
print()

# SciPy Graphs
# Connected Components
from scipy.sparse.csgraph import connected_components, dijkstra, floyd_warshall, bellman_ford, depth_first_order, breadth_first_order
from scipy.sparse import csr_matrix
arr = np.array([[0, 1, 2], [1, 0, 0], [2, 0, 0]])
newarr = csr_matrix(arr)
print(connected_components(newarr))
print()

# Dijkstra
print(dijkstra(newarr, return_predecessors=True, indices=0))
print()

# Floyd Warshall
print(floyd_warshall(newarr, return_predecessors=True))
print()

# Bellman Ford
print(bellman_ford(newarr, return_predecessors=True, indices=0))
print()

# Depth First Order
コード例 #57
0
    def compute_neighbors(self,
                          n_neighbors: int = 30,
                          knn: bool = True,
                          random_state: Optional[Union[RandomState, int]] = 0,
                          write_knn_indices: bool = False,
                          metric: str = 'euclidean',
                          metric_kwds: Mapping[str, Any] = {},
                          smoothknn: bool = True) -> None:
        """\
        Compute distances and connectivities of neighbors.

        Parameters
        ----------
        n_neighbors
             Use this number of nearest neighbors.
        knn
             Restrict result to `n_neighbors` nearest neighbors.


        Returns
        -------
        Writes sparse graph attributes `.distances` and `.connectivities`.
        Also writes `.knn_indices` and `.knn_distances` if
        `write_knn_indices==True`.
        """
        if n_neighbors > self._data.shape[0]:  # very small datasets
            n_neighbors = 1 + int(0.5 * self._data.shape[0])
            print('Warning: n_obs too small: adjusting to `n_neighbors = {}`'.
                  format(n_neighbors))

        if self._data.shape[0] >= 10000 and not knn:
            print(
                'Warning: Using high n_obs without `knn=True` takes a lot of memory...'
            )
        self.n_neighbors = n_neighbors
        self.knn = knn
        X = self._data
        # neighbor search

        knn_indices, knn_distances = compute_neighbors_umap(
            X,
            n_neighbors,
            random_state,
            metric=metric,
            metric_kwds=metric_kwds)
        # write indices as attributes
        if write_knn_indices:
            self.knn_indices = knn_indices
            self.knn_distances = knn_distances

        if smoothknn:
            # we need self._distances also for method == 'gauss' if we didn't
            # use dense distances
            self._distances, self._connectivities = compute_connectivities_umap(
                knn_indices, knn_distances, self._data.shape[0],
                self.n_neighbors)
        else:
            s = np.repeat(np.arange(knn_indices.shape[0]),
                          knn_indices.shape[1])
            t = knn_indices.flatten()
            w = np.ones(t.shape)
            self._connectivities = scipy.sparse.csr_matrix(
                (w, (s.astype(np.int), t.astype(np.int))),
                (X.shape[0], X.shape[0]))
            self._distances = scipy.sparse.csr_matrix(
                (knn_distances.flatten(),
                 (s.astype(np.int), t.astype(np.int))),
                (X.shape[0], X.shape[0]))

        self._number_connected_components = 1
        if issparse(self._connectivities):
            from scipy.sparse.csgraph import connected_components
            self._connected_components = connected_components(
                self._connectivities)
            self._number_connected_components = self._connected_components[0]
コード例 #58
0
ファイル: merging.py プロジェクト: peimengsui/CaImAn
def merge_components(Y,
                     A,
                     b,
                     C,
                     f,
                     S,
                     sn_pix,
                     temporal_params,
                     spatial_params,
                     dview=None,
                     thr=0.85,
                     fast_merge=True,
                     mx=1000,
                     bl=None,
                     c1=None,
                     sn=None,
                     g=None):
    """ Merging of spatially overlapping components that have highly correlated temporal activity

    The correlation threshold for merging overlapping components is user specified in thr

Parameters:
-----------     

Y: np.ndarray
     residual movie after subtracting all found components (Y_res = Y - A*C - b*f) (d x T)

A: sparse matrix
     matrix of spatial components (d x K)

b: np.ndarray
     spatial background (vector of length d)

C: np.ndarray
     matrix of temporal components (K x T)

f:     np.ndarray
     temporal background (vector of length T)     

S:     np.ndarray
     matrix of deconvolved activity (spikes) (K x T)

sn_pix: ndarray
     noise standard deviation for each pixel

temporal_params: dictionary
     all the parameters that can be passed to the update_temporal_components function

spatial_params: dictionary
     all the parameters that can be passed to the update_spatial_components function     

thr:   scalar between 0 and 1
     correlation threshold for merging (default 0.85)

mx:    int
     maximum number of merging operations (default 50)

sn_pix:    nd.array
     noise level for each pixel (vector of length d)

fast_merge: bool
    if true perform rank 1 merging, otherwise takes best neuron

bl:        
     baseline for fluorescence trace for each row in C
c1:        
     initial concentration for each row in C
g:         
     discrete time constant for each row in C
sn:        
     noise level for each row in C

Returns:
--------

A:     sparse matrix
        matrix of merged spatial components (d x K)

C:     np.ndarray
        matrix of merged temporal components (K x T)

nr:    int
    number of components after merging

merged_ROIs: list
    index of components that have been merged     

S:     np.ndarray
        matrix of merged deconvolved activity (spikes) (K x T)

bl: float
    baseline for fluorescence trace

c1: float
    initial concentration

g:  float
    discrete time constant

sn: float
    noise level

    Raise:
    -----
    Exception("The number of elements of bl\c1\g\sn must match the number of components")


    See Also:
    --------
    """
    #tests and initialization
    nr = A.shape[1]
    if bl is not None and len(bl) != nr:
        raise Exception(
            "The number of elements of bl must match the number of components")
    if c1 is not None and len(c1) != nr:
        raise Exception(
            "The number of elements of c1 must match the number of components")
    if sn is not None and len(sn) != nr:
        raise Exception(
            "The number of elements of sn must match the number of components")
    if g is not None and len(g) != nr:
        raise Exception(
            "The number of elements of g must match the number of components")

    [d, t] = np.shape(Y)

    # % find graph of overlapping spatial components
    A_corr = scipy.sparse.triu(A.T * A)
    A_corr.setdiag(0)
    A_corr = A_corr.tocsc()
    FF2 = A_corr > 0
    C_corr = scipy.sparse.csc_matrix(A_corr.shape)
    for ii in range(nr):
        overlap_indeces = A_corr[ii, :].nonzero()[1]
        if len(overlap_indeces) > 0:
            #we chesk the correlation of the calcium traces for eahc overlapping components
            corr_values = [
                scipy.stats.pearsonr(C[ii, :], C[jj, :])[0]
                for jj in overlap_indeces
            ]
            C_corr[ii, overlap_indeces] = corr_values

    FF1 = (C_corr + C_corr.T) > thr
    FF3 = FF1.multiply(FF2)

    nb, connected_comp = csgraph.connected_components(
        FF3)  # % extract connected components

    p = temporal_params['p']
    list_conxcomp = []
    for i in range(nb):  # we list them
        if np.sum(connected_comp == i) > 1:
            list_conxcomp.append((connected_comp == i).T)
    list_conxcomp = np.asarray(list_conxcomp).T

    if list_conxcomp.ndim > 1:
        cor = np.zeros((np.shape(list_conxcomp)[1], 1))
        for i in range(np.size(cor)):
            fm = np.where(list_conxcomp[:, i])[0]
            for j1 in range(np.size(fm)):
                for j2 in range(j1 + 1, np.size(fm)):
                    cor[i] = cor[i] + C_corr[fm[j1], fm[j2]]

#        if not fast_merge:
#            Y_res = Y - A.dot(C) #residuals=background=noise
        if np.size(cor) > 1:
            ind = np.argsort(np.squeeze(cor))[::-1]  #we get the size (indeces)
        else:
            ind = [0]

        nbmrg = min((np.size(ind), mx))  # number of merging operations

        #we initialize the values
        A_merged = lil_matrix((d, nbmrg))
        C_merged = np.zeros((nbmrg, t))
        S_merged = np.zeros((nbmrg, t))
        bl_merged = np.zeros((nbmrg, 1))
        c1_merged = np.zeros((nbmrg, 1))
        sn_merged = np.zeros((nbmrg, 1))
        g_merged = np.zeros((nbmrg, p))
        merged_ROIs = []

        for i in range(nbmrg):
            merged_ROI = np.where(list_conxcomp[:, ind[i]])[0]
            merged_ROIs.append(merged_ROI)

            #we l2 the traces to have normalization values
            C_to_norm = np.sqrt(
                [computedC.dot(computedC) for computedC in C[merged_ROI]])
            #            fast_merge = False

            # from here we are computing initial values for C and A
            Acsc = A.tocsc()[:, merged_ROI]
            Ctmp = np.array(C)[merged_ROI, :]
            print((merged_ROI.T))

            #this is a  big normalization value that for every one of the merged neuron
            C_to_norm = np.sqrt(
                np.ravel(Acsc.power(2).sum(axis=0)) * np.sum(Ctmp**2, axis=1))
            indx = np.argmax(C_to_norm)

            if fast_merge:
                #we normalize the values of different A's to be able to compare them efficiently. we then sum them
                computedA = Acsc.dot(
                    scipy.sparse.diags(
                        C_to_norm, 0,
                        (len(C_to_norm), len(C_to_norm)))).sum(axis=1)

                for _ in range(
                        10
                ):  # we operate a rank one NMF, refining it multiple times (see cnmf demos )
                    computedC = np.maximum(
                        Acsc.T.dot(computedA).T.dot(Ctmp) /
                        (computedA.T * computedA), 0)
                    computedA = np.maximum(
                        Acsc.dot(Ctmp.dot(computedC.T)) /
                        (computedC * computedC.T), 0)
            else:
                print('Simple Merging Take Best Neuron')
                computedC = Ctmp[indx]
                computedA = Acsc[:, indx]

            # then we de-normalize them using A_to_norm
            A_to_norm = np.sqrt(
                computedA.T.dot(computedA)[0, 0] / Acsc.power(2).sum(0).max())
            computedA /= A_to_norm
            computedC *= A_to_norm

            # we then compute the traces ( deconvolution ) to have a clean c and noise in the background
            if g is not None:
                computedC, bm, cm, gm, sm, ss, lam_ = constrained_foopsi(
                    np.array(computedC).squeeze(),
                    g=g[merged_ROI[indx]],
                    **temporal_params)
            else:
                computedC, bm, cm, gm, sm, ss, lam_ = constrained_foopsi(
                    np.array(computedC).squeeze(), g=None, **temporal_params)

            A_merged[:, i] = computedA
            C_merged[i, :] = computedC
            S_merged[i, :] = ss[:t]
            bl_merged[i] = bm
            c1_merged[i] = cm
            sn_merged[i] = sm
            g_merged[i, :] = gm

        #we want to remove merged neuron from the initial part and replace them with merged ones
        neur_id = np.unique(np.hstack(merged_ROIs))
        good_neurons = np.setdiff1d(list(range(nr)), neur_id)
        A = scipy.sparse.hstack((A.tocsc()[:, good_neurons], A_merged.tocsc()))
        C = np.vstack((C[good_neurons, :], C_merged))
        #we continue for the variables
        if S is not None:
            S = np.vstack((S[good_neurons, :], S_merged))
        if bl is not None:
            bl = np.hstack((bl[good_neurons], np.array(bl_merged).flatten()))
        if c1 is not None:
            c1 = np.hstack((c1[good_neurons], np.array(c1_merged).flatten()))
        if sn is not None:
            sn = np.hstack((sn[good_neurons], np.array(sn_merged).flatten()))
        if g is not None:
            g = np.vstack((np.vstack(g)[good_neurons], g_merged))
        nr = nr - len(neur_id) + nbmrg

    else:
        print('No neurons merged!')
        merged_ROIs = []

    return A, C, nr, merged_ROIs, S, bl, c1, sn, g
コード例 #59
0
def get_visible_points(verPredOrig,
                       classMask,
                       filterSize=8,
                       normThresh=0.07,
                       pointsDistanceThresh=5):
    nKeypoints = verPredOrig.shape[1] // 2

    # Create conv layer which does mean operation over a (filterSize x filterSize) pixel area
    meanFilter = torch.nn.Conv2d(nKeypoints * 2,
                                 nKeypoints * 2,
                                 filterSize,
                                 stride=1,
                                 padding=filterSize // 2,
                                 dilation=1,
                                 groups=nKeypoints * 2,
                                 bias=False).cuda()
    meanFilter.state_dict()['weight'][:] = 1 / filterSize**2

    # Use mean filter on vertex field, exclude image border element to make
    # shapes match (since an even filterSize should be used).
    filteredVerPred = meanFilter(verPredOrig)[:, :, :-1, :-1]
    _, _, height, width = filteredVerPred.shape

    maskedPixels, _ = matrixToIndices(classMask)
    maskedPixels = torch.index_select(maskedPixels, 1,
                                      torch.tensor([1, 0]).cuda())

    verPred = torch.reshape(verPredOrig,
                            [nKeypoints, 2, height, width]).squeeze()
    verPredPixels = verPred[:, :, maskedPixels[:, 1], maskedPixels[:, 0]]
    verPredPixels = verPredPixels / torch.norm(verPredPixels, dim=1)[:, None]
    verPredPixels = verPredPixels.permute(0, 2, 1)

    filteredVerPredAlt = torch.reshape(filteredVerPred,
                                       [nKeypoints, 2, height, width])

    # Calculate norm of mean filtered vertex filed
    verNorms = torch.norm(filteredVerPredAlt, dim=1)
    verNorms_np = verNorms.cpu().detach().numpy()

    # Find pixels within segmented areas whose filtered norms are small
    visibilityMatrix = (verNorms < normThresh) & classMask
    visibilityMatrix_np = visibilityMatrix.cpu().detach().numpy()

    visiblePointsList = []

    for iKeypoint in range(nKeypoints):
        # Reshape visible pixels from matrix to point form
        #visibleClusterPoints=np.stack(np.where(visibilityMatrix_np[iKeypoint])).T
        visibleClusterPoints, _ = matrixToIndices(visibilityMatrix[iKeypoint])

        if len(visibleClusterPoints) == 0:
            visiblePointsList.append([])
            continue
        # Find connectivity between visible points
        adjMatrix = radius_neighbors_graph(
            visibleClusterPoints,
            radius=pointsDistanceThresh,
            include_self=False,
            mode='connectivity').toarray().astype(bool)

        # Cluster points that are connected, i.e. belong to the same GT point
        n_components, labels = connected_components(csgraph=adjMatrix,
                                                    directed=False,
                                                    return_labels=True)
        labels = torch.from_numpy(labels).cuda()
        # For each different cluster of visible points, find the one with the smallest norm
        # TODO: change to the one with most inlier counts?
        visiblePoints = torch.zeros((n_components, 2))

        for iFeature in range(n_components):
            # Find points belonging to current cluster
            isLabel = labels == (iFeature)

            # Get filtered vertex norms for current cluster points
            pointCluster = visibleClusterPoints[isLabel]
            #pointClusterNorms = verNorms[iKeypoint][pointCluster[:,0],pointCluster[:,1]]

            pointsDirection = getPointDirections(pointCluster,
                                                 maskedPixels,
                                                 normalized=True)
            votingFunction = lambda x, y: innerProductExponentiated(
                x,
                y,
                innerProductExponent=1,
                frequencyMultiplierExponent=0,
                threshold=0.999)
            votingScore = getVotingScore(pointsDirection,
                                         verPredPixels[iKeypoint],
                                         votingFunction)
            scoreSum, _ = getScoreSum(votingScore, pointsDirection)

            biggestScoreIdx = torch.argmax(scoreSum)
            bestPoint = pointCluster[biggestScoreIdx]

            visiblePoints[iFeature] = torch.index_select(
                bestPoint, 0,
                torch.tensor([1, 0]).cuda()) - 0.5
            # Select point with smallest norm (deprecated)
            #verNormSMallIdx = np.argmin(pointClusterNorms)
            #visiblePoints[iFeature] = pointCluster[verNormSMallIdx,::-1] - 0.5

        # Create a list of arrays which holds the detected visible points
        visiblePointsList.append(visiblePoints)

    return visiblePointsList
コード例 #60
0
ファイル: __init__.py プロジェクト: spadavec/scanpy
    def compute_neighbors(self,
                          n_neighbors: int = 30,
                          knn: bool = True,
                          n_pcs: Optional[int] = None,
                          use_rep: Optional[str] = None,
                          method: str = 'umap',
                          random_state: Optional[Union[RandomState, int]] = 0,
                          write_knn_indices: bool = False,
                          metric: str = 'euclidean',
                          metric_kwds: Mapping[str, Any] = {}) -> None:
        """\
        Compute distances and connectivities of neighbors.

        Parameters
        ----------
        n_neighbors
             Use this number of nearest neighbors.
        knn
             Restrict result to `n_neighbors` nearest neighbors.
        {n_pcs}
        {use_rep}

        Returns
        -------
        Writes sparse graph attributes `.distances` and `.connectivities`.
        Also writes `.knn_indices` and `.knn_distances` if
        `write_knn_indices==True`.
        """
        if n_neighbors > self._adata.shape[0]:  # very small datasets
            n_neighbors = 1 + int(0.5 * self._adata.shape[0])
            logg.warn(
                'n_obs too small: adjusting to `n_neighbors = {}`'.format(
                    n_neighbors))
        if method == 'umap' and not knn:
            raise ValueError('`method = \'umap\' only with `knn = True`.')
        if method not in {'umap', 'gauss'}:
            raise ValueError('`method` needs to be \'umap\' or \'gauss\'.')
        if self._adata.shape[0] >= 10000 and not knn:
            logg.warn(
                'Using high n_obs without `knn=True` takes a lot of memory...')
        self.n_neighbors = n_neighbors
        self.knn = knn
        X = choose_representation(self._adata, use_rep=use_rep, n_pcs=n_pcs)
        # neighbor search
        use_dense_distances = (metric == 'euclidean'
                               and X.shape[0] < 8192) or knn == False
        if use_dense_distances:
            _distances = pairwise_distances(X, metric=metric, **metric_kwds)
            knn_indices, knn_distances = get_indices_distances_from_dense_matrix(
                _distances, n_neighbors)
            if knn:
                self._distances = get_sparse_matrix_from_indices_distances_numpy(
                    knn_indices, knn_distances, X.shape[0], n_neighbors)
            else:
                self._distances = _distances
        else:
            # non-euclidean case and approx nearest neighbors
            if X.shape[0] < 4096:
                X = pairwise_distances(X, metric=metric, **metric_kwds)
                metric = 'precomputed'
            knn_indices, knn_distances = compute_neighbors_umap(
                X,
                n_neighbors,
                random_state,
                metric=metric,
                metric_kwds=metric_kwds)
        # write indices as attributes
        if write_knn_indices:
            self.knn_indices = knn_indices
            self.knn_distances = knn_distances
        logg.msg('computed neighbors', t=True, v=4)
        if not use_dense_distances or method == 'umap':
            # we need self._distances also for method == 'gauss' if we didn't
            # use dense distances
            self._distances, self._connectivities = compute_connectivities_umap(
                knn_indices, knn_distances, self._adata.shape[0],
                self.n_neighbors)
        # overwrite the umap connectivities if method is 'gauss'
        # self._distances is unaffected by this
        if method == 'gauss':
            self._compute_connectivities_diffmap()
        logg.msg('computed connectivities', t=True, v=4)
        self._number_connected_components = 1
        if issparse(self._connectivities):
            from scipy.sparse.csgraph import connected_components
            self._connected_components = connected_components(
                self._connectivities)
            self._number_connected_components = self._connected_components[0]