Exemple #1
0
def eig_calc_OLD(G,normalize=None):
    import scipy as sp
    #print '\n\t~~~~~~ eig_calc ~~~~~~'; startTime = datetime.now(); sys.stdout.flush()
    startTime = datetime.now() 
    n = G.number_of_nodes()
    if n == 1:
        eigvec = np.array([1])
    elif n == 2:     # for handling ValueError: k must be less than ndim(A)-1, k=1
        return power_iteration(G,normalize=normalize)
    else:    # the graph contains more than 2 nodes
        A=nx.to_scipy_sparse_matrix(G)
        '''print '--- eig_calc: is sub graph stochastic? ' + str(gm.check_if_stochastic_matrix(nx.to_numpy_matrix(G)))#; sys.stdout.flush()
        print '--- eig_calc: is sub graph strongly connected? ' + str(nx.is_strongly_connected(G))#; sys.stdout.flush()
        print '--- eig_calc: is sub graph aperiodic? ' + str(nx.is_aperiodic(G));# sys.stdout.flush()
        print '--- eig_calc: debug step took: '+str(datetime.now()-tmpTime); tmpTime = datetime.now(); sys.stdout.flush()
        '''
        try:
            eigval,eigvec = sp.sparse.linalg.eigen.arpack.eigs(A.T, k=1, sigma=1, which='LM')
        except RuntimeError:    
            B=nx.to_scipy_sparse_matrix(add_noise(G))
            eigval,eigvec = sp.sparse.linalg.eigen.arpack.eigs(B.T, k=1, sigma=1, which='LM')
        #eigval,eigvec = sp.sparse.linalg.eigen.arpack.eigs(A.T, k=1, which='LM')
        #print '--- eig_calc: eigs took: '+str(datetime.now()-tmpTime); sys.stdout.flush()
        #print '--- eig_calc: sub graph eigval- '+str(eigval)
    eigvec = eigvec/eigvec.sum()
    if normalize:
        norm_factor = float(n)/normalize
        eigvec = eigvec*norm_factor    
    #if (eigvec.imag.sum() != 0. ):
    #    print '##### COMPLEX VECTOR!!!! returning the real part only!!! #####'; #sys.stdout.flush(
    results_dict = dict(zip(G.nodes(),map(float,eigvec.real)))
    if n > 100: print '--- eig_calc: calc of class contains '+str(n)+' nodes, ('+str(float(n)/normalize)+'% of the non-isolates nodes from the graph) took-'+str(datetime.now()-startTime); sys.stdout.flush()
    return results_dict
Exemple #2
0
def gRa(g, w):
    '''w为图中的边数,表示经过减边p扰动后仍然留在数据中的边数'''
    tg = g.copy()
    Rq = nx.to_scipy_sparse_matrix(g)
    Rq = Rq.toarray()

    bw = nx.edge_betweenness_centrality(g, normalized=False)
    norm = sum(bw.values())
    e_num = len(g.edges())

    n = len(g)
    N = (n * (n - 1)) / 2
    for k, v in bw.items():
        g.add_edge(*k, weight=v)
#    print g.edges(data=True)
    R = nx.to_scipy_sparse_matrix(g, weight='weight')
    Rp = R.toarray()

    Rp = w * Rp * 2.0 / Rp.sum()

    q = float(e_num - w) / (N - e_num)

    for i, each in enumerate(Rq):
        for j, e in enumerate(each):
            if e == 0:
                Rp[i, j] = q  # 超级绕采用特别方式在Rp中加入Rq
    for i in range(n):
        Rp[i,i]=0 #去除对角线上的q
    return Rp
 def test_weight_keyword(self):
     WP4 = nx.Graph()
     WP4.add_edges_from((n, n + 1, dict(weight=0.5, other=0.3)) for n in range(3))
     P4 = path_graph(4)
     A = nx.to_scipy_sparse_matrix(P4)
     np_assert_equal(A.todense(), nx.to_scipy_sparse_matrix(WP4, weight=None).todense())
     np_assert_equal(0.5 * A.todense(), nx.to_scipy_sparse_matrix(WP4).todense())
     np_assert_equal(0.3 * A.todense(), nx.to_scipy_sparse_matrix(WP4, weight="other").todense())
def dists(G, nbunch = None):
    
    G = G.copy()
    
    if nbunch is None:
        nbunch = G.nodes()
    
    try:
        out_degree = G.out_degree(nbunch = nbunch)
        in_degree = G.in_degree(nbunch = nbunch)
        gross_out_weight = G.out_degree(weighted = True, nbunch = nbunch)
        gross_in_weight = G.in_degree(weighted = True, nbunch = nbunch)

    except TypeError:
        out_degree = G.out_degree(nbunch = nbunch)
        in_degree = G.in_degree(nbunch = nbunch)
        gross_out_weight = G.out_degree(weight = 'weight', nbunch = nbunch)
        gross_in_weight = G.in_degree(weight = 'weight', nbunch = nbunch)

        
    A = nx.to_scipy_sparse_matrix(G, nodelist = nbunch)
    i, j, grosscells = extract.find(A)

    selfloops = G.selfloop_edges(data = True)
    G.remove_edges_from(selfloops)
    
    
    try:
        net_out_weight = G.out_degree(weighted = True, nbunch = nbunch)
        net_in_weight = G.in_degree(weighted = True, nbunch = nbunch)

    except TypeError:
        net_out_weight = G.out_degree(weight = 'weight', nbunch = nbunch)
        net_in_weight = G.in_degree(weight = 'weight', nbunch = nbunch)


    A = nx.to_scipy_sparse_matrix(G, nodelist = nbunch)
    i, j, netcells = extract.find(A)

    dists = {
    'out-degree': 
    np.array([out_degree[i] for i in nbunch],dtype = np.float32), 
    'in-degree': 
    np.array([in_degree[i] for i in nbunch],dtype = np.float32), 
    'gross_out-weight': 
    np.array([gross_out_weight[i] for i in nbunch],dtype = np.float32), 
    'gross_in-weight': 
    np.array([gross_in_weight[i] for i in nbunch],dtype = np.float32),  
    'net_out-weight': 
    np.array([net_out_weight[i] for i in nbunch],dtype = np.float32), 
    'net_in-weight': 
    np.array([net_in_weight[i] for i in nbunch],dtype = np.float32),  
    'gross_cells': grosscells,
    'net_cells': netcells
    }
    
    return dists
Exemple #5
0
def to_sparse( G ):
    """
    DiGraph to scipy sparse matrix.
    """
    try:
        return nx.to_scipy_sparse_matrix( G.graph, dtype=int, format='csr' )
    # in case one sends in G.graph instead.
    except AttributeError:
        return nx.to_scipy_sparse_matrix( G, dtype=int, format='csr' )        
Exemple #6
0
def eig_calc(G,normalize=None,nstart_norm=None):
    '''
    Calculates the dominant eigenvector of graph (the one related to eigenvector = 1). 
    Parameters
    ----------
        G - networkx directed graph, the strongly connected component (subGraph) in our case
        normalized - int, the number of nodes in the original (entire) graph- for normlizing the resulted eigenvector as per the proportion of the component from the entire (original) graph
        nstart_norm - float, the weight [0,1] for normalizing the resulted eigenvector (for referring the risk proportion of the component from the entire (original) graph).
        *NOTE: normalize and nstart_norm cannot come together!! only one of them can be different from None!
    Returns
    -------
        results_dict - a dict of the (normalized) dominant eigenvector (the keys are G nodes names- basically integer)
    '''
    import scipy as sp
    #print '\n\t~~~~~~ eig_calc ~~~~~~'; startTime = datetime.now(); sys.stdout.flush()
    startTime = datetime.now() 
    n = G.number_of_nodes()
    if n == 1:
        eigvec = np.array([1])
    elif n == 2:     # for handling ValueError: k must be less than ndim(A)-1, k=1
        return power_iteration(G,normalize=normalize,nstart_norm=nstart_norm)
    else:    # the graph contains more than 2 nodes
        A=nx.to_scipy_sparse_matrix(G)
        '''print '--- eig_calc: is sub graph stochastic? ' + str(gm.check_if_stochastic_matrix(nx.to_numpy_matrix(G)))#; sys.stdout.flush()
        print '--- eig_calc: is sub graph strongly connected? ' + str(nx.is_strongly_connected(G))#; sys.stdout.flush()
        print '--- eig_calc: is sub graph aperiodic? ' + str(nx.is_aperiodic(G));# sys.stdout.flush()
        print '--- eig_calc: debug step took: '+str(datetime.now()-tmpTime); tmpTime = datetime.now(); sys.stdout.flush()
        '''
        try:
            eigval,eigvec = sp.sparse.linalg.eigen.arpack.eigs(A.T, k=1, sigma=1, which='LM')
        except RuntimeError:    
            B=nx.to_scipy_sparse_matrix(add_noise(G))
            eigval,eigvec = sp.sparse.linalg.eigen.arpack.eigs(B.T, k=1, sigma=1, which='LM')
        #eigval,eigvec = sp.sparse.linalg.eigen.arpack.eigs(A.T, k=1, which='LM')
        #print '--- eig_calc: eigs took: '+str(datetime.now()-tmpTime); sys.stdout.flush()
        #print '--- eig_calc: sub graph eigval- '+str(eigval)
    eigvec = eigvec/eigvec.sum()
    if normalize:
        norm_factor = float(n)/normalize
        eigvec = eigvec*norm_factor  
        
        if n > 100: print '--- eig_calc: calc of class contains ',n,' nodes, (',float(n)/normalize,'% of the non-isolates nodes from the graph) took-',datetime.now()-startTime; sys.stdout.flush()
    
    elif nstart_norm != None:
        eigvec = eigvec*nstart_norm
        
        if n > 100: print '--- eig_calc: calc of class contains ',n,' nodes, took-',datetime.now()-startTime; sys.stdout.flush()
    #if (eigvec.imag.sum() != 0. ):
    #    print '##### COMPLEX VECTOR!!!! returning the real part only!!! #####'; #sys.stdout.flush(
    results_dict = dict(zip(G.nodes(),map(float,eigvec.real)))
    
    return results_dict
Exemple #7
0
def unroll_adjacency_matrix(G):
    '''"Unrolls" the adjacency matrix of the input graph into a vector. This
    is done by extracting all off-diagonal elements of the nxn adjacency matrix
    and concatenating them into an n(n - 1)/2 dimensional array.
    Example:
    [[0, 1, 0],
     [1, 0, 1],
     [0, 1, 0]]
     gives [1, 0, 1].'''

    # Number of nodes in the graph
    n = len(G)
    # Length of the unrolled matrix
    dim = n*(n - 1)//2
    # Sparse matrix to hold the results
    result = sp.sparse.lil_matrix((1, dim))
    # Adjacency matrix for the graph
    M = nx.to_scipy_sparse_matrix(G, format = "coo")

    for i,j,v in zip(M.row, M.col, M.data):
        # Only care about northeastern corner of the matrix
        if not j > i:
            continue
        ind = i*n - (i*(i+1))//2 + j - i - 1 # Nothing to see here, move along.
        # Add the encountered element at the appropriate index of result
        result[0, ind] = v
    
    return result
Exemple #8
0
def r_perturbR(g,R):
    '''可变参数的随机扰动方法'''
    A=nx.to_scipy_sparse_matrix(g)
    B=sparse.triu(A).toarray()
    #print B
    n=len(g)
    i = 0
    ts=0

    while i<n:
        j=i+1
        while j<n:
            if(B[i,j]==1):
                if R[i,j]<1:
                    B[i,j] = stats.bernoulli.rvs(R[i,j])#参数p伯努利实验成功的概率
                else:
                    B[i, j] = stats.bernoulli.rvs(1)  #其实可以去掉
                ts=ts + 1
                #print "+",ts, ":", i, ",", j, ",", B[i, j]
            else:
                if R[i,j]<1:
                    B[i,j] = stats.bernoulli.rvs(R[i,j])#参数q伯努利实验成功的概率
                else:
                    B[i, j] = stats.bernoulli.rvs(0)  #其实可以去掉
                ts=ts + 1
                #print "-",ts, ":", i, ",", j, ",", B[i, j]
            j = j + 1
        i=i+1

    return nx.from_numpy_matrix(B,create_using=nx.Graph())#重新构建了Graph类型的返回对象
Exemple #9
0
def page_rank_scipy(G,alpha=0.85,max_iter=100,tol=1.0e-4,nodelist=None):
    """Return a numpy array of the PageRank of G.
    
    PageRank computes the largest eigenvector of the stochastic
    adjacency matrix of G.

    The eigenvector calculation is done by the power iteration method
    and has no guarantee of convergence.   

    A starting vector for the power iteration can be given in the
    dictionary nstart.

    This implementation requires scipy.

    """
    import scipy.sparse
    M=NX.to_scipy_sparse_matrix(G,nodelist=nodelist)
    (n,m)=M.shape # should be square
    S=scipy.array(M.sum(axis=1)).flatten()
    index=scipy.where(S<>0)[0]
    for i in index:
        M[i,:]*=1.0/S[i]
    x=scipy.ones((n))/n  # initial guess
    dangle=scipy.array(scipy.where(M.sum(axis=1)==0,1.0/n,0)).flatten()
    for i in range(max_iter):
        xlast=x
        x=alpha*(M.rmatvec(x)+scipy.dot(dangle,xlast))+(1-alpha)*xlast.sum()/n
        # check convergence, l1 norm            
        err=scipy.absolute(x-xlast).sum()
        if err < n*tol:
            return x

    raise NetworkXError("page_rank: power iteration failed to converge in %d iterations."%(i+1))
def modularity_matrix(G, nodelist=None, weight=None):
    """Return the modularity matrix of G.

    The modularity matrix is the matrix B = A - <A>, where A is the adjacency
    matrix and <A> is the average adjacency matrix, assuming that the graph
    is described by the configuration model.

    More specifically, the element B_ij of B is defined as
        A_ij - k_i k_j / 2 * m
    where k_i(in) is the degree of node i, and were m is the number of edges
    in the graph. When weight is set to a name of an attribute edge, Aij, k_i, 
    k_j and m are computed using its value. 

    Parameters
    ----------
    G : Graph
       A NetworkX graph

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in nodelist.
       If nodelist is None, then the ordering is produced by G.nodes().
    
    weight : string or None, optional (default=None)
       The edge attribute that holds the numerical value used for
       the edge weight.  If None then all edge weights are 1.

    Returns
    -------
    B : Numpy matrix
      The modularity matrix of G.

    Examples
    --------
    >>> import networkx as nx
    >>> k =[3, 2, 2, 1, 0]
    >>> G = nx.havel_hakimi_graph(k)
    >>> B = nx.modularity_matrix(G)


    See Also
    --------
    to_numpy_matrix
    adjacency_matrix
    laplacian_matrix
    directed_modularity_matrix

    References
    ----------
    .. [1] M. E. J. Newman, "Modularity and community structure in networks",
       Proc. Natl. Acad. Sci. USA, vol. 103, pp. 8577-8582, 2006.
    """
    if nodelist is None:
        nodelist = list(G)
    A = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight,
                                  format='csr')
    k = A.sum(axis=1)
    m = k.sum() * 0.5
    # Expected adjacency matrix
    X = k * k.transpose() / (2 * m)
    return A - X
    def compute_slice_matrices(self,G):
        #Create node and edge layers
        node_layer = defaultdict(list)
        for n in G.nodes():
            node_layer[n[0]].append(n)

        edge_layer = defaultdict(list)
        for e in G.edges(data=True):
            edge_layer[e[2]['etype']].append(e)

        ALLNTYPES = [ntype for ntype in node_layer] 
        ALLETYPES = [etype for etype in edge_layer]

        #### Transform everything into linear algebra...

        self.OrderedNodes=[]
        for ntype in ALLNTYPES:
            self.OrderedNodes = self.OrderedNodes + node_layer[ntype]
        self.NodeIndex = {}
        for idx,n in enumerate(self.OrderedNodes):
            self.NodeIndex[n]=idx

        #Construct Adjacency Matrices for various slices (single edge type)
        self.AdjMat = {}
        self.Degs = {} # Degre
        #Invdegs = {}
        for etype in ALLETYPES:
            print '--computing slice for edge type "'+etype+'"'
            H = graph_slice(G,etypes=etype)
            self.AdjMat[etype] = nx.to_scipy_sparse_matrix(H,self.OrderedNodes,format='csr')
            self.Degs[etype] = np.array([[max(1,float(H.degree(n)))] for n in self.OrderedNodes])
Exemple #12
0
def r_perturbSa(g,p=None):
    '''固定参数的随机扰动方法,p伯努利实验成功的概率'''
    A=nx.to_scipy_sparse_matrix(g)
    B=sparse.triu(A).toarray()
    #print B
    n=len(g)
    e_num=len(g.edges())#图中存在的边数

    q = e_num * (1 - p) / ((n * (n - 1)) / 2 - e_num)
    #print q
    i = 0
    ts=0
    listp=stats.bernoulli.rvs(p,size=e_num)
    listp=listp.tolist()
    listq=stats.bernoulli.rvs(q,size=(n * (n - 1)) / 2 - e_num)
    listq=listq.tolist()

    while i<n:
        j=i+1#略过对角线上的0
        while j<n:
            if(B[i,j]==1):
                B[i,j] = listp.pop()#参数p伯努利实验成功的概率
                #ts=ts + 1
                # print "+",ts, ":", i, ",", j, ",", B[i, j]
            else:
                B[i,j] = listq.pop()#参数q伯努利实验成功的概率
                #ts=ts + 1
                # print "-",ts, ":", i, ",", j, ",", B[i, j]
            j = j + 1
        i=i+1

    return nx.from_numpy_matrix(B,create_using=nx.Graph())#重新构建了Graph类型的返回对象
 def test_ordering(self):
     G = nx.DiGraph()
     G.add_edge(1,2)
     G.add_edge(2,3)
     G.add_edge(3,1)
     M = nx.to_scipy_sparse_matrix(G,nodelist=[3,2,1])
     np_assert_equal(M.todense(), np.matrix([[0,0,1],[1,0,0],[0,1,0]]))
def classify_samples(data, labels, unmarked_idxs,
                     sample_size, n_runs, n_clusters):
    unmarked_point_probs = {}
    all_idxs = range(len(unmarked_idxs))
    random.shuffle(all_idxs)
    keep_raw_idxs = sorted(all_idxs[:sample_size])
    delete_raw_idxs = sorted(all_idxs[sample_size:])
    keep_idxs, delete_idxs = (unmarked_idxs[keep_raw_idxs],
                              unmarked_idxs[delete_raw_idxs])

    bagging_graph = nx.from_scipy_sparse_matrix(data)
    bagging_graph.remove_nodes_from(delete_idxs)
    bagging_adj_matrix = nx.to_scipy_sparse_matrix(bagging_graph)
    bagging_labels = np.delete(labels, delete_idxs, 0)
    bagging_unmarked_idxs = np.where(
        bagging_labels[:, 0] == -1)[0]

    clf = TransductiveClassifier(n_runs, n_clusters)
    clf.fit(bagging_adj_matrix, bagging_labels)
    assert len(keep_idxs) == len(bagging_unmarked_idxs)
    for i, idx in enumerate(keep_idxs):
        unmarked_point_probs[idx] = clf.transduction_[
            bagging_unmarked_idxs[i]]

    return unmarked_point_probs
Exemple #15
0
def mypr(G, alpha=0.85, personalization=None,
       max_iter=100, tol=1.0e-6, weight='weight'):

    nodelist=G.nodes()
    M=nx.to_scipy_sparse_matrix(G,nodelist=nodelist,weight=weight)
    (n,m)=M.shape # should be square

    S = scipy.array(M.sum(axis=1))
    S[S>0] = 1.0 / S[S>0]
    Sm = scipy.sparse.lil_matrix((n,n))
    Sm.setdiag(S.flat)
    Sm = Sm.tocsr()
    M = Sm * M

    ## Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    ## M = Q * M
    
    x=scipy.ones((n))/n  # initial guess
    dangle=scipy.array(scipy.where(M.sum(axis=1)==0,1.0/n,0)).flatten()
    # add "teleportation"/personalization
    v=x
    i=0
    while i <= max_iter:
        # power iteration: make up to max_iter iterations
        xlast=x
        x=alpha*(x*M+scipy.dot(dangle,xlast))+(1-alpha)*v
        x=x/x.sum()
        # check convergence, l1 norm            
        err=scipy.absolute(x-xlast).sum()
        if err < n*tol:
            r = dict(zip(nodelist,x))
            return r
        i+=1
    
    print 'Failed to converge'
def _backward(T, edge_to_P, root, root_prior_distn1d, node_to_data_lmap):
    """
    This is the first pass of a forward-backward algorithm.

    Parameters
    ----------
    {params}

    """
    # Define a toposort node ordering and a corresponding csr matrix.
    nodes = nx.topological_sort(T, [root])
    node_to_idx = dict((na, i) for i, na in enumerate(nodes))
    m = nx.to_scipy_sparse_matrix(T, nodes)

    # Stack the transition matrices into a single array.
    nnodes = len(nodes)
    nstates = root_prior_distn1d.shape[0]
    trans = np.empty((nnodes-1, nstates, nstates), dtype=float)
    for (na, nb), P in edge_to_P.items():
        edge_idx = node_to_idx[nb] - 1
        trans[edge_idx, :, :] = P

    # Stack the data into a single array.
    data = np.empty((nnodes, nstates), dtype=float)
    for i, na in enumerate(nodes):
        data[i, :] = node_to_data_lmap[na]

    # Compute the partial likelihoods.
    lhood = np.empty((nnodes, nstates), dtype=float)
    validation = 0
    _wrapped_first_pass(m.indices, m.indptr, trans, data, lhood, validation)
    lhood[0, :] *= root_prior_distn1d

    # Convert the output into a dictionary.
    return dict((na, lhood[i, :]) for i, na in enumerate(nodes))
Exemple #17
0
def compute_pagerank(network : nx.DiGraph, damping : float=0.85):
    Adj = nx.to_scipy_sparse_matrix(network, dtype='float', format='csr')
    deg = np.ravel(Adj.sum(axis=1))
    Dinv = sparse.diags(1 / deg)
    Trans = (Dinv @ Adj).T
    pr = pagerank_power(Trans, damping=damping)
    return pr
    def configuration_model(self, return_copy=False):
        """ Reads AdjMatrixSequence Object and returns an edge randomized version.
            Result is written to txt file.
        """
        if self.is_directed:
            nx_creator = nx.DiGraph()
        else:
            nx_creator = nx.Graph()

        if return_copy:
            x = self[:]
        else:
            x = self

        # t_edges=[]
        for i in range(len(self)):
            print "configuration model: ", i
            graphlet = nx.from_scipy_sparse_matrix(x[i], create_using=nx_creator)
            graphlet = gwh.randomize_network(graphlet)
            x[i] = nx.to_scipy_sparse_matrix(graphlet, dtype="int")
            # for u,v in graphlet.edges():
            #    t_edges.append((u,v,i))

        # gwh.write_array(t_edges,"Configuration_model.txt")

        if return_copy:
            return x
        else:
            return
def main111():
  if 1:
    G = nx.read_edgelist(infname)
    print nx.info(G)
    # Graph adj matix
    A = nx.to_scipy_sparse_matrix(G)
    print type(A)
    from scipy import sparse, io
    io.mmwrite("Results/test.mtx", A)
    exit()
    # write to disk clustering coeffs for this graph
    snm.get_clust_coeff([G], 'orig', 'mmonth')
    # write to disk egienvalue
    snm.network_value_distribution([G], [], 'origMmonth')

  if 0:
    edgelist = np.loadtxt(infname, dtype=str, delimiter='\t')
    print edgelist[:4]
    idx = np.arange(len(edgelist))
    np.random.shuffle(idx)
    subsamp_edgelist = edgelist[idx[:100]]
    G = nx.Graph()
    G.add_edges_from([(long(x), long(y)) for x, y in subsamp_edgelist])

  # visualize this graph
  # visualize_graph(G)
  exit()

  G = nx.Graph()
  G.add_edges_from([(long(x), long(y)) for x, y in edgelist])
  print nx.info(G)
  print 'Done'
def list_directed_cc (H):
    adj_matrix = nx.to_scipy_sparse_matrix(H) # Return the graph adjacency matrix as a SciPy sparse matrix
 
    list_cc = sp.sparse.csgraph.connected_components(adj_matrix, directed=True, connection='weak', return_labels=True)

    print(" All cc: ", list_cc)

    return list_cc
Exemple #21
0
def get_matrix_norm_by_col(G,sparse=False):
    """Return the transition matrix normalized by its columns- so each column sums to 1."""
    # L is the transition matrix
    if(sparse==True):
        L=nx.to_scipy_sparse_matrix(G,nodelist=G.nodes())
    else:
        L=nx.to_numpy_matrix(G,nodelist=G.nodes())
    return (normalize(L.copy().T, axis=1, norm='l1')).T
Exemple #22
0
def get_graph(filepath):
    """
    Load the matrix saved at filepath.

    Args:
    filepath: path to file holding a sparse matrix

    Returns:
    A: SciPy CSR matrix

    """
    filename, ending = os.path.splitext(filepath)
    if ending == '.mat':
        from scipy import io
        A = sparse.csr_matrix(io.loadmat(filepath)['mat'], dtype=float)
    elif ending == '.csv':
        A = sparse.csr_matrix(np.genfromtxt(filepath, delimiter=','), 
                              dtype=float)
    elif ending == '.gml':
        import networkx as nx 
        A = nx.to_scipy_sparse_matrix(nx.read_gml(filepath), dtype=float)
    elif ending == '.dat':
        adjlist = np.genfromtxt(filepath)
        
        if adjlist.shape[1] == 2:
            data = np.ones(adjlist.shape[0])
            if np.min(adjlist) == 1:
                adjlist -= 1 # 0 indexing
        else:
            data = adjlist[:, 2]
            if np.min(adjlist[:, :-1]) == 1:
                adjlist[:, :-1] -= 1 # 0 indexing
        A = sparse.coo_matrix((data, 
                              (np.array(adjlist[:,0], dtype=int),
                              np.array(adjlist[:,1], dtype=int))),
                              dtype=float).tocsr()            

    elif ending == '.gz' or ending == '.txt':
        filename = os.path.splitext(filename)[0]
        import networkx as nx
        A = nx.to_scipy_sparse_matrix(
                nx.read_weighted_edgelist(filepath, delimiter =' '),
                dtype=float)  
    else:
        raise IOError("Could not parse file")
    return A
Exemple #23
0
def get_overlay_topology(N, f_name=None):
    g = nx.scale_free_graph(N)
    g = g.to_undirected()
    cg_list = nx.connected_component_subgraphs(g)
    if f_name:
        save_graph(cg_list[0], f_name)
    # zdump(np.asarray(nx.to_numpy_matrix(cg_list[0])), 'overlay_adj.pkz')
    return nx.to_scipy_sparse_matrix(cg_list[0])
Exemple #24
0
def test_partition_graph(rand_lowrank_g, spark_context):
    print(nx.to_scipy_sparse_matrix(rand_lowrank_g, weight='sign').todense())
    labels = partition_graph(rand_lowrank_g,
                             k=rank,
                             sc=spark_context,
                             iterations=20, lambda_=0.1,
                             seed=random_seed)
    assert adjusted_rand_score(labels, true_labels) == 1.0
Exemple #25
0
    def predict(self, beta=0.001, max_power=5, weight='weight', dtype=None):
        """Predict by Katz (1953) measure

        Let `A` be an adjacency matrix for the directed network `G`.
        Then, each element `a_{ij}` of `A^k` (the `k`-th power of `A`) has a
        value equal to the number of walks with length `k` from `i` to `j`.

        The probability of a link rapidly decreases as the walks grow longer.
        Katz therefore introduces an extra parameter (here beta) to weigh
        longer walks less.

        Parameters
        ----------
        beta : a float
            the value of beta in the formula of the Katz equation

        max_power : an int
            the maximum number of powers to take into account

        weight : string or None
            The edge attribute that holds the numerical value used for
            the edge weight.  If None then treat as unweighted.

        dtype : a data type
            data type of edge weights (default numpy.int32)

        """
        from itertools import izip

        if dtype is None:
            import numpy
            dtype = numpy.int32

        nodelist = self.G.nodes()
        adj = nx.to_scipy_sparse_matrix(
            self.G, dtype=dtype, weight=weight)
        res = Scoresheet()

        for k in progressbar(range(1, max_power + 1),
                             "Computing matrix powers: "):
            # The below method is found to be fastest for iterating through a
            # sparse matrix, see
            # http://stackoverflow.com/questions/4319014/iterating-through-a-scipy-sparse-vector-or-matrix
            matrix = (adj ** k).tocoo()
            for i, j, d in izip(matrix.row, matrix.col, matrix.data):
                if i == j:
                    continue
                u, v = nodelist[i], nodelist[j]
                if self.eligible(u, v):
                    w = d * (beta ** k)
                    res[(u, v)] += w

        # We count double in case of undirected networks ((i, j) and (j, i))
        if not self.G.is_directed():
            for pair in res:
                res[pair] /= 2

        return res
def get_iid_lhoods(T, edge_to_P, root, root_prior_distn1d, node_to_data_lmaps):
    """
    Get the likelihood of this combination of parameters.

    Parameters
    ----------
    T : directed networkx tree graph
        Edge and node annotations are ignored.
    edge_to_P : dict of 2d float ndarrays
        A map from directed edges of the tree graph
        to 2d float ndarrays representing state transition probabilities.
    root : hashable
        This is the root node.
        Following networkx convention, this may be anything hashable.
    root_prior_distn1d : 1d ndarray
        Prior state distribution at the root.
    node_to_data_lmaps : sequence of dicts of 1d float ndarrays
        Observed data.
        For each iid site, a dict mapping each node to a 1d array
        giving the observation likelihood for each state.
        This parameter is similar to the sample_histories output.

    Returns
    -------
    lhoods : 1d float array
        Likelihood for each iid site.

    """
    nsites = len(node_to_data_lmaps)

    # Define a toposort node ordering and a corresponding csr matrix.
    nodes = nx.topological_sort(T, [root])
    node_to_idx = dict((na, i) for i, na in enumerate(nodes))
    m = nx.to_scipy_sparse_matrix(T, nodes)

    # Stack the transition matrices into a single array.
    nnodes = len(nodes)
    nstates = root_prior_distn1d.shape[0]
    trans = np.empty((nnodes-1, nstates, nstates), dtype=float)
    for (na, nb), P in edge_to_P.items():
        edge_idx = node_to_idx[nb] - 1
        trans[edge_idx, :, :] = P

    # Stack the data into a single array.
    data = np.empty((nsites, nnodes, nstates), dtype=float)
    for i, node_to_data_lmap in enumerate(node_to_data_lmaps):
        for j, na in enumerate(nodes):
            data[i, j, :] = node_to_data_lmap[na]

    # Compute the likelihoods.
    lhoods = np.empty(nsites, dtype=float)
    validation = 0
    iid_likelihoods(m.indices, m.indptr,
        trans, data, root_prior_distn1d, lhoods, validation)

    # Return the dense array that contains the likelihood at each iid site.
    return lhoods
Exemple #27
0
 def __test_save_and_load_graph_npz(self, x):
     '''Test save and load a Networkx DiGraph in npz format with np-array wrapping.'''
     out_file = tempfile.TemporaryFile()
     np.savez(out_file, x=np.array([nx.to_scipy_sparse_matrix(x)]))
     out_file.seek(0) # Only needed here to simulate closing & reopening file
     x2 = np.load(out_file)
     y = nx.from_scipy_sparse_matrix(x2['x'][0], nx.DiGraph())
     assert_equal(x.nodes(), y.nodes(), 'Saving and loading did not restore the original object')
     assert_equal(x.edges(), y.edges(), 'Saving and loading did not restore the original object')
def nx_graph_nbrw(G):
    import networkx as nx

    A = nx.to_scipy_sparse_matrix(G)
    P = mkm.graph_nbrw_transition_matrix(A)
    mc = mkm.MarkovChain(P)
    mc.set_stationary_distribution(mkm.uniform_distribution(mc.get_n()))

    return mc
def sparse_laplacian(G, weight='weight'):
    import scipy.sparse
    nodelist = G.nodes()
    A = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight,
                                  format='csr')
    n,m = A.shape
    diags = A.sum(axis=1)
    D = scipy.sparse.spdiags(diags.flatten(), [0], m, n, format='csr')
    return D - A
Exemple #30
0
def adjacency_matrix(G, nodelist=None, weight='weight'):
    """Returns adjacency matrix of G.

    Parameters
    ----------
    G : graph
       A NetworkX graph

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in nodelist.
       If nodelist is None, then the ordering is produced by G.nodes().

    weight : string or None, optional (default='weight')
       The edge data key used to provide each value in the matrix.
       If None, then each edge has weight 1.

    Returns
    -------
    A : SciPy sparse matrix
      Adjacency matrix representation of G.

    Notes
    -----
    For directed graphs, entry i,j corresponds to an edge from i to j.

    If you want a pure Python adjacency matrix representation try
    networkx.convert.to_dict_of_dicts which will return a
    dictionary-of-dictionaries format that can be addressed as a
    sparse matrix.

    For MultiGraph/MultiDiGraph with parallel edges the weights are summed.
    See to_numpy_matrix for other options.

    The convention used for self-loop edges in graphs is to assign the
    diagonal matrix entry value to the edge weight attribute
    (or the number 1 if the edge has no weight attribute).  If the
    alternate convention of doubling the edge weight is desired the
    resulting Scipy sparse matrix can be modified as follows:

    >>> import scipy as sp
    >>> G = nx.Graph([(1,1)])
    >>> A = nx.adjacency_matrix(G)
    >>> print(A.todense())
    [[1]]
    >>> A.setdiag(A.diagonal()*2)
    >>> print(A.todense())
    [[2]]

    See Also
    --------
    to_numpy_matrix
    to_scipy_sparse_matrix
    to_dict_of_dicts
    adjacency_spectrum
    """
    return nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight)
Exemple #31
0
 def adj_mat(graph, weight=None):
     return to_scipy_sparse_matrix(graph, weight=weight)
graph = nx.DiGraph()
graph.add_nodes_from(nodes)
graph.add_edges_from(edges)
nx.draw(graph, with_labels=True)
plt.show()
data = {
    0: np.arange(24) + 0,
    1: np.arange(24) + 1,
    2: np.arange(24) + 2,
    3: np.arange(24) + 3,
    4: np.arange(24) + 4,
    5: np.arange(24) + 5
}
val_data = {6: np.arange(24) + 6, 7: np.arange(24) + 7}
# dense_adjacency = nx.to_pandas_adjacency(graph)
sparse_adj = nx.to_scipy_sparse_matrix(graph).tocoo()
sparse_adj_in_coo_format = np.stack([sparse_adj.row, sparse_adj.col])
sparse_adj_in_coo_format_tensor = torch.tensor(sparse_adj_in_coo_format,
                                               dtype=torch.long).cuda()

frame_data = pd.DataFrame.from_dict(data)
valframe = pd.DataFrame.from_dict(val_data)
data_graphs = []
for i in range(len(frame_data) - 1):
    x = torch.tensor([frame_data.iloc[i]], dtype=torch.double).cuda()
    x = x.permute(1, 0)  # nodes, features
    y = torch.tensor([frame_data.iloc[i + 1]], dtype=torch.double).cuda()
    y = y.permute(1, 0)  # nodes, features
    data_entry = Data(x=x, y=y, edge_index=sparse_adj_in_coo_format_tensor)
    data_graphs.append(data_entry)
loader = DataLoader(data_graphs, batch_size=1)
Exemple #33
0
def directed_modularity_matrix(G, nodelist=None, weight=None):
    """Return the directed modularity matrix of G.

    The modularity matrix is the matrix B = A - <A>, where A is the adjacency
    matrix and <A> is the expected adjacency matrix, assuming that the graph
    is described by the configuration model.

    More specifically, the element B_ij of B is defined as
        B_ij = A_ij - k_i(out) k_j(in) / m
    where k_i(in) is the in degree of node i, and k_j(out) is the out degree
    of node j, with m the number of edges in the graph. When weight is set
    to a name of an attribute edge, Aij, k_i, k_j and m are computed using
    its value.

    Parameters
    ----------
    G : DiGraph
       A NetworkX DiGraph

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in nodelist.
       If nodelist is None, then the ordering is produced by G.nodes().

    weight : string or None, optional (default=None)
       The edge attribute that holds the numerical value used for
       the edge weight.  If None then all edge weights are 1.

    Returns
    -------
    B : Numpy matrix
      The modularity matrix of G.

    Examples
    --------
    >>> import networkx as nx
    >>> G = nx.DiGraph()
    >>> G.add_edges_from(((1,2), (1,3), (3,1), (3,2), (3,5), (4,5), (4,6),
    ...                   (5,4), (5,6), (6,4)))
    >>> B = nx.directed_modularity_matrix(G)


    Notes
    -----
    NetworkX defines the element A_ij of the adjacency matrix as 1 if there
    is a link going from node i to node j. Leicht and Newman use the opposite
    definition. This explains the different expression for B_ij.

    See Also
    --------
    to_numpy_matrix
    adjacency_matrix
    laplacian_matrix
    modularity_matrix

    References
    ----------
    .. [1] E. A. Leicht, M. E. J. Newman, 
       "Community structure in directed networks",
        Phys. Rev Lett., vol. 100, no. 11, p. 118703, 2008.
    """
    if nodelist is None:
        nodelist = list(G)
    A = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight,
                                  format='csr')
    k_in = A.sum(axis=0)
    k_out = A.sum(axis=1)
    m = k_in.sum()
    # Expected adjacency matrix
    X = k_out * k_in / m
    return A - X
def transversal_network_mucha_original(dyn_graph: tn.DynGraphSN,
                                       om=0.5,
                                       form="local",
                                       elapsed_time=False,
                                       matlab_session=None):
    """
    Multiplex community detection, Mucha et al.

    Algorithm described in : `Mucha, P. J., Richardson, T., Macon, K., Porter, M. A., & Onnela, J. P. (2010). Community structure in time-dependent, multiscale, and multiplex networks. science, 328(5980), 876-878.`

    Brief summary: a single network is created by adding nodes between themselves in different snaphsots. A modified modularity optimization algorithm is run
    on this network

    For this function, it is necessary to have Matlab installed
    And to set up the matlab for python engine, see how to there
    https://fr.mathworks.com/help/matlab/matlab_external/install-the-matlab-engine-for-python.html
    (you can find the value of matlabroot by tapping matlabroot in your matlab console)


    :param dyn_graph: dynamic network
    :param om:
    :param form:
    :param elapsed_time:
    :param matlab_session:
    :return:
    """
    print("preprocessing MUCHA ")

    #Original example on genlouvain website
    #N = length(A{1});
    #T = length(A);
    #B = spalloc(N * T, N * T, N * N * T + 2 * N * T);
    #twomu = 0;
    #for s=1:T
    #     k = sum(A
    #     {s});
    #     twom = sum(k);
    #     twomu = twomu + twom;
    #     indx = [1:N]+(s - 1) * N;
    #     B(indx, indx) = A
    #     {s} - gamma * k
    #     '*k/twom;
    #
    #
    # end
    # twomu = twomu + 2 * omega * N * (T - 1);
    # B = B + omega * spdiags(ones(N * T, 2), [-N, N], N * T, N * T);
    # [S, Q] = genlouvain(B);
    # Q = Q / twomu
    # S = reshape(S, N, T);

    graphs = dyn_graph.snapshots()

    nodeOrderAllSN = []
    listModularityMatrices = []

    #for each graph in order
    for t, gT in enumerate(graphs):
        g = graphs[gT]
        nodeOrder = list(g.nodes())
        if len(nodeOrder) > 0:
            nodeOrderAllSN += [(t, n) for n in nodeOrder]

            gmat = nx.to_scipy_sparse_matrix(g,
                                             nodelist=nodeOrder,
                                             format="dok")
            k = gmat.sum(axis=0)  #degrees of nodes
            twom = k.sum(axis=1)  #sum of degrees
            nullModel = k.transpose() * k / twom
            listModularityMatrices.append(gmat - nullModel)

    #Concatenate all null modularity matrices
    #B = scipy.sparse.block_diag(*listModularityMatrices)
    B = scipy.sparse.block_diag(listModularityMatrices, format="dok")
    listModularityMatrices = None

    #B = scipy.sparse.dok_matrix(B)

    #add the link between same nodes in different timestamps
    multipleAppearances = {}  #for each node, list of indices where it appears

    ordered_real_times = dyn_graph.snapshots_timesteps()
    for (i, (t, n)) in enumerate(nodeOrderAllSN):
        multipleAppearances.setdefault(n, []).append((i, t))

    if form == "global":
        for (n, nAppearences) in multipleAppearances.items():
            for (i, t) in nAppearences:
                for (j, t) in nAppearences:
                    if i != j:
                        B[i, j] = om
    if form == "local":
        #print(multipleAppearances)
        for (n, orderedAppearences) in multipleAppearances.items():
            #print(orderedAppearences)
            for i in range(0, len(orderedAppearences) - 1):
                #BE CAREFUL, modified recently
                ii, t = orderedAppearences[i]
                ii_next, t_next = orderedAppearences[i + 1]
                #index_t = ordered_real_times.index(t)

                if ordered_real_times[t + 1] == ordered_real_times[t_next]:
                    B[ii, ii_next] = om

    if form == "local_relaxed":
        #print(multipleAppearances)
        for (n, orderedAppearences) in multipleAppearances.items():
            for i in range(0, len(orderedAppearences) - 1):
                ii, t = orderedAppearences[i]
                ii_next, t_next = orderedAppearences[i + 1]
                B[ii, ii_next] = om

    #print("saving temp file")
    #numpy.savetxt("test.csv", B, fmt="%.2f", delimiter=",")
    #print("file saved")

    #B = scipy.sparse.coo_matrix(B)
    print("calling external code")

    (S, duration) = _runMatlabCode(B, matlab_session=matlab_session)
    #print("transforming back to dynamic net")

    DCSN = tn.DynCommunitiesSN()
    times = dyn_graph.snapshots_timesteps()
    for i in range(len(S)):
        DCSN.add_affiliation(nodeOrderAllSN[i][1], S[i],
                             times[nodeOrderAllSN[i][0]])

    print("sucessfully finished MUCHA ")

    if elapsed_time:
        return (DCSN, {"total": duration})
    return DCSN
Exemple #35
0
def pagerank_scipy(G, alpha=0.85, personalization=None,
                   max_iter=100, tol=1.0e-6, nstart=None, weight='weight',
                   dangling=None):
    """Returns the PageRank of the nodes in the graph.

    PageRank computes a ranking of the nodes in the graph G based on
    the structure of the incoming links. It was originally designed as
    an algorithm to rank web pages.

    Parameters
    ----------
    G : graph
      A NetworkX graph.  Undirected graphs will be converted to a directed
      graph with two directed edges for each undirected edge.

    alpha : float, optional
      Damping parameter for PageRank, default=0.85.

    personalization: dict, optional
      The "personalization vector" consisting of a dictionary with a
      key some subset of graph nodes and personalization value each of those.
      At least one personalization value must be non-zero.
      If not specfiied, a nodes personalization value will be zero.
      By default, a uniform distribution is used.

    max_iter : integer, optional
      Maximum number of iterations in power method eigenvalue solver.

    tol : float, optional
      Error tolerance used to check convergence in power method solver.

    nstart : dictionary, optional
      Starting value of PageRank iteration for each node.

    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.

    dangling: dict, optional
      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
      any outedges. The dict key is the node the outedge points to and the dict
      value is the weight of that outedge. By default, dangling nodes are given
      outedges according to the personalization vector (uniform if not
      specified) This must be selected to result in an irreducible transition
      matrix (see notes under google_matrix). It may be common to have the
      dangling dict to be the same as the personalization dict.

    Returns
    -------
    pagerank : dictionary
       Dictionary of nodes with PageRank as value

    Examples
    --------
    >>> G = nx.DiGraph(nx.path_graph(4))
    >>> pr = nx.pagerank_scipy(G, alpha=0.9)

    Notes
    -----
    The eigenvector calculation uses power iteration with a SciPy
    sparse matrix representation.

    This implementation works with Multi(Di)Graphs. For multigraphs the
    weight between two nodes is set to be the sum of all edge weights
    between those nodes.

    See Also
    --------
    pagerank, pagerank_numpy, google_matrix

    Raises
    ------
    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance
        within the specified number of iterations of the power iteration
        method.

    References
    ----------
    .. [1] A. Langville and C. Meyer,
       "A survey of eigenvector methods of web information retrieval."
       http://citeseer.ist.psu.edu/713792.html
    .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
       The PageRank citation ranking: Bringing order to the Web. 1999
       http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
    """
    import scipy.sparse

    N = len(G)
    if N == 0:
        return {}

    nodelist = list(G)
    M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight,
                                  dtype=float)
    S = scipy.array(M.sum(axis=1)).flatten()
    S[S != 0] = 1.0 / S[S != 0]
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    M = Q * M

    # initial vector
    if nstart is None:
        x = scipy.repeat(1.0 / N, N)
    else:
        x = scipy.array([nstart.get(n, 0) for n in nodelist], dtype=float)
        x = x / x.sum()

    # Personalization vector
    if personalization is None:
        p = scipy.repeat(1.0 / N, N)
    else:
        p = scipy.array([personalization.get(n, 0) for n in nodelist], dtype=float)
        p = p / p.sum()

    # Dangling nodes
    if dangling is None:
        dangling_weights = p
    else:
        # Convert the dangling dictionary into an array in nodelist order
        dangling_weights = scipy.array([dangling.get(n, 0) for n in nodelist],
                                       dtype=float)
        dangling_weights /= dangling_weights.sum()
    is_dangling = scipy.where(S == 0)[0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) + \
            (1 - alpha) * p
        # check convergence, l1 norm
        err = scipy.absolute(x - xlast).sum()
        if err < N * tol:
            return dict(zip(nodelist, map(float, x)))
    raise nx.PowerIterationFailedConvergence(max_iter)
Exemple #36
0
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
import pickle
from node2vec import node2vec
from gensim.models import Word2Vec
from node2vec.preprocessing import mask_test_edges

network_dir = './GraphPickle/HPO-Orphanet.pkl'

with open(network_dir, 'rb') as f:
    adj, features = pickle.load(f)

g = nx.Graph(adj)

np.random.seed(0)
adj_sparse = nx.to_scipy_sparse_matrix(g)

# Perform train-test split
adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    test_edges, test_edges_false = mask_test_edges(adj_sparse, test_frac=.3, val_frac=.1)

g_train = nx.from_scipy_sparse_matrix(adj_train)

P = 1  # Return hyperparameter
Q = 0.05  # In-out hyperparameter
WINDOW_SIZE = 10  # Context size for optimization
NUM_WALKS = 10  # Number of walks per source
WALK_LENGTH = 5  # Length of walk per source
DIMENSIONS = 128  # Embedding dimension
DIRECTED = False  # Graph directed/undirected
WORKERS = 8  # Num. parallel workers
Exemple #37
0
 def test_empty(self):
     G = nx.Graph()
     G.add_node(1)
     M = nx.to_scipy_sparse_matrix(G)
     npt.assert_equal(M.todense(), np.matrix([[0]]))
Exemple #38
0
 def test_identity_graph_matrix(self):
     "Conversion from graph to sparse matrix to graph."
     A = nx.to_scipy_sparse_matrix(self.G1)
     self.identity_conversion(self.G1, A, nx.Graph())
Exemple #39
0
def gen_er(args):
    g = nx.fast_gnp_random_graph(args.er_n, args.er_p)
    csr = nx.to_scipy_sparse_matrix(g, weight=None, format='csr')
    graph_io.save_graph(args.out, csr)
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 29 00:48:07 2018

@author: Sami
"""
import networkx as nx
hep_graph = nx.read_gml('C:/Users/samiu/Desktop/8009 LAB/hep-th.gml')
hep_graph.remove_nodes_from(list(nx.isolates(hep_graph)))
gMatrix = nx.to_scipy_sparse_matrix(hep_graph)
#graph_dict = nx.to_dict_of_lists(hep_graph)
sparse_matrix = gMatrix.todense()
def prim(graph, root):
    assert type(graph)==dict

    nodes = list(graph)
    nodes.remove(root)
    
    visited = [root]
    path = []
    next = None

    while nodes:
        distance = float('inf') 
        for s in visited:
            for d in graph[s]:
                if d in visited or s == d:
                    continue
                if graph[s][d] < distance:
                    distance = graph[s][d]
                    pre = s
Exemple #41
0
 def test_identity_digraph_matrix(self):
     "Conversion from digraph to sparse matrix to digraph."
     A = nx.to_scipy_sparse_matrix(self.G2)
     self.identity_conversion(self.G2, A, nx.DiGraph())
Exemple #42
0
 def test_identity_weighted_digraph_matrix(self):
     """Conversion from weighted digraph to sparse matrix to weighted digraph."""
     A = nx.to_scipy_sparse_matrix(self.G4)
     self.identity_conversion(self.G4, A, nx.DiGraph())
Exemple #43
0
def hits_scipy(G, max_iter=100, tol=1.0e-6, normalized=True):
    """Returns HITS hubs and authorities values for nodes.

    The HITS algorithm computes two numbers for a node.
    Authorities estimates the node value based on the incoming links.
    Hubs estimates the node value based on outgoing links.

    Parameters
    ----------
    G : graph
      A NetworkX graph

    max_iter : integer, optional
      Maximum number of iterations in power method.

    tol : float, optional
      Error tolerance used to check convergence in power method iteration.

    nstart : dictionary, optional
      Starting value of each node for power method iteration.

    normalized : bool (default=True)
       Normalize results by the sum of all of the values.

    Returns
    -------
    (hubs,authorities) : two-tuple of dictionaries
       Two dictionaries keyed by node containing the hub and authority
       values.

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> h, a = nx.hits(G)

    Notes
    -----
    This implementation uses SciPy sparse matrices.

    The eigenvector calculation is done by the power iteration method
    and has no guarantee of convergence.  The iteration will stop
    after max_iter iterations or an error tolerance of
    number_of_nodes(G)*tol has been reached.

    The HITS algorithm was designed for directed graphs but this
    algorithm does not check if the input graph is directed and will
    execute on undirected graphs.

    Raises
    ------
    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance
        within the specified number of iterations of the power iteration
        method.

    References
    ----------
    .. [1] A. Langville and C. Meyer,
       "A survey of eigenvector methods of web information retrieval."
       http://citeseer.ist.psu.edu/713792.html
    .. [2] Jon Kleinberg,
       Authoritative sources in a hyperlinked environment
       Journal of the ACM 46 (5): 604-632, 1999.
       doi:10.1145/324133.324140.
       http://www.cs.cornell.edu/home/kleinber/auth.pdf.
    """
    try:
        import numpy as np
    except ImportError as e:
        raise ImportError("hits_scipy() requires SciPy and NumPy:"
                          "http://scipy.org/ http://numpy.org/") from e
    if len(G) == 0:
        return {}, {}
    M = nx.to_scipy_sparse_matrix(G, nodelist=list(G))
    (n, m) = M.shape  # should be square
    A = M.T * M  # authority matrix
    x = np.ones((n, 1)) / n  # initial guess
    # power iteration on authority matrix
    i = 0
    while True:
        xlast = x
        x = A * x
        x = x / x.max()
        # check convergence, l1 norm
        err = np.absolute(x - xlast).sum()
        if err < tol:
            break
        if i > max_iter:
            raise nx.PowerIterationFailedConvergence(max_iter)
        i += 1

    a = np.asarray(x).flatten()
    # h=M*a
    h = np.asarray(M * a).flatten()
    if normalized:
        h = h / h.sum()
        a = a / a.sum()
    hubs = dict(zip(G, map(float, h)))
    authorities = dict(zip(G, map(float, a)))
    return hubs, authorities
def spectral_layout(G, weight='weight', scale=1, center=None, dim=2):
    """Position nodes using the eigenvectors of the graph Laplacian.

    Parameters
    ----------
    G : NetworkX graph or list of nodes
        A position will be assigned to every node in G.

    weight : string or None   optional (default='weight')
        The edge attribute that holds the numerical value used for
        the edge weight.  If None, then all edge weights are 1.

    scale : number (default: 1)
        Scale factor for positions.

    center : array-like or None
        Coordinate pair around which to center the layout.

    dim : int
        Dimension of layout.

    Returns
    -------
    pos : dict
        A dictionary of positions keyed by node

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> pos = nx.spectral_layout(G)

    Notes
    -----
    Directed graphs will be considered as undirected graphs when
    positioning the nodes.

    For larger graphs (>500 nodes) this will use the SciPy sparse
    eigenvalue solver (ARPACK).
    """
    # handle some special cases that break the eigensolvers
    import numpy as np

    G, center = _process_params(G, center, dim)

    if len(G) <= 2:
        if len(G) == 0:
            pos = np.array([])
        elif len(G) == 1:
            pos = np.array([center])
        else:
            pos = np.array([np.zeros(dim), np.array(center) * 2.0])
        return dict(zip(G, pos))
    try:
        # Sparse matrix
        if len(G) < 500:  # dense solver is faster for small graphs
            raise ValueError
        A = nx.to_scipy_sparse_matrix(G, weight=weight, dtype='d')
        # Symmetrize directed graphs
        if G.is_directed():
            A = A + np.transpose(A)
        pos = _sparse_spectral(A, dim)
    except (ImportError, ValueError):
        # Dense matrix
        A = nx.to_numpy_matrix(G, weight=weight)
        # Symmetrize directed graphs
        if G.is_directed():
            A = A + np.transpose(A)
        pos = _spectral(A, dim)

    pos = rescale_layout(pos, scale) + center
    pos = dict(zip(G, pos))
    return pos
def calculate(min_degree,
              file_path="graph.graph",
              analyse="no",
              classifier='SVM'):
    graph = nx.read_edgelist(file_path, delimiter=" ")
    nodes = [
        node for node, degree in graph.degree().items() if degree >= min_degree
    ]
    graph = graph.subgraph(nodes)
    connected_components = nx.connected_components(graph)
    largest_cc_nodes = max(connected_components, key=len)
    graph = graph.subgraph(largest_cc_nodes)

    adj_sparse = nx.to_scipy_sparse_matrix(graph)
    adj = nx.to_numpy_matrix(graph)
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    test_edges, test_edges_false = mask_test_edges(adj_sparse, test_frac=.3, val_frac=.1)

    g_train = nx.from_scipy_sparse_matrix(
        adj_train)  # new graph object with only non-hidden edges
    aa_matrix = np.zeros(adj.shape)
    for u, v, p in nx.adamic_adar_index(
            g_train):  # (u, v) = node indices, p = Adamic-Adar index
        aa_matrix[u][v] = p
        aa_matrix[v][u] = p  # make sure it's symmetric

    # Normalize array
    aa_matrix = aa_matrix / aa_matrix.max()
    aa_roc, aa_ap = get_roc_score(adj_sparse, test_edges, test_edges_false,
                                  aa_matrix)

    jc_matrix = np.zeros(adj.shape)
    for u, v, p in nx.jaccard_coefficient(
            g_train):  # (u, v) = node indices, p = Jaccard coefficient
        jc_matrix[u][v] = p
        jc_matrix[v][u] = p  # make sure it's symmetric

    jc_matrix = jc_matrix / jc_matrix.max()

    # Calculate ROC AUC and Average Precision
    jc_roc, jc_ap = get_roc_score(adj_sparse, test_edges, test_edges_false,
                                  jc_matrix)

    pa_matrix = np.zeros(adj.shape)
    for u, v, p in nx.preferential_attachment(
            g_train):  # (u, v) = node indices, p = Jaccard coefficient
        pa_matrix[u][v] = p
        pa_matrix[v][u] = p  # make sure it's symmetric

    # Normalize array
    pa_matrix = pa_matrix / pa_matrix.max()

    # Calculate ROC AUC and Average Precision
    pa_roc, pa_ap = get_roc_score(adj_sparse, test_edges, test_edges_false,
                                  pa_matrix)
    import time
    time_before_node2vec32 = time.time()
    model_factory = ModelFactory(g_train)
    model = model_factory.get_model("node2vec_32")
    time_after_node2vec32 = time.time()

    node2vec32_time = time_after_node2vec32 - time_before_node2vec32

    #TODO: refactor these three calls. Make a function out of it
    # Store embeddings mapping
    time_before_stacking_embedding = time.time()
    emb_mappings = model.wv
    emb_list = []
    for node_index in range(0, adj_sparse.shape[0]):
        node_str = str(node_index)
        node_emb = emb_mappings[node_str]
        emb_list.append(node_emb)
    emb_matrix = np.vstack(emb_list)
    time_after_stacking_embedding = time.time()
    time_before_UMAP16 = time.time()
    umap_obj = model_factory.get_model("UMAP_16")
    emb_mappings_umap = umap_obj.fit_transform(emb_matrix)
    time_after_UMAP16 = time.time()

    umap16_time = time_after_UMAP16 - time_before_UMAP16

    emb_list_umap = []
    for node_index in range(0, adj_sparse.shape[0]):
        node_emb = emb_mappings_umap[node_index]
        emb_list_umap.append(node_emb)
    emb_matrix_umap = np.vstack(emb_list_umap)
    time_before_PCA = time.time()
    pca_obj = model_factory.get_model("PCA_16")
    emb_mappings_pca = pca_obj.fit_transform(emb_matrix)
    time_after_PCA = time.time()

    pca16_time = time_after_PCA - time_before_PCA

    emb_list_pca = []
    for node_index in range(0, adj_sparse.shape[0]):
        node_emb = emb_mappings_pca[node_index]
        emb_list_pca.append(node_emb)
    emb_matrix_pca = np.vstack(emb_list_pca)

    time_before_node2vec16 = time.time()
    node2vec16_model = model_factory.get_model("node2vec_16")
    emb_mappings_node2vec16 = node2vec16_model.wv
    time_after_node2vec16 = time.time()

    node2vec16_time = time_after_node2vec16 - time_before_node2vec16

    emb_list_node2vec_16 = []
    for node_index in range(0, adj_sparse.shape[0]):
        node_str = str(node_index)
        node_emb = emb_mappings_node2vec16[node_str]
        emb_list_node2vec_16.append(node_emb)
    emb_matrix_node2vec16 = np.vstack(emb_list_node2vec_16)

    lp_arg = LP_arguments(emb_mappings=emb_mappings, adj_sparse = adj_sparse, train_edges = train_edges, \
     train_edges_false = train_edges_false, val_edges = val_edges, val_edges_false = val_edges_false, \
     test_edges = test_edges, test_edges_false = test_edges_false, matrix=emb_matrix)

    lp_arg_umap = LP_arguments(emb_mappings=emb_mappings_umap, adj_sparse=adj_sparse, train_edges = train_edges, \
     train_edges_false = train_edges_false, val_edges = val_edges, val_edges_false = val_edges_false, \
     test_edges = test_edges, test_edges_false = test_edges_false, matrix=emb_matrix_umap)

    lp_arg_pca = LP_arguments(emb_mappings=emb_mappings_pca, adj_sparse=adj_sparse, train_edges = train_edges, \
     train_edges_false = train_edges_false, val_edges = val_edges, val_edges_false = val_edges_false, \
     test_edges = test_edges, test_edges_false = test_edges_false, matrix=emb_matrix_pca)

    lp_arg_node2vec16 = LP_arguments(emb_mappings=emb_mappings_node2vec16, adj_sparse=adj_sparse,
    train_edges = train_edges, train_edges_false = train_edges_false, val_edges = val_edges, val_edges_false = val_edges_false, \
     test_edges = test_edges, test_edges_false = test_edges_false, matrix=emb_matrix_node2vec16)

    methods = {
        "node2vec (32)": lp_arg,
        "node2vec (16)": lp_arg_node2vec16,
        "node2vec+UMAP (16)": lp_arg_umap,
        "node2vec+PCA (16)": lp_arg_pca
    }

    adamic_adard_result = MethodResult('Adamic-Adar', aa_roc, aa_ap)
    jc_result = MethodResult('Jaccard Coefficient', jc_roc, jc_ap)
    pa_result = MethodResult('Preferential Attachment', pa_roc, pa_ap)
    lime = False
    if analyse in ['y', 'yes', 'true']:
        lime = True

    methods_list = [adamic_adard_result, jc_result, pa_result]
    lime_results = []
    for key, value in methods.items():
        val_roc, val_ap, test_roc, test_ap, lime_explanations,\
            training_time = link_prediction_on_embedding(key, value, lime, classifier)
        methods_list.append(MethodResult(key, test_roc, test_ap))
        lime_results.append(lime_explanations)

    if lime:
        import os
        if not os.path.exists('plots'):
            os.makedirs('plots')
        lime_plotter = LimeExplainer.LimeExplainerPlotter(
            lime_results, adj_sparse.shape[0])
        lime_plotter.plot_feature_importance()

    if file_path == "graph.graph":
        caption = "Link prediction on Wikipedia dataset containing"
    elif file_path == "soc_hamsterster.edges":
        caption = "Link prediction on network of the friendships between users of hamsterster.com"
    elif file_path == "external_graph.csv":
        caption = "Link prediction on DBLP dataset"
    else:
        caption = "Unknown caption"
    result = ModelAccuracyResults(adj_sparse.shape[0], len(train_edges),
                                  len(test_edges), methods_list, caption,
                                  classifier, training_time)

    with open("latex_results.txt", "a") as file:
        file.write(result.get_latex_representation())

    with open("csv_results.txt", "a") as file:
        file.write(result.get_csv_representation())

    methods_time = [
        MethodTime("nodevec (32)", node2vec32_time),
        MethodTime("node2vec (16)", node2vec16_time),
        MethodTime("node2vec+UMAP (16)", umap16_time),
        MethodTime("node2vec+PCA (16)", pca16_time)
    ]

    time_results = ModelTimeResults(
        methods_time, adj_sparse.shape[0], len(train_edges), len(test_edges),
        "Time of the training of algorithms on Wikipedia dataset", classifier,
        training_time)

    with open("latex_time.txt", "a") as file:
        file.write(time_results.get_latex_representation())

    with open("csv_time.txt", "a") as file:
        file.write(time_results.get_csv_representation())
Exemple #46
0
def spectral_layout(G, dim=2, weighted=True, scale=1):
    """Position nodes using the eigenvectors of the graph Laplacian. 

    Parameters
    ----------
    G : NetworkX graph 

    dim : int 
       Dimension of layout

    weighted : boolean
        If True, use edge weights in layout 

    scale : float
        Scale factor for positions 

    Returns
    -------
    dict : 
       A dictionary of positions keyed by node

    Examples
    --------
    >>> G=nx.path_graph(4)
    >>> pos=nx.spectral_layout(G)

    Notes
    -----
    Directed graphs will be considered as unidrected graphs when
    positioning the nodes.

    For larger graphs (>500 nodes) this will use the SciPy sparse
    eigenvalue solver (ARPACK).
    """
    # handle some special cases that break the eigensolvers
    try:
        import numpy as np
    except ImportError:
        raise ImportError(
            "spectral_layout() requires numpy: http://scipy.org/ ")
    if len(G) <= 2:
        if len(G) == 0:
            pos = np.array([])
        elif len(G) == 1:
            pos = np.array([[1, 1]])
        else:
            pos = np.array([[0, 0.5], [1, 0.5]])
        return dict(list(zip(G, pos)))
    try:
        # Sparse matrix
        if len(G) < 500:  # dense solver is faster for small graphs
            raise ValueError
        A = nx.to_scipy_sparse_matrix(G)
        # Symmetrize directed graphs
        if G.is_directed():
            A = A + np.transpose(A)
        pos = _sparse_spectral(A, dim=dim, weighted=weighted)
    except (ImportError, ValueError):
        # Dense matrix
        A = nx.to_numpy_matrix(G)
        # Symmetrize directed graphs
        if G.is_directed():
            A = A + np.transpose(A)
        pos = _spectral(A, dim=dim, weighted=weighted)

    pos = _rescale_layout(pos, scale=scale)
    return dict(list(zip(G, pos)))
Exemple #47
0
 def test_null_raise(self):
     with pytest.raises(nx.NetworkXError):
         nx.to_scipy_sparse_matrix(nx.Graph())
def b_lexrank(G,
              baseline_score,
              alpha=0.85,
              personalization=None,
              max_iter=100,
              tol=1.0e-6,
              weight='weight',
              seed_weight=1):
    """ Return the biased Lexrank scores of the nodes in the graph

		This program is based upon the pagerank_scipy program from the networkx 
		source.

	Parameters
	___________
	G: graph
		A NetworkX graph

	alpha: float, optional
		A damping parameter for PageRank, default = 0.85

	personalization: dict, optional
		The "personalization vector" consisting of a dictionary with a
		key for every graph node and nonzero personalization value for each node.

	max_iter : integer, optional
		Maximum number of iterations in power method eigenvalue solver.

	tol : float, optional
		Error tolerance used to check convergence in power method solver.

	weight : key, optional
		Edge data key to use as weight.  If None weights are set to 1.
	
	baseline_score: vector, float
		similarity scores between the seed and sentences within the graph



	Returns
	-------
	pagerank : dictionary
		Dictionary of nodes with PageRank as value

	Examples
	--------
		>>> G=nx.DiGraph(nx.path_graph(4))
		>>> pr=nx.pagerank_scipy(G,alpha=0.9)

	Notes
	-----
	The eigenvector calculation uses power iteration with a SciPy
	sparse matrix representation.


		References
		----------
		.. [1] A. Langville and C. Meyer,
		   "A survey of eigenvector methods of web information retrieval."
		   http://citeseer.ist.psu.edu/713792.html
		.. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
		   The PageRank citation ranking: Bringing order to the Web. 1999
		   http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
		   [3] Otterbacher, Erkan and Radev, Biased LexRank: Passage Retrieval using Random
		   Walks with Question-Based Priors (2008)
		"""

    try:
        import scipy.sparse
        import networkx as nx
        from numpy import diag
        from networkx.exception import NetworkXError
    except ImportError:
        raise ImportError("pagerank_scipy() requires SciPy: http://scipy.org/")
    if len(G) == 0:
        return {}
# choose ordering in matrix
    if personalization is None:  # use G.nodes() ordering
        nodelist = G.nodes()
    elif personalization is 'biased':
        nodelist = G.nodes()
    else:  # use personalization "vector" ordering
        nodelist = personalization.keys()
    M = nx.to_scipy_sparse_matrix(G,
                                  nodelist=nodelist,
                                  weight=weight,
                                  dtype='f')
    (n, m) = M.shape  # should be square
    S = scipy.array(M.sum(axis=1)).flatten()
    #    for i, j, v in zip( *scipy.sparse.find(M) ):
    #        M[i,j] = v / S[i]
    S[S > 0] = 1.0 / S[S > 0]
    #creates a sparse diagonal matrix with normalization values
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    M = Q * M
    x = scipy.ones((n)) / n  # initial guess
    dangle = scipy.array(scipy.where(M.sum(axis=1) == 0, 1.0 / n, 0)).flatten()
    # add "teleportation"/personalization
    if personalization is 'biased':
        v = scipy.array(baseline_score)
        v = v / v.sum()
        v = seed_weight * v / v.sum()
        #print v.shape

    elif personalization is not None:
        v = scipy.array(list(personalization.values()), dtype=float)
        v = v / v.sum()
    else:
        v = x
        #print v.shape

    i = 0
    while i <= max_iter:
        # power iteration: make up to max_iter iterations
        xlast = x
        x = alpha * (x * M + scipy.dot(dangle, xlast)) + (1 - alpha) * v
        x = x / x.sum()
        # check convergence, l1 norm
        err = scipy.absolute(x - xlast).sum()
        if err < n * tol:
            return dict(zip(nodelist, map(float, x)))
        i += 1
    raise NetworkXError('pagerank_scipy: power iteration failed to converge'
                        'in %d iterations.' % (i + 1))
def fruchterman_reingold_layout(G,
                                k=None,
                                pos=None,
                                fixed=None,
                                iterations=50,
                                threshold=1e-4,
                                weight='weight',
                                scale=1,
                                center=None,
                                dim=2,
                                random_state=None):
    """Position nodes using Fruchterman-Reingold force-directed algorithm.

    Parameters
    ----------
    G : NetworkX graph or list of nodes
        A position will be assigned to every node in G.

    k : float (default=None)
        Optimal distance between nodes.  If None the distance is set to
        1/sqrt(n) where n is the number of nodes.  Increase this value
        to move nodes farther apart.

    pos : dict or None  optional (default=None)
        Initial positions for nodes as a dictionary with node as keys
        and values as a coordinate list or tuple.  If None, then use
        random initial positions.

    fixed : list or None  optional (default=None)
        Nodes to keep fixed at initial position.

    iterations : int  optional (default=50)
        Maximum number of iterations taken

    threshold: float optional (default = 1e-4)
        Threshold for relative error in node position changes.
        The iteration stops if the error is below this threshold.

    weight : string or None   optional (default='weight')
        The edge attribute that holds the numerical value used for
        the edge weight.  If None, then all edge weights are 1.

    scale : number (default: 1)
        Scale factor for positions. Not used unless `fixed is None`.

    center : array-like or None
        Coordinate pair around which to center the layout.
        Not used unless `fixed is None`.

    dim : int
        Dimension of layout.

    random_state : int, RandomState instance or None  optional (default=None)
        Set the random state for deterministic node layouts.
        If int, `random_state` is the seed used by the random number generator,
        if numpy.random.RandomState instance, `random_state` is the random
        number generator,
        if None, the random number generator is the RandomState instance used
        by numpy.random.

    Returns
    -------
    pos : dict
        A dictionary of positions keyed by node

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> pos = nx.spring_layout(G)

    # The same using longer but equivalent function name
    >>> pos = nx.fruchterman_reingold_layout(G)
    """
    import numpy as np

    G, center = _process_params(G, center, dim)

    if fixed is not None:
        nfixed = dict(zip(G, range(len(G))))
        fixed = np.asarray([nfixed[v] for v in fixed])

    if pos is not None:
        # Determine size of existing domain to adjust initial positions
        dom_size = max(coord for pos_tup in pos.values() for coord in pos_tup)
        if dom_size == 0:
            dom_size = 1
        shape = (len(G), dim)
        pos_arr = random_state.rand(*shape) * dom_size + center

        for i, n in enumerate(G):
            if n in pos:
                pos_arr[i] = np.asarray(pos[n])
    else:
        pos_arr = None

    if len(G) == 0:
        return {}
    if len(G) == 1:
        return {nx.utils.arbitrary_element(G.nodes()): center}

    try:
        # Sparse matrix
        if len(G) < 500:  # sparse solver for large graphs
            raise ValueError
        A = nx.to_scipy_sparse_matrix(G, weight=weight, dtype='f')
        if k is None and fixed is not None:
            # We must adjust k by domain size for layouts not near 1x1
            nnodes, _ = A.shape
            k = dom_size / np.sqrt(nnodes)
        pos = _sparse_fruchterman_reingold(A, k, pos_arr, fixed, iterations,
                                           threshold, dim, random_state)
    except:
        A = nx.to_numpy_matrix(G, weight=weight)
        if k is None and fixed is not None:
            # We must adjust k by domain size for layouts not near 1x1
            nnodes, _ = A.shape
            k = dom_size / np.sqrt(nnodes)
        pos = _fruchterman_reingold(A, k, pos_arr, fixed, iterations,
                                    threshold, dim, random_state)
    if fixed is None:
        pos = rescale_layout(pos, scale=scale) + center
    pos = dict(zip(G, pos))
    return pos
Exemple #50
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        S = nx.to_scipy_sparse_matrix(graph)
        t1 = time()
        S = (S + S.T) / 2
        self._node_num = graph.number_of_nodes()

        # Generate encoder, decoder and autoencoder
        self._num_iter = self._n_iter
        # If cannot use previous step information, initialize new models
        self._encoder = get_encoder(self._node_num, self._d, self._K,
                                    self._n_units, self._nu1, self._nu2,
                                    self._actfn)
        self._decoder = get_decoder(self._node_num, self._d, self._K,
                                    self._n_units, self._nu1, self._nu2,
                                    self._actfn)
        self._autoencoder = get_autoencoder(self._encoder, self._decoder)

        # Initialize self._model
        # Input
        x_in = Input(shape=(2 * self._node_num, ), name='x_in')
        x1 = Lambda(lambda x: x[:, 0:self._node_num],
                    output_shape=(self._node_num, ))(x_in)
        x2 = Lambda(lambda x: x[:, self._node_num:2 * self._node_num],
                    output_shape=(self._node_num, ))(x_in)
        # Process inputs
        [x_hat1, y1] = self._autoencoder(x1)
        [x_hat2, y2] = self._autoencoder(x2)
        # Outputs
        x_diff1 = merge([x_hat1, x1],
                        mode=lambda ab: ab[0] - ab[1],
                        output_shape=lambda L: L[1])
        x_diff2 = merge([x_hat2, x2],
                        mode=lambda ab: ab[0] - ab[1],
                        output_shape=lambda L: L[1])
        y_diff = merge([y2, y1],
                       mode=lambda ab: ab[0] - ab[1],
                       output_shape=lambda L: L[1])

        # Objectives
        def weighted_mse_x(y_true, y_pred):
            ''' Hack: This fn doesn't accept additional arguments.
                      We use y_true to pass them.
                y_pred: Contains x_hat - x
                y_true: Contains [b, deg]
            '''
            return KBack.sum(KBack.square(
                y_pred * y_true[:, 0:self._node_num]),
                             axis=-1) / y_true[:, self._node_num]

        def weighted_mse_y(y_true, y_pred):
            ''' Hack: This fn doesn't accept additional arguments.
                      We use y_true to pass them.
            y_pred: Contains y2 - y1
            y_true: Contains s12
            '''
            min_batch_size = KBack.shape(y_true)[0]
            return KBack.reshape(KBack.sum(KBack.square(y_pred), axis=-1),
                                 [min_batch_size, 1]) * y_true

        # Model
        self._model = Model(input=x_in, output=[x_diff1, x_diff2, y_diff])
        sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True)
        # adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        self._model.compile(
            optimizer=sgd,
            loss=[weighted_mse_x, weighted_mse_x, weighted_mse_y],
            loss_weights=[1, 1, self._alpha])

        self._model.fit_generator(
            generator=batch_generator_sdne(S, self._beta, self._n_batch, True),
            nb_epoch=self._num_iter,
            samples_per_epoch=S.nonzero()[0].shape[0] // self._n_batch,
            verbose=1)
        # Get embedding for all points
        self._Y = model_batch_predictor(self._autoencoder, S, self._n_batch)
        t2 = time()
        # Save the autoencoder and its weights
        if (self._weightfile is not None):
            saveweights(self._encoder, self._weightfile[0])
            saveweights(self._decoder, self._weightfile[1])
        if (self._modelfile is not None):
            savemodel(self._encoder, self._modelfile[0])
            savemodel(self._decoder, self._modelfile[1])
        if (self._savefilesuffix is not None):
            saveweights(self._encoder,
                        'encoder_weights_' + self._savefilesuffix + '.hdf5')
            saveweights(self._decoder,
                        'decoder_weights_' + self._savefilesuffix + '.hdf5')
            savemodel(self._encoder,
                      'encoder_model_' + self._savefilesuffix + '.json')
            savemodel(self._decoder,
                      'decoder_model_' + self._savefilesuffix + '.json')
            # Save the embedding
            np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y)

        # BLOCCO DI ISTRUZIONI DA ESEGUIRE SE GLI ID DEL DATASET NON SONO COMPATTI
        listNodes = graph.nodes()
        listNodes = list(set(listNodes))  # Elimina i doppioni dalla lista
        listNodes.sort(
        )  # Ordina la lista che contiene tutti gli ID contenuti nel Grafo originale
        nA = np.asarray(listNodes, dtype=int)
        dE = self._d
        nR = (nA.max()) + 1
        XX = np.zeros((nR, dE))
        for i in range(0, nA.__len__()):
            XX[nA[i]] = cp.copy(self._Y[i])
        self._Y = np.zeros((nR, dE))
        self._Y = cp.copy(XX)

        return self._Y, (t2 - t1)
Exemple #51
0
def geodesic_matrix(x, tn_ids=None, directed=False, weight='weight'):
    """ Generates geodesic ("along-the-arbor") distance matrix for treenodes
    of given neuron.

    Parameters
    ----------
    x :         CatmaidNeuron | CatmaidNeuronList
                If list, must contain a SINGLE neuron.
    tn_ids :    list | numpy.ndarray, optional
                Treenode IDs. If provided, will compute distances only FROM
                this subset to all other nodes.
    directed :  bool, optional
                If True, pairs without a child->parent path will be returned
                with ``distance = "inf"``.
    weight :    'weight' | None, optional
                If ``weight`` distances are given as physical length.
                If ``None`` distances is number of nodes.

    Returns
    -------
    pd.SparseDataFrame
                Geodesic distance matrix. Distances in nanometres.

    See Also
    --------
    :func:`~pymaid.distal_to`
        Check if a node A is distal to node B.
    :func:`~pymaid.dist_between`
        Get point-to-point geodesic distances.
    """

    if isinstance(x, core.CatmaidNeuronList):
        if len(x) == 1:
            x = x[0]
        else:
            raise ValueError('Cannot process more than a single neuron.')
    elif isinstance(x, core.CatmaidNeuron):
        pass
    else:
        raise ValueError(
            'Unable to process data of type "{0}"'.format(type(x)))

    if x.igraph and config.use_igraph:
        nodeList = x.igraph.vs.get_attribute_values('node_id')

        # Matrix is ordered by vertex number
        m = _igraph_to_sparse(x.igraph, weight_attr=weight)
    else:
        nodeList = tuple(x.graph.nodes())

        m = nx.to_scipy_sparse_matrix(x.graph, nodeList,
                                      weight=weight)

    if not isinstance(tn_ids, type(None)):
        tn_ids = set(utils._make_iterable(tn_ids))
        tn_indices = tuple(i for i, node in enumerate(
            nodeList) if node in tn_ids)
        ix = [nodeList[i] for i in tn_indices]
    else:
        tn_indices = None
        ix = nodeList

    dmat = csgraph.dijkstra(m,
                            directed=directed, indices=tn_indices)

    return pd.SparseDataFrame(dmat, columns=nodeList, index=ix,
                              default_fill_value=float('inf'))
Exemple #52
0
    def test_format_keyword(self):
        WP4 = nx.Graph()
        WP4.add_edges_from(
            (n, n + 1, dict(weight=0.5, other=0.3)) for n in range(3))
        P4 = path_graph(4)
        A = nx.to_scipy_sparse_matrix(P4, format="csr")
        npt.assert_equal(A.todense(),
                         nx.to_scipy_sparse_matrix(WP4, weight=None).todense())

        A = nx.to_scipy_sparse_matrix(P4, format="csc")
        npt.assert_equal(A.todense(),
                         nx.to_scipy_sparse_matrix(WP4, weight=None).todense())

        A = nx.to_scipy_sparse_matrix(P4, format="coo")
        npt.assert_equal(A.todense(),
                         nx.to_scipy_sparse_matrix(WP4, weight=None).todense())

        A = nx.to_scipy_sparse_matrix(P4, format="bsr")
        npt.assert_equal(A.todense(),
                         nx.to_scipy_sparse_matrix(WP4, weight=None).todense())

        A = nx.to_scipy_sparse_matrix(P4, format="lil")
        npt.assert_equal(A.todense(),
                         nx.to_scipy_sparse_matrix(WP4, weight=None).todense())

        A = nx.to_scipy_sparse_matrix(P4, format="dia")
        npt.assert_equal(A.todense(),
                         nx.to_scipy_sparse_matrix(WP4, weight=None).todense())

        A = nx.to_scipy_sparse_matrix(P4, format="dok")
        npt.assert_equal(A.todense(),
                         nx.to_scipy_sparse_matrix(WP4, weight=None).todense())
Exemple #53
0
def fruchterman_reingold_layout(G,
                                dim=2,
                                pos=None,
                                fixed=None,
                                iterations=50,
                                weighted=True,
                                scale=1):
    """Position nodes using Fruchterman-Reingold force-directed algorithm. 

    Parameters
    ----------
    G : NetworkX graph 

    dim : int 
       Dimension of layout

    pos : dict
       Initial positions for nodes as a dictionary with node as keys
       and values as a list or tuple.  

    fixed : list
      Nodes to keep fixed at initial position.


    iterations : int
       Number of iterations of spring-force relaxation 

    weighted : boolean
        If True, use edge weights in layout 

    scale : float
        Scale factor for positions 

    Returns
    -------
    dict : 
       A dictionary of positions keyed by node

    Examples
    --------
    >>> G=nx.path_graph(4)
    >>> pos=nx.spring_layout(G)

    # The same using longer function name
    >>> pos=nx.fruchterman_reingold_layout(G)
    
    """
    try:
        import numpy as np
    except ImportError:
        raise ImportError(
            "fruchterman_reingold_layout() requires numpy: http://scipy.org/ ")
    if fixed is not None:
        nfixed = dict(list(zip(G, list(range(len(G))))))
        fixed = np.asarray([nfixed[v] for v in fixed])

    if pos is not None:
        pos_arr = np.asarray(np.random.random((len(G), dim)))
        for n, i in zip(G, list(range(len(G)))):
            if n in pos:
                pos_arr[i] = np.asarray(pos[n])
    else:
        pos_arr = None

    if len(G) == 0:
        return {}
    if len(G) == 1:
        return {G.nodes()[0]: (1, ) * dim}

    try:
        # Sparse matrix
        if len(G) < 500:  # sparse solver for large graphs
            raise ValueError
        A = nx.to_scipy_sparse_matrix(G)
        pos = _sparse_fruchterman_reingold(A,
                                           pos=pos_arr,
                                           fixed=fixed,
                                           dim=dim,
                                           iterations=iterations,
                                           weighted=weighted)
    except:
        A = nx.to_numpy_matrix(G)
        pos = _fruchterman_reingold(A,
                                    pos=pos_arr,
                                    fixed=fixed,
                                    dim=dim,
                                    iterations=iterations,
                                    weighted=weighted)
    if fixed is None:
        pos = _rescale_layout(pos, scale=scale)
    return dict(list(zip(G, pos)))
Exemple #54
0
print("Loaded labels (" + str(len(Config.labels)) + " classes): ", end='')
print(Config.labels)

# In[93]:

threshold = 0.75
adjmat = sim.reshape((-1, )).copy()
adjmat[adjmat > threshold] = 0
#adjmat[adjmat > 0] = 1
print("{} out of {} values set to zero".format(len(adjmat[adjmat == 0]),
                                               len(adjmat)))
adjmat = adjmat.reshape(sim.shape)

# In[94]:

G = make_graph(adjmat, labels=Config.labels)
nx.draw_spring(G, with_labels=True)

# In[95]:

matrix = nx.to_scipy_sparse_matrix(G)
result = mc.run_mcl(matrix, inflation=2)  # run MCL with default parameters
clusters = mc.get_clusters(result)  # get clusters
print("There are {} clusters.".format(len(clusters)))
mc.draw_graph(matrix, clusters, with_labels=True, edge_color="silver")

# In[77]:

ref = np.genfromtxt(labelfilename, delimiter=',', dtype=None)
print(ref[19])
Exemple #55
0
@author: Seokyong Hong
'''
import os
import time
import networkx
from community.SCAN import SCAN

input_path = '/Input/Com-Amazon.txt'

if __name__ == '__main__':
    start = time.time()

    digraph = networkx.read_edgelist(path=input_path,
                                     delimiter='\t',
                                     create_using=networkx.DiGraph())
    graph = networkx.to_scipy_sparse_matrix(digraph.to_undirected())

    t0 = time.time()
    labels = SCAN().detect(graph=graph, epsilon=0.7, mu=2)
    print 'Community Detection Time: ' + str(time.time() - t0)
    '''
    community_labels = set()
    community_count = 0
    hub_count = 0
    outlier_count = 0
    community = {}
    max_label = int(max(labels)) + 1
  
    for index in range(len(labels)):
        if labels[index] >= 0:
            community[list(digraph.nodes(data = False))[index]] = int(labels[index])
Exemple #56
0
    def learn_embeddings(self, graph=None, edge_f=None):
        # TensorFlow wizardry
        config = tf.ConfigProto()
        # Don't pre-allocate memory; allocate as-needed
        config.gpu_options.allow_growth = True
        # Only allow a total of half the GPU memory to be allocated
        config.gpu_options.per_process_gpu_memory_fraction = 0.1
        # Create a session with the above options specified.
        KBack.tensorflow_backend.set_session(tf.Session(config=config))

        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)

        S = nx.to_scipy_sparse_matrix(graph)
        self._node_num = graph.number_of_nodes()
        t1 = time()

        # Generate encoder, decoder and autoencoder
        self._num_iter = self._n_iter
        self._encoder = get_encoder(self._node_num, self._d,
                                    self._n_units,
                                    self._nu1, self._nu2,
                                    self._actfn)
        self._decoder = get_decoder(self._node_num, self._d,
                                    self._n_units,
                                    self._nu1, self._nu2,
                                    self._actfn)
        self._autoencoder = get_autoencoder(self._encoder, self._decoder)

        # Initialize self._model
        # Input
        x_in = Input(shape=(self._node_num,), name='x_in')
        # Process inputs
        [x_hat, y] = self._autoencoder(x_in)
        # Outputs
        x_diff = Subtract()([x_hat, x_in])

        # Objectives
        def weighted_mse_x(y_true, y_pred):
            """ Hack: This fn doesn't accept additional arguments.
                      We use y_true to pass them.
                y_pred: Contains x_hat - x
                y_true: Contains b
            """
            return KBack.sum(
                KBack.square(y_pred * y_true[:, 0:self._node_num]),
                axis=-1
            )

        # Model
        self._model = Model(input=x_in, output=x_diff)
        sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True)
        adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        self._model.compile(optimizer=sgd, loss=weighted_mse_x)

        history = self._model.fit_generator(
            generator=batch_generator_ae(S, self._beta, self._n_batch, True),
            nb_epoch=self._num_iter,
            samples_per_epoch=S.shape[0] // self._n_batch,
            verbose=1,
            # callbacks=[tensorboard]
            # callbacks=[callbacks.TerminateOnNaN()]
        )
        loss = history.history['loss']
        # Get embedding for all points
        if loss[0] == np.inf or np.isnan(loss[0]):
            print('Model diverged. Assigning random embeddings')
            self._Y = np.random.randn(self._node_num, self._d)
        else:
            try:
                self._Y, self._next_adj = model_batch_predictor_v2(self._autoencoder, S, self._n_batch)
            except:
                pdb.set_trace()
        t2 = time()
        # Save the autoencoder and its weights
        """
        if self._weightfile is not None:
            saveweights(self._encoder, self._weightfile[0])
            saveweights(self._decoder, self._weightfile[1])
        if self._modelfile is not None:
            savemodel(self._encoder, self._modelfile[0])
            savemodel(self._decoder, self._modelfile[1])
        if self._savefilesuffix is not None:
            saveweights(self._encoder,
                        'encoder_weights_' + self._savefilesuffix + '.hdf5')
            saveweights(self._decoder,
                        'decoder_weights_' + self._savefilesuffix + '.hdf5')
            savemodel(self._encoder,
                      'encoder_model_' + self._savefilesuffix + '.json')
            savemodel(self._decoder,
                      'decoder_model_' + self._savefilesuffix + '.json')
            # Save the embedding
            np.savetxt('embedding_' + self._savefilesuffix + '.txt',
                       self._Y)
        """
        return self._Y, (t2 - t1)
Exemple #57
0
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from networkx.drawing.nx_pydot import graphviz_layout
import matplotlib
import copy

# Matplotlib parameters.
matplotlib.use("Agg")

comm = MPI.COMM_WORLD
"""
Steady state of an L-QSW on a full binary tree of depth 5:
"""
Graph = nx.balanced_tree(2, 5)
G = nx.to_scipy_sparse_matrix(Graph)

H = qsw_mpi.operators.transition(1.0, G)
"""
Local-interction Lindblad operators are derived from the
cannonical markov chain transition matrix.
"""
M = qsw_mpi.operators.markov_chain(G)
L = qsw_mpi.operators.local_lindblads(M)

omega = 0.5
QSW = qsw_mpi.MPI.LQSW(omega, H, L, comm)
"""
The system begins in a maximally mixed state.
"""
QSW.initial_state('mixed')
Exemple #58
0
def gen_ba(args):
    g = nx.barabasi_albert_graph(args.ba_n, args.ba_m)
    csr = nx.to_scipy_sparse_matrix(g, weight=None, format='csr')
    graph_io.save_graph(args.out, csr)
import scipy.sparse as sp

g = nx.karate_club_graph().to_undirected().to_directed()
src = []
dst = []
for u, v in g.edges():
    src.append(u)
    dst.append(v)

with open('edges.txt', 'w') as f:
    for u, v in zip(src, dst):
        f.write('{} {}\n'.format(u, v))

torch.save(torch.tensor(src), 'src.pt')
torch.save(torch.tensor(dst), 'dst.pt')

spmat = nx.to_scipy_sparse_matrix(g)
print(spmat)
sp.save_npz('scipy_adj.npz', spmat)

from networkx.readwrite import json_graph
import json

with open('adj.json', 'w') as f:
    json.dump(json_graph.adjacency_data(g), f)

node_feat = torch.randn((34, 5)) / 10.
edge_feat = torch.ones((156, ))
torch.save(node_feat, 'node_feat.pt')
torch.save(edge_feat, 'edge_feat.pt')
Exemple #60
0
def fruchterman_reingold_layout(
    G,
    k=None,
    pos=None,
    fixed=None,
    iterations=50,
    threshold=1e-4,
    weight="weight",
    scale=1,
    center=None,
    dim=2,
    seed=None,
):
    """Position nodes using Fruchterman-Reingold force-directed algorithm.

    The algorithm simulates a force-directed representation of the network
    treating edges as springs holding nodes close, while treating nodes
    as repelling objects, sometimes called an anti-gravity force.
    Simulation continues until the positions are close to an equilibrium.

    There are some hard-coded values: minimal distance between
    nodes (0.01) and "temperature" of 0.1 to ensure nodes don't fly away.
    During the simulation, `k` helps determine the distance between nodes,
    though `scale` and `center` determine the size and place after
    rescaling occurs at the end of the simulation.

    Fixing some nodes doesn't allow them to move in the simulation.
    It also turns off the rescaling feature at the simulation's end.
    In addition, setting `scale` to `None` turns off rescaling.

    Parameters
    ----------
    G : NetworkX graph or list of nodes
        A position will be assigned to every node in G.

    k : float (default=None)
        Optimal distance between nodes.  If None the distance is set to
        1/sqrt(n) where n is the number of nodes.  Increase this value
        to move nodes farther apart.

    pos : dict or None  optional (default=None)
        Initial positions for nodes as a dictionary with node as keys
        and values as a coordinate list or tuple.  If None, then use
        random initial positions.

    fixed : list or None  optional (default=None)
        Nodes to keep fixed at initial position.
        ValueError raised if `fixed` specified and `pos` not.

    iterations : int  optional (default=50)
        Maximum number of iterations taken

    threshold: float optional (default = 1e-4)
        Threshold for relative error in node position changes.
        The iteration stops if the error is below this threshold.

    weight : string or None   optional (default='weight')
        The edge attribute that holds the numerical value used for
        the edge weight.  If None, then all edge weights are 1.

    scale : number or None (default: 1)
        Scale factor for positions. Not used unless `fixed is None`.
        If scale is None, no rescaling is performed.

    center : array-like or None
        Coordinate pair around which to center the layout.
        Not used unless `fixed is None`.

    dim : int
        Dimension of layout.

    seed : int, RandomState instance or None  optional (default=None)
        Set the random state for deterministic node layouts.
        If int, `seed` is the seed used by the random number generator,
        if numpy.random.RandomState instance, `seed` is the random
        number generator,
        if None, the random number generator is the RandomState instance used
        by numpy.random.

    Returns
    -------
    pos : dict
        A dictionary of positions keyed by node

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> pos = nx.spring_layout(G)

    # The same using longer but equivalent function name
    >>> pos = nx.fruchterman_reingold_layout(G)
    """
    import numpy as np

    G, center = _process_params(G, center, dim)

    if fixed is not None:
        if pos is None:
            raise ValueError("nodes are fixed without positions given")
        for node in fixed:
            if node not in pos:
                raise ValueError("nodes are fixed without positions given")
        nfixed = {node: i for i, node in enumerate(G)}
        fixed = np.asarray([nfixed[node] for node in fixed])

    if pos is not None:
        # Determine size of existing domain to adjust initial positions
        dom_size = max(coord for pos_tup in pos.values() for coord in pos_tup)
        if dom_size == 0:
            dom_size = 1
        pos_arr = seed.rand(len(G), dim) * dom_size + center

        for i, n in enumerate(G):
            if n in pos:
                pos_arr[i] = np.asarray(pos[n])
    else:
        pos_arr = None
        dom_size = 1

    if len(G) == 0:
        return {}
    if len(G) == 1:
        return {nx.utils.arbitrary_element(G.nodes()): center}

    try:
        # Sparse matrix
        if len(G) < 500:  # sparse solver for large graphs
            raise ValueError
        A = nx.to_scipy_sparse_matrix(G, weight=weight, dtype="f")
        if k is None and fixed is not None:
            # We must adjust k by domain size for layouts not near 1x1
            nnodes, _ = A.shape
            k = dom_size / np.sqrt(nnodes)
        pos = _sparse_fruchterman_reingold(A, k, pos_arr, fixed, iterations,
                                           threshold, dim, seed)
    except ValueError:
        A = nx.to_numpy_array(G, weight=weight)
        if k is None and fixed is not None:
            # We must adjust k by domain size for layouts not near 1x1
            nnodes, _ = A.shape
            k = dom_size / np.sqrt(nnodes)
        pos = _fruchterman_reingold(A, k, pos_arr, fixed, iterations,
                                    threshold, dim, seed)
    if fixed is None and scale is not None:
        pos = rescale_layout(pos, scale=scale) + center
    pos = dict(zip(G, pos))
    return pos