Beispiel #1
0
    def test_google_matrix(self):
        G = self.G
        M = networkx.google_matrix(G, alpha=0.9)
        e, ev = numpy.linalg.eig(M.T)
        p = numpy.array(ev[:, 0] / ev[:, 0].sum())[:, 0]
        for (a, b) in zip(p, self.G.pagerank.values()):
            assert_almost_equal(a, b)

        personalize = dict((n, random.random()) for n in G)
        M = networkx.google_matrix(G, alpha=0.9, personalization=personalize)
        personalize.pop(1)
        assert_raises(networkx.NetworkXError, networkx.google_matrix, G,
                      personalization=personalize)
Beispiel #2
0
 def test_google_matrix(self):
     G = self.G
     M = networkx.google_matrix(G, alpha=0.9, nodelist=sorted(G))
     e, ev = numpy.linalg.eig(M.T)
     p = numpy.array(ev[:, 0] / ev[:, 0].sum())[:, 0]
     for (a, b) in zip(p, self.G.pagerank.values()):
         assert_almost_equal(a, b)
 def test_google_matrix(self):
     G = self.G
     M = nx.google_matrix(G, alpha=0.9, nodelist=sorted(G))
     e, ev = np.linalg.eig(M.T)
     p = np.array(ev[:, 0] / ev[:, 0].sum())[:, 0]
     for (a, b) in zip(p, self.G.pagerank.values()):
         assert a == pytest.approx(b, abs=1e-7)
Beispiel #4
0
def LRW_Index (matrix, graph, n):
    P = nx.google_matrix(graph)
    trans = np.transpose(P)
    pi = []
    vector = np.zeros(nodes_num)
    vector[0] = 1
    new_v = copy.copy(vector)
    pi.append(new_v)

    for i in range(1, nodes_num):
        vector[i-1] = 0
        vector[i] = 1
        new_v = copy.copy(vector)
        pi.append(new_v)

    for i in range(0, n):
        for x in range(0, nodes_num):
            pi[x] = pi[x].dot(trans)

    s = (nodes_num, nodes_num)
    S_LRW = np.zeros(s)
    degree_matrix = nx.laplacian_matrix(graph) + matrix
    for x in range(0, nodes_num):
        q_x = degree_matrix[x, x] / len(graph.edges)
        for y in range(0, nodes_num):
            q_y = degree_matrix[y, y] / len(graph.edges)
            S_LRW[x, y] = q_x  * pi[x].tolist()[0][y] +q_y  * pi[y].tolist()[0][x]
    return S_LRW
Beispiel #5
0
def RWR_Index (graph, nodes_num, c):
    # google matrix == transition matrix z papera
    P = nx.google_matrix(graph)
    idn = np.identity(nodes_num)
    trans = np.transpose(P)
    core = (1 - c) * LAD.inv((idn - c * trans))
    vector = np.zeros(nodes_num)
    qs = []
    vector[0] = 1
    new_v = copy.copy(vector)
    # vector <==> "baza" wektora
    qs.append(core.dot(new_v))

    for i in range(1, nodes_num):
        vector[i - 1] = 0
        vector[i] = 1
        new_v = copy.copy(vector)
        qs.append(core.dot(new_v))

    s = (nodes_num, nodes_num)
    S_RWR = np.zeros(s)
    for x in range(0, nodes_num):
        for y in range(0, nodes_num):
            S_RWR[x, y] = qs[x].tolist()[0][y] + qs[y].tolist()[0][x]
    return S_RWR
Beispiel #6
0
def page_rank(data):
    import pandas as pd
    import numpy as np
    import networkx as nx
    df = pd.DataFrame(data)
    graph = nx.DiGraph()
    h = nx.path_graph(pd.Series.count(np.unique(df[0])))
    graph.add_nodes_from(h)
    graph.add_node(h)
    for i in range(len(df[0])):
        graph.add_edge(df[0][i], df[1][i])
        graph[df[0][i]][df[1][i]]['weight'] = df[3][i]
    transition_matrix = nx.google_matrix(graph)
    n = min(transition_matrix.shape)
    p0 = np.repeat(1 / n, n)
    pi = np.matmul(p0, transition_matrix)
    eps = 0.0000025
    i = 1
    while np.sum(np.abs(pi - p0)) >= eps:
        p0 = pi
        pi = np.matmul(pi, transition_matrix)
        print(i)
        print(pi)
        i = i + 1
    print('The final rank is :', pi)
Beispiel #7
0
 def test_google_matrix(self):
     G = self.G
     M = networkx.google_matrix(G, alpha=0.9, nodelist=sorted(G))
     e, ev = numpy.linalg.eig(M.T)
     p = numpy.array(ev[:, 0] / ev[:, 0].sum())[:, 0]
     for (a, b) in zip(p, self.G.pagerank.values()):
         assert_almost_equal(a, b)
Beispiel #8
0
def pagerank_numpy(G,
                   alpha=0.85,
                   personalization=None,
                   weight='weight',
                   dangling=None):
    """Return the PageRank of the nodes in the graph.
    """

    if len(G) == 0:
        return {}

    M = nx.google_matrix(G,
                         alpha,
                         personalization=personalization,
                         weight=weight,
                         dangling=dangling)

    # use numpy LAPACK solver
    eigenvalues, eigenvectors = np.linalg.eig(M.T)
    ind = eigenvalues.argsort()

    # eigenvector of largest eigenvalue at ind[-1], normalized
    largest = np.array(eigenvectors[:, ind[-1]]).flatten().real
    norm = float(largest.sum())

    return dict(zip(G, map(float, largest / norm)))
Beispiel #9
0
 def test_empty(self):
     try:
         import numpy
     except ImportError:
         raise SkipTest('numpy not available.')
     G = networkx.Graph()
     assert_equal(networkx.pagerank(G), {})
     assert_equal(networkx.pagerank_numpy(G), {})
     assert_equal(networkx.google_matrix(G).shape, (0, 0))
 def test_empty(self):
     try:
         import numpy
     except ImportError:
         raise SkipTest("numpy not available.")
     G = networkx.Graph()
     assert_equal(networkx.pagerank(G), {})
     assert_equal(networkx.pagerank_numpy(G), {})
     assert_equal(networkx.google_matrix(G).shape, (0, 0))
Beispiel #11
0
 def test_dangling_matrix(self):
     """
     Tests that the google_matrix doesn't change except for the dangling
     nodes.
     """
     G = self.G
     dangling = self.dangling_edges
     dangling_sum = float(sum(dangling.values()))
     M1 = nx.google_matrix(G, personalization=dangling)
     M2 = nx.google_matrix(G, personalization=dangling, dangling=dangling)
     for i in range(len(G)):
         for j in range(len(G)):
             if i == self.dangling_node_index and (j + 1) in dangling:
                 assert almost_equal(M2[i, j],
                                     dangling[j + 1] / dangling_sum,
                                     places=4)
             else:
                 assert almost_equal(M2[i, j], M1[i, j], places=4)
Beispiel #12
0
 def test_dangling_matrix(self):
     """
     Tests that the google_matrix doesn't change except for the dangling
     nodes.
     """
     G = self.G
     dangling = self.dangling_edges
     dangling_sum = float(sum(dangling.values()))
     M1 = networkx.google_matrix(G, personalization=dangling)
     M2 = networkx.google_matrix(G, personalization=dangling,
                                 dangling=dangling)
     for i in range(len(G)):
         for j in range(len(G)):
             if i == self.dangling_node_index and (j + 1) in dangling:
                 assert_almost_equal(M2[i, j],
                                     dangling[j + 1] / dangling_sum,
                                     places=4)
             else:
                 assert_almost_equal(M2[i, j], M1[i, j], places=4)
Beispiel #13
0
 def test_google_matrix(self):
     try:
         import numpy.linalg
     except ImportError:
         raise SkipTest('numpy not available.')
     G=self.G
     M=networkx.google_matrix(G,alpha=0.9)
     e,ev=numpy.linalg.eig(M.T)
     p=numpy.array(ev[:,0]/ev[:,0].sum())[:,0]
     for (a,b) in zip(p,self.G.pagerank.values()):
         assert_almost_equal(a,b)
Beispiel #14
0
def calc(g,negative,alpha,M,beta=1):
	epsilon = 0.000000001
	print "start calc pagetrust, epsilon =",epsilon
	N = len(g)
	x = np.ones(N)
	x = x * 1/N
	visualize("x",x)
	P,tildeP = initialize_P(g,negative)
	t = 0
	G = nx.google_matrix(g)
	pagerank = nx.pagerank(g,alpha=alpha)
	visualize("Google matrix",G)
	t = 0
	while True:
		t += 1
		#build the transition matrix T
		print "***"
		print "*** iteration start, time = ",t
		print "***"
		T = build_transition_matrix(alpha,x,g,G,M)
		tildeP = np.dot(T,P)
		visualize("P",P)
		visualize("tildeP",tildeP)
		x2 = np.zeros(N)
		for i in range(N):
			p = 0
			for k in range(N):
				p += G[k,i]*x[k]
			x2[i] = (1 - tildeP[i][i])**beta*p
			for j in range(N):
				if (i,j) in negative:
					P[i,j] = 1
				elif i == j:
					P[i,j] = 0
				else:
					P[i,j] = tildeP[i,j]
		#normalization
		tmpl = 0
		for l in range(N):
			tmpl += x2[l]
		for o in range(N):
			x2[o] = x2[o] / tmpl
		visualize("x2",x2)
		e = is_converged(x,x2)
		print "e:",e
		if e < epsilon:
			#visualize('pagerank',pagerank)
			break
		else:
			#x <- x(t+1)
			for p in range(N):
				x[p] = x2[p]
	print x2
	return x2
Beispiel #15
0
 def test_google_matrix(self):
     try:
         import numpy.linalg
     except ImportError:
         raise SkipTest('numpy not available.')
     G = self.G
     M = networkx.google_matrix(G, alpha=0.9)
     e, ev = numpy.linalg.eig(M.T)
     p = numpy.array(ev[:, 0] / ev[:, 0].sum())[:, 0]
     for (a, b) in zip(p, self.G.pagerank.values()):
         assert_almost_equal(a, b)
Beispiel #16
0
def calc(g, negative, alpha, M, beta=1):
    epsilon = 0.000000001
    print "start calc pagetrust, epsilon =", epsilon
    N = len(g)
    x = np.ones(N)
    x = x * 1 / N
    visualize("x", x)
    P, tildeP = initialize_P(g, negative)
    t = 0
    G = nx.google_matrix(g)
    pagerank = nx.pagerank(g, alpha=alpha)
    visualize("Google matrix", G)
    t = 0
    while True:
        t += 1
        #build the transition matrix T
        print "***"
        print "*** iteration start, time = ", t
        print "***"
        T = build_transition_matrix(alpha, x, g, G, M)
        tildeP = np.dot(T, P)
        visualize("P", P)
        visualize("tildeP", tildeP)
        x2 = np.zeros(N)
        for i in range(N):
            p = 0
            for k in range(N):
                p += G[k, i] * x[k]
            x2[i] = (1 - tildeP[i][i])**beta * p
            for j in range(N):
                if (i, j) in negative:
                    P[i, j] = 1
                elif i == j:
                    P[i, j] = 0
                else:
                    P[i, j] = tildeP[i, j]
        #normalization
        tmpl = 0
        for l in range(N):
            tmpl += x2[l]
        for o in range(N):
            x2[o] = x2[o] / tmpl
        visualize("x2", x2)
        e = is_converged(x, x2)
        print "e:", e
        if e < epsilon:
            #visualize('pagerank',pagerank)
            break
        else:
            #x <- x(t+1)
            for p in range(N):
                x[p] = x2[p]
    print x2
    return x2
Beispiel #17
0
 def test_google_matrix(self):
     G=self.G
     try:
         import numpy.linalg
         M=networkx.google_matrix(G,alpha=0.9)
         e,ev=numpy.linalg.eig(M.T)
         p=numpy.array(ev[:,0]/ev[:,0].sum())[:,0]
         for (a,b) in zip(p,self.G.pagerank):
             assert_almost_equal(a,b)
     except ImportError:
         print "Skipping google_matrix test"
def graph_structure(graph, content_graph, path):
    """ Builds interaction graph with structure based weighs
        similarity metric, and save it at the given path """
    A = scipy.sparse.csr_matrix(
        nx.google_matrix(graph, alpha=1, weight='weight'))
    W = nx.to_scipy_sparse_matrix(content_graph)
    W2 = (W.dot(A) + A.transpose().dot(W)) * 0.5
    # rescaling in range 0-1
    max_val = W2.max()
    W2 = W2.multiply(1.0 / max_val)

    save_matrix_to_edgelist(W2, path)
Beispiel #19
0
def exhaustive_set(G, query_nodes, target_nodes, n_edges, start_dist):
    """Exaustively searches all the combinations of k links between 
    a set of query nodes Q and a set of absorbing
    target nodes C such that Q \cap C = \emptyset. 
    Parameters
    ----------
    G : Networkx graph
        The graph from which the team will be selected.
    query : list 
        The set of nodes from which random walker starts.
    target : list
        The set of nodes from where the random walker ends.
    n_edges : integer
        the number of links to be added
    start_dist: list
        The starting distribution over the query set
    Returns
    -------
    links : list
        The set of links that reduce the absorbing RW centrality
    ac_scores: list
        The set of scores of adding the links
    """
    query_set_size = len(query_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))
    P = csc_matrix(nx.google_matrix(G, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums = start_dist.dot(F.sum())[0,0]
    candidates = list(product(query_nodes, target_nodes))
    eligible = [candidates[i] for i in range(len(candidates)) 
                if G.has_edge(candidates[i][0], candidates[i][1]) == False]
    ac_scores = [row_sums]
    exhaustive_links = []
    for L in range(1, n_edges+1):
        print '\t Number of edges {}'.format(L)
        round_min = -1
        best_combination = [] 
        for subset in combinations(eligible, L):
            H = G.copy()
            F_modified = F.copy()
            for links_to_add in subset:
                F_updated = update_fundamental_mat(F_modified, H, map_query_to_org, links_to_add[0])
                H.add_edge(links_to_add[0], links_to_add[1])
                F_modified = F_updated            
            abs_cen = start_dist.dot( F_updated.sum(axis = 1))[0,0]
            if abs_cen < round_min or round_min == -1:
                best_combination = subset
                round_min = abs_cen
        exhaustive_links.append(best_combination)
        ac_scores.append(round_min)              
    return exhaustive_links, ac_scores
Beispiel #20
0
def page_rank(graph, jump_probability=.15, weighted=False):
    if weighted:
        wt = 'weight'
    else:
        wt = None
    alpha = 1 - jump_probability
    M = _nx.google_matrix(graph, alpha, weight=wt)
    _, v = _eigs(M.T, k=1)

    r = v.flatten().real
    r /= r.sum()

    return _pd.Series(r, index=graph.nodes).sort_index()
Beispiel #21
0
def linkrank(G):
    c = arange(len(G.nodes()))
    goo = nx.google_matrix(G)
    goo = array(goo)
    m = nx.pagerank_numpy(G)
    m = m.items()
    m = [i[1] for i in m]
    m = array([m])
    m = m.T
    L = tile(m,[1,len(goo)])*goo
    Q = 0
    mm = tile(m,[1,len(goo)])*tile(m.T,[len(goo),1])
    Qlr = L - mm
    return greedyMax(Qlr,c,0)
Beispiel #22
0
def _google_matrix(graph, anchors, normalize):
    aut = {}
    node_to_num = dict((node, i) for i, node in enumerate(graph.nodes()))
    num_to_node = dict(enumerate(graph.nodes()))
    aut_mat = nx.google_matrix(graph)
    for num, node in enumerate(graph.nodes()):
        d = []
        for anchor in anchors:
            a_num = node_to_num[anchor]
            d.append(aut_mat[num, a_num])
        if normalize:
            d = normalized(d)
        aut[node] = np.array(d)
    return aut
Beispiel #23
0
def linkrank(G):
    c = arange(len(G.nodes()))
    goo = nx.google_matrix(G)
    goo = array(goo)
    m = nx.pagerank_numpy(G)
    m = m.items()
    m = [i[1] for i in m]
    m = array([m])
    m = m.T
    L = tile(m, [1, len(goo)]) * goo
    Q = 0
    mm = tile(m, [1, len(goo)]) * tile(m.T, [len(goo), 1])
    Qlr = L - mm
    return greedyMax(Qlr, c, 0)
Beispiel #24
0
def pagerank(g):
    import numpy as np
    import networkx as nx
    trm = nx.google_matrix(g)
    n = min(trm.shape)
    p0 = np.repeat(1 / n, n)
    pi = np.matmul(p0, trm)
    i = 1
    eps = 0.00015
    while np.sum(np.abs(pi - p0)) >= eps:
        p0 = pi
        pi = np.matmul(pi, trm)
        i = i + 1
        if i == 10000:
            break
    return pi
Beispiel #25
0
def pagerank_numpy(G,alpha=0.85,max_iter=100,tol=1.0e-6,nodelist=None):
    """Return a NumPy array of the PageRank of G.
    """
    import numpy
    import networkx
    M=networkx.google_matrix(G,alpha,nodelist)   
    (n,m)=M.shape # should be square
    x=numpy.ones((n))/n
    for i in range(max_iter):
        xlast=x
        x=numpy.dot(x,M)
        # check convergence, l1 norm            
        err=numpy.abs(x-xlast).sum()
        if err < n*tol:
            return numpy.asarray(x).flatten()

    raise NetworkXError("pagerank: power iteration failed to converge in %d iterations."%(i+1))
Beispiel #26
0
def random_links(G, query_nodes, target_nodes, n_edges, start_dist):
    """Selects a random set of links between a set of query nodes Q and a set of absorbing
    target nodes C such that Q \cap C = \emptyset. 
    Parameters
    ----------
    G : Networkx graph
        The graph from which the team will be selected.
    query : list 
        The set of nodes from which random walker starts.
    target : list
        The set of nodes from where the random walker ends.
    n_edges : integer
        the number of links to be added
    start_dist: list
        The starting distribution over the query set
    Returns
    -------
    links : list
        The set of links that reduce the absorbing RW centrality
    ac_scores: list
        The set of scores of adding the links
    """
    query_set_size = len(query_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))
    P = csc_matrix(nx.google_matrix(G, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums = start_dist.dot(F.sum())[0,0]
    candidates = list(product(query_nodes, target_nodes))
    eligible = [candidates[i] for i in range(len(candidates)) 
                if G.has_edge(candidates[i][0], candidates[i][1]) == False]
    links_to_add = sample(eligible, n_edges)
    
    ac_scores = []
    ac_scores.append(row_sums)
    i = 0
    while i < n_edges:
        F_updated = update_fundamental_mat(F, G, map_query_to_org, links_to_add[i][0])
        G.add_edge(links_to_add[i][0], links_to_add[i][1])
        abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0]
        F = F_updated            
        ac_scores.append(abs_cen)
        i += 1
    return links_to_add, ac_scores
Beispiel #27
0
def pagerank_numpy(G, alpha=0.85, max_iter=100, tol=1.0e-6, nodelist=None):
    """Return a NumPy array of the PageRank of G.
    """
    import numpy
    import networkx
    M = networkx.google_matrix(G, alpha, nodelist)
    (n, m) = M.shape  # should be square
    x = numpy.ones((n)) / n
    for i in range(max_iter):
        xlast = x
        x = numpy.dot(x, M)
        # check convergence, l1 norm
        err = numpy.abs(x - xlast).sum()
        if err < n * tol:
            return numpy.asarray(x).flatten()

    raise NetworkXError(
        "pagerank: power iteration failed to converge in %d iterations." %
        (i + 1))
Beispiel #28
0
def cluster(seed_nodes, graph):

    # Compute adjacency matrix
    A = nx.adjacency_matrix(graph, weight='weight').todense()

    # Compute the initial transition matrix
    M = A / A.sum(axis=1)
    M[np.isnan(M)] = 0

    # Compute the initial google matrix
    P = nx.google_matrix(graph, weight='weight')
    PF = np.copy(P)
    # Compute random walk for t steps
    t = 3
    for i in range(2, t + 1):
        PF += np.linalg.matrix_power(P, i)

    P_degree = np.diag(PF)
    P_weight = cosine_similarity(PF)

    coms = set()
    membership = {}

    # Sort the nodes array in decreasing value of weights
    sorted_P_deg = np.sort(P_degree)[::-1]
    sorted_P_deg_indices = np.argsort(P_degree)
    nodes_list = list(graph.nodes)
    sorted_nodes = [nodes_list[i] for i in sorted_P_deg_indices]

    # Take the P-degree of N/4th node as threshold
    Pt = sorted_P_deg[graph.number_of_nodes() // 4]

    # Loop over the nodes and check if P-degree of node > Pt
    # If P-degree of a node > Pt and P-degree is not in a sub region,
    # keep the node as a seed node and map the nodes connected to as a
    # community
    # If P-degree of a node < Pt and P-degree is in a sub-region, skip

    for i, node in enumerate(sorted_nodes):
        if sorted_P_deg[i] > Pt and node not in coms:
            coms |= {node, *graph.neighbors(node)}
            membership[node] = [*graph.neighbors(node)]
Beispiel #29
0
def M(path, graph, a, personal):
    
    if path is None and graph is None:
        return
    
    g = nx.read_gpickle(path) if path is not None else graph
    
    P = nx.google_matrix(g, alpha=1, dangling=personal) # Transition matrix (per row). Returns NumPy array that is different from
                                                        # ndarray!
    
    P_transp = np.transpose(P)
    
    I = np.identity(len(g.nodes()))

    m = np.subtract(I,np.dot(a,P_transp))

    m_inv = linalg.inv(m)
        
    p_array = np.array(P_transp)

    return (m_inv,p_array)
Beispiel #30
0
def sim(n=4, m=15):
    I_graph = nx.DiGraph()
    nodes = ["FB", "Amazon", "HCP", "LinkedIn"]
    # nodes = range(n)
    edges = np.random.choice(nodes,2*m).reshape(m,2)
    # I_graph.add_edges_from(edges)
    I_graph.add_edge("FB", "HCP")
    I_graph.add_edge("Amazon", "HCP")
    I_graph.add_edge("LinkedIn", "HCP")

    # visualize
    graphVizWrapper.graph(I_graph.edges(), 'di')

    # google transition matrix
    G = nx.google_matrix(I_graph, alpha=1.0)
    # make indexing consistent
    G = np.array(G)
    surfer = Markov_process(I_graph.nodes(), G)
    print(surfer.sample_game(stop=10))
    print(G)
    print(I_graph.node)
    surfer.sim_distribution()
Beispiel #31
0
    walk[0] = i
    elements = np.arange(a.shape[0]) # for our graph [0,1,2,3]
    c_index = i # current index for this iteration
    for k in range(iters):
        count = 0 # count of transitions
        probs = a[c_index].reshape((-1,))  # probability of transitions
        # sample from probs
        sample = np.random.choice(elements,p=probs) # sample a target using probs
        index = sample # go to target
        walk[k+1] = index
        c_index = index
    return walk

# print(pd.DataFrame(nx.adj_matrix(G).todense()))
walk_length = 1000000
markov_matrix = np.array(nx.google_matrix(G, alpha=1))
nodes = G.nodes()
vocab = {f"node_{node}":node for node in nodes}
n2voc = {node:name for name, node in vocab.items()}
starting_point = np.random.choice(nodes)
walk = random_walk(markov_matrix, starting_point, walk_length)
walk


# %%
sliding_windows = np.vstack((walk,np.roll(walk, -1), np.roll(walk, -2), np.roll(walk, -3), np.roll(walk, -4))).astype(np.int)
sliding_windows
# %%
cooccurence_matrix = np.zeros_like(markov_matrix)
center_node_pos = int(sliding_windows.shape[0]/2)
for position in range(walk_length):
	def get_d_erH(self, alpha = 1, gamma = 1, N = 100):
		p_mtx = nx.google_matrix(self.graph, alpha = alpha)
		r_mtx = -1 * np.eye(self.graph.order()) + p_mtx
		d_erH = (np.eye(self.graph.order()) + (float(gamma) / N ) * r_mtx) ** N
		return(d_erH)
def get_trans_matrix(graph):
    P = nx.google_matrix(graph, alpha=1)
    return P.T
Beispiel #34
0
 def test_empty(self):
     G = networkx.Graph()
     assert_equal(networkx.pagerank(G), {})
     assert_equal(networkx.pagerank_numpy(G), {})
     assert_equal(networkx.google_matrix(G).shape, (0, 0))
Beispiel #35
0
 def test_empty(self):
     G = networkx.Graph()
     assert networkx.pagerank(G) == {}
     assert networkx.pagerank_numpy(G) == {}
     assert networkx.google_matrix(G).shape == (0, 0)
Beispiel #36
0
def greedy_navigation(G, query_nodes, target_nodes, n_edges, start_dist):
    """Selects a set of links with a greedy descent algorithm that reduce the 
    absorbing RW centrality between a set of query nodes Q and a set of absorbing
    target nodes C such that Q \cap C = \emptyset. The query and target set 
    must be a 'viable' partition of the graph.
    Parameters
    ----------
    G : Networkx graph
        The graph from which the team will be selected.
    query : list 
        The set of nodes from which random walker starts.
    target : list
        The set of nodes from where the random walker ends.
    n_edges : integer
        the number of links to be added
    start_dist: list
        The starting distribution over the query set
    P : Scipy matrix
        The transition matrix of the graph G
    F : Scipy matrix
        The fundamental matrix for the graph G with the given set of absorbing
        random walk nodes
    Returns
    -------
    links : list
        The set of links that reduce the absorbing RW centrality
    """
    H = G.copy()
    prng = RandomState()
    query_set_size = len(query_nodes)
    target_set_size = len(target_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))

    P = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums = start_dist.dot(F.sum(axis=1))[0,0]
    best_F = zeros(F.shape)
    optimal_set = []
    ac_scores = []
    ac_scores.append(row_sums)
    
    while n_edges > 0:
        round_min = -1
        best_node = -1
        
        for i in query_nodes:
            abs_neighbours = [l for l in H.neighbors(i) if l in target_nodes]
            if len(abs_neighbours) == target_set_size:
                continue
            
            F_updated = update_fundamental_mat(F, H, map_query_to_org, i)
            abs_cen = start_dist.dot( F_updated.sum(axis = 1))[0,0]
            if abs_cen < round_min or round_min == -1:
                best_node = i
                round_min = abs_cen
                best_F = F_updated
        F = best_F            
        ac_scores.append(round_min)
        optimal_candidate_edges = [(best_node, k, round_min) 
                                   for k in target_nodes 
                                   if H.has_edge(best_node, k) == False ]
        
        try:
            edge_idx = prng.randint(0, len(optimal_candidate_edges))
        except ValueError:
            print(H.neighbors(best_node))
            print([l for l in H.neighbors(best_node) if l in target_nodes])
            print(best_node)
            print(optimal_candidate_edges)
            print(target_nodes)
        H.add_edge(optimal_candidate_edges[edge_idx][0], 
                   optimal_candidate_edges[edge_idx][1])
        optimal_set.append(optimal_candidate_edges[edge_idx])
        n_edges -= 1

    return optimal_set, ac_scores
Beispiel #37
0
def compute_security(star_dict, edge_dict, num_seeds, num_iterations):
    
    #build up a nx graph
    galaxy = networkx.Graph()
    for v, vertex in star_dict.iteritems():
        galaxy.add_node(v)
    
    for v, neighbors in edge_dict.iteritems():
        for n in neighbors:
            galaxy.add_edge(v,n)
            
    #use the centrality measures already computed to find seeds
    
    #find the top 25% vertices of each centrality measure
    betweenness_limit = int(len(star_dict)* 0.4)
    sorted_betweenness = sorted((v['betweenness'], k) for k, v in star_dict.iteritems())
    top_betweenness = {k for (value, k) in sorted_betweenness[-betweenness_limit:]}
    
    closeness_limit = int(len(star_dict)* 0.15)
    sorted_closeness = sorted((v['closeness'], k) for k, v in star_dict.iteritems())
    top_closeness = {k for (value, k) in sorted_closeness[-closeness_limit:]}
    
    pagerank_limit = int(len(star_dict)* 0.4)
    sorted_pagerank = sorted((v['pagerank'], k) for k, v in star_dict.iteritems())
    top_pagerank = {k for (value, k) in sorted_pagerank[-pagerank_limit:]}
    
    #take the intersection of all the top measures. this will be our pool to choose seeds from
    seed_pool = top_betweenness & top_closeness & top_pagerank
    print len(seed_pool)
    
    seeds = set()
    #loop until we have num_seeds or the seed pool is exhausted
    while(len(seed_pool) > 0 and len(seeds) < num_seeds):
        
        #pick a random vertex and remove it from the seed pool
        current_seed = random.choice(list(seed_pool))
        seed_pool.remove(current_seed)
        
        #find all vertices within 10 jumps
        close_vertices = networkx.single_source_shortest_path(galaxy, source=current_seed, cutoff=15)
        
        #if none of the current seeds were found within 10 jumps, add this as a seed
        if(len(seeds.intersection(close_vertices.iterkeys())) == 0):
            seeds.add(current_seed)
    print len(seeds)
    
    #apply the random walk algorithm, aka pagerank with alpha=1
    personalization_dict = {k: v['closeness']**10 for k,v in star_dict.iteritems()}
    mat = networkx.google_matrix(galaxy, alpha=1, personalization=personalization_dict, nodelist=star_dict.keys())
    mat = csr_matrix(mat)
    
    #for the initial array, set the seeds to 1 and everything else to 0
    weight_array = numpy.empty(len(star_dict), dtype=mat.dtype)
    weight_array.fill(0.0001)
    for i,k in enumerate(star_dict.iterkeys()):
        if(k in seeds):
            weight_array[i] = 1
            
    #iterate that shit
    for i in xrange(num_iterations):
        weight_array = weight_array * mat
        
    #create a security dict to normalize
    security_dict = {k:weight_array[i] for i, k in enumerate(star_dict.iterkeys())}
    security_dict = normalize(security_dict)
    
    #apply some data transformations - the square root will create more hisec, and the subtraction will create nullsec
    for key,value in security_dict.iteritems():
        security_dict[key] = value ** 0.025 - 0.94
        
    security_dict = normalize(security_dict)
    
    
    #copy the result into the security field
    for k,v in star_dict.iteritems():
        v['security'] = security_dict[k]
Beispiel #38
0
import networkx as nx
import numpy as np
import sys
sys.path.append('./src')
from dataset import createFromDataset
from stationary import statDist

G = createFromDataset()

# Especifique a matriz P_ (P_barra) referente ao modelo Pagerank considerando alpha = 0.1.
P_ = nx.google_matrix(G, alpha=0.1)
print(P_)

#Considerando k = 100, aplique o Power method e compare o resultado com o obtido no item b).
#  As distribuições estacionárias obtidas em b) e c) são iguais ou diferentes?

# distribuição estacionaria item C
wdict = nx.pagerank(G, alpha=0.1, max_iter=100)
witemC = [0. for i in range(len(P_))]

for key in wdict:
    witemC[key - 1] = wdict[key]

# distribuição estacionária item B
witemB = statDist(G, 100)

# Comparar vetores:
isDif = False  # booleana para diferentes
contDif = 0  # contador para os diferentes

for i in range(len(witemB)):
Beispiel #39
0
                        help='pagerank algorithm type. naive for pagerank(), numpy for pagerank_numpy(), scipy for pagerank_scipy(), google for google_matrix()')
    arg = parser.parse_args()
    print("pagerank begins!")

    begintime = time.time()
    G = nx.DiGraph()
    G = buildGFromFile(G)
    print("building Graph elapsed time: ", time.time() - begintime)
    if arg.type == 'naive':
        beginTime = time.time()
        PageRankResult = nx.pagerank(G, alpha=1 - arg.rate, max_iter=int(arg.num_iter), tol=arg.eps)
        print("PageRank elapsed time: ", time.time() - beginTime)
    elif arg.type == 'numpy':
        beginTime = time.time()
        PageRankResult = nx.pagerank_numpy(G, alpha=1 - arg.rate, max_iter=int(arg.num_iter), tol=arg.eps)
        print("pagerank_numpy elapsed time: ", time.time() - beginTime)
    elif arg.type == 'scipy':
        beginTime = time.time()
        PageRankResult = nx.pagerank_scipy(G, alpha=1 - arg.rate)
        print("PageRank_scipy elapsed time: ", time.time() - beginTime)
    elif arg.type == 'google':
        beginTime = time.time()
        PageRankResult = nx.google_matrix(G, alpha=1 - arg.rate)
        print("Google Matrix elapsed time: ", time.time() - beginTime)

    pr_res = json.dumps(PageRankResult)

    f = open('rank_save.tsv', 'w')
    f.write(pr_res)
    f.close()
Beispiel #40
0
def getTransMatrix(graph):
    P = nx.google_matrix(graph, alpha=1)
    # P /= P.sum(axis=1)
    P = P.T
    return P
Beispiel #41
0
 def test_empty(self):
     G = nx.Graph()
     assert nx.pagerank(G) == {}
     assert _pagerank_python(G) == {}
     assert nx.pagerank_numpy(G) == {}
     assert nx.google_matrix(G).shape == (0, 0)
Beispiel #42
0
def link_prediction(G, query_nodes, target_nodes, n_edges, start_dist, alg = "ra"):
    """Selects a random set of links between based on the scores calculated by 
    a standard link-prediction algorithm from networkx library
    Parameters
    ----------
    G : Networkx graph
        The graph from which the team will be selected.
    query : list 
        The set of nodes from which random walker starts.
    target : list
        The set of nodes from where the random walker ends.
    n_edges : integer
        the number of links to be added
    start_dist: list
        The starting distribution over the query set
    alg: string
        A string describing the link-prediction algorithm to be used
    Returns
    -------
    links : list
        The set of links that reduce the absorbing RW centrality
    ac_scores: list
        The set of scores of adding the links
    """
    assert alg in ["ra", "pa", "jaccard", "aa"], "alg must be one of [\"ra\", \"pa\", \"jaccard\", \"aa\"]."
          
    H = G.copy()
    query_set_size = len(query_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))
    P = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums = start_dist.dot(F.sum())[0,0]
    candidates = list(product(query_nodes, target_nodes))
    eligible = [candidates[i] for i in range(len(candidates)) 
                if H.has_edge(candidates[i][0], candidates[i][1]) == False]
    links_to_add = []
    if alg == 'ra':
        preds = nx.resource_allocation_index(H, eligible)
    elif alg == 'jaccard':
        preds = nx.jaccard_coefficient(H, eligible)
    elif alg == 'aa':
        preds = nx.adamic_adar_index(H, eligible)
    elif alg == 'pa':
        preds = nx.preferential_attachment(H, eligible)
        
    for u,v,p in preds:
        links_to_add.append((u,v,p))
    links_to_add.sort(key=lambda x: x[2], reverse = True)
    
    ac_scores = []
    ac_scores.append(row_sums)
    i = 0
    while i < n_edges:
        F_updated = update_fundamental_mat(F, H, map_query_to_org, links_to_add[i][0])
        H.add_edge(links_to_add[i][0], links_to_add[i][1])
        abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0]
        F = F_updated            
        ac_scores.append(abs_cen)
        i += 1
    return links_to_add, ac_scores
Beispiel #43
0
 def googleMatrix(self):
     fname = self.DIR + '/googleMatricx.txt'
     google_matrix = nx.google_matrix(self.graph)
     numpy.savetxt(fname, google_matrix)
     print(fname)
Beispiel #44
0
def get_approx_boundary(G, query_nodes, target_nodes, n_edges, start_dist):
    """
    Used to calculate an approximation guarantee for greedy algorithm
    """
    
    H = G.copy() # GET A COPY OF THE GRAPH
    query_set_size = len(query_nodes) 
    target_set_size = len(target_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))
    
    candidates = list(product(query_nodes, target_nodes))
    # ALL minus exitsting in G
    eligible = [candidates[i] for i in range(len(candidates)) 
                if H.has_edge(candidates[i][0], candidates[i][1]) == False]
    
    # CALCULATE MARGINAL GAIN TO EMPTY SET FOR ALL NODES IN STEEPNESS FUNCTION
    P = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums_empty = start_dist.dot(F.sum(axis=1))[0,0] # F(\emptyset)
    # candidates = list(product(query_nodes, target_nodes))
    ac_marginal_empty   = []
    ac_marginal_full    = []
    source_idx_empty = []
    node_processed = -1
    for out_edge in eligible:
        abs_cen = -1
        source_node = out_edge[0]
        if(node_processed == source_node):
            # skip updating matrix because this updates the F matrix in the same way
            continue
        node_processed = source_node           
        F_updated = update_fundamental_mat(F, H, map_query_to_org, source_node)
        abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0]
        ac_marginal_empty.append(abs_cen)
        source_idx_empty.append(source_node)
        
    sorted_indexes_empty = [i[0] for i in sorted(enumerate(source_idx_empty), key=lambda x:x[1])]
    ac_marginal_empty = [ac_marginal_empty[i] for i in sorted_indexes_empty]   
    # CALCULATE MARGINAL GAIN FOR FULL SET

    H.add_edges_from(eligible)
    P_all = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs_all = P_all[list(query_nodes),:][:,list(query_nodes)]
    F_all = compute_fundamental(P_abs_all)
    
    row_sums_all = start_dist.dot(F_all.sum(axis=1))[0,0]
    node_prcessed   = -1
    source_idx = []
    for out_edge in eligible:
        abs_cen = -1
        source_node = out_edge[0]
        if(node_prcessed == source_node):
            # skip updating matrix because this updates the F matrix in the same way
            continue
        node_prcessed = source_node
        F_all_updated = update_rev_fundamental_mat(F_all, H, map_query_to_org, source_node)
        abs_cen   = start_dist.dot(F_all_updated.sum(axis = 1))[0,0]
        ac_marginal_full.append(abs_cen)
        source_idx.append(source_node)   
    
    sorted_indexes = [i[0] for i in sorted(enumerate(source_idx), key=lambda x:x[1])]
    ac_marginal_full = [ac_marginal_full[i] for i in sorted_indexes]
    
    assert sorted_indexes == sorted_indexes_empty , "Something is wrong with the way scores are appended"
    
    all_steepness = (asarray(ac_marginal_full) - row_sums_all) / (row_sums_empty-asarray(ac_marginal_empty))
    s = min(all_steepness)
    node_max = argmin(all_steepness)
    return 1-s, sorted_indexes[node_max]
def localPartitioningAttempt(alpha, beta, node):
	gamma = alpha + beta - alpha * beta
	global nodeIntDict
	nodeIntDict = getNodeIntDict(DG)
	
	#This returns the transition matrix, which I think is the random walk matrix
	M = nx.google_matrix(DG)
	print "M"
	print M
	print "about to page rank"

	# Compute the two global page rank vectors
	prBeta  = nx.pagerank(DG, alpha=beta)
 	prGamma = nx.pagerank(DG, alpha=gamma)

 	#starting vector for local page rank with all probability on node
 	localDict = dict.fromkeys(DG.nodes(), 0)
 	localDict[node] = 1

 	localPR = nx.pagerank(DG, alpha=gamma, nstart=localDict, personalization=localDict)
 	
 	#linear combination
 	#this would maybe be faster if I used actual arrays instead of dictionaries
 	p = {}
 	for key in localPR.keys():
 		p[key] = (alpha/gamma)*localPR[key] + (((1-alpha)*beta)/gamma)*prGamma[key]
 		p[key] = p[key]/prBeta[key]
 	
 	#create a list of tuples sorted in non-increasing order by value
 	sortedP = sorted(p.iteritems(), key = operator.itemgetter(1), reverse=True)
 	sortedLocalPR = sorted(localPR.iteritems(), key = operator.itemgetter(1), reverse=True)
 	print "node is " + str(node)
 	print "sortedLocalPR is "
  	print sortedLocalPR[0:20]	
 	print "sortedP" 
 	print sortedP[0:20]

 	print "Conductance Loop"
 	S = []
 	notS = DG.nodes()
 	j = 0
 	S.append(sortedP[j][0])
 	notS.remove(sortedP[j][0])
 	minConducance = calculateConductance(S, notS)
 	minJ = j
 	for j in range(2,len(sortedP)):
 		S.append(sortedP[j][0])
 		notS.remove(sortedP[j][0])
 		tempConductance = calculateConductance(S, notS)
 		#print "cond " + str(tempConductance)
 		if tempConductance < minConducance:
 			minConducance = tempConductance
 			minJ = j
 	print minJ
 	print minConducance


 	minSet = sortedP[0:j]
 	print "minset"
 	print minSet
 	print "length of minset " + str(len(minSet))
 	return minSet 	
 	"""
Beispiel #46
0
 def test_empty(self):
     G = networkx.Graph()
     assert_equal(networkx.pagerank(G), {})
     assert_equal(networkx.pagerank_numpy(G), {})
     assert_equal(networkx.google_matrix(G).shape, (0, 0))
'''

n3graph = nx.read_edgelist('../3node2SPs1direct.csv',
                           create_using=nx.DiGraph())
'''
Simplest Pagerank, use defaults (but print google matrix first)
'''
n3res = pagerank(n3graph,
                 alpha=0.85,
                 personalization=None,
                 dangling=None,
                 max_iter=1000,
                 nstart=None,
                 weight=None)
print("Pagerank, NO teleport set:")
print(google_matrix(n3graph, alpha=0.85, weight=None))
print(n3res)
print("Numpy version:")
n3res = pagerank_numpy(n3graph, alpha=0.85)
print(n3res)
print("Scipy version:")
n3res = pagerank_scipy(n3graph, alpha=0.85)
print(n3res)
print("\n=============================\n")
'''
We only need to provide values for the nodes we want considered (rather than all nodes)
Personalization values are normalized automatically
'''
pValues = {'A': 1}
n3res = pagerank(n3graph,
                 alpha=0.85,
    N = M.shape[1]
    v = np.random.rand(N, 1)
    v = v / np.linalg.norm(v, 1)
    M_hat = (d * M + (1 - d) / N)
    start = time.process_time()
    for i in range(iters):
        v = M_hat @ v
    end = time.process_time()
    return v


# experiment parameters

N = 10000
iters = 25

# prepare pagerank matrix

print("constructing pagerank matrix...")
# use nx to create representation of a web graph
d = 0.85
G = nx.scale_free_graph(N, alpha=0.41, beta=0.49, gamma=0.1, delta_in=0)
M = nx.google_matrix(G, alpha=1).T

start = time.process_time()
v = pagerank(M, 25, 0.85)
end = time.process_time()
print("numpy:")
print(end - start)
print(v[1], v[2], v[3])
Beispiel #49
0
def reverse_greedy(G, query_nodes, target_nodes, n_edges, start_dist):
    """Selects a set of links with a reverse greedy descent algorithm that reduce the 
    absorbing RW centrality between a set of query nodes Q and a set of absorbing
    target nodes C such that Q \cap C = \emptyset. The query and target set 
    must be a 'viable' partition of the graph.
    Parameters
    ----------
    G : Networkx graph
        The graph from which the team will be selected.
    query : list 
        The set of nodes from which random walker starts.
    target : list
        The set of nodes from where the random walker ends.
    n_edges : integer
        the number of links to be added
    start_dist: list
        The starting distribution over the query set
    P : Scipy matrix
        The transition matrix of the graph G
    F : Scipy matrix
        The fundamental matrix for the graph G with the given set of absorbing
        random walk nodes
    Returns
    -------
    links : list
        The set of links that reduce the absorbing RW centrality
    """
    H = G.copy()
    query_set_size = len(query_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))
    candidates = list(product(query_nodes, target_nodes))
    eligible = [candidates[i] for i in range(len(candidates)) 
                if H.has_edge(candidates[i][0], candidates[i][1]) == False]
    H.add_edges_from(eligible)
    P = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums = start_dist.dot(F.sum(axis=1))[0,0]
    # candidates = list(product(query_nodes, target_nodes))
    worst_F = zeros(F.shape)
    worst_set = []
    optimal_set = []
    ac_scores = []
#     ac_scores.append(row_sums)
    
    while len(eligible) > 0:
        round_min       = -1
        worst_link      = (-1,-1)
        node_prcessed   = -1
        for out_edge in eligible:
            source_node = out_edge[0]
            if(node_prcessed == source_node):
                # skip updating matrix because this updates the F matrix in the same way
                continue
            node_prcessed = source_node
            F_updated = update_rev_fundamental_mat(F, H, map_query_to_org, source_node)
            abs_cen   = start_dist.dot(F_updated.sum(axis = 1))[0,0]
            if abs_cen < round_min or round_min == -1:
                worst_link  = out_edge
                round_min   = abs_cen
                worst_F     = F_updated
        F = worst_F
        H.remove_edge(*worst_link)
        worst_set.append(worst_link) 
        eligible.remove(worst_link)
        if (len(eligible) <= n_edges):           
            ac_scores.append(round_min)
            optimal_set.append(worst_link)
        
    return list(reversed(optimal_set)), list(reversed(ac_scores))