Esempio n. 1
0
 def test_biadjacency_matrix_weight(self):
     G=nx.path_graph(5)
     G.add_edge(0,1,weight=2,other=4)
     X=[1,3]
     Y=[0,2,4]
     M = bipartite.biadjacency_matrix(G,X,weight='weight')
     assert_equal(M[0,0], 2)
     M = bipartite.biadjacency_matrix(G, X, weight='other')
     assert_equal(M[0,0], 4)
Esempio n. 2
0
 def test_biadjacency_matrix_weight(self):
     try:
         import scipy
     except ImportError:
         raise SkipTest('SciPy not available.')
     G=nx.path_graph(5)
     G.add_edge(0,1,weight=2,other=4)
     X=[1,3]
     Y=[0,2,4]
     M = bipartite.biadjacency_matrix(G,X,weight='weight')
     assert_equal(M[0,0], 2)
     M = bipartite.biadjacency_matrix(G, X, weight='other')
     assert_equal(M[0,0], 4)
Esempio n. 3
0
 def test_biadjacency_matrix_order(self):
     G=nx.path_graph(5)
     G.add_edge(0,1,weight=2)
     X=[3,1]
     Y=[4,2,0]
     M = bipartite.biadjacency_matrix(G,X,Y,weight='weight')
     assert_equal(M[1,2], 2)
def dominantRatio(G):
  """
  Compute the dominant ratio of a community.
  """
  
  allHosts = set(n for n,d in G.nodes(data=True) if d['bipartite']==0)
  allfqdns = set(n for n,d in G.nodes(data=True) if d['bipartite']==1)

  A = bipartite.biadjacency_matrix(G,allHosts,allfqdns)
  
  p = A/np.sum(A)

  pf = np.sum(p,0)
  ph = np.sum(p,1)

  #compute dhr
  if np.size(ph) == 1:
      dhr = 0
  else:
      dhr = -np.sum(np.multiply(ph,np.log(ph)))/np.log(np.size(ph))

  #compute ddr
  if np.size(pf) == 1:
      dfr = 0
  else:
      dfr = -np.sum(np.multiply(pf,np.log(pf)))/np.log(np.size(pf))


  return dhr, dfr
def GraphSVD(G,row_nodes,column_nodes):
    A = bipartite.biadjacency_matrix(G,row_order=row_nodes,column_order=column_nodes)
    D1 = np.diag(np.squeeze(np.asarray(A.sum(1))))
    D2 = np.diag(np.squeeze(np.asarray(A.sum(0))))
    An = la.sqrtm(la.inv(D1))*A*la.sqrtm(la.inv(D2))
    U,S,V = np.linalg.svd(An)
    V = V.T
    return (U,S,V,D1,D2)
Esempio n. 6
0
 def test_biadjacency_matrix(self):
     tops = [2,5,10]
     bots = [5,10,15]
     for i in range(len(tops)):
         G = nx.bipartite_random_graph(tops[i], bots[i], 0.2)
         top = [n for n,d in G.nodes(data=True) if d['bipartite']==0]
         M = bipartite.biadjacency_matrix(G, top)
         assert_equal(M.shape[0],tops[i])
         assert_equal(M.shape[1],bots[i])
Esempio n. 7
0
 def test_biadjacency_matrix_order(self):
     try:
         import scipy
     except ImportError:
         raise SkipTest('SciPy not available.')
     G=nx.path_graph(5)
     G.add_edge(0,1,weight=2)
     X=[3,1]
     Y=[4,2,0]
     M = bipartite.biadjacency_matrix(G,X,Y,weight='weight')
     assert_equal(M[1,2], 2)
Esempio n. 8
0
def call_bipartite_edge_swap( G, xs, ys, Q ):
    # Compute the desired pieces of the graph structure
    A = np.array(bipartite.biadjacency_matrix(G, row_order=xs, column_order=ys, dtype=np.int32))

    # Set up and call the permute matrix function
    max_tries = 2**31-1
    seed      = random.randint(0, 2**31-1)
    nswap     = len(G.edges()) * Q
    B = bipartite_edge_swap(A, nswap, max_tries, seed=seed, verbose=True)
    H = nx.Graph()
    H.add_edges_from([ (xs[u], ys[v]) for u, v in zip(*np.where(B == 1)) ])
    return H
Esempio n. 9
0
 def test_biadjacency_matrix(self):
     try:
         import scipy
     except ImportError:
         raise SkipTest('SciPy not available.')
     tops = [2,5,10]
     bots = [5,10,15]
     for i in range(len(tops)):
         G = bipartite.random_graph(tops[i], bots[i], 0.2)
         top = [n for n,d in G.nodes(data=True) if d['bipartite']==0]
         M = bipartite.biadjacency_matrix(G, top)
         assert_equal(M.shape[0],tops[i])
         assert_equal(M.shape[1],bots[i])
Esempio n. 10
0
def permute_mutation_data(G, genes, patients, seed, Q=100):
	if fortranBindings:
		# Compute the desired pieces of the graph structure
		A = np.array(biadjacency_matrix(G, row_order=genes, column_order=patients, dtype=np.int32), dtype=np.int32)

		# Set up and call the permute matrix function
		B = bipartite_edge_swap(A, nswap=len(G.edges()) * Q, max_tries=2**31-1, seed=seed, verbose=False)
		H = nx.Graph()
		H.add_nodes_from( genes + patients ) # some patients/genes may have zero mutations
		H.add_edges_from([ (genes[u], patients[v]) for u, v in zip(*np.where(B == 1)) ])
	else:
		H = G.copy()
		random.seed(seed)
		bipartite_double_edge_swap(H, genes, patients, nswap=Q * len( G.edges() ))
	return graph_to_mutation_data(H, genes, patients)
Esempio n. 11
0
def call_bipartite_edge_swap( G, xs, ys, Q ):
    # Compute the desired pieces of the graph structure
    xs.sort(G.degree, reverse=True)
    ys.sort(G.degree, reverse=True)
    x_degrees = [ G.degree(x) for x in xs ]
    y_degrees = [ G.degree(y) for y in ys ]
    A = np.array(bipartite.biadjacency_matrix(G, row_order=xs, column_order=ys, dtype=np.int32))

    # Set up and call the permute matrix function
    max_tries = 1e75
    seed      = random.randint(0, 2**32-1)
    nswap     = len(G.edges()) * Q
    B = bipartite_edge_swap(A, x_degrees, y_degrees, nswap, max_tries, seed)
    H = nx.Graph()
    H.add_edges_from([ (xs[u], ys[v]) for u, v in zip(*np.where(B == 1)) ])
    return H
Esempio n. 12
0
def permute_mutation_data(G, genes, patients, seed, Q=100):
        if fortranBindings:
            # Compute the desired pieces of the graph structure
            xs = sorted(genes, key=G.degree, reverse=True)
            ys = sorted(patients, key=G.degree, reverse=True)
            x_degrees = [ G.degree(x) for x in xs ]
            y_degrees = [ G.degree(y) for y in ys ]
            A = np.array(biadjacency_matrix(G, row_order=xs,
                                            column_order=ys,
                                            dtype=np.int32))

            # Set up and call the permute matrix function
            B = bipartite_edge_swap(A, x_degrees, y_degrees, len(G.edges()) * Q, 1e75, seed=seed)
            H = nx.Graph()
            H.add_edges_from([ (xs[u], ys[v]) for u, v in zip(*np.where(B == 1)) ])
        else:
            H = G.copy()
            random.seed(seed)
            bipartite_double_edge_swap(H, genes, patients, nswap=Q * len( G.edges() ))
	return graph_to_mutation_data(H, genes, patients)
Esempio n. 13
0
 def test_format_keyword_fail(self):
     bipartite.biadjacency_matrix(nx.Graph([(1,0)]),[0],format='foo')
Esempio n. 14
0
 def test_duplicate_col_fail(self):
     bipartite.biadjacency_matrix(nx.Graph([(1,0)]),[0],[1,1])
Esempio n. 15
0
 def test_empty_fail(self):
     bipartite.biadjacency_matrix(nx.Graph([(1,0)]),[])
Esempio n. 16
0
 def test_null_fail(self):
     bipartite.biadjacency_matrix(nx.Graph(),[])
def sample_data(seed=123):
    #### load networks (hypothetical) #######
    nb_nodes_networkP = 4
    nb_nodes_networkD = 6
    nb_nodes_networkC = 8
    # H**o networks
    obj_networkP = nx.gnm_random_graph(nb_nodes_networkP,
                                       nb_nodes_networkP * 2,
                                       seed=seed)
    obj_networkD = nx.gnm_random_graph(nb_nodes_networkD,
                                       nb_nodes_networkD * 2,
                                       seed=seed)
    obj_networkC = nx.gnm_random_graph(nb_nodes_networkC,
                                       nb_nodes_networkC * 2,
                                       seed=seed)
    # Hetero networks
    obj_networkPD = bipartite.random_graph(nb_nodes_networkP,
                                           nb_nodes_networkD,
                                           0.8,
                                           seed=seed)
    obj_networkPC = bipartite.random_graph(nb_nodes_networkP,
                                           nb_nodes_networkC,
                                           0.8,
                                           seed=seed)
    obj_networkDC = bipartite.random_graph(nb_nodes_networkD,
                                           nb_nodes_networkC,
                                           0.8,
                                           seed=seed)

    ### Weight matrices for network propagation (Normalized weighted adjacency matrices)
    # H**o networks (Assuming no node with degree 0)
    adj_networkP = nx.adjacency_matrix(obj_networkP)
    deg_networkP = np.sum(adj_networkP, axis=0)
    norm_adj_networkP = sp.csr_matrix(
        adj_networkP / np.sqrt(np.dot(deg_networkP.T, deg_networkP)),
        dtype=np.float64)

    adj_networkD = nx.adjacency_matrix(obj_networkD)
    deg_networkD = np.sum(adj_networkD, axis=0)
    norm_adj_networkD = sp.csr_matrix(
        adj_networkD / np.sqrt(np.dot(deg_networkD.T, deg_networkD)),
        dtype=np.float64)

    adj_networkC = nx.adjacency_matrix(obj_networkC)
    deg_networkC = np.sum(adj_networkC, axis=0)
    norm_adj_networkC = sp.csr_matrix(
        adj_networkC / np.sqrt(np.dot(deg_networkC.T, deg_networkC)),
        dtype=np.float64)

    # Hetero networks
    biadj_networkPD = bipartite.biadjacency_matrix(
        obj_networkPD, row_order=range(nb_nodes_networkP))
    degP = np.sum(biadj_networkPD, axis=1)
    degD = np.sum(biadj_networkPD, axis=0)
    norm_biadj_networkPD = sp.csr_matrix(biadj_networkPD /
                                         np.sqrt(np.dot(degP, degD)),
                                         dtype=np.float64)
    norm_biadj_networkPD.data[np.isnan(norm_biadj_networkPD.data)] = 0.0
    norm_biadj_networkPD.eliminate_zeros()

    biadj_networkPC = bipartite.biadjacency_matrix(
        obj_networkPC, row_order=range(nb_nodes_networkP))
    degP = np.sum(biadj_networkPC, axis=1)
    degC = np.sum(biadj_networkPC, axis=0)
    norm_biadj_networkPC = sp.csr_matrix(biadj_networkPC /
                                         np.sqrt(np.dot(degP, degC)),
                                         dtype=np.float64)
    norm_biadj_networkPC.data[np.isnan(norm_biadj_networkPC.data)] = 0.0
    norm_biadj_networkPC.eliminate_zeros()

    biadj_networkDC = bipartite.biadjacency_matrix(
        obj_networkDC, row_order=range(nb_nodes_networkD))
    degD = np.sum(biadj_networkDC, axis=1)
    degC = np.sum(biadj_networkDC, axis=0)
    norm_biadj_networkDC = sp.csr_matrix(biadj_networkDC /
                                         np.sqrt(np.dot(degD, degC)),
                                         dtype=np.float64)
    norm_biadj_networkDC.data[np.isnan(norm_biadj_networkDC.data)] = 0.0
    norm_biadj_networkDC.eliminate_zeros()

    return norm_adj_networkP, norm_adj_networkD, norm_adj_networkC, norm_biadj_networkPD, norm_biadj_networkPC, norm_biadj_networkDC
Esempio n. 18
0
    for i, j in zip(a_node, b_node):
        temp = (i, j)
        edge_list.append(temp)

    # print edge list
    print("** Edge list 작성이 완료되었습니다.")

    # 네트워크 분석용 데이터 변환
    user_nodes = list(set(a_node))
    repo_nodes = list(set(b_node))
    nodes = user_nodes + repo_nodes
    print(repo_nodes)
    print(user_nodes)

    G = nx.Graph()
    G.add_nodes_from(user_nodes, bipartite=0)
    G.add_nodes_from(repo_nodes, bipartite=1)
    G.add_edges_from(edge_list)

    # biadjacency_matrix 작성
    adjacency_matrix = bipartite.biadjacency_matrix(G,row_order=repo_nodes, column_order=user_nodes)
    # print(adjacency_matrix.todense())

    # co_occurence=incidence_matrix(G,nodelist=nodes, edgelist= edge_list, oriented=False)
    # print(co_occurence.todense())


    # # 데이터 프레임 형태로 변환, 행렬 연산 후 변환 주의
    # df_adjacency = pd.DataFrame(adjacency_matrix.todense(), index=repo_nodes, columns=user_nodes)
    # print(df_adjacency)

import matplotlib.pyplot as plt
import networkx as nx
import networkx.algorithms.bipartite as bipartite

G = nx.davis_southern_women_graph()
women = G.graph['top']
clubs = G.graph['bottom']

print("Biadjacency matrix")
print(bipartite.biadjacency_matrix(G, women, clubs))

# project bipartite graph onto women nodes
W = bipartite.projected_graph(G, women)
print('')
print("#Friends, Member")
for w in women:
    print('%d %s' % (W.degree(w), w))

# project bipartite graph onto women nodes keeping number of co-occurence
# the degree computed is weighted and counts the total number of shared contacts
W = bipartite.weighted_projected_graph(G, women)
print('')
print("#Friend meetings, Member")
for w in women:
    print('%d %s' % (W.degree(w, weight='weight'), w))

nx.draw(G)
plt.show()
Esempio n. 20
0
 def test_format_keyword(self):
     bipartite.biadjacency_matrix(nx.Graph([(1,0)]),[0],format='foo')
Esempio n. 21
0
 def test_from_biadjacency_roundtrip(self):
     B1 = nx.path_graph(5)
     M = bipartite.biadjacency_matrix(B1, [0,2,4])
     B2 = bipartite.from_biadjacency_matrix(M)
     assert_true(nx.is_isomorphic(B1,B2))
Esempio n. 22
0
 def test_duplicate_col(self):
     bipartite.biadjacency_matrix(nx.Graph([(1,0)]),[0],[1,1])
Esempio n. 23
0
 def test_empty_graph(self):
     bipartite.biadjacency_matrix(nx.Graph([(1,0)]),[])
Esempio n. 24
0
 def test_null_graph(self):
     bipartite.biadjacency_matrix(nx.Graph(),[])
Esempio n. 25
0
 def test_from_biadjacency_roundtrip(self):
     B1 = nx.path_graph(5)
     M = bipartite.biadjacency_matrix(B1, [0,2,4])
     B2 = bipartite.from_biadjacency_matrix(M)
     assert_true(nx.is_isomorphic(B1,B2))
Esempio n. 26
0
cc = nx.bipartite.closeness_centrality(bg, l)
bc = nx.bipartite.betweenness_centrality(bg, l)

DictLargestValue(dc, list(l), "degree")
DictLargestValue(cc, list(l), "closeness")
DictLargestValue(bc, list(l), "betweenness")



###################################
''' STEP 3 '''
###################################
# Make a bipartite matrix
row_order = sorted(list(l))	#Rows are critics
col_order = sorted(list(r))	#Cols are movies
numpyMatrix = bipartite.biadjacency_matrix(bg, row_order, column_order=col_order)

# Create an event by actor matrix to determine movies that have been seen by 3 or more critics
M = numpyMatrix.A	#.A gets us an ndarray object
#print(row_order)	#Edna, Homer, Krusty, Lisa, Marge, Moe, Ned 				(Top to bottom)
#print(col_order)	#Cold, Eyes, Far, Into, Jack, Jerry, Live, Prada, Hours, Others	(Left to Right)

timesViewed = 0	#Increment number of times a movie has been seen for each movie. Reset to zero for each movie.
print("Movies seen by three or more critics: ")
for i in range(len(col_order)):
	timesViewed = 0
	for q in range(len(row_order)):
		timesViewed = timesViewed + M[q][i]
	
	#Output the name of the film if it has been viewed by three or more critics.
	if(timesViewed >= 3):
Esempio n. 27
0
                del preds[v]
                for u in L:
                    if u in pred:
                        pu = pred[u]
                        del pred[u]
                        if pu is unmatched or recurse(pu):
                            matching[v] = u
                            return 1
            return 0

        for v in unmatched:
            recurse(v)


if __name__ == '__main__':
    import timeit
    import numpy as np
    from networkx.algorithms import bipartite as nx
    from operator import itemgetter

    G = nx.random_graph(1000, 2000, 0.2)
    partition_l = set(map(itemgetter(0), G.edges()))

    content = nx.biadjacency_matrix(G, partition_l).toarray().tolist()

    start = timeit.default_timer()
    bipartiteMatch(content)
    stop = timeit.default_timer()

    print('Time: ', stop - start)
Esempio n. 28
0
def main():
    global likely_orientation
    global where_dey_is
    global pipe
    global v
    global cf
    global likeliness
    global u
    global ents
    global ids
    global gay_graph_vince
    global straight_graph_vince
    global allusers
    global naive_gay
    global naive_straight
    global gay_users
    global straight_users
    global conf
    global entity_apriori
    global entities
    global entitylist
    global userlist
    global entity_given_gay
    global entity_given_straight
    global neighbors
    global pr_gay
    global pr_straight
    global users
    global labeled_users
    global gay_apriori
    global straight_apriori
    global m_bi
    global m_bi2
    global svd
    global linear_model
    global fpr
    global tpr
    global thresholds
    global w
    global X
    global y
    global scores
    global gnb
    global testing_users

    ents = numpy.genfromtxt(sys.argv[2],
                            delimiter="\t",
                            skip_header=1,
                            dtype=[('username', 'S32'),
                                   ('num_followers', 'i4'),
                                   ('entity_type', 'S2'), ('id', 'i8')])
    ids = set(
        ents['id'][0:-1]
    )  # need to remove last entity because it currently has no id label

    print "Loading users"
    f = open(sys.argv[3])
    users = json.load(f)
    f.close()

    print "Finding labeled users"
    users = {int(k): v for k, v in users.items()}

    allusers = users
    labeled_users = {k: v for k, v in users.items() if "orientation" in v}
    (users, testing_users) = split(labeled_users, "orientation",
                                   {"Straight", "Gay"})
    straight_users = {
        k: v
        for k, v in users.items() if v["orientation"] == "Straight"
    }
    gay_users = {k: v for k, v in users.items() if v["orientation"] == "Gay"}

    gay_apriori = len(gay_users) / float(len(straight_users) + len(gay_users))
    straight_apriori = len(straight_users) / float(
        len(straight_users) + len(gay_users))
    print "Reading graph"
    f = open(sys.argv[1])
    v = read_json_graph_nonrecip_restricted_to(f, users.keys(), ids)
    #v = read_json_graph_with_ids(f, users.keys())
    f.close()

    print "Reading vince graph"
    u = nx.DiGraph(v)
    # Gives us all entities defined as those who don't recipocate
    u.remove_nodes_from([
        k for k, w in v.node.items()
        if (not k in users) and (w["role"] != "entity")
    ])
    vincy = u.copy()
    allzy = u.copy()
    # Gives us all entities as defined by Vince
    vincy.remove_nodes_from(
        [k for k, w in v.node.items() if (not k in users) and (not k in ids)])
    vincy.graph['name'] = "Vince"
    # Gives us all entities as defined by Vince
    allzy.remove_edges_from([(x, y) for (x, y) in v.edges()
                             if v.node[x]["role"] == v.node[y]["role"]])
    allzy.graph['name'] = "All"

    for u in [vincy, allzy]:
        print "Finding useful features"

        gay_graph_vince = u.subgraph([
            k for k, w in u.node.items()
            if (w["role"] == "entity") or (k in gay_users)
        ])
        straight_graph_vince = u.subgraph([
            k for k, w in u.node.items()
            if (w["role"] == "entity") or (k in straight_users)
        ])
        # Gives us all entities defined as those who don't recipocate
        entities = [
            x for x, y in u.node.items()
            if y["role"] == "entity" and gay_graph_vince.degree([x])[x] > 0
            and straight_graph_vince.degree([x])[x] > 0
        ]
        # Gives us all entities as defined by Vince
        #entities = [x for x in list(ids.intersection(u.nodes())) if u.degree([x])[x] > 0]

        cf = [(y,
               scipy.stats.bayes_mvs([
                   0 if users[x]['orientation'] == 'Straight' else 1
                   for x in u.predecessors(y)
               ])[0][1]) for y in entities if len(u.predecessors(y)) > 1]

        #entities = [x for x,y in cf if y > gay_apriori]
        #entities = [x for (x,(y,z)) in cf if y > .2 or z < 1 - .2]
        entity_count = float(sum([u.degree([x])[x] for x in entities]))
        entity_given_gay = {
            x: ((gay_graph_vince.degree([x])[x]) / float(len(gay_users)))
            for x in entities
        }
        entity_given_straight = {
            x: ((straight_graph_vince.degree([x])[x]) /
                float(len(straight_users)))
            for x in entities
        }

        entity_apriori = {
            x: (u.degree([x])[x]) / float(len(users))
            for x in entities
        }

        #test a naive classifier on the training set to see how well it fits the observations
        for k in set(users.keys()).intersection(u.nodes()):
            neighbors = set(u.neighbors(k))
            e_apriori = reduce(
                lambda x, y: x * y,
                map(
                    lambda z: entity_apriori[z]
                    if z in neighbors else 1 - entity_apriori[z], entities))
            e_given_g = reduce(
                lambda x, y: x * y,
                map(
                    lambda z: entity_given_gay[z]
                    if z in neighbors else 1 - entity_given_gay[z], entities))
            e_given_s = reduce(
                lambda x, y: x * y,
                map(
                    lambda z: entity_given_straight[z]
                    if z in neighbors else 1 - entity_given_straight[z],
                    entities))
            pr_gay[k] = 0 if e_apriori == 0 else (e_given_g * gay_apriori /
                                                  e_apriori)
            pr_straight[k] = 0 if e_apriori == 0 else (e_given_s *
                                                       straight_apriori /
                                                       e_apriori)
    # currently the graph with

        sorted([(z, entity_given_straight[z] / entity_apriori[z])
                for z in entities],
               key=operator.itemgetter(1))

        min_straight = min([
            pr_straight[z] for z in set(users.keys()).intersection(u.nodes())
            if pr_straight[z] > 0
        ])
        pr_straight = {
            z: (pr_straight[z] if pr_straight[z] > 0 else min_straight)
            for z in set(users.keys()).intersection(u.nodes())
        }
        likeliness = {
            z: pr_gay[z] / pr_straight[z]
            for z in set(users.keys()).intersection(u.nodes())
        }
        likely_orientation = [
            (x, y, users[x]["orientation"])
            for x, y in sorted(likeliness.items(), key=operator.itemgetter(1))
        ]
        where_dey_is = [
            x for x, y in enumerate(likely_orientation) if y[2] == "Gay"
        ]
        likeliness_gay = [
            math.log(y) for x, y in likeliness.items()
            if users[x]["orientation"] == "Gay" and y > 0
        ]
        likeliness_straight = [
            math.log(y) for x, y in likeliness.items()
            if users[x]["orientation"] == "Straight" and y > 0
        ]
        uzers = list(set(users.keys()).intersection(u.nodes()))
        uzers_est = [likeliness[x] for x in uzers]

        uzers_real = [
            1 if users[x]["orientation"] == 'Gay' else 0 for x in uzers
        ]

        fpr, tpr, thresholds = roc_curve(uzers_real, uzers_est)
        roc_auc = auc(fpr, tpr)
        plot_roc(fpr, tpr, roc_auc, "charts/naive_roc.pdf", "(Naive Bayes)")
        print "%s: Naive bayes: Area under the ROC curve : %f" % (
            u.graph["name"], roc_auc)

        plt.hist([likeliness_gay, likeliness_straight],
                 20,
                 normed=1,
                 histtype='bar',
                 color=['crimson', 'burlywood'],
                 label=['Gay', 'Straight'],
                 log=True)
        plt.ylabel("Frequency (Log)")
        plt.xlabel("Log-Likeliness")
        plt.title("Naive Bayes Model")
        plt.legend()
        plt.savefig('charts/naive-bayes-normed-%s.pdf' % u.graph["name"],
                    bbox_inches='tight')

        plt.clf()

        w = u.to_undirected()
        userlist = uzers
        entitylist = list(set(entities).intersection(w.nodes()))

        #m_bi = bim(w, entitylist, userlist)
        m_bi = bp.biadjacency_matrix(w, entitylist, userlist)

        m_bi2 = np.matrix.transpose(m_bi)

        #m_bi2 = normalize(m_bi2)

        #gayrowi=[x for x,y in enumerate(userlist) if users[y]["orientation"] == "Gay"]
        #gayrows = m_bi2[gayrowi]

        #for i in range(11):
        #    m_bi2 = np.concatenate((m_bi2, gayrows))

        #U, s, Vh = np.linalg.svd(m_bi2)

        X = svd.fit_transform(m_bi2)
        y = [1 if users[x]["orientation"] == "Gay" else 0
             for x in userlist]  # + [1]*11*len(gayrows)

        logistic.fit(X, y)
        yp = logistic.decision_function(X)

        fpr, tpr, thresholds = roc_curve(y, yp)
        roc_auc = auc(fpr, tpr)
        print "%s: SVD -> Logit: Area under the ROC curve : %f" % (
            u.graph["name"], roc_auc)

        #roc_auc_score(y, yp)
        likeliness_gay = [
            z for x, z in zip(userlist, yp) if users[x]['orientation'] == 'Gay'
        ]
        likeliness_straight = [
            z for x, z in zip(userlist, yp)
            if users[x]['orientation'] == 'Straight'
        ]

        plt.hist([likeliness_gay, likeliness_straight],
                 30,
                 normed=1,
                 histtype='bar',
                 color=['crimson', 'burlywood'],
                 label=['Gay', 'Straight'])
        plt.ylabel("Frequency")
        plt.xlabel("Confidence")
        plt.title("SVD Plus Logistic Regression")
        plt.legend()
        plt.savefig('charts/svd-logit-normed-%s.pdf' % u.graph["name"],
                    bbox_inches='tight')
        plt.clf()

        plt.hist([likeliness_gay, likeliness_straight],
                 30,
                 histtype='bar',
                 log=True,
                 color=['crimson', 'burlywood'],
                 label=['Gay', 'Straight'])
        plt.ylabel("Frequency")
        plt.xlabel("Confidence")
        plt.title("SVD Plus Logistic Regression")
        plt.legend()
        plt.savefig('charts/svd-logit-%s.pdf' % u.graph["name"],
                    bbox_inches='tight')
        plt.clf()

        plot_roc(fpr, tpr, roc_auc,
                 "charts/svd-logit_roc-%s.pdf" % u.graph["name"],
                 "(SVD-Logit, %d Dims)" % dims)

        scores = cross_validation.cross_val_score(pipe,
                                                  m_bi2,
                                                  y,
                                                  cv=10,
                                                  scoring='roc_auc')
        # Yo! You might want to call '%pylab' if you exit into the ipython environment

        print "%s: svd->logit" % u.graph['name']
        print scores
        print scores.mean()

        #gnb.fit(m_bi2, y)

        scores = cross_validation.cross_val_score(gnb,
                                                  m_bi2,
                                                  y,
                                                  cv=10,
                                                  scoring='roc_auc')
        # Yo! You might want to call '%pylab' if you exit into the ipython environment

        print "%s: naive" % u.graph['name']
        print scores
        print scores.mean()
    new_users = find_gay(entitylist, pipe, sys.argv[1], allusers)
Esempio n. 29
0
fw_graph = bipartite.weighted_projected_graph(N, foods, True)

# Edge width represents weights
dzcnapy.attrs["width"] = [d['weight'] * 75 for n1, n2, d in
                          fw_graph.edges(data=True)]
dzcnapy.thick_attrs["width"] = 10

pos = graphviz_layout(f_graph)
nx.draw_networkx_edges(f_graph, pos, **dzcnapy.thick_attrs)
nx.draw_networkx_nodes(f_graph, pos, **dzcnapy.attrs)
nx.draw_networkx_labels(f_graph, pos, **dzcnapy.attrs)
dzcnapy.set_extent(pos, plt)
dzcnapy.plot("projected_foods")

adj = bipartite.biadjacency_matrix(N, f_graph).toarray()
foods = pd.DataFrame([[stats.pearsonr(x, y)[0] for x in adj]
                      for y in adj], columns=f_graph, index=f_graph)

SLICING_THRESHOLD = 0.375
stacked = foods.stack()
edges = stacked[stacked >= SLICING_THRESHOLD].index.tolist()
f_pearson = nx.Graph(edges)

nx.draw_networkx_edges(f_pearson, pos, **dzcnapy.thick_attrs)
nx.draw_networkx_nodes(f_graph, pos, **dzcnapy.attrs)
nx.draw_networkx_labels(f_graph, pos, **dzcnapy.attrs)
dzcnapy.set_extent(pos, plt)
dzcnapy.plot("pearson_foods")

from generalized import generalized_similarity
Esempio n. 30
0
def add_knowledge_complexity(network: nx.Graph, max_iteration=25):
    print('正在计算知识复杂度……')
    assert nx.is_bipartite(network)

    # 从知识-城市二分网络中取出知识和城市两类节点,并将其转换为列表
    set1, set2 = bipartite.sets(network)
    set1, set2 = list(set1), list(set2)

    # 通过判断列表中的第一个元素是否为IPC格式,进行知识和城市列表的确定
    pattern = re.compile(r'\w\d\d\w')
    if pattern.match(str(set1[0])) is not None:
        ipc_list = set1
        city_list = set2
    else:
        ipc_list = set2
        city_list = set1

    # 清空无用变量并对列表进行排序(以便后面重新绑定属性)
    set1 = None
    set2 = None
    ipc_list.sort()
    city_list.sort()

    # 使用networkx函数生成二分网络的邻接矩阵,其中行为城市,列为知识,加入权重
    matrix = biadjacency_matrix(network, city_list, ipc_list,
                                weight='weight').toarray()

    # total为知识的加权总量
    total = matrix.sum()

    # kc0为每一行的和(按列求和),在这里表示每个城市产生知识的数量(城市的生产的diversity,多样性)
    kc0 = matrix.sum(1)
    # kp0为每一列的和(按行求和),在这里表示每个知识的产出城市数量(知识的ubiquity,普遍性)
    kp0 = matrix.sum(0)
    # product_share为每类知识在全球知识市场中所占的比值
    product_share = kp0 / total

    # matrix2是在原有邻接矩阵的基础上,根据RCA相对技术优势值是否大于等于1,转化成的01矩阵
    matrix2 = (
        (matrix / kc0.reshape(-1, 1)) / product_share.reshape(1, -1)) >= 1
    matrix2 = matrix2 * 1

    # 将最初值放入迭代结果列表
    kc = [kc0]
    kp = [kp0]
    max_i = 0

    for i in range(1, max_iteration):
        # 论文公式的向量化实现(使用矩阵相乘不需要循环)
        kci = np.matmul(matrix2, np.transpose(kp[i - 1])) / kc[0]
        kpi = np.matmul(np.transpose(kc[i - 1]), matrix2) / kp[0]

        # 检测排名是否与前2次发生变化,如果变化幅度小于某个设定值则跳出
        kci_rank = ss.rankdata(kci)
        kpi_rank = ss.rankdata(kpi)

        if i > 1:
            pre_kci_rank = ss.rankdata(kc[i - 2])
            if sum(kci_rank == pre_kci_rank) / len(kci_rank) >= 0.8:
                print('排名未发生太大变化,停止迭代,i=', i - 1)
                max_i = i - 1
                break

            pre_kpi_rank = ss.rankdata(kp[i - 2])
            if sum(kpi_rank == pre_kpi_rank) / len(kpi_rank) >= 0.8:
                print('排名未发生太大变化,停止迭代,i=', i - 1)
                max_i = i - 1
                break

        kc.append(kci)
        kp.append(kpi)

    for i in range(len(kc)):
        knowledge_complexity = {}
        kci_list = list(kc[i])
        kpi_list = list(kp[i])
        for city, kci in zip(city_list, kci_list):
            knowledge_complexity[city] = kci
        for ipc, kpi in zip(ipc_list, kpi_list):
            knowledge_complexity[ipc] = kpi

        nx.set_node_attributes(network, knowledge_complexity,
                               'Knowledge_Complexity_' + str(i))
        knowledge_complexity.clear()

    return network
Esempio n. 31
0
Shows how to make unipartite projections of the graph and compute the
properties of those graphs.

These data were collected by Davis et al. in the 1930s.  
They represent observed attendance at 14 social events by 18 Southern women.  
The graph is bipartite (clubs, women).
"""
import networkx as nx
import networkx.algorithms.bipartite as bipartite

G = nx.davis_southern_women_graph()
women = G.graph['top']
clubs = G.graph['bottom']

print("Biadjacency matrix")
print(bipartite.biadjacency_matrix(G,women,clubs))

# project bipartite graph onto women nodes
W = bipartite.projected_graph(G, women)
print('') 
print("#Friends, Member")
for w in women:
    print('%d %s' % (W.degree(w),w))

# project bipartite graph onto women nodes keeping number of co-occurence
# the degree computed is weighted and counts the total number of shared contacts
W = bipartite.weighted_projected_graph(G, women)
print('') 
print("#Friend meetings, Member")
for w in women:
    print('%d %s' % (W.degree(w,weight='weight'),w))
Esempio n. 32
0
 def test_duplicate_col(self):
     with pytest.raises(nx.NetworkXError):
         bipartite.biadjacency_matrix(nx.Graph([(1, 0)]), [0], [1, 1])
Esempio n. 33
0
 def test_null_graph(self):
     with pytest.raises(nx.NetworkXError):
         bipartite.biadjacency_matrix(nx.Graph(), [])
Esempio n. 34
0
 def test_format_keyword(self):
     with pytest.raises(nx.NetworkXError):
         bipartite.biadjacency_matrix(nx.Graph([(1, 0)]), [0], format="foo")
Esempio n. 35
0
 def test_empty_graph(self):
     with pytest.raises(nx.NetworkXError):
         bipartite.biadjacency_matrix(nx.Graph([(1, 0)]), [])
Esempio n. 36
0
def to_adjacency_matrix(data):
    g = nx.DiGraph()
    g.add_edges_from(data)
    partition_1 = set(map(itemgetter(0), data))
    return partition_1, biadjacency_matrix(g, partition_1).toarray()