Beispiel #1
0
 def test_bipartite_weighted_degrees(self):
     G = nx.path_graph(5)
     G.add_edge(0, 1, weight=0.1, other=0.2)
     X = {1, 3}
     Y = {0, 2, 4}
     u, d = bipartite.degrees(G, Y, weight="weight")
     assert dict(u) == {1: 1.1, 3: 2}
     assert dict(d) == {0: 0.1, 2: 2, 4: 1}
     u, d = bipartite.degrees(G, Y, weight="other")
     assert dict(u) == {1: 1.2, 3: 2}
     assert dict(d) == {0: 0.2, 2: 2, 4: 1}
Beispiel #2
0
 def test_bipartite_weighted_degrees(self):
     G=nx.path_graph(5)
     G.add_edge(0,1,weight=0.1,other=0.2)
     X=set([1,3])
     Y=set([0,2,4])
     u,d=bipartite.degrees(G,Y,weight='weight')
     assert_equal(u,{1:1.1,3:2})
     assert_equal(d,{0:0.1,2:2,4:1})
     u,d=bipartite.degrees(G,Y,weight='other')
     assert_equal(u,{1:1.2,3:2})
     assert_equal(d,{0:0.2,2:2,4:1})
Beispiel #3
0
 def test_bipartite_weighted_degrees(self):
     G = nx.path_graph(5)
     G.add_edge(0, 1, weight=0.1, other=0.2)
     X = set([1, 3])
     Y = set([0, 2, 4])
     u, d = bipartite.degrees(G, Y, weight='weight')
     assert_equal(u, {1: 1.1, 3: 2})
     assert_equal(d, {0: 0.1, 2: 2, 4: 1})
     u, d = bipartite.degrees(G, Y, weight='other')
     assert_equal(u, {1: 1.2, 3: 2})
     assert_equal(d, {0: 0.2, 2: 2, 4: 1})
Beispiel #4
0
def partition_strings_2set(X, C, X_file, C_file, params):
    """

    """

    G_star = graphs.construct_exact_2set_nearest_neighbor_bipartite_graph(
        X, C, X_file, C_file, params)
    # G_star, alignment_graph = graphs.construct_2set_nearest_neighbor_bipartite_graph(X, C, X_file, C_file)
    G_star_transposed = nx.reverse(G_star)  #functions.transpose(G_star)
    partition = {
    }  # dict with a center as key and a set containing all sequences chosen to this partition

    # candidate_nodes, read_nodes = bipartite.sets(G_star_transposed)

    read_nodes = set(n for n, d in G_star_transposed.nodes(data=True)
                     if d['bipartite'] == 0)
    candidate_nodes = set(G_star_transposed) - read_nodes

    read_deg, cand_deg = bipartite.degrees(G_star_transposed, candidate_nodes)
    # print(len(read_nodes), len(candidate_nodes))
    # print(read_deg)
    # print(cand_deg)

    ######################
    while len(candidate_nodes) > 0:
        read_deg, cand_deg = bipartite.degrees(G_star_transposed,
                                               candidate_nodes)
        read_deg, cand_deg = dict(read_deg), dict(cand_deg)
        # print(type(read_deg), read_deg)
        # print(type(cand_deg), cand_deg)
        # print("reads left:", len(read_deg))
        # print("cands left:", len(cand_deg))
        m = max(sorted(cand_deg), key=lambda key: cand_deg[key])
        reads_supporting_m = list(G_star_transposed.neighbors(m))
        partition[m] = set(reads_supporting_m)
        G_star_transposed.remove_node(m)
        G_star_transposed.remove_nodes_from(reads_supporting_m)

        read_nodes = set(n for n, d in G_star_transposed.nodes(data=True)
                         if d['bipartite'] == 0)
        candidate_nodes = set(G_star_transposed) - read_nodes
        # candidate_nodes, read_nodes = bipartite.sets(G_star_transposed)

        # print("total nodes left after removal:", len(G_star_transposed.nodes()), "tot candidate nodes left:", candidate_nodes)
        # print(read_nodes, [G_star[node] for node in read_nodes])
        # print(len(reads_supporting_m) , len(G_star_transposed.nodes()), G_star_transposed.nodes() )

    # print([ (m,len(partition[m])) for m in partition] )
    #####################

    return G_star, partition
def collaborativeness(B):

    # splitting the types of nodes of the graph object B
    top_nodes = set(node for node, d in B.nodes(data=True)
                    if d['bipartite'] == 0)  #set of top nodes
    bottom_nodes = set(B) - top_nodes  #set of bottom nodes
    deg_top, deg_bottom = bipartite.degrees(
        B, bottom_nodes)  #dictionary: nodes as keys, degrees as values

    # creating simple graph and multigraph bottom projections
    G = bipartite.projected_graph(B, bottom_nodes)
    Gm = bipartite.projected_graph(B, bottom_nodes, multigraph=True)

    col_dict = {}
    #ratio_dict = {}
    #div_dict = {}

    for node in bottom_nodes:
        if G.degree(node) > 0:
            gamma = 0
            shared = 0
            for nbr in B[node]:
                gamma += math.log(B.degree(nbr))
                if B.degree(nbr) > 1:
                    shared += 1

            col_dict[node] = ((float(shared) / B.degree(node)) * gamma,
                              float(G.degree(node)) / Gm.degree(node))
            #ratio_dict[node] = (float(shared)/B.degree(node))
            #diversity_dict[node] = float(G.degree(node))/Gm.degree(node)

    return col_dict
Beispiel #6
0
 def test_bipartite_degrees(self):
     G = nx.path_graph(5)
     X = {1, 3}
     Y = {0, 2, 4}
     u, d = bipartite.degrees(G, Y)
     assert dict(u) == {1: 2, 3: 2}
     assert dict(d) == {0: 1, 2: 2, 4: 1}
Beispiel #7
0
 def test_bipartite_degrees(self):
     G = nx.path_graph(5)
     X = set([1, 3])
     Y = set([0, 2, 4])
     u, d = bipartite.degrees(G, Y)
     assert dict(u) == {1: 2, 3: 2}
     assert dict(d) == {0: 1, 2: 2, 4: 1}
Beispiel #8
0
 def test_bipartite_degrees(self):
     G = nx.path_graph(5)
     X = set([1, 3])
     Y = set([0, 2, 4])
     u, d = bipartite.degrees(G, Y)
     assert_equal(u, {1: 2, 3: 2})
     assert_equal(d, {0: 1, 2: 2, 4: 1})
Beispiel #9
0
 def test_bipartite_degrees(self):
     G=nx.path_graph(5)
     X=set([1,3])
     Y=set([0,2,4])
     u,d=bipartite.degrees(G,Y)
     assert_equal(u,{1:2,3:2})
     assert_equal(d,{0:1,2:2,4:1})
Beispiel #10
0
def degree_distribution(G, nodes,weight):

	if weight == True:
		degree_values_users, degree_values_posts = bipartite.degrees(G,nodes,'weight')
	else:
		degree_values_users, degree_values_posts = bipartite.degrees(G,nodes)	

	#take a look at first 20 entries in each
	def take(n, iterable):
    		"Return first n items of the iterable as a list"
    		return list(islice(iterable, n))
	#print take(20,degree_values_posts.iteritems())
	#print take(20,degree_values_users.iteritems())


	vals_posts=degree_values_posts.values()
	average_degree_p = sum(vals_posts)/float(len(vals_posts))
	#crop out larger values
	vals_posts= [s for s in vals_posts if s<=2000]
	data_posts=Counter(vals_posts)

	"""	
	vals_users=degree_values_users.values()
	average_degree_u = sum(vals_users)/float(len(vals_users))
	#crop out larger values
	vals_users=[s for s in vals_users if s<=100]
	data_users=Counter(vals_users)
	"""


	plt.figure(1)

	plt.subplot(211)
	plt.hist(data_posts.keys(),label = "Average Degree = %s" %(average_degree_p))
	plt.title("Degree Distribution (post-projection) Histogram")
	plt.xlabel("Degree")
	plt.ylabel("Number of nodes with degree")
	plt.legend(loc='best')
	"""
	plt.subplot(212)
	plt.hist(data_users.keys(),label = "Average Degree = %s" %(average_degree_u))
	plt.title("Degree Distribution (user-projection) Histrogram")
	plt.xlabel("Degree")
	plt.ylabel("Number of nodes with degree")
	plt.legend(loc='best')
	"""
	plt.show()
def get_deg_list_by_partition(graph, partition):
    """
    Get degree distribution for given partition of bipartite graph
    """
    if not bipartite.is_bipartite(graph):
        return []

    nodes = [
        node for node in graph.nodes
        if graph.nodes[node]['bipartite'] == partition
    ]
    return bipartite.degrees(graph, nodes)[1]
Beispiel #12
0
def suffle_edges_lc(G):

    # Get the largest component
    print("Getting largest component ...")
    components = sorted(nx.connected_components(G), key=len, reverse=True)
    largest_component = components[0]
    C = G.subgraph(largest_component)

    degX, degY = bipartite.degrees(C, nodes_0)
    degATC = dict(degX).values()
    degCIE = dict(degY).values()
    counterATC = collections.Counter(degATC)
    counterCIE = collections.Counter(degCIE)

    nodes_0_c = []
    nodes_1_c = []
    for n in C.nodes(data=True):
        if n[1]['bipartite'] == 0:
            nodes_0_c.append(n[0])
        if n[1]['bipartite'] == 1:
            nodes_1_c.append(n[0])

    print("Shuffling edges ... ")
    unfrozen_graph = nx.Graph(C)
    #    C_shuffled = copy.copy(C)

    k = 0
    iter = 2 * unfrozen_graph.size()
    while k < iter:
        r1 = random.choice(sorted(dict(degY).keys()))
        d1 = random.choice(list(unfrozen_graph.neighbors(r1)))

        r2 = random.choice(sorted(dict(degY).keys()))
        d2 = random.choice(list(unfrozen_graph.neighbors(r2)))

        if (unfrozen_graph.has_edge(r1, d2)
                == False) & (unfrozen_graph.has_edge(r2, d1) == False):
            unfrozen_graph.add_edge(r1, d2)
            unfrozen_graph.remove_edge(r1, d1)
            unfrozen_graph.add_edge(r2, d1)
            unfrozen_graph.remove_edge(r2, d2)
            #            print(k)
            k = k + 1

    return unfrozen_graph, counterATC, counterCIE, dict(degX), dict(
        degY), nodes_0_c, nodes_1_c
Beispiel #13
0
def from_bipartitegraph_to_mergesplitmetric(bipart_graph, sv_nodes):
    degX, degY = bipartite.degrees(bipart_graph, sv_nodes)
    num_of_merges = 0
    SVids_merger = []
    skeleton_groups = []
    for node_id, node_degree in degY.iteritems():
        if node_degree > 1:
            num_of_merges += node_degree - 1
            SVids_merger.append(node_id)
            skeletons = bipart_graph[node_id].keys(
            )  # This gets all the nodes the node is connected to.
            skeletons = [int(id, 16) for id in skeletons]
            skeleton_groups.append(skeletons)

    num_of_splits = 0
    for node_id, node_degree in degX.iteritems():
        if node_degree > 1:
            num_of_splits += node_degree - 1
    return num_of_merges, num_of_splits, skeleton_groups, SVids_merger
def commit_here(username):
    G = nx.Graph()

    users = set()
    users.update(user
                 for user in followers(username, page="1", followers_list=[]))
    users.update(user
                 for user in following(username, page="1", following_list=[]))

    users = list(users)
    repos = set()
    user2repo = []
    for user in users:
        if user == username:
            continue
        user_repos = get_repos(user, url="", repo_list=[])
        repos.update(repo + "_repo" for repo in user_repos)
        user2repo += [(user, repo + "_repo") for repo in user_repos]
    repos = list(repos)
    G.add_nodes_from(users, bipartite="users")
    G.add_nodes_from(repos, bipartite="repos")
    G.add_edges_from(user2repo)

    for repo in repos:
        degX, degY = bipartite.degrees(G, repo)
        break
    degX = list(degX)

    repos_only = []
    for ind in degX:
        if ind[0] not in users:
            repos_only.append(ind)

    repos_only.sort(key=lambda x: x[-1], reverse=True)
    repos_only = [list(repo) for repo in repos_only]
    return repos_only[:10]
Beispiel #15
0
#use one less process to be a little more stable
#p = multiprocessing.Pool(processes = multiprocessing.cpu_count()-5)
    p = multiprocessing.Pool()
    #timing it...
    start = time.time()
    #    for file in names:
    #        p.apply_async(multip, [file])

    for i in range(1, 5, 1):
        print("Shuffle edges ... iteration " + str(i))
        #H = add_and_remove_edges(C, type_proj, dict(degX), dict(degY))
        C, counterATC, counterCIE, degX, degY, nodes_0_c, nodes_1_c = suffle_edges_lc(
            G)
        #        suffle_edges(C, sorted(dict(degX).keys()), sorted(dict(degY).keys()))
        degX_sh, degY_sh = bipartite.degrees(C, nodes_0_c)
        degATC_sh = dict(degX_sh).values()
        degCIE_sh = dict(degY_sh).values()
        counterATC_sh = collections.Counter(degATC_sh)
        counterCIE_sh = collections.Counter(degCIE_sh)
        nx.write_graphml(C, 'networks/bipartite_sh_' + str(i) + '.graphml')

        print("Apply threshold analysis to shuffled graph ... " + str(i))
        for th_icd in sorted(list(counterCIE_sh.keys())):
            p.apply_async(threshold_analysis, [C, th_icd, 0, len(degY_sh), i])

        for th_atc in sorted(list(counterATC_sh.keys())):
            p.apply_async(threshold_analysis, [C, th_atc, 1, len(degX_sh), i])

#        if type_proj == 0:
#            for th in sorted(list(counterCIE_sh.keys())):
            if u in M:
                del_edges.append((u, v))
        B.remove_edges_from(del_edges)
        sys.stderr.write("\nedges after deletion are: " + str(B.edges()))
        if nx.is_bipartite(B):
            logging.info("\n ... graph is bipartite.")
        else:
            logging.info("\n ... graph is NOT bipartite.")
        numOfNodes = B.number_of_nodes()
        numOfEdges = B.number_of_edges()
        k = numOfEdges
        logging.info(
            '\n...done. Created a bipartite graph with %d nodes and %d edges' %
            (numOfNodes, numOfEdges))

        degN, degM = bipartite.degrees(B, M)
        degrees = dict(degN)
        print("degree dict: " + str(degrees))
        descending_degrees = sorted(degrees.values(), reverse=True)
        print("degrees descending: " + str(descending_degrees))
        sorted_nodes = sorted(range(len(degrees.values())),
                              key=lambda c: degrees.values()[c],
                              reverse=True)
        print("nodes sorted by descending degrees: " + str(sorted_nodes))
        max_degree = descending_degrees[0]
        min_degree = descending_degrees[-1]
        mean = (sum(degrees.values()) * 1.0) / len(degrees)
        std_dev = np.sqrt((sum([(degrees[i] - mean)**2
                                for i in degrees]) * 1.0) / len(degrees))
        logging.info(
            '\nMax degree: %s, min degree: %s, mean: %s, and standard deviation: %s.'
def threshold(G, bp):
    degX, degY = bipartite.degrees(G, nodes_0)
    degATC = dict(degX).values()
    degCIE = dict(degY).values()
    counterATC = collections.Counter(degATC)
    counterCIE = collections.Counter(degCIE)
    c_list = []
    nc_list = []
    nuc_list = []
    if bp == 0:
        for th in sorted(list(counterCIE.keys())):
            #th = 1
            H = nx.Graph()
            #for v in G.nodes(data = True):
            #    if v[1]['bipartite'] == 0:
            #        H.add_node(v[0])

            for n in G.nodes(data=True):
                if n[1]['bipartite'] == 0:
                    sourceNode = n[0]
                    s_neighbors = set(G.neighbors(n[0]))
                    for m in G.nodes(data=True):
                        if m[1]['bipartite'] == 0:  #### Change to 1 to change the projection to active ingredient
                            targetNode = m[0]
                            t_neighbors = set(G.neighbors(m[0]))
                            if sourceNode != targetNode:
                                if len(s_neighbors & t_neighbors) >= th:
                                    H.add_node(sourceNode)
                                    H.add_node(targetNode)
                                    H.add_edge(sourceNode, targetNode)
            components = sorted(nx.connected_components(H),
                                key=len,
                                reverse=True)
            #sum(list(map(lambda c: len(c), components)))
            c_list.append(len(components))
            nodes_connected = sum(list(map(lambda c: len(c), components)))
            nc_list.append(nodes_connected)
            nuc_list.append(len(nodes_0) - nodes_connected)
            #nx.write_graphml(H,'proCIE_th_'+str(th)+'.graphml')
    else:

        for th in sorted(list(counterATC.keys())):
            #th = 136
            H = nx.Graph()
            #for v in G.nodes(data = True):
            #    if v[1]['bipartite'] == 1:
            #        H.add_node(v[0])

            for n in G.nodes(data=True):
                if n[1]['bipartite'] == 1:
                    sourceNode = n[0]
                    s_neighbors = set(G.neighbors(n[0]))
                    for m in G.nodes(data=True):
                        if m[1]['bipartite'] == 1:  #### Change to 1 to change the projection to active ingredient
                            targetNode = m[0]
                            t_neighbors = set(G.neighbors(m[0]))
                            if sourceNode != targetNode:
                                if len(s_neighbors & t_neighbors) >= th:
                                    #print(len(s_neighbors & t_neighbors))
                                    #print(sourceNode + " " + targetNode)
                                    H.add_node(sourceNode)
                                    H.add_node(targetNode)
                                    H.add_edge(sourceNode, targetNode)

            components = sorted(nx.connected_components(H),
                                key=len,
                                reverse=True)
            c_list.append(len(components))
            nodes_connected = sum(list(map(lambda c: len(c), components)))
            nc_list.append(nodes_connected)
            nuc_list.append(len(nodes_1) - nodes_connected)
        #nx.write_graphml(H,'proATC_th_'+str(th)+'.graphml')
    #degXH,degYH=bipartite.degrees(H,nodes_0)
    #degATCH = dict(degXH).values()
    #degCIEH = dict(degYH).values()
    #counterATCH = collections.Counter(degATCH)
    #counterCIEH = collections.Counter(degCIEH)
    return c_list, nc_list, nuc_list, counterATC, counterCIE
user2repo = []
for user in users:
    if user == username:  # Don't recommend self repos
        continue
    user_repos = get_repos(user, url="", repo_list=[])
    repos.update(repo + "_repo" for repo in user_repos)
    user2repo += [(user, repo + "_repo") for repo in user_repos]
repos = list(repos)
G.add_nodes_from(users, bipartite="users")
G.add_nodes_from(repos, bipartite="repos")
G.add_edges_from(user2repo)

nx.write_edgelist(G, "imro8_bipartite_recommending_repo.edgelist")

# G = nx.read_edgelist('imro8_bipartite_recommending_repo.edgelist')
# print(G.edges())

# repos  = ['teach-flask-through-doing_repo', 'Hades_App_repo']
for repo in repos:
    degX, degY = bipartite.degrees(G, repo)
    break
degX = list(degX)

repos_only = []
for ind in degX:
    if ind[0] not in users:
        repos_only.append(ind)

repos_only.sort(key=lambda x: x[-1], reverse=True)
print(repos_only[:10])
                     bipartite=0)  # Add the node attribute “bipartite” disease
    G.add_nodes_from(nodes_1, bipartite=1)  # active substance

    # Add edges without weight
    for m in vdmdata.iterrows():
        enfermedad = m[1][0]
        #peso = m[1][3];
        sustancia = m[1][1]
        G.add_edge(enfermedad, sustancia)

    print("Getting largest component ...")
    components = sorted(nx.connected_components(G), key=len, reverse=True)
    largest_component = components[0]
    C = G.subgraph(largest_component)

    degX, degY = bipartite.degrees(C, nodes_0)
    degATC = dict(degX).values()
    degCIE = dict(degY).values()
    counterATC = collections.Counter(degATC)
    counterCIE = collections.Counter(degCIE)

    df_icd = pd.DataFrame(dict(degY).items(), columns=['node', 'degree'])
    df_atc = pd.DataFrame(dict(degX).items(), columns=['node', 'degree'])

    df_icd = df_icd.sort_values(by=['degree', 'node'], ascending=False)
    df_atc = df_atc.sort_values(by=['degree', 'node'], ascending=False)

    nodes_0_c = []
    nodes_1_c = []
    for n in C.nodes(data=True):
        if n[1]['bipartite'] == 0:
#--------------------------------------------------------------------------
# Calculate the density of the bipartite graph
#--------------------------------------------------------------------------
print(round(bipartite.density(B, bottom_nodes), 3))

#--------------------------------------------------------------------------
# Calculate the degree distribution of universities from the bipartite projected graph.
# We need to have a multigraph because it works for counting the mappings
#--------------------------------------------------------------------------
U = bipartite.weighted_projected_graph(B, bottom_nodes)
#--------------------------------------------------------------------------
# Calculate the degree for the top and bottom nodes of the bipartite graph
#--------------------------------------------------------------------------
import operator

degX, degY = bipartite.degrees(U, top_nodes, weight='weight')
#universites
a, b = zip(*degX)
degreeCount = collections.Counter(b)
deg, cnt = zip(*degreeCount.items())

#The degree seq below does not take into account the weights of the edges
#degree_sequence = sorted([d for n, d in U.degree()], reverse=True)  # degree sequence
#degreeCount = collections.Counter(degree_sequence)
plt.rcParams['axes.facecolor'] = 'grey'
plt.rcParams['savefig.facecolor'] = 'grey'
fig, ax = plt.subplots(figsize=(10, 8))

#plt.title("Degree Histogram and Graph of Universities",fontsize=11)
plt.ylabel("Count of links", fontsize=14)
plt.xlabel("Degree", fontsize=14)
Beispiel #21
0
B = nx.Graph()
edges = []
x = 0
for i in range(len(top_nodes)):
    edges.append((top_nodes[i], bottom_nodes[i], weight[i]))
B.add_weighted_edges_from(edges)
# print(B.edges())


# 二模转一模
# G=bipartite.projected_graph(B,bottom_nodes,multigraph=True)
# print(G.edges(keys=True))

# 计算二模网络的度
degX,degY=bipartite.degrees(B,bottom_nodes,weight='weight')
# 计算二模网络的点度中心性
D = bipartite.degree_centrality(B, bottom_nodes)
degX_dict, degY_dict, D_dict = dict(degX), dict(degY), dict(D)
print('模1的度为:', len(degX), degX_dict, '模2的度为:', len(degY), degY_dict, '模2的点度中心性', len(D), D_dict, sep='\n')


arcpy.env.overwriteOutput = True
targetsource1 = targetsource + 'output'
print(targetsource)
arcpy.Copy_management(in_data=targetsource, out_data=targetsource1)


mo2fieldname,mo1fieldname,mo2centraldegree = 'mo2degree', 'mo1degree', 'mo2centraldegree'
arcpy.AddField_management(targetsource1, mo2fieldname, "FLOAT",field_alias=mo2fieldname, field_is_nullable="NULLABLE")
arcpy.AddField_management(targetsource1, mo1fieldname, "FLOAT",field_alias=mo1fieldname, field_is_nullable="NULLABLE")