def test_directed_projection(self): G = nx.DiGraph() G.add_edge('A', 1) G.add_edge(1, 'B') G.add_edge('A', 2) G.add_edge('B', 2) P = bipartite.projected_graph(G, 'AB') assert_edges_equal(list(P.edges()), [('A', 'B')]) P = bipartite.weighted_projected_graph(G, 'AB') assert_edges_equal(list(P.edges()), [('A', 'B')]) assert_equal(P['A']['B']['weight'], 1) P = bipartite.projected_graph(G, 'AB', multigraph=True) assert_edges_equal(list(P.edges()), [('A', 'B')]) G = nx.DiGraph() G.add_edge('A', 1) G.add_edge(1, 'B') G.add_edge('A', 2) G.add_edge(2, 'B') P = bipartite.projected_graph(G, 'AB') assert_edges_equal(list(P.edges()), [('A', 'B')]) P = bipartite.weighted_projected_graph(G, 'AB') assert_edges_equal(list(P.edges()), [('A', 'B')]) assert_equal(P['A']['B']['weight'], 2) P = bipartite.projected_graph(G, 'AB', multigraph=True) assert_edges_equal(list(P.edges()), [('A', 'B'), ('A', 'B')])
def collaborativeness(B): # splitting the types of nodes of the graph object B top_nodes = set(node for node, d in B.nodes(data=True) if d['bipartite'] == 0) #set of top nodes bottom_nodes = set(B) - top_nodes #set of bottom nodes deg_top, deg_bottom = bipartite.degrees( B, bottom_nodes) #dictionary: nodes as keys, degrees as values # creating simple graph and multigraph bottom projections G = bipartite.projected_graph(B, bottom_nodes) Gm = bipartite.projected_graph(B, bottom_nodes, multigraph=True) col_dict = {} #ratio_dict = {} #div_dict = {} for node in bottom_nodes: if G.degree(node) > 0: gamma = 0 shared = 0 for nbr in B[node]: gamma += math.log(B.degree(nbr)) if B.degree(nbr) > 1: shared += 1 col_dict[node] = ((float(shared) / B.degree(node)) * gamma, float(G.degree(node)) / Gm.degree(node)) #ratio_dict[node] = (float(shared)/B.degree(node)) #diversity_dict[node] = float(G.degree(node))/Gm.degree(node) return col_dict
def test_directed_projection(self): G = nx.DiGraph() G.add_edge("A", 1) G.add_edge(1, "B") G.add_edge("A", 2) G.add_edge("B", 2) P = bipartite.projected_graph(G, "AB") assert_equal(sorted(P.edges()), [("A", "B")]) P = bipartite.weighted_projected_graph(G, "AB") assert_equal(sorted(P.edges()), [("A", "B")]) assert_equal(P["A"]["B"]["weight"], 1) P = bipartite.projected_graph(G, "AB", multigraph=True) assert_equal(sorted(P.edges()), [("A", "B")]) G = nx.DiGraph() G.add_edge("A", 1) G.add_edge(1, "B") G.add_edge("A", 2) G.add_edge(2, "B") P = bipartite.projected_graph(G, "AB") assert_equal(sorted(P.edges()), [("A", "B")]) P = bipartite.weighted_projected_graph(G, "AB") assert_equal(sorted(P.edges()), [("A", "B")]) assert_equal(P["A"]["B"]["weight"], 2) P = bipartite.projected_graph(G, "AB", multigraph=True) assert_equal(sorted(P.edges()), [("A", "B"), ("A", "B")])
def test_directed_projection(self): G=nx.DiGraph() G.add_edge('A',1) G.add_edge(1,'B') G.add_edge('A',2) G.add_edge('B',2) P=bipartite.projected_graph(G,'AB') assert_edges_equal(list(P.edges()),[('A','B')]) P=bipartite.weighted_projected_graph(G,'AB') assert_edges_equal(list(P.edges()),[('A','B')]) assert_equal(P['A']['B']['weight'],1) P=bipartite.projected_graph(G,'AB',multigraph=True) assert_edges_equal(list(P.edges()),[('A','B')]) G=nx.DiGraph() G.add_edge('A',1) G.add_edge(1,'B') G.add_edge('A',2) G.add_edge(2,'B') P=bipartite.projected_graph(G,'AB') assert_edges_equal(list(P.edges()),[('A','B')]) P=bipartite.weighted_projected_graph(G,'AB') assert_edges_equal(list(P.edges()),[('A','B')]) assert_equal(P['A']['B']['weight'],2) P=bipartite.projected_graph(G,'AB',multigraph=True) assert_edges_equal(list(P.edges()),[('A','B'),('A','B')])
def test_directed_projection(self): G = nx.DiGraph() G.add_edge("A", 1) G.add_edge(1, "B") G.add_edge("A", 2) G.add_edge("B", 2) P = bipartite.projected_graph(G, "AB") assert_edges_equal(list(P.edges()), [("A", "B")]) P = bipartite.weighted_projected_graph(G, "AB") assert_edges_equal(list(P.edges()), [("A", "B")]) assert P["A"]["B"]["weight"] == 1 P = bipartite.projected_graph(G, "AB", multigraph=True) assert_edges_equal(list(P.edges()), [("A", "B")]) G = nx.DiGraph() G.add_edge("A", 1) G.add_edge(1, "B") G.add_edge("A", 2) G.add_edge(2, "B") P = bipartite.projected_graph(G, "AB") assert_edges_equal(list(P.edges()), [("A", "B")]) P = bipartite.weighted_projected_graph(G, "AB") assert_edges_equal(list(P.edges()), [("A", "B")]) assert P["A"]["B"]["weight"] == 2 P = bipartite.projected_graph(G, "AB", multigraph=True) assert_edges_equal(list(P.edges()), [("A", "B"), ("A", "B")])
def test_path_projected_graph(self): G = nx.path_graph(4) P = bipartite.projected_graph(G, [1, 3]) assert_nodes_equal(list(P), [1, 3]) assert_edges_equal(list(P.edges()), [(1, 3)]) P = bipartite.projected_graph(G, [0, 2]) assert_nodes_equal(list(P), [0, 2]) assert_edges_equal(list(P.edges()), [(0, 2)])
def test_path_projected_graph(self): G = nx.path_graph(4) P = bipartite.projected_graph(G, [1, 3]) assert_equal(sorted(P.nodes()), [1, 3]) assert_equal(sorted(P.edges()), [(1, 3)]) P = bipartite.projected_graph(G, [0, 2]) assert_equal(sorted(P.nodes()), [0, 2]) assert_equal(sorted(P.edges()), [(0, 2)])
def test_project_multigraph(self): G = nx.Graph() G.add_edge("a", 1) G.add_edge("b", 1) G.add_edge("a", 2) G.add_edge("b", 2) P = bipartite.projected_graph(G, "ab") assert_edges_equal(P.edges(), [("a", "b")]) P = bipartite.weighted_projected_graph(G, "ab") assert_edges_equal(P.edges(), [("a", "b")]) P = bipartite.projected_graph(G, "ab", multigraph=True) assert_edges_equal(P.edges(), [("a", "b"), ("a", "b")])
def test_project_multigraph(self): G = nx.Graph() G.add_edge("a", 1) G.add_edge("b", 1) G.add_edge("a", 2) G.add_edge("b", 2) P = bipartite.projected_graph(G, "ab") assert_edges_equal(list(P.edges()), [("a", "b")]) P = bipartite.weighted_projected_graph(G, "ab") assert_edges_equal(list(P.edges()), [("a", "b")]) P = bipartite.projected_graph(G, "ab", multigraph=True) assert_edges_equal(list(P.edges()), [("a", "b"), ("a", "b")])
def test_path_projected_properties_graph(self): G=nx.path_graph(4) G.add_node(1,name='one') G.add_node(2,name='two') P=bipartite.projected_graph(G,[1,3]) assert_nodes_equal(list(P),[1,3]) assert_edges_equal(list(P.edges()),[(1,3)]) assert_equal(P.node[1]['name'],G.node[1]['name']) P=bipartite.projected_graph(G,[0,2]) assert_nodes_equal(list(P),[0,2]) assert_edges_equal(list(P.edges()),[(0,2)]) assert_equal(P.node[2]['name'],G.node[2]['name'])
def test_path_projected_properties_graph(self): G = nx.path_graph(4) G.add_node(1, name='one') G.add_node(2, name='two') P = bipartite.projected_graph(G, [1, 3]) assert_nodes_equal(list(P), [1, 3]) assert_edges_equal(list(P.edges()), [(1, 3)]) assert_equal(P.node[1]['name'], G.node[1]['name']) P = bipartite.projected_graph(G, [0, 2]) assert_nodes_equal(list(P), [0, 2]) assert_edges_equal(list(P.edges()), [(0, 2)]) assert_equal(P.node[2]['name'], G.node[2]['name'])
def test_project_multigraph(self): G=nx.Graph() G.add_edge('a',1) G.add_edge('b',1) G.add_edge('a',2) G.add_edge('b',2) P=bipartite.projected_graph(G,'ab') assert_edges_equal(list(P.edges()),[('a','b')]) P=bipartite.weighted_projected_graph(G,'ab') assert_edges_equal(list(P.edges()),[('a','b')]) P=bipartite.projected_graph(G,'ab',multigraph=True) assert_edges_equal(list(P.edges()),[('a','b'),('a','b')])
def test_path_projected_properties_graph(self): G = nx.path_graph(4) G.add_node(1, name="one") G.add_node(2, name="two") P = bipartite.projected_graph(G, [1, 3]) assert_nodes_equal(list(P), [1, 3]) assert_edges_equal(list(P.edges()), [(1, 3)]) assert P.nodes[1]["name"] == G.nodes[1]["name"] P = bipartite.projected_graph(G, [0, 2]) assert_nodes_equal(list(P), [0, 2]) assert_edges_equal(list(P.edges()), [(0, 2)]) assert P.nodes[2]["name"] == G.nodes[2]["name"]
def test_star_projected_graph(self): G = nx.star_graph(3) P = bipartite.projected_graph(G, [1, 2, 3]) assert_nodes_equal(list(P), [1, 2, 3]) assert_edges_equal(list(P.edges()), [(1, 2), (1, 3), (2, 3)]) P = bipartite.weighted_projected_graph(G, [1, 2, 3]) assert_nodes_equal(list(P), [1, 2, 3]) assert_edges_equal(list(P.edges()), [(1, 2), (1, 3), (2, 3)]) P = bipartite.projected_graph(G, [0]) assert_nodes_equal(list(P), [0]) assert_edges_equal(list(P.edges()), [])
def test_project_multigraph(self): G = nx.Graph() G.add_edge('a', 1) G.add_edge('b', 1) G.add_edge('a', 2) G.add_edge('b', 2) P = bipartite.projected_graph(G, 'ab') assert_edges_equal(list(P.edges()), [('a', 'b')]) P = bipartite.weighted_projected_graph(G, 'ab') assert_edges_equal(list(P.edges()), [('a', 'b')]) P = bipartite.projected_graph(G, 'ab', multigraph=True) assert_edges_equal(list(P.edges()), [('a', 'b'), ('a', 'b')])
def test_path_projected_properties_graph(self): G = nx.path_graph(4) G.add_node(1, name="one") G.add_node(2, name="two") P = bipartite.projected_graph(G, [1, 3]) assert_equal(sorted(P.nodes()), [1, 3]) assert_equal(sorted(P.edges()), [(1, 3)]) assert_equal(P.node[1]["name"], G.node[1]["name"]) P = bipartite.projected_graph(G, [0, 2]) assert_equal(sorted(P.nodes()), [0, 2]) assert_equal(sorted(P.edges()), [(0, 2)]) assert_equal(P.node[2]["name"], G.node[2]["name"])
def test_star_projected_graph(self): G = nx.star_graph(3) P = bipartite.projected_graph(G, [1, 2, 3]) assert_equal(sorted(P.nodes()), [1, 2, 3]) assert_equal(sorted(P.edges()), [(1, 2), (1, 3), (2, 3)]) P = bipartite.weighted_projected_graph(G, [1, 2, 3]) assert_equal(sorted(P.nodes()), [1, 2, 3]) assert_equal(sorted(P.edges()), [(1, 2), (1, 3), (2, 3)]) P = bipartite.projected_graph(G, [0]) assert_equal(sorted(P.nodes()), [0]) assert_equal(sorted(P.edges()), [])
def create_projection(B): B.add_edges_from([('D', 1), ('H', 1), ('B', 2), ('C', 2), ('D', 2), ('E', 2), ('G', 2), ('E', 3), ('F', 3), ('H', 3), ('J', 3), ('E', 4), ('I', 4), ('J', 4)]) X = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'} N = {1, 2, 3, 4} P = bipartite.projected_graph(B, X) print(nx.info(P)) P2 = bipartite.projected_graph(B, N) print(nx.info(P2)) # if we want weights: P3 = bipartite.weighted_projected_graph(B, N) print(P3.edges(data=True)) return P, P2, P3
def block_cutpoint_tree(G, projected=False, verbose=False): input_graph = Graph(G) top_nodes = [] bottom_nodes = [] articulation_points = set(nx.articulation_points(input_graph)) if verbose: print "Articulation points:", articulation_points for biconnected_component in nx.biconnected_components(input_graph): inter = biconnected_component.intersection(articulation_points) if verbose: print "Inter:", inter top_nodes.extend( [json.dumps(sorted(biconnected_component)) for _ in inter] ) #top_nodes.extend([G.subgraph(bcc) for _ in inter]) bottom_nodes.extend([x for x in inter]) #bottom_nodes.extend([G.subgraph(x) for x in inter]) if verbose: print "Top nodes:", top_nodes print "Bottom nodes:", bottom_nodes edges = zip(top_nodes, bottom_nodes) if verbose: print "Edges:", edges bc_tree = Graph() bc_tree.add_edges_from(edges) if projected: return Graph(bipartite.projected_graph(bc_tree, top_nodes)) else: return bc_tree
def read_movies(): # Was unable to figure out how to use nx.read_pajek for this file, # ended up looking at the data and doing it manually. Edgelist starts # at line 107 and ends at line 298. file = NETWORKS_DIR + "movies/Movies.paj" ## G = nx.read_pajek(file) elist = [] with open(file, "r") as f: linecount = 1 while linecount <= 298: line = f.readline() if linecount >= 107: line = re.sub(r'\s+', ' ', line).strip() e = set(int(x) for x in line.split(" ")[:2]) elist.append(e) linecount += 1 B = nx.Graph() B.add_edges_from(elist) B.add_node(78) # node 78 doesn't appear in any edges # Project onto composers. Two composers are linked if they worked with the # same producer. G = bipartite.projected_graph(B, list(range(63, 103))) return get_giant_component(G)
def Dulmage_Mendelsohn(g, eqs): '''The input graph g is assumed to be a bipartite graph with no isolated nodes. Returns the diagonal blocks as a list of (equations, variables).''' assert_all_in_graph(g, eqs) assert_no_isolates(g) assert eqs, 'At least one equation is expected' assert is_bipartite_node_set(g, eqs) # Maximum matching mate = nx.max_weight_matching(g, maxcardinality=True) matches = sorted((k, mate[k]) for k in mate if k in eqs) log('Matches:') for eq, var in matches: log(eq, var) # Direct the edges of g according to the matching bipart = to_digraph(g, matches) plot(bipart) # Find the strongly connected components (SCCs) of the equations eq_sccs = nx.condensation(projected_graph(bipart, eqs)) plot(eq_sccs) # Q: With proper implementation, shouldn't the SCCs be already top. sorted? precedence_order = nx.topological_sort(eq_sccs) # Collect the diagonal blocks as a list of (equations, variables) diagonal_blocks = [] seen = set() for scc in precedence_order: equations = eq_sccs.node[scc]['members'] variables = { n for eq in equations for n in g.edge[eq] if n not in seen } seen.update(variables) diagonal_blocks.append((equations, list(variables))) return diagonal_blocks
def experiment1(db, extra_round): output_file_name = 'timeline_analysis_predictions_round1.csv' cursor = db.get_authors_timelines_temp() print('retrieving authors and posts') author_post_tuples, author_type_dict = create_author_post_and_author_type_tuples(db, cursor) print('creating bi-partite graph') bi_graph = create_bi_graph(author_post_tuples) authors, posts = list(zip(*author_post_tuples)) del author_post_tuples authors = list(set(authors)) print('creating authors graph') authors_projection_graph = bipartite.projected_graph(bi_graph, authors) del bi_graph print('counting shared posts') unlabeled_author_shared_posts_dict = count_shared_posts(author_type_dict, authors_projection_graph) #Add an extra iteration to the experiment if extra_round: output_file_name = 'timeline_analysis_predictions_round2.csv' for author, num_shared_posts_with_bad in unlabeled_author_shared_posts_dict.items(): if unlabeled_author_shared_posts_dict[author] is not None: author_type_dict[author] = 'bad_actor' unlabeled_author_shared_posts_dict = count_shared_posts(author_type_dict, authors_projection_graph) print('writing results to file') with open(output_file_name, 'wb') as csv_file: writer = csv.writer(csv_file) for key, value in list(unlabeled_author_shared_posts_dict.items()): writer.writerow([key, value])
def nearest_neighbors_weighted_projection(TG_graph, gen, tumor_gene_relation_bi_adj_mat): #for nearest neighbors# #G = bipartite.weighted_projected_graph(TG_graph, gen) #for jaccard similarity nearest neighbors# #G = bipartite.overlap_weighted_projected_graph(TG_graph, gen) #for other baseline link prediction alogrithm# G = bipartite.projected_graph(TG_graph, gen) print "calculating the link prediction scores", G.number_of_edges() preds = nx.preferential_attachment(G, ebunch=G.edges()) pred_scores_graph = nx.Graph() pred_scores_graph.add_nodes_from(gen) cnt = 0 for u, v, p in preds: pred_scores_graph.add_edge(u, v, weight=p) print cnt print "sorted !" #G = pred_scores_graph A = nx.to_scipy_sparse_matrix(G) scores = np.dot(tumor_gene_relation_bi_adj_mat, A) return scores
def create_global_graph(posts_df, fb_group_df, NODE_COLOR, GRAPH_DIRECTORY, DATE): bipartite_graph = nx.Graph() for _, row in fb_group_df.iterrows(): bipartite_graph.add_node(int(row['account_id']), label=row['account_name'], nb_fake_news_shared=row['nb_fake_news'], nb_followers=row['account_subscriber_count'], # # This is to make the graph with ipySigma and the gradient colors: # color=color_gradient(row['nb_fake_news_climate']/row['nb_fake_news'], # row['nb_fake_news_health']/row['nb_fake_news'], # NODE_COLOR), # size=np.sqrt(row['nb_fake_news']) # This is to make the graph with Minivan and the three unique colors: main_topic=row['main_topic'] ) bipartite_graph.add_nodes_from(posts_df["url"].tolist()) bipartite_graph.add_edges_from(list(posts_df[['account_id', 'url']]\ .itertuples(index=False, name=None))) monopartite_graph = bipartite.projected_graph(bipartite_graph, fb_group_df['account_id'].unique().tolist()) monopartite_graph_path = os.path.join(".", GRAPH_DIRECTORY, "global_{}.gexf".format(DATE)) nx.write_gexf(monopartite_graph, monopartite_graph_path, encoding="utf-8") return monopartite_graph
def nets(df): edgelist = df[['author', 'post_id']] B = nx.Graph() B.add_nodes_from(set(edgelist['author']), bipartite=0) B.add_nodes_from(set(edgelist['post_id']), bipartite=1) B.add_edges_from(list(zip(edgelist['author'], edgelist['post_id']))) assert (len(B.nodes()) == len(df['author'].unique()) + len(df['post_id'].unique())), 'number of nodes is off' assert (len(B.edges()) == len( edgelist.drop_duplicates())), 'number of edges is off' print(f'the two-mode network has {len(B.nodes())} nodes' f' and density of {nx.density(B)}') ## one mode projections authors = bipartite.projected_graph(B, set(edgelist['author'])) assert (len(authors.nodes()) == len( df['author'].unique())), 'number of authors is off' print(f'the author network has {len(authors.nodes())} nodes' f' and density of {nx.density(authors)}') posts = bipartite.weighted_projected_graph(B, set(edgelist['post_id'])) assert (len(posts.nodes()) == len( df['post_id'].unique())), 'number of posts is off' print(f'the post network has {len(posts.nodes())} nodes' f' and density of {nx.density(posts)}') return B, authors, posts
def bi_graph(): B = nx.Graph() # B.add_edges_from([('a', 1), ('b', 1), (1, 2), ('b', 2)]) B.add_edges_from([('a', 1), ('b', 1), ('a', 2), ('c', 2)]) B.add_edges_from([]) print(list(B.nodes())) print(list(B.edges())) print(bipartite.is_bipartite(B)) e = list(B.edges()) f = [] for i in e: f.append(list(i)) print(f) # P = bipartite.collaboration_weighted_projected_graph(B,['a','b','c']) P = bipartite.projected_graph(B, ['a', 'b', 'c']) print(list(P.nodes())) print(list(P.edges(data=True))) f = [] for i in list(P.edges(data=True)): # print('i') # print(i) f.append(list(i)[:2]) if 'weight' in list(i)[2]: print(i[2]['weight']) else: print('no weight') print(f)
def get_projections(single_bipartite_graph, sites, pollutants, verbose=False): start_time = time.time() sites_graph = multi_to_single_graph_projection( bipartite.projected_graph(single_bipartite_graph, nodes=sites, multigraph=True), verbose) pollutants_graph = multi_to_single_graph_projection( bipartite.projected_graph(single_bipartite_graph, nodes=pollutants, multigraph=True), verbose) if verbose: print("Getting projections of bipartite graph:", time.time() - start_time) return sites_graph, pollutants_graph
def experiment2(db): output_file_name = 'timeline_analysis.experiment2.csv' output_file_name2 = 'sharedposts.experiment2.csv' print('retrieving bad actors timelines') cursor = db.get_labeled_bad_actors_timelines_temp() author_post_tuples, author_type_dict = create_author_post_and_author_type_tuples(db, cursor) authors, posts = list(zip(*author_post_tuples)) print('creating bi-partite graph') bi_graph = create_bi_graph(author_post_tuples) del author_post_tuples authors = list(set(authors)) print('creating projection into authors graph') authors_projection_graph = bipartite.projected_graph(bi_graph, authors) edges_list = authors_projection_graph.edges() del authors_projection_graph del authors manually_labeled_authors_shared_posts_dict = {} bought_sub_types = ['crowdturfer','acquired'] manually_labeled_authors_sub_types = ['spammer','bot','news_feed','company','private'] interesting_edges = [] #count how many bought actors we find sharing posts with manually labeled ones/ print('iterating over author-author edges') for edge in edges_list: source_author_name = edge[0] destination_author_name = edge[1] if author_type_dict[source_author_name] in manually_labeled_authors_sub_types and author_type_dict[destination_author_name] in bought_sub_types: interesting_edges.append(edge) if source_author_name not in manually_labeled_authors_shared_posts_dict: manually_labeled_authors_shared_posts_dict[source_author_name] = 1 else: manually_labeled_authors_shared_posts_dict[source_author_name] += 1 elif author_type_dict[destination_author_name] in manually_labeled_authors_sub_types and author_type_dict[source_author_name] in bought_sub_types: interesting_edges.append(edge) if destination_author_name not in manually_labeled_authors_shared_posts_dict: manually_labeled_authors_shared_posts_dict[destination_author_name] = 1 else: manually_labeled_authors_shared_posts_dict[destination_author_name] += 1 print('writing results into file') with open(output_file_name, 'wb') as csv_file: writer = csv.writer(csv_file) for key, value in list(manually_labeled_authors_shared_posts_dict.items()): writer.writerow([key, value, author_type_dict[key]]) with open(output_file_name2, 'wb') as csv_file: writer = csv.writer(csv_file) for item in interesting_edges: writer.writerow([item])
def gen_image_network(metadata_file, output_file, malware_projection, resource_projection): malware_data = pandas.read_csv(metadata_file, dtype=str, keep_default_na=False, skipinitialspace=True) network = networkx.Graph() # search the target directory for PE files to extract images from image_objects = [] i = 1 for malware_path in malware_data['FILE']: print("\t i: {}, j: {}, so: {}".format(i, malware_data['FILE'].size, i/malware_data['FILE'].size)) i += 1 # try to parse the path to see if it's a valid PE file try: pe = pefile.PE(malware_path) except pefile.PEFormatError: continue images = ExtractImages(malware_path) images.work() image_objects.append(images) # create the network by linking malware samples to their images filehash = hashlib.sha256(open(malware_path, 'rb').read()).hexdigest() node_name = malware_path.split("/")[-1] + '_' + str(filehash) if not node_name in network: network.add_node(node_name,type="malware") for path, image_hash in images.images: # set the image attribute on the image nodes to tell GraphViz to render images within these nodes if not image_hash in network: network.add_node(image_hash,image=path,label='',type='image') # node_name = path.split("/")[-1] print(node_name, image_hash) network.add_edge(node_name,image_hash) # write the bipartite network, then do the two projections and write them print(network) write_dot(network, output_file) malware = set(n for n,d in network.nodes(data=True) if d['type']=='malware') resource = set(network) - malware malware_network = bipartite.projected_graph(network, malware) resource_network = bipartite.projected_graph(network, resource) write_dot(malware_network, malware_projection) write_dot(resource_network, resource_projection)
def connected_reconstructions(reconstruction_shots): g = nx.Graph() for r in reconstruction_shots: g.add_node(r, bipartite=0) for shot_id in reconstruction_shots[r]: g.add_node(shot_id, bipartite=1) g.add_edge(r, shot_id) p = bipartite.projected_graph(g, reconstruction_shots.keys()) return p.edges()
def main(): G = initialize() user, business = node_initialize(G) user = list(set(user) & set(G.nodes())) business = list(set(business) & set(G.nodes())) G = make_bipartite(G, user, business) print nx.is_bipartite(G) G = list(nx.connected_component_subgraphs(G))[0] user, business = bipartite.sets(G) print "nodes separated" Gu = bipartite.projected_graph(G, user) print Gu.number_of_nodes()
def projected_graph(self): E = bipartite.sets(self.B)[0] P = bipartite.projected_graph(self.B, E, multigraph=False) # self.plot_graph(P,'projected_gragh') self.plot_graph_2(P, 'projected_gragh') print('projected_graph:number of edges:', P.number_of_edges()) print(P.edges()) print(list(P.edges(data=True))) print('kkkkkkkkkkkkkkkk') print(list(P.nodes())) print(P['Goeff']) P = bipartite.projected_graph(self.B, E, multigraph=True) # self.plot_graph(P, 'projected_gragh_multigraph') self.plot_graph_2(P, 'projected_gragh_multigraph') print('MMMMMMMMMMMMMMMMMMMMM') print(P.nodes()) print(P.edges()) # for e in P.edges(): # print(e) # print(e['edge']) print('NNNNNNNNNNNNNNNNNNNNN')
def main(): G = nx.Graph() with open('clean_essays.csv', 'rb') as csvfile: readerz = csv.reader(csvfile, delimiter=',') for row in readerz: title = row[0] thnx = row[3].replace(' and', ',').replace(', ', ',').split(',') people = [x.strip() for x in thnx if (x != ' ' and x != '')] G.add_node(title, klass='essay') for p in people: if p != 'None': G.add_node(p, klass='person') G.add_edge(p, title) p_nodes = [n for n, d in G.nodes_iter(data=True) if d['klass'] == 'person'] e_nodes = [n for n, d in G.nodes_iter(data=True) if d['klass'] == 'essay'] BP = bipartite.projected_graph(G, p_nodes) BE = bipartite.projected_graph(G, e_nodes) nx.write_gml(BP, 'people.gml') nx.write_gml(BE, 'essays.gml') print "Done"
def main(): G=nx.Graph() with open('clean_essays.csv', 'rb') as csvfile: readerz = csv.reader(csvfile, delimiter=',') for row in readerz: title = row[0] thnx = row[3].replace(' and', ',').replace(', ', ',').split(',') people = [x.strip() for x in thnx if (x!=' ' and x!='')] G.add_node(title, klass='essay') for p in people: if p != 'None': G.add_node(p, klass='person') G.add_edge(p, title) p_nodes = [n for n,d in G.nodes_iter(data=True) if d['klass']=='person'] e_nodes = [n for n,d in G.nodes_iter(data=True) if d['klass']=='essay'] BP = bipartite.projected_graph(G,p_nodes) BE = bipartite.projected_graph(G,e_nodes) nx.write_gml(BP, 'people.gml') nx.write_gml(BE, 'essays.gml') print "Done"
def resource_allocation_index(TG_graph, gen, tumor_gene_relation_bi_adj_mat): G = bipartite.projected_graph(TG_graph, gen) A = nx.to_scipy_sparse_matrix(G) degrees = A.sum(axis=0) with np.errstate(divide='ignore'): weights = sparse.csr_matrix(1. / (degrees)) ###to avoid divide by 0 AA = A.multiply(weights) * A.T tumor_gene_relation_bi_adj_mat = sparse.csr_matrix( tumor_gene_relation_bi_adj_mat) scores = np.dot(tumor_gene_relation_bi_adj_mat, AA) return scores
def read_club_membership(): file = NETWORKS_DIR + "brunson_club-membership/out.brunson_club-membership_club-membership" elist = [] with open(file, "r") as f: skip = [next(f) for _ in range(2)] # skip a line for line in f: line = line.rstrip().split()[:2] e = (int(line[0]), int(line[1]) + 25) elist.append(e) B = nx.Graph() B.add_edges_from(elist) # Project onto people. Two people are linked if they belong to the # same club/board. G = bipartite.projected_graph(B, list(range(1, 26))) ## # Project onto clubs. ## G = bipartite.projected_graph(B, list(range(26,41))) return G
def execute(self, window_start): author_timelines_tuples = self._db.get_author_timelines_by_min_num_of_posts( self._domain, self._min_number_of_posts_per_author) print 'retrieving authors and posts' if self._num_of_random_authors_for_graph is not None: random_author_timelines_tuples = [] random_author_guid_post_id_dict = self._db.get_random_author_guid_post_id_dictionary( ) for author_timeline in author_timelines_tuples: author_guid = author_timeline[0] if author_guid in random_author_guid_post_id_dict: random_author_timelines_tuples.append(author_timeline) author_timelines_tuples = random_author_timelines_tuples author_post_tuples, author_type_dict = self._create_author_post_and_author_type_tuples( author_timelines_tuples) print 'creating bi-partite graph' bi_graph = self._create_bi_graph(author_post_tuples) author_guids, posts = zip(*author_post_tuples) del author_post_tuples author_guids = list(set(author_guids)) print 'creating authors graph' authors_projection_graph = bipartite.projected_graph(bi_graph, author_guids, multigraph=True) del bi_graph print 'counting shared posts' source_author_guid_dest_author_guid_shared_posts_dict = self._count_shared_posts( authors_projection_graph) self._fill_common_posts_connections( source_author_guid_dest_author_guid_shared_posts_dict)
tmp_node = line.split("||") try: B.add_node(tmp_node[0],bipartite=0) B.add_node(tmp_node[1],bipartite=1) edgelist.append((tmp_node[0],tmp_node[1])) except Exception,Err: pass # we found 2 range exception error in 1000000 # proccessing element limit 1M if i >= parse_limit: break else: i += 1 file.close() #add edge list B.add_edges_from(edgelist) authors = set(n for n,d in B.nodes(data=True) if d['bipartite'] == 0) G = bipartite.projected_graph(B, authors) get_k_highest_degree(G, 10) export_degree_statistic(G) nx.draw(G) plt.figure(num=None, figsize=(20, 20), dpi=80) plt.savefig("degree_histogram.png") plt.show()
for line in readFile: length = len(line.split(",")) srcIp = line.split(",")[1] destIp = line.split(",")[length -2] keyVal = srcIp+"-"+destIp; if keyVal not in edgeMap: edgeMap[keyVal] = True edgeArr.append((srcIp,destIp)) validNodes.append(srcIp) srcNodes = list(set(srcNodes)- (set(srcNodes) - set(validNodes))) G.add_edges_from(edgeArr) PG = bipartite.projected_graph(G,srcNodes) listVal = nx.connected_components(PG) for el in listVal: length = len(el) if length < 5: continue print(length) # print(nx.number_connected_components(G)) # print(nx.number_connected_components(PG))
DG.add_node(directors.index(row['name']),label=row['name'],name=row['name']) if row['cname'] not in companies: companies.append(row['cname']) DG.add_node(row['ocid'],label=row['cname'],name=row['cname']) DG.add_edge(directors.index(row['name']),row['ocid']) if reduce!=-1: from networkx.algorithms import bipartite officers,companies=bipartite.sets(DG) #print list(officers) #print list(companies) if reduce=="officers": #Collapse the bipartite graph to a graph of journalists connected via a common tag OG= bipartite.projected_graph(DG, officers) elif reduce=='companies': #Collapse the bipartite graph to a set of tags connected via a common journalist OG= bipartite.projected_graph(DG, companies) else: OG=DG else: OG=DG ''' modularity=1 if modularity!=-1: import community partition = community.best_partition(G) size = float(len(set(partition.values()))) pos = nx.spring_layout(G) count = 0. for com in set(partition.values()) :
df = df[(df[CREATE_GRAPH_OF].notnull()) & (df[LINKED_BY].notnull())] print "[+] Removed null values..." # Dedup dd = df.groupby(columns).size().reset_index().rename(columns={0: 'w'}) print "[+] Created deduplicated dataset..." # Creating the bipartite graph G = nx.Graph() G.add_nodes_from( dd[CREATE_GRAPH_OF].unique(), bipartite=0 ) G.add_nodes_from( dd[LINKED_BY].unique(), bipartite=1 ) G.add_edges_from( zip(dd[CREATE_GRAPH_OF], dd[LINKED_BY]) ) print "[+] Created bipartite graph..." # Projecting the main projected graph graph = bipartite.projected_graph(G, dd[CREATE_GRAPH_OF].unique(), multigraph=False) print "[+] Created projected graph..." # Outputting the corresponding data frame d = pd.DataFrame(graph.edges()) d.columns = [CREATE_GRAPH_OF + '__1', CREATE_GRAPH_OF + '__2'] # Recipe outputs print "[+] Writing output dataset..." graph = dataiku.Dataset(output_name) graph.write_with_schema(d)
count += 1 return Sum / count print "is bottom_nodes a bipartite set?",bipartite.is_bipartite_node_set(G, bottom_nodes) print "is top_nodesa bipartite set?",bipartite.is_bipartite_node_set(G, top_nodes) print len(bottom_nodes)," bottom nodes",len(top_nodes)," top nodes" print "Average subreddits moderated per moderator: ",mymean(G.degree_iter(bottom_nodes)) print "Average moderators per subreddit: ",mymean(G.degree_iter(top_nodes)) if export: nx.write_gexf(G,"C:\\Users\\Theseus\\Documents\\moderatorproject\\untouched.gexf") print "gexf exported" pg1 = bipartite.projected_graph(G, bottom_nodes) print "Unweighted moderator to moderator projection made" print "Average unweighted degree: ",mymean(pg1.degree_iter()) if export: nx.write_gexf(pg1,"C:\\Users\\Theseus\\Documents\\moderatorproject\\bottoms.gexf") print "gexf exported" pg2 = bipartite.projected_graph(G, top_nodes) print "Unweighted subreddit to subreddit projection made" print "Average unweighted degree: ",mymean(pg2.degree_iter()) if export: nx.write_gexf(pg2,"C:\\Users\\Theseus\\Documents\\moderatorproject\\tops.gexf") print "gexf exported" wpg1 = bipartite.weighted_projected_graph(G, bottom_nodes) print "Weighted bottom node projection made"
def describe(self, extra=False): """ Provides a summary of graph statistics. Includes basic statistics like the number of nodes, edges, denstiy, and the average degree for one mode. Prints a string that contains each of the items that make up the summary. Density is calculated using one of the modes of the original bipartite network graph. **Parameters** : > *extra* : `bool` >> Runs the low efficiency algorithms, which can be resource-intensive on large networks. >> Recommended maximum network size for the low efficiency algorithms is around 100 nodes. **Returns** : `string` > Returns the descriptive string that contains information about the `MultiGraphPlus` object. """ mode1 = self.mode1 mode2 = self.mode2 density = bipartite.density(self, bipartite.sets(self)[0]) edges = self.number_of_edges() nodes_mode1 = 0 nodes_mode2 = 0 for n in self.nodes(): if self.node[n]['type'] == mode1: nodes_mode1 += 1 elif self.node[n]['type'] == mode2: nodes_mode2 += 1 descriptives_nodes = "This is a bipartite network of types '{}' and '{}'.\n " \ "{} nodes are of the type '{}'.\n " \ "{} nodes are of the type '{}'.\n".format(str(mode1), str(mode2), str(nodes_mode1), str(mode1), str(nodes_mode2), str(mode2)) descriptives_edges = "There are {} edges.\n".format(str(edges)) descriptives_density = "Density: {}.\n".format(str(density)) descriptives = descriptives_nodes + descriptives_edges + descriptives_density if extra: # Note: for each mode of the bipartite graph, degree and betweenness centrality are the same. # Keeping them both makes it easy to compare them and make sure they are the same. degree_mode1 = bipartite.degree_centrality(self, bipartite.sets(self)[0]) degree_mode2 = bipartite.degree_centrality(self, bipartite.sets(self)[1]) degree_mode1 = list(degree_mode1.values()) degree_mode2 = list(degree_mode2.values()) degree_mode1 = np.mean(degree_mode1) degree_mode2 = np.mean(degree_mode2) betweenness_mode1 = bipartite.betweenness_centrality(self, bipartite.sets(self)[0]) betweenness_mode1 = list(betweenness_mode1.values()) betweenness_mode1 = np.mean(betweenness_mode1) betweenness_mode2 = bipartite.betweenness_centrality(self, bipartite.sets(self)[1]) betweenness_mode2 = list(betweenness_mode2.values()) betweenness_mode2 = np.mean(betweenness_mode2) g = nx.Graph(self) projection = bipartite.projected_graph(g, bipartite.sets(g)[0]) transitivity = nx.transitivity(projection) descriptives_transitivity = "Transitivity: {}.\n".format(str(transitivity)) descriptives_degree_centrality = "Mean Degree Centrality for '{}': {}.\n" \ "Mean Degree Centrality for '{}': {}.\n".format(str(mode1), str(degree_mode1), str(mode2), str(degree_mode2)) descriptives_btwn_centrality = "Mean Betweenness Centrality for '{}': {}.\n"\ "Mean Betweenness Centrality for '{}': {}.\n".format(str(mode1), str(betweenness_mode1), str(mode2), str(betweenness_mode2)) descriptives = descriptives + descriptives_transitivity +\ descriptives_degree_centrality + descriptives_btwn_centrality print(descriptives) return descriptives
They represent observed attendance at 14 social events by 18 Southern women. The graph is bipartite (clubs, women). """ import matplotlib.pyplot as plt import networkx as nx import networkx.algorithms.bipartite as bipartite G = nx.davis_southern_women_graph() women = G.graph['top'] clubs = G.graph['bottom'] print("Biadjacency tar_matrix") print(bipartite.biadjacency_matrix(G, women, clubs)) # project bipartite graph onto women nodes W = bipartite.projected_graph(G, women) print('') print("#Friends, Member") for w in women: print('%d %s' % (W.degree(w), w)) # project bipartite graph onto women nodes keeping number of co-occurence # the degree computed is weighted and counts the total number of shared contacts W = bipartite.weighted_projected_graph(G, women) print('') print("#Friend meetings, Member") for w in women: print('%d %s' % (W.degree(w, weight='weight'), w)) nx.draw(G) plt.show()
nodes_1 = list(dict.fromkeys(nodes_1)) print("Build a bipartite graph ...") # Build a bipartite graph: G = nx.Graph() G.add_nodes_from(nodes_0, bipartite=0) # disease G.add_nodes_from(nodes_1, bipartite=1) # active substance for m in vdmdata_reduce.iterrows(): enfermedad = m[1][0] sustancia = m[1][1] G.add_edge(enfermedad, sustancia) if type_nx == 'projected' and type_proj == 'icd': # Build Projected Graph Diseases GP = bipartite.projected_graph(G, nodes_0) print('Calculate Global properties for projected graph ' + type_proj) print("\n") elif type_nx == 'projected' and type_proj == 'atc': # Build Projected Graph Active Ingredients GP = bipartite.projected_graph(G, nodes_1) print('Calculate Global properties for projected graph ' + type_proj) print("\n") else: print('Calculate Global properties for bipartite network') print("\n") if type_nx == 'bipartite': print("Nodes Number : " + str(G.number_of_nodes())) print("\n") print("Edges Number : " + str(G.number_of_edges()))
return hostnames for root,dirs,files in os.walk(args.target): for file in files: try: pe = pefile.PE(os.path.join(root, file)) except pefile.PEFormatError: continue f_path = os.path.join(root, file) contents = os.popen("strings '{0}'".format(f_path)).read() hostnames = get_hostnames(contents) if len(hostnames): network.add_node(file,label=file ,color='blue', penwidth=3,bipartite=0) for hostname in hostnames: network.add_node(hostname,label=hostname,color='purple', penwidth=10,bipartite=1) network.add_edge(hostname, file ,penwidth=2) if hostnames: print "Extracted hostname from:", file pprint.pprint(hostname) write_dot(network, args.filename) codes= set(n for n,d in network.nodes(data=True) if d['bipartite']==0) hostname = set(network)-codes codes = bipartite.projected_graph(network, codes) hostname = bipartite.projected_graph(network, hostname) write_dot(codes ,args.malware_pro) write_dot(hostname ,args.hostname_pro)
# check set of nodes is bipartite X = set([1, 2, 3, 4]) bipartite.is_bipartite_node_set(B, X) X = set(["A", "B", "C", "D", "E"]) bipartite.is_bipartite_node_set(B, X) bipartite.sets(B) # Projected Graphs B = nx.Graph() B.add_edges_from([("A", 1), ("B", 1), ("C", 1), ("D", 1), ("H", 1), ("B", 2), ("C", 2), ("D", 2), ("E", 2), ("G", 2), ("E", 3), ("F", 3), ("H", 3), ("J", 3), ("E", 4), ("I", 4), ("J", 4)]) X = set(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]) P = bipartite.projected_graph(B, X) nx.draw(P) X = set([1, 2, 3, 4]) P = bipartite.projected_graph(B, X) nx.draw(P, with_labels=1) # Weighted Projected Graphs X = set([1, 2, 3, 4]) P = bipartite.weighted_projected_graph(B, X) nx.draw(P, with_labels=1) # generate network data import pandas as pd import numpy as np import random
These data were collected by Davis et al. in the 1930s. They represent observed attendance at 14 social events by 18 Southern women. The graph is bipartite (clubs, women). """ import networkx as nx import networkx.algorithms.bipartite as bipartite G = nx.davis_southern_women_graph() women = G.graph['top'] clubs = G.graph['bottom'] print("Biadjacency matrix") print(bipartite.biadjacency_matrix(G,women,clubs)) # project bipartite graph onto women nodes W = bipartite.projected_graph(G, women) print('') print("#Friends, Member") for w in women: print('%d %s' % (W.degree(w),w)) # project bipartite graph onto women nodes keeping number of co-occurence # the degree computed is weighted and counts the total number of shared contacts W = bipartite.weighted_projected_graph(G, women) print('') print("#Friend meetings, Member") for w in women: print('%d %s' % (W.degree(w,weight='weight'),w))
def load_stock(stock): with open(PROCESSED_PATH+stock, 'r') as f: return pickle.load(f) def load_user(user): with open(USER_PATH+user, 'r') as f: return pickle.load(f) stocks = {f: load_stock(f) for f in listdir(PROCESSED_PATH) if isfile(join(PROCESSED_PATH,f))} users = {f: load_user(f) for f in listdir(USER_PATH) if isfile(join(USER_PATH, f))} # Bipartite graphs in networkx are implemented using a normal graph where nodes have an attribute # value of either 0 or 1 depending on which graph it belongs to G = nx.Graph() # Add all stocks G.add_nodes_from(stocks.keys(), bipartite=0) # Add all users G.add_nodes_from(users.keys(), bipartite=1) for (symbol, stock_tweets) in stocks.iteritems(): # Find the users that tweeted about this stock user_ids = set([tweet.author.id_str for tweet in stock_tweets]) G.add_edges_from([(symbol, user) for user in user_ids if user in users.keys()]) # Create the user network from the bipartite network stock_nodes = set(n for n,d in G.nodes(data=True) if d['bipartite']==0) user_nodes = set(G) - stock_nodes U = bipartite.projected_graph(G, user_nodes)
tx.size() disciplines = df.SC_l.unique() techniques = {det for de in df.topics if de is not nan for det in de} # <codecell> df.SC_l # <codecell> len(techniques) # <codecell> disc = bi.projected_graph(tx, g2.nodes()) # <codecell> sorted(disc.degree().iteritems(), key= operator.itemgetter(1)) # <markdowncell> # ## Retrieving further literature if needed from WoS # <codecell> ## generate searches that can be run back against WoS - it says it will take up to 5000 terms! '"'+'" or "'.join([de for de,val in de_counts_sorted if val > 90 and val < 200]) + '"'
nx.set_node_attributes(g,'co',co) d=nx.degree(g1) nx.set_node_attributes(g1,'d',d) d=nx.degree(g2) nx.set_node_attributes(g2,'d',d) d=nx.degree(g3) nx.set_node_attributes(g3,'d',d) d=nx.degree(g4) nx.set_node_attributes(g4,'d',d) d=nx.degree(g5) nx.set_node_attributes(g5,'d',d) #projected eigenvector centrality bio_nodes = set(n for n in g.nodes() if n < 1000 and n > 0) eg = bipartite.projected_graph(g, bio_nodes) ec_pro=nx.eigenvector_centrality(eg, 1000) nx.set_node_attributes(eg,'ec',ec_pro) # ho=sh.structural_holes(g) for n in g.nodes(): if(n < 1000 and n != -9): #calculate number of partner types n_bio = 0 n_npr = 0 n_gov = 0 n_fin = 0 n_pha = 0 n_oth = 0
def folded_graph(B): bottom_nodes, top_nodes = bipartite.sets(B) F = bipartite.projected_graph(B, top_nodes) return F