def test_directed_projection(self): G=nx.DiGraph() G.add_edge('A',1) G.add_edge(1,'B') G.add_edge('A',2) G.add_edge('B',2) P=nx.projected_graph(G,'AB') assert_equal(sorted(P.edges()),[('A','B')]) P=nx.weighted_projected_graph(G,'AB') assert_equal(sorted(P.edges()),[('A','B')]) assert_equal(P['A']['B']['weight'],1) P=nx.projected_graph(G,'AB',multigraph=True) assert_equal(sorted(P.edges()),[('A','B')]) G=nx.DiGraph() G.add_edge('A',1) G.add_edge(1,'B') G.add_edge('A',2) G.add_edge(2,'B') P=nx.projected_graph(G,'AB') assert_equal(sorted(P.edges()),[('A','B')]) P=nx.weighted_projected_graph(G,'AB') assert_equal(sorted(P.edges()),[('A','B')]) assert_equal(P['A']['B']['weight'],2) P=nx.projected_graph(G,'AB',multigraph=True) assert_equal(sorted(P.edges()),[('A','B'),('A','B')])
def construct_graph(self): """ Creates and returns a graph representation of the model Returns ------- graph : networkx graph multgraph representation of the model functions and flows """ self.bipartite = nx.Graph() self.bipartite.add_nodes_from(self.fxns, bipartite=0) self.bipartite.add_nodes_from(self.flows, bipartite=1) self.bipartite.add_edges_from(self._fxnflows) self.multgraph = nx.projected_graph(self.bipartite, self.fxns, multigraph=True) self.graph = nx.projected_graph(self.bipartite, self.fxns) attrs = {} #do we still need to do this for the objects? maybe not--I don't think we use the info anymore for edge in self.graph.edges: midedges = list(self.multgraph.subgraph(edge).edges) flows = [midedge[2] for midedge in midedges] flowdict = {} for flow in flows: flowdict[flow] = self.flows[flow] attrs[edge] = flowdict nx.set_edge_attributes(self.graph, attrs) nx.set_node_attributes(self.graph, self.fxns, 'obj') #self.graph=nx.DiGraph() #self.graph.add_nodes_from(self.fxn) #self.graph= return self.graph
def test_directed_projection(self): G = nx.DiGraph() G.add_edge('A', 1) G.add_edge(1, 'B') G.add_edge('A', 2) G.add_edge('B', 2) P = nx.projected_graph(G, 'AB') assert_equal(sorted(P.edges()), [('A', 'B')]) P = nx.weighted_projected_graph(G, 'AB') assert_equal(sorted(P.edges()), [('A', 'B')]) assert_equal(P['A']['B']['weight'], 1) P = nx.projected_graph(G, 'AB', multigraph=True) assert_equal(sorted(P.edges()), [('A', 'B')]) G = nx.DiGraph() G.add_edge('A', 1) G.add_edge(1, 'B') G.add_edge('A', 2) G.add_edge(2, 'B') P = nx.projected_graph(G, 'AB') assert_equal(sorted(P.edges()), [('A', 'B')]) P = nx.weighted_projected_graph(G, 'AB') assert_equal(sorted(P.edges()), [('A', 'B')]) assert_equal(P['A']['B']['weight'], 2) P = nx.projected_graph(G, 'AB', multigraph=True) assert_equal(sorted(P.edges()), [('A', 'B'), ('A', 'B')])
def test_path_projected_graph(self): G = nx.path_graph(4) P = nx.projected_graph(G, [1, 3]) assert_equal(sorted(P.nodes()), [1, 3]) assert_equal(sorted(P.edges()), [(1, 3)]) P = nx.projected_graph(G, [0, 2]) assert_equal(sorted(P.nodes()), [0, 2]) assert_equal(sorted(P.edges()), [(0, 2)])
def test_path_projected_graph(self): G=nx.path_graph(4) P=nx.projected_graph(G,[1,3]) assert_equal(sorted(P.nodes()),[1,3]) assert_equal(sorted(P.edges()),[(1,3)]) P=nx.projected_graph(G,[0,2]) assert_equal(sorted(P.nodes()),[0,2]) assert_equal(sorted(P.edges()),[(0,2)])
def test_havel_hakimi_graph(self): aseq = [] bseq = [] G = havel_hakimi_graph(aseq, bseq) assert len(G) == 0 aseq = [0, 0] bseq = [0, 0] G = havel_hakimi_graph(aseq, bseq) assert len(G) == 4 assert G.number_of_edges() == 0 aseq = [3, 3, 3, 3] bseq = [2, 2, 2, 2, 2] pytest.raises(nx.NetworkXError, havel_hakimi_graph, aseq, bseq) bseq = [2, 2, 2, 2, 2, 2] G = havel_hakimi_graph(aseq, bseq) assert sorted( d for n, d in G.degree()) == [2, 2, 2, 2, 2, 2, 3, 3, 3, 3] aseq = [2, 2, 2, 2, 2, 2] bseq = [3, 3, 3, 3] G = havel_hakimi_graph(aseq, bseq) assert G.is_multigraph() assert not G.is_directed() assert sorted( d for n, d in G.degree()) == [2, 2, 2, 2, 2, 2, 3, 3, 3, 3] GU = nx.projected_graph(nx.Graph(G), range(len(aseq))) assert GU.number_of_nodes() == 6 GD = nx.projected_graph(nx.Graph(G), range(len(aseq), len(aseq) + len(bseq))) assert GD.number_of_nodes() == 4 G = reverse_havel_hakimi_graph(aseq, bseq, create_using=nx.Graph) assert not G.is_multigraph() assert not G.is_directed() pytest.raises(nx.NetworkXError, havel_hakimi_graph, aseq, bseq, create_using=nx.DiGraph) pytest.raises(nx.NetworkXError, havel_hakimi_graph, aseq, bseq, create_using=nx.DiGraph) pytest.raises( nx.NetworkXError, havel_hakimi_graph, aseq, bseq, create_using=nx.MultiDiGraph, )
def test_path_projected_properties_graph(self): G = nx.path_graph(4) G.add_node(1, name='one') G.add_node(2, name='two') P = nx.projected_graph(G, [1, 3]) assert_equal(sorted(P.nodes()), [1, 3]) assert_equal(sorted(P.edges()), [(1, 3)]) assert_equal(P.node[1]['name'], G.node[1]['name']) P = nx.projected_graph(G, [0, 2]) assert_equal(sorted(P.nodes()), [0, 2]) assert_equal(sorted(P.edges()), [(0, 2)]) assert_equal(P.node[2]['name'], G.node[2]['name'])
def test_star_projected_graph(self): G = nx.star_graph(3) P = nx.projected_graph(G, [1, 2, 3]) assert_equal(sorted(P.nodes()), [1, 2, 3]) assert_equal(sorted(P.edges()), [(1, 2), (1, 3), (2, 3)]) P = nx.weighted_projected_graph(G, [1, 2, 3]) assert_equal(sorted(P.nodes()), [1, 2, 3]) assert_equal(sorted(P.edges()), [(1, 2), (1, 3), (2, 3)]) P = nx.projected_graph(G, [0]) assert_equal(sorted(P.nodes()), [0]) assert_equal(sorted(P.edges()), [])
def test_project_multigraph(self): G = nx.Graph() G.add_edge('a', 1) G.add_edge('b', 1) G.add_edge('a', 2) G.add_edge('b', 2) P = nx.projected_graph(G, 'ab') assert_equal(sorted(P.edges()), [('a', 'b')]) P = nx.weighted_projected_graph(G, 'ab') assert_equal(sorted(P.edges()), [('a', 'b')]) P = nx.projected_graph(G, 'ab', multigraph=True) assert_equal(sorted(P.edges()), [('a', 'b'), ('a', 'b')])
def test_project_multigraph(self): G=nx.Graph() G.add_edge('a',1) G.add_edge('b',1) G.add_edge('a',2) G.add_edge('b',2) P=nx.projected_graph(G,'ab') assert_equal(sorted(P.edges()),[('a','b')]) P=nx.weighted_projected_graph(G,'ab') assert_equal(sorted(P.edges()),[('a','b')]) P=nx.projected_graph(G,'ab',multigraph=True) assert_equal(sorted(P.edges()),[('a','b'),('a','b')])
def test_star_projected_graph(self): G=nx.star_graph(3) P=nx.projected_graph(G,[1,2,3]) assert_equal(sorted(P.nodes()),[1,2,3]) assert_equal(sorted(P.edges()),[(1,2),(1,3),(2,3)]) P=nx.weighted_projected_graph(G,[1,2,3]) assert_equal(sorted(P.nodes()),[1,2,3]) assert_equal(sorted(P.edges()),[(1,2),(1,3),(2,3)]) P=nx.projected_graph(G,[0]) assert_equal(sorted(P.nodes()),[0]) assert_equal(sorted(P.edges()),[])
def test_path_projected_properties_graph(self): G=nx.path_graph(4) G.add_node(1,name='one') G.add_node(2,name='two') P=nx.projected_graph(G,[1,3]) assert_equal(sorted(P.nodes()),[1,3]) assert_equal(sorted(P.edges()),[(1,3)]) assert_equal(P.node[1]['name'],G.node[1]['name']) P=nx.projected_graph(G,[0,2]) assert_equal(sorted(P.nodes()),[0,2]) assert_equal(sorted(P.edges()),[(0,2)]) assert_equal(P.node[2]['name'],G.node[2]['name'])
def test_make_clique_bipartite(self): G = self.G B = nx.make_clique_bipartite(G) assert sorted(B) == [ -5, -4, -3, -2, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] # Project onto the nodes of the original graph. H = nx.projected_graph(B, range(1, 12)) assert H.adj == G.adj # Project onto the nodes representing the cliques. H1 = nx.projected_graph(B, range(-5, 0)) # Relabel the negative numbers as positive ones. H1 = nx.relabel_nodes(H1, {-v: v for v in range(1, 6)}) assert sorted(H1) == [1, 2, 3, 4, 5]
def return_stategraph(self, gtype='normal'): """ Returns a graph representation of the current state of the model. Parameters ---------- gtype : str, optional Type of graph to return (normal, bipartite, or component). The default is 'normal'. Returns ------- graph : networkx graph Graph representation of the system with the modes and states added as attributes. """ if gtype == 'normal': graph = nx.projected_graph(self.bipartite, self.fxns) elif gtype == 'bipartite': graph = self.bipartite.copy() elif gtype == 'component': graph = self.bipartite.copy() for fxnname, fxn in self.fxns.items(): graph.add_nodes_from(fxn.components, bipartite=1) graph.add_edges_from([(fxnname, component) for component in fxn.components]) edgevals, fxnmodes, fxnstates, flowstates, compmodes, compstates, comptypes ={}, {}, {}, {}, {}, {}, {} if gtype == 'normal': #set edge values for normal graph for edge in graph.edges: midedges = list(self.multgraph.subgraph(edge).edges) flows = [midedge[2] for midedge in midedges] flowdict = {} for flow in flows: flowdict[flow] = self.flows[flow].status() edgevals[edge] = flowdict nx.set_edge_attributes(graph, edgevals) elif gtype == 'bipartite' or gtype == 'component': #set flow node values for bipartite graph for flowname, flow in self.flows.items(): flowstates[flowname] = flow.status() nx.set_node_attributes(graph, flowstates, 'states') #set node values for functions for fxnname, fxn in self.fxns.items(): fxnstates[fxnname], fxnmodes[fxnname] = fxn.return_states() if gtype == 'normal': del graph.nodes[fxnname]['bipartite'] if gtype == 'component': for mode in fxnmodes[fxnname].copy(): for compname, comp in fxn.components.items(): compstates[compname] = {} comptypes[compname] = True if mode in comp.faultmodes: compmodes[compname] = compmodes.get( compname, set()) compmodes[compname].update([mode]) fxnmodes[fxnname].remove(mode) fxnmodes[fxnname].update(['Comp_Fault']) nx.set_node_attributes(graph, fxnstates, 'states') nx.set_node_attributes(graph, fxnmodes, 'modes') if gtype == 'component': nx.set_node_attributes(graph, compstates, 'states') nx.set_node_attributes(graph, compmodes, 'modes') nx.set_node_attributes(graph, comptypes, 'iscomponent') return graph
def generate_G_com(self, dist_scale): # Compute the shortest path between all community nodes in the # bipartite projection of the graph G_outer_proj = nx.projected_graph(B=self.G_outer, nodes=self.community_nodes) distances = dict(nx.all_pairs_shortest_path_length(G_outer_proj)) # Init new graph to hold weighted connections # between community nodes self.G_com = nx.Graph() # For each combination of community nodes for n1, n2 in itertools.combinations(self.community_nodes, 2): # Random uniform prob p = self.random_state.random() # Compute scale by distance, # dist_scale of 0, removes scale,' # higher values give higher weight to closer nodes try: scale = distances[n1][n2] ** dist_scale # Scale the random uniform prob weight = p / scale # Add weighted edge self.G_com.add_edge(n1, n2, weight=weight) # If the two nodes are disconnected in dif networks - dont add edge for now except KeyError: # Make sure nodes are added though self.G_com.add_node(n1) self.G_com.add_node(n2)
def k_random_intersection_graph(n, m, k, seed=None): """Returns a intersection graph with randomly chosen attribute sets for each node that are of equal size (k). Parameters ---------- n : int The number of nodes in the first bipartite set (nodes) m : int The number of nodes in the second bipartite set (attributes) k : float Size of attribute set to assign to each node. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. See Also -------- gnp_random_graph, uniform_random_intersection_graph References ---------- .. [1] Godehardt, E., and Jaworski, J. Two models of random intersection graphs and their applications. Electronic Notes in Discrete Mathematics 10 (2001), 129--132. """ G = nx.empty_graph(n + m) mset = range(n, n + m) for v in range(n): targets = seed.sample(mset, k) G.add_edges_from(zip([v] * len(targets), targets)) return nx.projected_graph(G, range(n))
def output_sims(bipartite_mode): ''' write non-zero jaccard similarity for all nodes in a particular mode to csv file specified in settings file format artist1,artist2,sim (or tag1,tag2,sim) :param bipartite_mode: which set of nodes to calculate similarity for: ARTIST_MODE or TAG_MODE ''' if bipartite_mode not in [ARTIST_MODE, TAG_MODE]: logging.error('invalid value for bipartite mode') return f = artist_sim_filename if bipartite_mode == ARTIST_MODE else tag_sim_filename g = get_artists_tags_graph() n_set = set(n for n, d in g.nodes(data=True) if d['bipartite'] == bipartite_mode) g_proj = nx.projected_graph(g, n_set) sims = jaccard_sims(g, bipartite_mode, g_proj.edges_iter()) # all edges logging.info('calculating similarity for %d unique unorderd pairs' % g_proj.number_of_edges()) with open(f, 'wb') as csvfile: w = csv.writer(csvfile) for counter, (tag1, tag2, sim) in enumerate(sims): row = [tag1, tag2] row = ([s.encode('utf-8') for s in row]) row.append(sim) logging.debug(row) w.writerow(row) if (counter % 10000 == 0): logging.info( 'Wrote non-zero similarity data for pair %d, mode %d' % (counter, bipartite_mode))
def output_sims(bipartite_mode): ''' write non-zero jaccard similarity for all nodes in a particular mode to csv file specified in settings file format artist1,artist2,sim (or tag1,tag2,sim) :param bipartite_mode: which set of nodes to calculate similarity for: ARTIST_MODE or TAG_MODE ''' if bipartite_mode not in [ARTIST_MODE, TAG_MODE]: logging.error('invalid value for bipartite mode') return f = artist_sim_filename if bipartite_mode == ARTIST_MODE else tag_sim_filename g = get_artists_tags_graph() n_set = set(n for n, d in g.nodes(data=True) if d['bipartite'] == bipartite_mode) g_proj = nx.projected_graph(g,n_set) sims = jaccard_sims(g, bipartite_mode, g_proj.edges_iter()) # all edges logging.info('calculating similarity for %d unique unorderd pairs' % g_proj.number_of_edges()) with open(f, 'wb') as csvfile: w = csv.writer(csvfile) for counter,(tag1, tag2, sim) in enumerate(sims): row = [tag1, tag2] row = ([s.encode('utf-8') for s in row]) row.append(sim) logging.debug(row) w.writerow (row) if (counter % 10000 == 0): logging.info('Wrote non-zero similarity data for pair %d, mode %d' % (counter, bipartite_mode))
def k_random_intersection_graph(n,m,k): """Return a intersection graph with randomly chosen attribute sets for each node that are of equal size (k). Parameters ---------- n : int The number of nodes in the first bipartite set (nodes) m : int The number of nodes in the second bipartite set (attributes) k : float Size of attribute set to assign to each node. seed : int, optional Seed for random number generator (default=None). See Also -------- gnp_random_graph, uniform_random_intersection_graph References ---------- .. [1] Godehardt, E., and Jaworski, J. Two models of random intersection graphs and their applications. Electronic Notes in Discrete Mathematics 10 (2001), 129--132. """ G = nx.empty_graph(n + m) mset = range(n,n+m) for v in range(n): targets = random.sample(mset, k) G.add_edges_from(zip([v]*len(targets), targets)) return nx.projected_graph(G, range(n))
def uniform_random_intersection_graph(n, m, p, seed=None): """Returns a uniform random intersection graph. Parameters ---------- n : int The number of nodes in the first bipartite set (nodes) m : int The number of nodes in the second bipartite set (attributes) p : float Probability of connecting nodes between bipartite sets seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. See Also -------- gnp_random_graph References ---------- .. [1] K.B. Singer-Cohen, Random Intersection Graphs, 1995, PhD thesis, Johns Hopkins University .. [2] Fill, J. A., Scheinerman, E. R., and Singer-Cohen, K. B., Random intersection graphs when m = !(n): An equivalence theorem relating the evolution of the g(n, m, p) and g(n, p) models. Random Struct. Algorithms 16, 2 (2000), 156–176. """ G = bipartite.random_graph(n, m, p, seed) return nx.projected_graph(G, range(n))
def uniform_random_intersection_graph(n, m, p, seed=None): """Return a uniform random intersection graph. Parameters ---------- n : int The number of nodes in the first bipartite set (nodes) m : int The number of nodes in the second bipartite set (attributes) p : float Probability of connecting nodes between bipartite sets seed : int, optional Seed for random number generator (default=None). See Also -------- gnp_random_graph References ---------- .. [1] K.B. Singer-Cohen, Random Intersection Graphs, 1995, PhD thesis, Johns Hopkins University .. [2] Fill, J. A., Scheinerman, E. R., and Singer-Cohen, K. B., Random intersection graphs when m = !(n): An equivalence theorem relating the evolution of the g(n, m, p) and g(n, p) models. Random Struct. Algorithms 16, 2 (2000), 156–176. """ G=bipartite.random_graph(n, m, p, seed=seed) return nx.projected_graph(G, range(n))
def nx_graph_projection(bgraph: NetworkXBipartiteGraph, nodes_retained: int) -> NetworkXGraph: g_proj = nx.projected_graph(bgraph.value, bgraph.nodes[nodes_retained]) return NetworkXGraph( g_proj, node_weight_label=bgraph.node_weight_label, edge_weight_label=bgraph.edge_weight_label, )
def get_upstream_paths(self, *requested_paths): subgraph_members = set(requested_paths) for path in requested_paths: subgraph_members.update(nx.ancestors(self._graph, path)) subgraph_paths = self._paths.intersection(subgraph_members) full_subgraph = nx.subgraph(self._graph, subgraph_members) path_subgraph = nx.projected_graph(full_subgraph, subgraph_paths) return(nx.topological_sort(path_subgraph))
def test_project_weighted(self): # Tore Opsahl's example # http://toreopsahl.com/2009/05/01/projecting-two-mode-networks-onto-weighted-one-mode-networks/ G=nx.Graph() G.add_edge('A',1) G.add_edge('A',2) G.add_edge('B',1) G.add_edge('B',2) G.add_edge('B',3) G.add_edge('B',4) G.add_edge('B',5) G.add_edge('C',1) G.add_edge('D',3) G.add_edge('E',4) G.add_edge('E',5) G.add_edge('E',6) G.add_edge('F',6) edges=[('A','B',2), ('A','C',1), ('B','C',1), ('B','D',1), ('B','E',2), ('E','F',1)] Panswer=nx.Graph() Panswer.add_weighted_edges_from(edges) # binary projected P=nx.projected_graph(G,'ABCDEF') assert_equal(P.edges(),Panswer.edges()) # weighted projected P=nx.weighted_projected_graph(G,'ABCDEF') assert_equal(P.edges(),Panswer.edges()) for u,v in P.edges(): assert_equal(P[u][v]['weight'],Panswer[u][v]['weight']) edges=[('A','B',1.5), ('A','C',0.5), ('B','C',0.5), ('B','D',1), ('B','E',2), ('E','F',1)] Panswer=nx.Graph() Panswer.add_weighted_edges_from(edges) # collaboration projected P=nx.weighted_projected_graph(G,'ABCDEF',collaboration=True) assert_equal(P.edges(),Panswer.edges()) for u,v in P.edges(): assert_equal(P[u][v]['weight'],Panswer[u][v]['weight'])
def test_make_max_clique_graph(self): """Tests that the maximal clique graph is the same as the bipartite clique graph after being projected onto the nodes representing the cliques. """ G = self.G B = nx.make_clique_bipartite(G) # Project onto the nodes representing the cliques. H1 = nx.projected_graph(B, range(-5, 0)) # Relabel the negative numbers as nonnegative ones, starting at # 0. H1 = nx.relabel_nodes(H1, {-v: v - 1 for v in range(1, 6)}) H2 = nx.make_max_clique_graph(G) assert H1.adj == H2.adj
def process_data() -> None: netflix_df = pd.read_csv(NETFLIX_CSV_FILE_LOCATION) print('Preprocessing data.') netflix_df = preprocess_netflix_df(netflix_df) actor_to_movie_edgelist = generate_actor_to_movie_edgelist(netflix_df) actor_to_movie_graph = nx.from_pandas_edgelist(actor_to_movie_edgelist, 'cast', 'title') actor_to_movie_graph, largest_cc_nodes = extract_largest_connected_component_graph( actor_to_movie_graph) largest_cc_actors = largest_cc_nodes.intersection( actor_to_movie_edgelist.cast.unique()) actor_to_actor_graph = nx.projected_graph(actor_to_movie_graph, largest_cc_actors) print( f'The actor-to-actor graph has {len(actor_to_actor_graph.nodes())} nodes.' ) assert len( set(actor_to_actor_graph.nodes()).intersection( actor_to_movie_edgelist.title)) == 0 print('Running APSP via SciPy.') scipy_apsp_dist_map, scipy_apsp_time = apsp_via_scipy(actor_to_actor_graph) print(f'APSP via SciPy took {scipy_apsp_time} seconds.') print('Running APSP via NetworkX.') nx_apsp_dist_map, nx_apsp_time = apsp_via_nx(actor_to_actor_graph) print(f'APSP via NetworkX took {nx_apsp_time} seconds.') _sanity_check_apsp_results(scipy_apsp_dist_map, nx_apsp_dist_map) graph_data, path_data = generate_path_data_for_visualization( actor_to_actor_graph) kevin_bacon_dist_dict = kevin_bacon_distances_from_tensor_map( scipy_apsp_dist_map) min_kevin_bacon_distance = min(kevin_bacon_dist_dict.values()) print('Saving results.') output_dict = { 'scipyAPSPTime': scipy_apsp_time, 'nxAPSPTime': nx_apsp_time, 'actorNameToKevinBaconDistance': kevin_bacon_dist_dict, 'minKevinBaconDistance': min_kevin_bacon_distance, 'kCoreValueForVisualziation': K_CORE_VALUE_FOR_VISUALZIATION, 'graphData': graph_data, 'pathLookup': path_data, } with open(OUTPUT_JSON_FILE_LOCATION, 'w') as file_handle: json.dump(output_dict, file_handle, indent=4) print('Done.') return
def get_top_n(g, node, n=5): ''' get the top n most similar nodes for a given node returns a dict of at most n node:similiarity pairs :param g: the artists tags graph :param node: the node ('artist',name) or ('tag',name) :param n: the maximum number of similar tags to be returned ''' if not node in g: logging.error('Node not in graph: %s' % node[1]) return mode = g.node[node]['bipartite'] n_set = set(n for n, d in g.nodes(data=True) if d['bipartite'] == mode) g_proj = nx.projected_graph(g,n_set) sims = {v:sim for (u, v, sim) in jaccard_sims(g, mode, g_proj.edges_iter(node))} max_len = n if len(sims) >= n else len(sims) return dict(sorted(sims.iteritems(), key=operator.itemgetter(1), reverse=True)[:max_len])
def test_project_weighted(self): # Tore Opsahl's example # http://toreopsahl.com/2009/05/01/projecting-two-mode-networks-onto-weighted-one-mode-networks/ G = nx.Graph() G.add_edge('A', 1) G.add_edge('A', 2) G.add_edge('B', 1) G.add_edge('B', 2) G.add_edge('B', 3) G.add_edge('B', 4) G.add_edge('B', 5) G.add_edge('C', 1) G.add_edge('D', 3) G.add_edge('E', 4) G.add_edge('E', 5) G.add_edge('E', 6) G.add_edge('F', 6) edges = [('A', 'B', 2), ('A', 'C', 1), ('B', 'C', 1), ('B', 'D', 1), ('B', 'E', 2), ('E', 'F', 1)] Panswer = nx.Graph() Panswer.add_weighted_edges_from(edges) # binary projected P = nx.projected_graph(G, 'ABCDEF') assert_equal(P.edges(), Panswer.edges()) # weighted projected P = nx.weighted_projected_graph(G, 'ABCDEF') assert_equal(P.edges(), Panswer.edges()) for u, v in P.edges(): assert_equal(P[u][v]['weight'], Panswer[u][v]['weight']) edges = [('A', 'B', 1.5), ('A', 'C', 0.5), ('B', 'C', 0.5), ('B', 'D', 1), ('B', 'E', 2), ('E', 'F', 1)] Panswer = nx.Graph() Panswer.add_weighted_edges_from(edges) # collaboration projected P = nx.weighted_projected_graph(G, 'ABCDEF', collaboration=True) assert_equal(P.edges(), Panswer.edges()) for u, v in P.edges(): assert_equal(P[u][v]['weight'], Panswer[u][v]['weight'])
def _storyid(df, proj_col="fake_url"): """ Cluster together similar URLs by following these steps: 1. Treats matches as edges of a bipartite graph. 2. Computes projection graph on given column (default: fake URLs). 3. Finds all connected component on the projection. 4. The ID of a story is the ID of the associated component. """ G = networkx.from_pandas_edgelist(df, "fake_url", "fact_url") G1 = networkx.projected_graph(G, df[proj_col]) d = {} cciter = networkx.connected_components(G1) cciter = zip(itertools.count(), cciter) for i, cc in cciter: for u in cc: d[u] = i df = df.set_index(proj_col) s = pandas.Series(d, index=df.index) df['story_id'] = s return df.reset_index()
def get_graph(source, sk_list=None, directed=False): if sk_list is None: sk_list = source.skeleton_ids() nr_list = source.get_neuron(sk_list) if directed: G = networkx.DiGraph() # TODO Add DiGraph Implementation else: G = networkx.Graph() G.add_nodes_from(sk_list, bipartite=0) for nr in nr_list: G.add_nodes_from(nr.connectors.keys(), bipartite=1) for con in nr.connectors: G.add_edge(nr.sid, con, weight=1) proj_multi = networkx.projected_graph(G, set(sk_list), multigraph=True) G2 = networkx.Graph() unique_edges = set(proj_multi.edges()) for u, v in unique_edges: w = proj_multi.edges().count((u, v)) G2.add_edge(u, v, weight=w) return G2
def get_top_n(g, node, n=5): ''' get the top n most similar nodes for a given node returns a dict of at most n node:similiarity pairs :param g: the artists tags graph :param node: the node ('artist',name) or ('tag',name) :param n: the maximum number of similar tags to be returned ''' if not node in g: logging.error('Node not in graph: %s' % node[1]) return mode = g.node[node]['bipartite'] n_set = set(n for n, d in g.nodes(data=True) if d['bipartite'] == mode) g_proj = nx.projected_graph(g, n_set) sims = { v: sim for (u, v, sim) in jaccard_sims(g, mode, g_proj.edges_iter(node)) } max_len = n if len(sims) >= n else len(sims) return dict( sorted(sims.iteritems(), key=operator.itemgetter(1), reverse=True)[:max_len])
def general_random_intersection_graph(n, m, p, seed=None): """Returns a random intersection graph with independent probabilities for connections between node and attribute sets. Parameters ---------- n : int The number of nodes in the first bipartite set (nodes) m : int The number of nodes in the second bipartite set (attributes) p : list of floats of length m Probabilities for connecting nodes to each attribute seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. See Also -------- gnp_random_graph, uniform_random_intersection_graph References ---------- .. [1] Nikoletseas, S. E., Raptopoulos, C., and Spirakis, P. G. The existence and efficient construction of large independent sets in general random intersection graphs. In ICALP (2004), J. D´ıaz, J. Karhum¨aki, A. Lepist¨o, and D. Sannella, Eds., vol. 3142 of Lecture Notes in Computer Science, Springer, pp. 1029–1040. """ if len(p) != m: raise ValueError("Probability list p must have m elements.") G = nx.empty_graph(n + m) mset = range(n, n + m) for u in range(n): for v, q in zip(mset, p): if seed.random() < q: G.add_edge(u, v) return nx.projected_graph(G, range(n))
def general_random_intersection_graph(n, m, p, seed=None): """Return a random intersection graph with independent probabilities for connections between node and attribute sets. Parameters ---------- n : int The number of nodes in the first bipartite set (nodes) m : int The number of nodes in the second bipartite set (attributes) p : list of floats of length m Probabilities for connecting nodes to each attribute seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. See Also -------- gnp_random_graph, uniform_random_intersection_graph References ---------- .. [1] Nikoletseas, S. E., Raptopoulos, C., and Spirakis, P. G. The existence and efficient construction of large independent sets in general random intersection graphs. In ICALP (2004), J. D´ıaz, J. Karhum¨aki, A. Lepist¨o, and D. Sannella, Eds., vol. 3142 of Lecture Notes in Computer Science, Springer, pp. 1029–1040. """ if len(p) != m: raise ValueError("Probability list p must have m elements.") G = nx.empty_graph(n + m) mset = range(n, n + m) for u in range(n): for v, q in zip(mset, p): if seed.random() < q: G.add_edge(u, v) return nx.projected_graph(G, range(n))
def projected_graph(graph, nodedict, multigraph=False, name=None): """Calculates the projected graph respect to a node list Parameters: :kegg_graph (Graph): input graph, has to be generated via kegg_link_graph() :nodedict (dict): dict of nodes and nodetypes :multigraph (bool): if True :name (str): optional name of the graph Returns: :projected_graph (Graph): projected graph .. seealso:: kegg_link_graph() """ graphnodes_set = set(graph.nodes) nodelist_set = set(nodedict.keys()) common_nodes = graphnodes_set & nodelist_set try: nodetype = graph.nodes[list(common_nodes)[0]]["nodetype"] except IndexError: raise NoProjectedError(graph) disjoint_nodes = nodelist_set - set(get_nodes_by_nodetype(graph, nodetype)) projected_graph = nx.Graph.copy( nx.projected_graph(graph, common_nodes, multigraph)) for dis_node in disjoint_nodes: projected_graph.add_node(dis_node, nodetype=nodetype, label=dis_node) if name == None: name = "{}_projected".format(graph.name) projected_graph.name = name return projected_graph
nodes[srcIp] = True srcIpMap[srcIp] = destIp if not srcIp in srcIpDestIpCount: srcIpDestIpCount[srcIp] = {} srcIpDestIpCount[srcIp][destIp] = True destIpSet.add(destIp) if (srcIp,destIp) not in edgeArr: edgeArr[(srcIp,destIp)] = True G.add_edges_from(edgeArr) G = nx.projected_graph(G,list(nodes.keys())) connected_comp = nx.connected_component_subgraphs(G) # print(nx.number_connected_components(G)) dayMap[dayVal] = connected_comp print("Done for Day ",dayVal) anomalyGraph = {} # writeFile.write("\n \n \n") # writeFile.write("day,component_size,%destIpSpanned") # writeFile.write("\n") for day in dayMap: components = dayMap[day] comp_no = 1
import la import numpy as np import matplotlib.pyplot as plt os.chdir('/Users/jeff/PycharmProjects/ChordDiagram') with open("UNTRADEDRelationships_NAICS.csv") as f: data = pd.DataFrame(pd.read_csv(f)) peoplelist = data['Person_Id'].unique() targetlist = data['Business_ID'].unique() connectionlist = [] B = networkx.Graph() B.add_nodes_from(peoplelist,bipartite=0) B.add_nodes_from(targetlist,bipartite=1) newlist = [] for index,row in data.iterrows(): newlist.append((row['Person_Id'],row['Business_ID'])) B.add_edges_from(newlist) B2 = networkx.projected_graph(B,targetlist,multigraph=True) mymatrix = networkx.to_numpy_matrix(B2, dtype=np.float16) label = [list(targetlist),list(targetlist)] mylarry = la.larry(mymatrix,label, dtype=float) # with open('adj_csv.csv','wb') as csvfile: # mywriter = csv.writer(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) # mywriter.writerows(mymatrix) np.savetxt("adj_matr2.csv",mymatrix,fmt='%3d',delimiter=",")
BcGPos = dict() for p in BcG.pos: # raise text positions if p < 0: BcGPos[p] = (BcG.pos[p][0], BcG.pos[p][1] + 0.06) # 0.045 else: BcGPos[p] = BcG.pos[p] # BcGPos[p][1] += 0.045 #offset 0.07 # print pos nx.draw_networkx_labels(BcG, pos=BcGPos, labels=BcG_labels) # nx.draw_networkx_labels(BcG,pos=BcG.pos,nodelist=list(bottom_nodes)) plt.axis("off") plt.axis("tight") PG = nx.projected_graph(BcG, top_nodes) posPG = nx.spring_layout(PG, k=0.15, iterations=10) plt.figure() plt.title("The graph of cliques of G") nx.draw(PG, posPG, with_labels=False, node_color="g") for p in posPG: # raise text positions posPG[p][1] += 0.045 # offset 0.07 nx.draw_networkx_labels(PG, posPG, labels=Bcg_labels) plt.show() # #### ΣΧΕΔΙΑΣΜΟΣ ΚΛΙΛΩΝ ΜΕΣΑ ΣΕ ΠΕΡΙΒΑΛΛΟΜΕΝΕΣ ΧΡΩΜΑΤΙΣΜΕΝΕΣ ΠΕΡΙΟΧΕΣ # import igraph as ig
edgeArr = [] validEdgeMap = {} for line in dataFile: length = len(line.split(",")) if length != 8: continue srcIp = line.split(",")[1] destIp = line.split(",")[length - 2] edgeArr.append((srcIp, destIp)) if srcIp in validIpMap and not (srcIp in validEdgeMap): validEdgeMap[srcIp] = True G.add_edges_from(edgeArr) PG = nx.projected_graph(G, list(validEdgeMap.keys())) mapVal = nx.pagerank(PG) print(len(mapVal.keys())) for el in mapVal: writeFile.write("{},{},{}".format(dayVal, el, mapVal[el])) writeFile.write("\n") writeFile.flush() print("Done with dayVal =", dayVal) # for el in rankMap: # ip = rankMap[el] # strVal = "" # for dayVal in rankMap[el].keys(): # strVal += dayVal+"-"+rankMap[el][dayVal]
nx.draw_networkx_edges(BcG, pos=BcG.pos) BcGPos = dict() for p in BcG.pos: # raise text positions if p < 0: BcGPos[p] = (BcG.pos[p][0], BcG.pos[p][1] + 0.06) #0.045 else: BcGPos[p] = BcG.pos[p] # BcGPos[p][1] += 0.045 #offset 0.07 # print pos nx.draw_networkx_labels(BcG, pos=BcGPos, labels=BcG_labels) # nx.draw_networkx_labels(BcG,pos=BcG.pos,nodelist=list(bottom_nodes)) plt.axis('off') plt.axis("tight") PG = nx.projected_graph(BcG, top_nodes) posPG = nx.spring_layout(PG, k=0.15, iterations=10) plt.figure() plt.title('The graph of cliques of G') nx.draw(PG, posPG, with_labels=False, node_color='g') for p in posPG: # raise text positions posPG[p][1] += 0.045 #offset 0.07 nx.draw_networkx_labels(PG, posPG, labels=Bcg_labels) plt.show() # #### ΣΧΕΔΙΑΣΜΟΣ ΚΛΙΛΩΝ ΜΕΣΑ ΣΕ ΠΕΡΙΒΑΛΛΟΜΕΝΕΣ ΧΡΩΜΑΤΙΣΜΕΝΕΣ ΠΕΡΙΟΧΕΣ # import igraph as ig
plt.plot([i for i in range(1, len(degreesSortedList) + 1)], degreesSortedList) plt.title('Location popularity') plt.ylabel('Number of users in a location') plt.xlabel('Location rank') plt.xscale('log') plt.axis([0, 1000000, 0, 3400]) plt.show() # In[ ]: overTen = [i[0] for i in degreesSorted if i[1] >= 150] # In[ ]: foldedLoc = nx.projected_graph(locUsrNtwrk, overTen) # In[ ]: nx.number_of_nodes(foldedLoc) # Betweenness centrality # In[ ]: bet_cent = nx.betweenness_centrality(foldedLoc, k=5) # In[ ]: bet_cent_sorted = sorted(bet_cent.items(), key=lambda x: x[1], reverse=True) print(bet_cent_sorted[:10])
# Build lists of nodes and edges: df = (pd.read_csv('tales-01.txt', header=None) .groupby(level=0) .apply(lambda x : pd.DataFrame ([[x.iloc[0,0],v] for v in x.iloc[0,1:]])) .reset_index(drop=True) .dropna() .rename_axis({0:'text',1:'word'},axis=1) ) edges = df.values.tolist() nodes_0 = list(set(df['text'].values.tolist())) nodes_1 = list(set(df['word'].values.tolist())) # Build a bipartite graph: B = nx.Graph() B.add_nodes_from(nodes_0, bipartite=0) # Add the node attribute "bipartite" B.add_nodes_from(nodes_1, bipartite=1) B.add_edges_from(edges) # Project one side of the graph: G = nx.projected_graph(B, nodes_1) nx.draw(G, pos=nx.spring_layout(G), with_labels = True, node_color = '#00CCFF') # Choose your output: # plt.show() plt.savefig("graphing.png", dpi=300)
## create a unipartite network of climbers # create array of counts for each climber climbers = [] for i in range(len(clean)): climbers = climbers + clean.loc[i] arr = np.unique(climbers, return_counts=True) arr = np.core.records.fromarrays(arr) counts = np.sort(arr, order='f1')[::-1] counts[:25] climbs = list(set(firsts.name.values.tolist())) FAs = list(set(climbers)) # build a bipartite graph: B = nx.Graph() B.add_nodes_from(climbs, bipartite=0) B.add_nodes_from(FAs, bipartite=1) B.add_edges_from(edges) # project one side of the graph: G = nx.projected_graph(B, FAs) # plot using matplotlib plt.figure(1, size=(14, 10)) pos = nx.spring_layout(G) nx.draw(G, pos=pos, with_labels=False, node_color='#0000FF88', node_size=100)
nodes[srcIp] = True srcIpMap[srcIp] = destIp if not srcIp in srcIpDestIpCount: srcIpDestIpCount[srcIp] = {} srcIpDestIpCount[srcIp][destIp] = True destIpSet.add(destIp) if (srcIp, destIp) not in edgeArr: edgeArr[(srcIp, destIp)] = True G.add_edges_from(edgeArr) G = nx.projected_graph(G, list(nodes.keys())) connected_comp = nx.connected_component_subgraphs(G) # print(nx.number_connected_components(G)) dayMap[dayVal] = connected_comp print("Done for Day ", dayVal) anomalyGraph = {} # writeFile.write("\n \n \n") # writeFile.write("day,component_size,%destIpSpanned") # writeFile.write("\n") for day in dayMap: components = dayMap[day] comp_no = 1 ipCountMap = {}
sorted_val = sorted(ipMap.items(), key=lambda x: float(x[1]), reverse=True) writeFile = open("../dataFiles/dayWiseAnomaly", "w") for dayVal in range(1, 16): edgeArr = [] ipRankArrMap = {} daywiseFile = open("../dataFiles/sipscan-" + str(dayVal)) for line in daywiseFile: length = len(line.split(",")) srcIp = line.split(",")[1] destIp = line.split(",")[length - 2] edgeArr.append((srcIp, destIp)) ipRankArrMap[srcIp] = [] graph = nx.Graph() graph.add_edges_from(edgeArr) graph = nx.projected_graph(graph, list(ipRankArrMap.keys())) pageRank = get_page_rank(graph) for el in pageRank: ipRankArrMap[el].append(pageRank[el]) buggyIps = [] while len(sorted_val) != 0: ip_to_be_removed = sorted_val[0][0] print('Removing ', ip_to_be_removed) if not ip_to_be_removed in ipRankArrMap: print('This is not present here = ', ip_to_be_removed) sorted_val.remove(sorted_val[0]) continue
writeFile = open("../dataFiles/dayWiseAnomaly","w") for dayVal in range(1,16): edgeArr = [] ipRankArrMap = {} daywiseFile = open("../dataFiles/sipscan-"+str(dayVal)) for line in daywiseFile: length = len(line.split(",")) srcIp = line.split(",")[1] destIp = line.split(",")[length -2] edgeArr.append((srcIp,destIp)) ipRankArrMap[srcIp] = [] graph = nx.Graph() graph.add_edges_from(edgeArr) graph = nx.projected_graph(graph,list(ipRankArrMap.keys())) pageRank = get_page_rank(graph) for el in pageRank: ipRankArrMap[el].append(pageRank[el]) buggyIps = [] while len(sorted_val) != 0: ip_to_be_removed = sorted_val[0][0] print('Removing ',ip_to_be_removed) if not ip_to_be_removed in ipRankArrMap: print('This is not present here = ',ip_to_be_removed) sorted_val.remove(sorted_val[0]) continue