def generate_small_world_graph(self): max_edges = self.NODE_COUNT*(self.NODE_COUNT-1)/2 if self.EDGE_COUNT > max_edges: return complete_graph(self.NODE_COUNT) graph = Graph() graph.add_nodes_from(range(self.NODE_COUNT)) edges = performer.edge_indices.flatten() probabilities = performer.probabilities.flatten() for trial in range(len(edges)-9): edge_index = numpy.random.choice(edges, p=probabilities) source, destination = self.edge_nodes(edge_index) graph.add_edge(source, destination, length = self.link_length(source, destination), weight = self.edge_weight(source, destination)) probabilities[edge_index] = 0 probabilities /= sum(probabilities) if max(graph.degree().values()) > self.DEGREE_MAX: graph.remove_edge(source, destination) if graph.number_of_edges() > self.EDGE_COUNT: victim = random.choice(graph.edges()) graph.remove_edge(victim[0], victim[1]) if self.constraints_satisfied(graph): print 'performer.generate_small_world_graph:', print self.BENCHMARK, self.NODE_COUNT, self.EDGE_COUNT, trial self.process_graph(graph) return graph
def test_algorithms(algorithms, graph: Graph, k): print() print("Testing graph with {0} nodes and {1} edges, expected result: {2}" .format(graph.number_of_nodes(), graph.number_of_edges(), k)) for algorithm, name in algorithms: start_time = time.time() args = inspect.getfullargspec(algorithm)[0] if len(args) == 2: result = len(algorithm(graph)) else: result = len(algorithm(graph, k)) print("{0}: {1}, time: {2}".format(name, result, time.time() - start_time)) assert k == result, "Wrong result!"
__author__ = 'zplin' import sys import json import csv from os import path import numpy as np from networkx import Graph, transitivity, clustering, average_shortest_path_length, connected_component_subgraphs from networkx.readwrite import json_graph if __name__ == '__main__': with open(sys.argv[1]) as g_file: data = json.load(g_file) g = Graph(json_graph.node_link_graph(data)) print('Number of nodes:', g.number_of_nodes()) print('Average degree:', 2 * g.number_of_edges()/g.number_of_nodes()) print('Transitivity:', transitivity(g)) cc = clustering(g) print('Average clustering coefficient:', np.mean(list(cc.values()))) for subgraph in connected_component_subgraphs(g): if subgraph.number_of_nodes() > 1: print('Average shortest path length for subgraph of', subgraph.number_of_nodes(), ':', average_shortest_path_length(subgraph)) # Calculating average clustering coefficient for different degrees degree_cc = {} for node, degree in g.degree_iter(): if degree not in degree_cc: degree_cc[degree] = [] degree_cc[degree].append(cc[node]) with open(path.join(path.dirname(sys.argv[1]), 'clustering.csv'), 'w', newline='') as cc_file:
class Network(HasTraits): """ The implementation of the Connectome Networks """ implements(INetwork) # Network ID, from parsed GraphML the graphid networkid = '' # Network name networkname = Str # network name as seen in the TreeView name = Str # Is it an hierarchical network? hierarchical = CBool(False) # TODO: later, also Hypergraph?! # see: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1000385 hypergraph = CBool(False) # Directionality of the Network, {True: 'directed', False: 'undirected'} directed = CBool(False) # metadata for the network metadata = Dict # NodeKeys from the parsed GraphML # These are Dict of Dict, all having strings nodekeys = {} # Edgekeys, from parsed GraphML edgekeys = {} # A NetworkX AttrGraph containing all the information graph = Any # Surface containers surfaces = List(ISurfaceContainer) # Surface containers loaded surfaces_loaded = List(ISurfaceContainer) # Volume data volumes = List(IVolume) # Track data tracks = List(ITrackfile) # is this network active, and thus a render manager displayed? active = Bool # the render manager of this network rendermanager = Instance(RenderManager) # DatasourceManager Instance of this network datasourcemanager = Instance(DatasourceManager) # private traits ########### # parent cfile this networks belongs to _parentcfile = Any # filezip of cfile _filezip = DelegatesTo('_parentcfile') # edge parameters for visualization _edge_para = Instance(EdgeParameters) # View traits_view = View( Item('networkname', style = 'readonly'), Item('hierarchical', style = 'simple'), Item('hypergraph', style = 'simple'), Item('directed', style = 'simple'), Item('active', style = 'simple'), title = 'A network', ) def __init__(self, name, src = None, directed = '0', pickled_graph = None, \ hierarchical ='0', hypergraph = '0', graph = None): """ Initializes the network and sets the traits. Parameters ---------- name : string the name of the network src : file handle or StringIO object the source text of the network to parse pickled_graph : NetworkX graph reference to a graph object, src should be None directed : bool Is the network directed? hierarchical : bool Is the network hierarchical? (default: '0') Not implemented yet. hypergraph : bool Is the network a hypergraph? (default: '0') Not implemented yet. """ # initialize the traits self.networkname = name self.directed = int(directed) self.hierarchical = int(hierarchical) self.hypergraph = int(hypergraph) if src is None and not pickled_graph is None: self.load_pickled_graphml(pickled_graph) else: if not src is None: # generates NetworkX Graph self.graph = self.parse_network_graphml(src) elif not graph is None: self.graph = graph else: if self.directed: from networkx import DiGraph self.graph = DiGraph() logger.info("Initialize with empty directed Graph") else: from networkx import Graph self.graph = Graph() logger.info("Initialize with empty undirected Graph") # initializes the weight key of the graph # with the first edgekey if len(self.edgekeys) > 0: edgk = self.edgekeys.keys() if not 'weight' in edgk: self.set_weight_key(edgk[0]) else: # try grabbing first edge from the graph if self.graph.number_of_edges() > 0: it = self.graph.edges_iter(data=True) edg = it.next() if len(edg[2]) > 0: # if it has a weigth key, just leave it edgk = edg[2].keys() if not 'weight' in edgk: self.set_weight_key(edgk[0]) else: pass # logger.error('Cannot set weight key for network : ' + self.networkname) def _name_default(self): return self.networkname def _active_default(self): return False def _active_changed(self , value): if value: n = self.name if ' [Active]' not in n: self.name = "%s [Active]" % n # XXX: do refactor with threaded loading of surfaces # and default spring force layout for graph rendering! # see also TraitsUI Demos: Multi thread demo # load the surface containers data # make a deep copy of the already loaded surface containers import copy self.surfaces = copy.deepcopy(self.surfaces_loaded) for surfcont in self.surfaces: surfcont.load_surface_container() if self.rendermanager is None: self._create_datasourcemanager() self._create_renderer() # if there are no surfaces, initialize # network rendering, but only if dn_positions are given if len(self.surfaces) == 0: logger.debug('No surfaces found. Try to render graph view with dn_position information.') self.rendermanager.datasourcemanager._compute_3DLayout(-1, -1) self.rendermanager.visualize_graph() else: logger.debug('SurfaceContainer found. Try to render 3D View using %s.' % self.surfaces[0].name) if len(self.surfaces[0].surfaces) == 0: logger.debug('Rendering not possible because SurfaceContainer contains no surfaces.') else: logger.debug('Using first surface for rendering.') self.surfaces[0].surfaces[0]._layout_3DView() if not self._parentcfile._workbenchwin is None: #from enthought.pyface.timer.api import do_later from enthought.pyface.api import GUI GUI.invoke_later(self._parentcfile._workbenchwin.status_bar_manager.set, message = '') else: self.name = self.name.replace(' [Active]', '') logger.debug('Close RenderManager scenes') self.rendermanager.close_scenes() logger.debug('All scenes closed.') # FIXME: what is happening in the following? # e.g. for instances. e.g. reset traits? # XXX: this is somehow not correct. do i need to use del # or remove/reset traits? self.rendermanager = None self.datasourcemanager = None self.surfaces = [] def _de_activate(self): """ Toggles the internal state of the activation """ if self.active: self.active = False else: self._parentcfile._workbenchwin.status_bar_manager.message = 'Activating network ...' self.active = True def _edge_parameters(self): """ Dialog to change edge attribute and thresholding """ if self._edge_para is None: self._edge_para = EdgeParameters(self, self.rendermanager.attract.point_scalars_name) self._edge_para.configure_traits() def _create_renderer(self): """ Creates the renderer instance if not yet available and opens the scenes in mayavi """ if self.active: if self.rendermanager is None: logger.debug('Create a RenderManager instance') self.rendermanager = RenderManager(network=self) else: logger.debug('RenderManager instance already running. This is an error.') def _create_datasourcemanager(self): """ Creates the datasource manager instance if not yet available """ if self.active: if self.datasourcemanager is None: logger.debug('Create a DatasourceManager instance') self.datasourcemanager = DatasourceManager(network=self) else: logger.debug('DatasourceManager instance already running. This is an error.') def _render_matrix(self): """ Invokes the connectivity matrix viewer """ # assume the network is activated (i.e. data source generated) # we need the edge parameter instance initialized if self._edge_para is None: self._edge_para = EdgeParameters(self, self.rendermanager.attract.point_scalars_name) logger.debug('Invoke Matrix Viewer...') self.rendermanager.invoke_matrix_viewer() def _trackvis_launch(self): """ Generates scene file and launch Trackvis on the selected nodes """ import tempfile logger.debug('Starting TrackVis ...') # extract selected subgraph selectionlist = self.get_selectiongraph_list() if len(selectionlist) == 0: # message from enthought.traits.ui.message import message message(message = 'No nodes selected for ROI creation!', title = 'Infomessage', buttons = [ 'OK' ], parent = None) tmpgraph = self.graph.subgraph(selectionlist) # extract trackfile temporarily if len(self.tracks) == 0: logger.info('No trackfile found to invoke Trackvis.') return else: # load the first trackfile trackfname = self.tracks[0].load_trackfile_to_file() # find the first valid segmentation volume in the self.volumes list for vol in self.volumes: if vol.segmentation: logger.debug('Found a segmentation volume file. Assume labels are corresponding.') volumefname = vol.load_volume_to_file() break # generate the scene file in the temporary folder tmpscenefile=tempfile.mkstemp(prefix='tmp', suffix='.scene') # generate trackfile generate_scene_file(scenefname=tmpscenefile[1], \ trackfname = trackfname, \ volumefname = volumefname, \ selectiongraph = tmpgraph) # execute trackvis in a thread pref = preference_manager.preferences action = ThreadedTrackvis(tvpath = pref.get('cviewer.plugins.ui.trackvispath'), \ fname = tmpscenefile[1], \ trkfname = trackfname,\ volfname = volumefname) action.start() def add_surface_container(self, surfacecontainer): """ Add a surface container to the loaded list Parameters ---------- surfacecontainer : `ISurfaceContainer` instance a surface container object """ surfacecontainer._networkref = self self.surfaces_loaded.append(surfacecontainer) def add_volume(self, volume): """ Adds a volume to the volumes list Parameters ---------- volume : `IVolume` instance a volume object """ self.volumes.append(volume) def add_trackfile(self, trackfile): """ Adds a trackfile to the tracks list Parameters ---------- trackfile : `ITrackfile` instance a trackfile of type ITrackfile """ self.tracks.append(trackfile) def unselect_all(self): """ Unselects every node in the current network """ if self.datasourcemanager is None: raise Exception('No DatasourceManager. You have to first activate the network and render it.') from numpy import array # get all the nodes graphnodes = self.datasourcemanager._srcobj.relabled_graph.nodes() # and unselect all nodes self.rendermanager._select_nodes(selection_node_array = array(graphnodes)) def select_all(self): """ Selects all nodes in the current network """ if self.datasourcemanager is None: raise Exception('No DatasourceManager. You have to first activate the network and render it.') from numpy import array # get all the nodes graphnodes = self.datasourcemanager._srcobj.relabled_graph.nodes() # and select all nodes self.rendermanager._select_nodes(selection_node_array = array(graphnodes), activate = True) def set_selectiongraph(self, sellist, activate = False): """ Sets the selected nodes in the network to active. Parameters ---------- sellist : array_like a list of nodeids conforming to the NetworkX node id activate : boolean set the selectionlist nodes to activated? """ from numpy import array, int16 graphnodes = self.graph.nodes(data=False) if self.rendermanager is None: raise Exception('No RenderManager. You have to first activate the network and render it.') if len(sellist) == 0: self.unselect_all() return from numpy import array, append tmparr = array([]) for node in sellist: # check if it is a valid graph node id if node in graphnodes: # get the node id as integer j = int(node.lstrip('n'))-1 # extend empty array with node id tmparr = append(tmparr, j) self.rendermanager._select_nodes(selection_node_array = array(tmparr, dtype = int16), activate = activate) def get_selectiongraph_list(self): """ Returns a list of the node ids that were selected in the rendered scene. """ if self.datasourcemanager is None: raise Exception('No DatasourceManager. You have to first activate the network and render it.') import numpy as np sel_list = [] if not self.active: return sel_list selnodesarray = self.datasourcemanager._srcobj.selected_nodes # array with indices where the nodes are selected (==1) idx = np.where(selnodesarray == 1)[0] for i in idx: sel_list.append('n' + str(i + 1)) return sel_list def set_weight_key(self, weight_key = None): """ Sets the weight key in the graph representation of the network. Parameters ---------- weight_key : Str Must be a possible existing edge key """ if not weight_key is None: for u,v,d in self.graph.edges(data=True): self.graph[u][v]['weight']=d[weight_key] return True else: return False def get_matrix(self, weight_key = None): """ Returns the connectivity matrix of the network with the nodes ordered according to their id in the GraphML file. Parameters ---------- weight_key : Str Possible key value of the edges Returns ------- matrix : `Numpy.array` instance The connectivity matrix """ nr_nodes = len(self.graph.nodes()) if not weight_key is None: #FIXME: sanity check if weight_key exists # thanks to Aric Hagberg for u,v,d in self.graph.edges(data=True): self.graph[u][v]['weight']=d[weight_key] nodes = [(lambda nmod:'n'+str(nmod))(node) for node in range(1,nr_nodes + 1)] from networkx import to_numpy_matrix return to_numpy_matrix(self.graph, nodelist = nodes) def toggle_surface(self): """ Toggle the surface for the selected network nodes """ if self.rendermanager is None: raise Exception('No RenderManager. You have to first activate the network and render it.') self.rendermanager._toggle_surface() def show_surface(self): """ Shows the surface for the selected network nodes """ if self.rendermanager is None: raise Exception('No RenderManager. You have to first activate the network and render it.') self.rendermanager._show_surface() def load_pickled_graphml(self, graph): """ Loads a pickled GraphML file Parameters ---------- graph : NetworkX Graph instance A graph instance """ # setting the graph self.graph = graph if self.graph.has_node('n0'): if self.graph.node['n0'].has_key('nodekeys'): # extracting the node keys from the first node self.nodekeys = self.graph.node['n0']['nodekeys'] # extracting the edge keys from the first edge (without explanation) if self.graph.node['n0'].has_key('edgekeys'): self.edgekeys = self.graph.node['n0']['edgekeys'] if self.graph.node['n0'].has_key('graphid'): self.networkid = self.graph.node['n0']['graphid'] # remove node self.graph.remove_node('n0') def _return_default_edgevalue(self, edgekeys, key): """ Looks up if there is a default value defined, otherwise return zero """ if edgekeys[key].has_key('default'): return float(edgekeys[key]['default']) else: return 0.0 def parse_network_graphml(self, path): """ Read network in GraphML format from a path. Parameters ---------- path : string path the the GraphML file Returns ------- graph : NetworkX `Graph` """ import networkx as nx from networkx.utils import _get_fh from lxml import etree # Return a file handle for given path. # Path can be a string or a file handle. # Attempt to uncompress/compress files ending in .gz and .bz2. fh=_get_fh(path,mode='r') tree = etree.parse(fh) # get the root node from parsed lxml root = tree.getroot() # Schema Validation # http://codespeak.net/lxml/validation.html#xmlschema # define the namespace prefixes nsprefix = "{%s}" % root.nsmap[None] nsxlink = "{%s}" % root.nsmap['xlink'] nodekeys = {} edgekeys = {} defaultDirected = [True] # Parse the KEYs for child in root.iterchildren(): if child.tag == (nsprefix+'key'): attribs = child.attrib ddkeys = {} for mchildren in child: if mchildren.tag == (nsprefix+'default'): ddkeys['default'] = mchildren.text elif mchildren.tag == (nsprefix+'desc'): ddkeys['desc'] = mchildren.text if child.attrib['for'] == 'node': # Parse all the node keys # Read in the description and the default (if existing) # dict of dicts for nodes: key1: the id; key2: rest: attr.name, attr.type, desc, default nodekeys[attribs['id']] = {'attr.name' : attribs['attr.name'], \ 'attr.type' : attribs['attr.type']} # add default/desc keys if existing nodekeys[attribs['id']] = ddkeys elif child.attrib['for'] == 'edge': # Parse all the edge keys # Read in the description and the default (if existing) # dict of dicts for edges: key1: the id; key2: rest: attr.name, attr.type, desc, default edgekeys[attribs['id']] = {'attr.name' : attribs['attr.name'], \ 'attr.type' : attribs['attr.type']} # add default/desc keys if existing edgekeys[attribs['id']] = ddkeys else: logger.error("The 'for' attribute of key-tag not known, must be either node or edge") elif child.tag == (nsprefix+'graph'): # start parsing the graph into networkx data structure # create graph depending on (either AttrGraph or AttrDiGraph) # directionality: undirected/directed # version of networkx: # contains self-loops # edges have dicts # data per graph/node/edge for attr, value in child.items(): if attr == 'edgedefault' and value == 'undirected': defaultDirected[0] = False elif attr == 'id': graphid = value if defaultDirected[0]: G = nx.DiGraph() else: G = nx.Graph() # add id, nodekeys and edkeys as traits self.networkid = graphid self.nodekeys = nodekeys self.edgekeys = edgekeys # iterate over all nodes and edges for children in child.iterchildren(): if children.tag == (nsprefix+'node'): # parse the node for attr, value in children.items(): if attr == 'id': # add the node with corresponding id G.add_node(value) # keep node id to store attributes nodeid = value elif attr == (nsxlink+'href'): # add xlink to node dictionary G.node[nodeid]['xlink'] = value else: # node attribute not known logger.warning('The following node attribute is not known and thus discarded:'+ attr + ':' + value) # parse node data, add to node dict for data in children.iterchildren(): # read the keylabel, i.e. the data attribute name keylabel = data.attrib['key'] # is the keylabel in the list of allowed keys if nodekeys.has_key(keylabel): if not data.text == '': # add data to the node's dict G.node[nodeid][keylabel] = data.text else: # no data available, check if default value exists if nodekeys[keylabel].has_key('default'): # add default data to the node's dict G.node[nodeid][keylabel] = nodekeys[keylabel]['default'] logger.debug('Added default value '+ keylabel + ':' + nodekeys[keylabel]['default']) else: logger.warning('Nor data nor default value defined for ' + keylabel) # TODO: Work with exceptions! else: logger.warning("Data entry with key " + keylabel + " not defined.") elif children.tag == (nsprefix+'edge'): # parse the edge # parse its attributes for attr, value in children.items(): if attr == 'id': # no usage of edge id # add the edge with corresponding id src = children.attrib['source'] tar = children.attrib['target'] G.add_edge(src, tar) # keep dest and tar id to store attributes srcid = src tarid = tar elif attr == (nsxlink+'href'): # add xlink to edge dictionary G.edge[srcid][tarid]['xlink'] = value # parse data, and add to the edge dict for data in children.iterchildren(): # read the keylabel, i.e. the data attribute name keylabel = data.attrib['key'] # is the keylabel in the list of allowed keys if self.edgekeys.has_key(keylabel): if not data.text == '': # add data to the edge's dict, assume float!! G.edge[srcid][tarid][keylabel] = float(data.text) else: # no data available, check if default value exists G.edge[srcid][tarid][keylabel] = self._return_default_edgevalue(self.edgekeys, keylabel) data_keys = G.edge[srcid][tarid].keys() # check if we missed some edge keys that are available in the header for k, v in self.edgekeys.items(): if not k in data_keys: G.edge[srcid][tarid][k] = self._return_default_edgevalue(self.edgekeys, k) # return the generated network graph return G
print('{0}/{1} nodes processed'.format(i, n)) print('Delete {0} orphaned nodes'.format(len(orphaned))) graph.remove_nodes_from(orphaned) print('Calculate offset') points = [node[1]['pos'] for node in graph.nodes(data=True)] min_x = min(points, key=lambda p: p[0])[0] min_y = min(points, key=lambda p: p[1])[1] for node in graph.nodes_iter(): pos = (graph.node[node]['pos'][0] - min_x, graph.node[node]['pos'][1] - min_y) graph.node[node]['pos'] = pos print('Translated data by ({0}, {1})'.format(-min_x, -min_y)) print('Calculate edge weights') n = graph.number_of_edges() i = 0 for edge in graph.edges(): lat1 = math.radians(graph.node[edge[0]]['lat']) lon1 = math.radians(graph.node[edge[0]]['lon']) lat2 = math.radians(graph.node[edge[1]]['lat']) lon2 = math.radians(graph.node[edge[1]]['lon']) graph[edge[0]][edge[1]]['weight'] = distance(lat1, lon1, lat2, lon2) i += 1 print('{0}/{1} edges processed'.format(i, n), end='\r') print('{0}/{1} edges processed'.format(i, n)) print('Write {0}'.format(output_file)) write_gpickle(graph, output_file) stop = timeit.default_timer()
def test_algorithms(algorithms, graph: nx.Graph): print() print("Testing graph with {0} nodes and {1} edges".format(graph.number_of_nodes(), graph.number_of_edges())) results = [] for algorithm, name in algorithms: # make a copy of the graph in case the algorithm mutates it graph_copy = graph.copy() start_time = time.time() result = len(algorithm.get_fbvs(graph_copy)) print("{0}: {1}, time: {2}".format(name, result, time.time() - start_time)) results.append(result) assert results.count(results[0]) == len(results), "The algorithms's results are not the same!"
def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[], alpha=0.5, method="OTD", base=math.e, exp_power=2, proc=mp.cpu_count(), chunksize=None, cache_maxsize=1000000, shortest_path="all_pairs", nbr_topk=1000): """Compute Ricci curvature for edges in given edge lists. Parameters ---------- G : NetworkX graph A given directional or undirectional NetworkX graph. weight : str The edge weight used to compute Ricci curvature. (Default value = "weight") edge_list : list of edges The list of edges to compute Ricci curvature, set to [] to run for all edges in G. (Default value = []) alpha : float The parameter for the discrete Ricci curvature, range from 0 ~ 1. It means the share of mass to leave on the original node. E.g. x -> y, alpha = 0.4 means 0.4 for x, 0.6 to evenly spread to x's nbr. (Default value = 0.5) method : {"OTD", "ATD", "Sinkhorn"} The optimal transportation distance computation method. (Default value = "OTD") Transportation method: - "OTD" for Optimal Transportation Distance, - "ATD" for Average Transportation Distance. - "Sinkhorn" for OTD approximated Sinkhorn distance. (faster) base : float Base variable for weight distribution. (Default value = `math.e`) exp_power : float Exponential power for weight distribution. (Default value = 0) proc : int Number of processor used for multiprocessing. (Default value = `cpu_count()`) chunksize : int Chunk size for multiprocessing, set None for auto decide. (Default value = `None`) cache_maxsize : int Max size for LRU cache for pairwise shortest path computation. Set this to `None` for unlimited cache. (Default value = 1000000) shortest_path : {"all_pairs","pairwise"} Method to compute shortest path. (Default value = `all_pairs`) nbr_topk : int Only take the top k edge weight neighbors for density distribution. Smaller k run faster but the result is less accurate. (Default value = 1000) Returns ------- output : dict[(int,int), float] A dictionary of edge Ricci curvature. E.g.: {(node1, node2): ricciCurvature}. """ logger.trace("Number of nodes: %d" % G.number_of_nodes()) logger.trace("Number of edges: %d" % G.number_of_edges()) if not nx.get_edge_attributes(G, weight): logger.info( 'Edge weight not detected in graph, use "weight" as default edge weight.' ) for (v1, v2) in G.edges(): G[v1][v2][weight] = 1.0 # ---set to global variable for multiprocessing used.--- global _Gk global _alpha global _weight global _method global _base global _exp_power global _proc global _cache_maxsize global _shortest_path global _nbr_topk global _apsp # ------------------------------------------------------- _Gk = nk.nxadapter.nx2nk(G, weightAttr=weight) _alpha = alpha _weight = weight _method = method _base = base _exp_power = exp_power _proc = proc _cache_maxsize = cache_maxsize _shortest_path = shortest_path _nbr_topk = nbr_topk # Construct nx to nk dictionary nx2nk_ndict, nk2nx_ndict = {}, {} for idx, n in enumerate(G.nodes()): nx2nk_ndict[n] = idx nk2nx_ndict[idx] = n if _shortest_path == "all_pairs": # Construct the all pair shortest path dictionary # if not _apsp: _apsp = _get_all_pairs_shortest_path() if edge_list: args = [(nx2nk_ndict[source], nx2nk_ndict[target]) for source, target in edge_list] else: args = [(nx2nk_ndict[source], nx2nk_ndict[target]) for source, target in G.edges()] # Start compute edge Ricci curvature t0 = time.time() with mp.get_context('fork').Pool(processes=_proc) as pool: # WARNING: Now only fork works, spawn will hang. # Decide chunksize following method in map_async if chunksize is None: chunksize, extra = divmod(len(args), proc * 4) if extra: chunksize += 1 # Compute Ricci curvature for edges result = pool.imap_unordered(_wrap_compute_single_edge, args, chunksize=chunksize) pool.close() pool.join() # Convert edge index from nk back to nx for final output output = {} for rc in result: for k in list(rc.keys()): output[(nk2nx_ndict[k[0]], nk2nx_ndict[k[1]])] = rc[k] logger.info("%8f secs for Ricci curvature computation." % (time.time() - t0)) return output
def run(graph: nx.Graph, game, alpha, T, sets=dict()): n = graph.number_of_nodes() m = graph.number_of_edges() players = [Player(id=i) for i in graph.nodes()] # TODO vary init sus = random.choices([0, 1], k=m) svs = random.choices([0, 1], k=m) rates = dict() for key in sets.keys(): rates[key] = [] for p in players: p.deg = graph.degree(p.id) for _ in range(T): for p in players: p.g_avg = .0 p.sig = .0 p.rho = .0 p.best_neighbor = -1 for (u, v), su, sv in zip(graph.edges(), sus, svs): pu, pv = game.payoff(su, sv) players[u].g_avg += pu players[v].g_avg += pv players[u].sig += 1 - sv players[v].sig += 1 - su players[u].rho += 1 - su players[v].rho += 1 - sv for p in players: p.best_g = -INF p.best_s = None p.g_avg /= p.sz() p.sig /= p.sz() p.rho /= p.sz() for (u, v), su, sv in zip(graph.edges(), sus, svs): if players[u].best_g < players[v].g_avg: players[u].best_g = players[v].g_avg players[u].best_s = sv players[u].best_neighbor = v if players[v].best_g < players[u].g_avg: players[v].best_g = players[u].g_avg players[v].best_s = su players[v].best_neighbor = u for p in players: delta = (p.best_g - p.g_avg) # TODO b = 1 ensure ???? p.prob = sigmoid(delta / alpha) if p.best_neighbor != -1: q = players[p.best_neighbor] if p.sig == q.sig: p.star = +100 else: p.star = (p.rho - q.rho) / (p.sig - q.sig) else: p.star = -100 for i, (u, v) in zip(range(m), graph.edges()): if random.uniform(0, 1) < players[u].prob: sus[i] = players[u].best_s if random.uniform(0, 1) < players[v].prob: svs[i] = players[v].best_s for p in players: p.cnt = 0 for (u, v), su, sv in zip(graph.edges(), sus, svs): players[u].cnt += 1 - su players[v].cnt += 1 - sv store_rates(rates, players, sets) return rates
def count_noiseless_subsystems(g: nx.Graph): n = g.number_of_nodes() m = nx.to_numpy_array(g) if g.number_of_edges() > 0 else np.zeros([n, n]) return count_noiseless_eigenvectors(m)
class BotSimilarityGrapher(BotRetweetGrapher): def __init__(self): super().__init__() self.similarity_graph = None @property def retweet_graph(self): return self.graph def retweet_graph_report(self): self.report() def perform(self): """ Given: bot_ids (list) a unique list of bot ids, which should all be included as nodes in the bot retweet graph. The retweet graph will also contain retweeted users. So that's why we need a separate list. The bot ids will be used as nodes in the similarity graph. bot_retweet_graph (networkx.DiGraph) a retweet graph generated from the bot list Returns: a similarity graph (networkx.Graph), where the similarity is based on the Jaccard index. For each pair of bots we calculate the Jaccard index based on the sets of people they retweet. If two bots retweet exactly the same users, their Jaccard index is one. If they don't retweet anyone in common, their Jaccard index is zero. """ grapher.retweet_graph_report() bot_ids = [row.user_id for row in self.bq_service.fetch_bot_ids(bot_min=self.bot_min)] print("FETCHED", fmt_n(len(bot_ids)), "BOT IDS") node_pairs = [] for i, bot_id in enumerate(bot_ids): for other_bot_id in bot_ids[i+1:]: if self.retweet_graph.has_node(other_bot_id) and self.retweet_graph.has_node(bot_id): node_pairs.append((bot_id, other_bot_id)) # could maybe just take the combinations between all nodes in the bot graph # because we can assume they were assembled using the same bot ids as the ones here # but the point is to be methodologically sound and it doesn't take that long print("NODE PAIRS:", fmt_n(len(node_pairs))) results = jaccard_coefficient(self.retweet_graph.to_undirected(), node_pairs) #> returns an iterator of 3-tuples in the form (u, v, p) #> where (u, v) is a pair of nodes and p is their Jaccard coefficient. print("JACCARD COEFFICIENT RESULTS:", fmt_n(len(results))) print("CONSTRUCTING SIMILARITY GRAPH...") self.similarity_graph = Graph() edge_count = 0 #positive_results = [r for r in results if r[2] > 0] # this takes a while, maybe let's just stick with the original iterator approach for bot_id, other_bot_id, similarity_score in results: if similarity_score > 0: self.similarity_graph.add_edge(bot_id, other_bot_id, weight=similarity_score) edge_count += 1 self.counter += 1 if self.counter % self.batch_size == 0: print(logstamp(), "|", fmt_n(self.counter), "|", fmt_n(edge_count), "EDGES") # # BOT SIMILARITY GRAPH STORAGE # TODO: refactor into a new storage service to inherit from the base storage service, # and mix that in instead (requires some parent class de-coupling) # @property def local_similarity_graph_filepath(self): return os.path.join(self.local_dirpath, "similarity_graph.gpickle") @property def gcs_similarity_graph_filepath(self): return os.path.join(self.gcs_dirpath, "similarity_graph.gpickle") def write_similarity_graph(self): print("SAVING SIMILARITY GRAPH...") write_gpickle(self.similarity_graph, self.local_similarity_graph_filepath) def upload_similarity_graph(self): print("UPLOADING SIMILARITY GRAPH...") self.upload_file(self.local_similarity_graph_filepath, self.gcs_similarity_graph_filepath) def load_similarity_graph(self): print("LOADING SIMILARITY GRAPH...") if not os.path.isfile(self.local_similarity_graph_filepath): self.download_file(self.gcs_similarity_graph_filepath, self.local_similarity_graph_filepath) return read_gpickle(self.local_similarity_graph_filepath) def save_similarity_graph(self): self.write_similarity_graph() self.upload_similarity_graph() def similarity_graph_report(self): if not self.similarity_graph: self.similarity_graph = self.load_similarity_graph() print("-------------------") print("SIMILARITY GRAPH", type(self.similarity_graph)) print(" NODES:", fmt_n(self.similarity_graph.number_of_nodes())) print(" EDGES:", fmt_n(self.similarity_graph.number_of_edges())) print("-------------------")
def rrhandler(graph: nx.Graph, budget: int): """ Handles application of reduction rules (both count and order). Doesn't take already constructed vc as part of input and thus the input graph must be cleaned with respect to the already known vc :param graph: graph to reduce :param budget: parameter k :return: 4-tuple (flag, new_graph, vc, folded_verts) where flag=false denotes that this is a NO-instance and vice-versa, new_graph is the reduced subgraph, vc is the partial vc constructed while applying reduction rules folded_verts is list of folded_verts """ vc = set() folded_verts = [] budget_active = (budget >= 0) changed = True while changed: if graph.number_of_edges() == 0: return True, graph, vc, folded_verts changed = False # apply sage reduction (RR1-4) old_verts = graph.number_of_nodes() graph, new_vc, new_folded_verts = sage_reduction(graph.copy()) # noinspection PyChainedComparisons if budget_active and len(new_vc) + len(new_folded_verts) > budget: return False, graph, set(), [] if graph.number_of_nodes() < old_verts: # some change folded_verts.extend(new_folded_verts) vc.update(new_vc) if budget_active: budget -= (len(new_vc) + len(new_folded_verts)) changed = True continue # apply lp based crown reduction rule graph, new_vc, lpopt = lp_reduction(graph.copy()) if budget_active and len(new_vc) > budget: return False, graph, set(), [] if graph.number_of_nodes() < old_verts: # some change print("#" * 80) print("lp crown found") print("#" * 80) vc.update(new_vc) if budget_active: budget -= len(new_vc) changed = True continue else: if budget_active: #print("lpopt", lpopt, "budget", budget) if lpopt > budget: return False, graph, set(), [] #print("all half", check_all_half_lp(graph, lpopt)) # apply degree k reduction rule only if positive budget if budget_active: graph, new_vc = degree_k(graph.copy(), budget) if len(new_vc) > budget: return False, graph, set(), [] if len(new_vc) > 0: vc.update(new_vc) budget -= len(new_vc) changed = True continue return True, graph, vc, folded_verts
def _compute_ricci_flow(G: nx.Graph, weight="weight", iterations=100, step=1, delta=1e-4, surgery=(lambda G, *args, **kwargs: G, 100), **kwargs ): """ Compute the given Ricci flow metric of each edge of a given connected NetworkX graph. :param iterations: Iterations to require Ricci flow metric. :param step: step size for gradient decent process. :param delta: process stop when difference of Ricci curvature is within delta. :param surgery: A tuple of user define surgery function that will execute every certain iterations. :return: G: A NetworkX graph with weight as Ricci flow metric. """ if not nx.is_connected(G): logger.warning("Not connected graph detected, compute on the largest connected component instead.") G = nx.Graph(G.subgraph(max(nx.connected_components(G), key=len))) G.remove_edges_from(nx.selfloop_edges(G)) logger.info("Number of nodes: %d" % G.number_of_nodes()) logger.info("Number of edges: %d" % G.number_of_edges()) # Set normalized weight to be the number of edges. normalized_weight = float(G.number_of_edges()) # Start compute edge Ricci flow t0 = time.time() if nx.get_edge_attributes(G, "original_RC"): logger.warning("original_RC detected, continue to refine the ricci flow.") else: _compute_ricci_curvature(G, weight=weight, **kwargs) for (v1, v2) in G.edges(): G[v1][v2]["original_RC"] = G[v1][v2]["ricciCurvature"] # Start the Ricci flow process for i in range(iterations): for (v1, v2) in G.edges(): G[v1][v2][weight] -= step * (G[v1][v2]["ricciCurvature"]) * G[v1][v2][weight] # Do normalization on all weight to prevent weight expand to infinity w = nx.get_edge_attributes(G, weight) sumw = sum(w.values()) for k, v in w.items(): w[k] = w[k] * (normalized_weight / sumw) nx.set_edge_attributes(G, values=w, name=weight) logger.info(" === Ricci flow iteration %d === " % i) _compute_ricci_curvature(G, weight=weight, **kwargs) rc = nx.get_edge_attributes(G, "ricciCurvature") diff = max(rc.values()) - min(rc.values()) logger.info("Ricci curvature difference: %f" % diff) logger.info("max:%f, min:%f | maxw:%f, minw:%f" % ( max(rc.values()), min(rc.values()), max(w.values()), min(w.values()))) if diff < delta: logger.info("Ricci curvature converged, process terminated.") break # do surgery or any specific evaluation surgery_func, do_surgery = surgery if i != 0 and i % do_surgery == 0: G = surgery_func(G, weight) normalized_weight = float(G.number_of_edges()) for n1, n2 in G.edges(): logger.debug(n1, n2, G[n1][n2]) logger.info("\n%8f secs for Ricci flow computation." % (time.time() - t0)) return G
ips = {} # filter all relays in this consensus to those that # have a descriptor, are running, and are fast for relay in consensus.relays: if (relay in descriptors): sd = descriptors[relay] # server descriptor rse = consensus.relays[relay] # router status entry if "Running" in rse.flags and "Fast" in rse.flags: if relay not in ips: ips[relay] = [] ips[relay].append(sd.address) # build edges between every relay that could have been # selected in a path together for r1 in ips: for r2 in ips: if r1 is r2: continue g.add_edges_from(product(ips[r1], ips[r2])) nsf_i += 1 # check if we should do a checkpoint and save our progress if nsf_i == nsf_len or "01-00-00-00" in fname: chkpntstart = fname[0:10] with open("relaypairs.{0}--{1}.json".format(chkpntstart, chkpntend), 'wb') as f: json.dump(g.edges(), f) print "" print('Num addresses: {0}'.format(g.number_of_nodes())) print('Num unique pairs: {0}'.format(g.number_of_edges())) # write final graph to disk with open(out_file, 'wb') as f: json.dump(g.edges(), f) ##########