def get_colorcore_map(self, g_tree, kcore_base=4): ''' defines property maps to hold the size and color of the vertices of the given 'g_tree'. The color is defined using the k-core value of each node. ''' colors = ['#C02F1D', '#107896'] kcore_map = gt.kcore_decomposition(self.g) normal_size = 5 core_size = 6 source_size = 15 node_color = g_tree.new_vp('string') node_size = g_tree.new_vp('int') node_size.a = normal_size original_index = g_tree.vp.original_index for v in g_tree.get_vertices(): core_v = kcore_map[original_index[v]] if core_v>=kcore_base: node_color[v] = colors[0] node_size[v] = core_size else: node_color[v] = colors[1] g_tree.vertex_properties['node_color'] = node_color g_tree.vertex_properties['node_size'] = node_size node_size[g_tree.vertex(0)] = source_size return node_color, node_size
def set_aesthestic(self, g_tree, kcore_base=4, mpl_colormap='viridis'): kcore_map = gt.kcore_decomposition(self.g) kcore_max = np.unique(kcore_map.a)[-1] cmap = cm.get_cmap(mpl_colormap, kcore_max) normal_size = 4 core_size = 6 source_size = 15 node_color = g_tree.new_vp('string') node_size = g_tree.new_vp('int') node_size.a = normal_size original_index = g_tree.vp.original_index for v in g_tree.get_vertices(): core_v = kcore_map[original_index[v]] if core_v<kcore_base: rgba = cmap(0.0) cur_color = cm.colors.to_hex([ rgba[0], rgba[1], rgba[2] ]) node_color[v] = cur_color node_size[v] = normal_size else: rgba = cmap((core_v)/(kcore_max)) cur_color = cm.colors.to_hex([ rgba[0], rgba[1], rgba[2] ]) node_color[v] = cur_color node_size[v] = core_size g_tree.vertex_properties['node_color'] = node_color g_tree.vertex_properties['node_size'] = node_size node_size[g_tree.vertex(0)] = source_size return node_color, node_size
def main(): conn = sqlite3.connect('../../data/testDefinitions.sqlite') g = gt.Graph() cursor = conn.cursor() cursor.execute('select id, testBed from tests;') tests = {} ids = [] for entry in cursor.fetchall(): tests[entry[0]] = g.add_vertex() cursor.execute('select id from testBeds;') for entry in cursor.fetchall(): ids.append(entry[0]) for id in ids: cursor.execute( "select tests.id from tests where testBed = '{}'".format(id)) all_items = cursor.fetchall() if len(all_items) > 1: for i in range(0, len(all_items) - 1): for j in range(i + 1, len(all_items)): g.add_edge(tests[all_items[i][0]], tests[all_items[j][0]]) #g = gt.GraphView(g, vfilt=gt.label_largest_component(g)) kcore = gt.kcore_decomposition(g) #gt.graph_draw(g, vertex_fill_color=kcore, vertex_text=kcore, output="test-testBed.pdf") gt.graph_draw(g, vertex_font_size=12, output_size=(800, 600), output="test-testBed.png")
def main(): conn = sqlite3.connect('../../data/testDefinitions.sqlite') g = gt.Graph() cursor = conn.cursor() cursor.execute('select id, testBed from tests;') tests = {} ids = [] for entry in cursor.fetchall(): tests[entry[0]] = g.add_vertex() cursor.execute('select id from testBeds;') for entry in cursor.fetchall(): ids.append(entry[0]) for id in ids: cursor.execute("select tests.id from tests where testBed = '{}'".format(id)) all_items = cursor.fetchall() if len(all_items) > 1: for i in range(0, len(all_items)-1): for j in range(i +1, len(all_items)): g.add_edge(tests[all_items[i][0]], tests[all_items[j][0]]) #g = gt.GraphView(g, vfilt=gt.label_largest_component(g)) kcore = gt.kcore_decomposition(g) #gt.graph_draw(g, vertex_fill_color=kcore, vertex_text=kcore, output="test-testBed.pdf") gt.graph_draw(g, vertex_font_size=12, output_size=(800, 600), output="test-testBed.png")
def mcore_of_rewired(args_tuple): t, g, model = args_tuple pid = mp.current_process(), rejected = gt.random_rewire(g, model=model) print('Process id={}, number of rejected edges={}'.format(pid, rejected)) logger.info('PID=%s, number of rejected edges=%s', pid, rejected) kshell = gt.kcore_decomposition(g) s = pd.Series(kshell.a.copy()) k = s.max() n = (s == k).sum() logger.info('PID=%s, ts=%s, K=%s, N=%s', pid, t, k, n) return (t, k, n)
def _seeds(self): """ Seeds generated from the components obtained by the cuts of all the possible k-cores of the graph :return: seeds """ network = self._network.copy() k_cores = gt.kcore_decomposition(network) max_core = np.max(k_cores.a) min_core = 2 seeds = [] for n in range(min_core, max_core + 1): k_component = self._components(n, k_cores, network) if len(k_component) > 0: seeds.append(k_component) return seeds
def extract_ids_kshell(g, shell=1, shell_mode=True): ''' Get the nodes' Ids inside the given shell or given core To select only the nodes in the shell, 'shell_mode' should be true. Otherwise, we select the nodes in the core. ''' kc = gt.kcore_decomposition(g) nodeId = g.vp.ids ids = [] for v in g.get_vertices(): if shell_mode: if kc[v]==shell: ids.append(nodeId[v]) elif kc[v]>=shell: ids.append(nodeId[v]) return ids
def label_core_components(g, core=1): kc = gt.kcore_decomposition(g) nodeId = g.vp.ids v_core = g.new_vertex_property('bool') for v in g.get_vertices(): v_core[v] = 0 if kc[v]>=core: v_core[v] = 1 g.set_vertex_filter(v_core) labels, val = gt.label_components(g) nodecolor_comp = defaultdict(lambda:-1) for v in g.get_vertices(): if v_core[v]==1: nodecolor_comp[nodeId[v]] = labels[v] # Recover the original network. g.set_vertex_filter(None) return nodecolor_comp, val.shape[0]
def paint_kcore(path, graph, name): if path: sys.stdout.write('Drawing kcore graph ... ') sys.stdout.flush() network = gt.Graph(graph, directed=False) folder = os.path.abspath(path) network = gt.GraphView(network, vfilt=gt.label_largest_component(network)) kcore = gt.kcore_decomposition(network) pos = gt.sfdp_layout(network) gt.graph_draw(network, pos=pos, vertex_fill_color=kcore, vertex_text=kcore, output=os.path.join(folder, str(name) + '-graph-kcore.svg')) sys.stdout.write('Ok!\n') sys.stdout.flush()
def _seeds(self): t0 = time.time() network = self._network.copy() kcores = gt.kcore_decomposition(network) max_core = np.max(kcores.a) min_core = 2 components = {} for n in range(min_core, max_core + 1): k_component = self._components(n, kcores, network) if len(k_component) > 0: components[n] = k_component sys.stdout.write('\rAnalyzing the {0}-support ... '.format(n)) sys.stdout.flush() t = time.time() sys.stdout.write(' Ok! ({0} s.)\n'.format(t - t0)) k, comps = self._select_best_k(components) print('Selecting {0}-cut'.format(k)) return comps
def peel_one(G): """Separate into vertices of peel one (and isolated vertices) and vertices of peel greater than one. Partition Type: Node Description: Given graph G and sets of both vertex and edge indices, induce subgraph and group nodes as either peel less than or equal to 1, or greater than 1. NOTE: Input graph G will have its filters cleared, if any NOTE: Usage recommended only at beginning of tree exploration NOTE: peel one is not a proper edge partition Args: G (graph_tool.Graph): The graph instance. vertex_indices (list): List of vertex indices to induce upon. edge_indices (list): List of edge indices to induce upon. Returns: A list of information dicts about the newly-created children nodes after partitioning/decomposition. """ if not isinstance(G, gt.Graph): err_msg = 'G must be a graph_tool.Graph instance' raise ValueError(err_msg) G.clear_filters() vertex_indices = range(G.num_vertices()) edge_indices = range(G.num_edges()) kcore = gt.kcore_decomposition(G) # peel one vertex and edge indices peel_one_vertex_idx = np.where(kcore.a <= 1)[0] vfilt = G.new_vp('bool', vals=False) vfilt.a[peel_one_vertex_idx] = True G.set_vertex_filter(vfilt) efilt = G.new_ep('bool', vals=True) peel_one_edge_idx = np.where(efilt.a == 1)[0] G.clear_filters() children = [] # Cases where all nodes are either at most peel 1 or at least peel 1 if len(peel_one_vertex_idx) == len(vertex_indices): node = PartitionNode(vertex_indices=peel_one_vertex_idx, edge_indices=peel_one_edge_idx, label='VP_LTE1_{}'.format(0), note='peel values less than or equal to (LTE) 1') children.append(node) return children elif len(peel_one_vertex_idx) == 0: node = PartitionNode(vertex_indices=vertex_indices, edge_indices=edge_indices, label='VP_GT1_{}'.format(0), note='peel values greater than (GT) 1') children.append(node) return children # Case where there are mixed peel values higher_peel_vertex_idx = np.where(kcore.a > 1)[0] vfilt = G.new_vp('bool', vals=False) vfilt.a[higher_peel_vertex_idx] = True G.set_vertex_filter(vfilt) efilt = G.new_ep('bool', vals=True) higher_peel_edge_idx = np.where(efilt.a == 1)[0] G.clear_filters() node = PartitionNode(vertex_indices=peel_one_vertex_idx, edge_indices=peel_one_edge_idx, label='VP_LTE1_{}'.format(0), note='peel values less than or equal to (LTE) 1') children.append(node) node = PartitionNode(vertex_indices=higher_peel_vertex_idx, edge_indices=higher_peel_edge_idx, label='VP_GT1_{}'.format(1), note='peel values greater than (GT) 1') children.append(node) return children
def kcore_growing_ba( fn1='ba.gml', fn2='graph.daily.csv', ofn=None, ): """The growing of kcore for a BA model.""" if ofn is None: ofn = 'kcore.growing.ba.csv' g = gt.load_graph(fn1) evmap = pd.read_csv(fn2) vnum_list = evmap['vnum'].tolist() emap = pd.DataFrame(g.get_edges().copy(), columns=['source', 'target', 'idx']) emap = emap[['source', 'target']] v_map = dict() v_counter = -1 gp_counter = 0 g = gt.Graph() mcore_k = [] mcore_s = [] mcore_idx = [] vnum = [] enum = [] largest_component_vnum = [] g = gt.Graph() for i, s, t in emap.itertuples(): if s not in v_map: v_counter += 1 v_map[s] = v_counter if t not in v_map: v_counter += 1 v_map[t] = v_counter source = v_map.get(s) target = v_map.get(t) g.add_edge(source, target, add_missing=True) if g.num_vertices() >= vnum_list[gp_counter]: kcore = pd.Series(gt.kcore_decomposition(g).a.copy()) mcore = kcore.value_counts().sort_index(ascending=False) mk = mcore.index[0] ms = mcore.iloc[0] mcore_k.append(mk) mcore_s.append(ms) mcore_idx.append(kcore.loc[kcore == mk].index.tolist()) lcv = gt.label_largest_component(g, directed=False) vnum.append(g.num_vertices()) enum.append(g.num_edges()) largest_component_vnum.append(lcv.a.sum()) logger.info(g) logger.info('gp counter: %s', gp_counter) logger.info('Main core at vnum=%s: k=%s, num=%s', g.num_vertices(), mk, ms) gp_counter += 1 try: vnum_list[gp_counter] except IndexError: break cdf = pd.DataFrame( dict(mcore_k=mcore_k, mcore_s=mcore_s, mcore_idx=mcore_idx, vnum=vnum, enum=enum, largest_commponent_vnum=largest_component_vnum)) cdf.to_csv(ofn, index=False)
def kcore_growing_shuffle(fn1='retweet.201710.claim.raw.csv', fn2='graph.daily.csv', ofn=None, rewiring=None): """The growing of kcore by shuffling the edge list.""" if ofn is None: ofn = 'kcore.growing.shuffle' if rewiring: ofn += '.' + rewiring ofn += '.csv' g = prepare_network_from_raw(fn1) if rewiring is not None: gt.random_rewire(g, model=rewiring) evmap = pd.read_csv(fn2) enum_list = evmap['enum'].tolist() emap = pd.DataFrame(g.get_edges().copy(), columns=['source', 'target', 'idx']) emap = emap[['source', 'target']] emap = emap.reindex(np.random.permutation( emap.index)).reset_index(drop=True) v_map = dict() v_counter = -1 gp_counter = 0 g = gt.Graph() mcore_k = [] mcore_s = [] mcore_idx = [] vnum = [] enum = [] largest_component_vnum = [] g = gt.Graph() for i, s, t in emap.itertuples(): if s not in v_map: v_counter += 1 v_map[s] = v_counter if t not in v_map: v_counter += 1 v_map[t] = v_counter source = v_map.get(s) target = v_map.get(t) g.add_edge(source, target, add_missing=True) if g.num_edges() >= enum_list[gp_counter]: kcore = pd.Series(gt.kcore_decomposition(g).a.copy()) mcore = kcore.value_counts().sort_index(ascending=False) mk = mcore.index[0] ms = mcore.iloc[0] mcore_k.append(mk) mcore_s.append(ms) mcore_idx.append(kcore.loc[kcore == mk].index.tolist()) lcv = gt.label_largest_component(g, directed=False) vnum.append(g.num_vertices()) enum.append(g.num_edges()) largest_component_vnum.append(lcv.a.sum()) logger.info(g) logger.info('gp counter: %s', gp_counter) logger.info('Main core at enum=%s: k=%s, num=%s', g.num_edges(), mk, ms) gp_counter += 1 cdf = pd.DataFrame( dict(mcore_k=mcore_k, mcore_s=mcore_s, mcore_idx=mcore_idx, vnum=vnum, enum=enum, largest_commponent_vnum=largest_component_vnum)) cdf.to_csv(ofn, index=False)
def kcore_growing_weighted_shuffle(fn1, fn2='graph.daily.csv', ofn=None, freq='D'): """The growing of kcore by shuffling the retweet list.""" if ofn is None: ofn = 'kcore.growing.weighted-shuffle.csv' # load only necessary columns df = pd.read_csv(fn1, usecols=[3, 4]) # remove self-loop df = df.loc[df.from_raw_id != df.to_raw_id] df = df.reindex(np.random.permutation(df.index)) evmap = pd.read_csv(fn2) enum_list = evmap['enum'].tolist() v_map = dict() v_counter = -1 e_set = set() gp_counter = 0 g = gt.Graph() mcore_k = [] mcore_s = [] mcore_idx = [] vnum = [] enum = [] largest_component_vnum = [] ts = [] g = gt.Graph() for from_raw_id, to_raw_id in df[['from_raw_id', 'to_raw_id']].itertuples(index=False): e = (from_raw_id, to_raw_id) if e not in e_set: if from_raw_id not in v_map: v_counter += 1 v_map[from_raw_id] = v_counter if to_raw_id not in v_map: v_counter += 1 v_map[to_raw_id] = v_counter source = v_map.get(from_raw_id) target = v_map.get(to_raw_id) g.add_edge(source, target, add_missing=True) e_set.add(e) if g.num_edges() >= enum_list[gp_counter]: is_group = False if by == 'v': try: if g.num_vertices() == vlist[gcounter]: is_group = True gcounter += 1 except IndexError: break if by == 'e': try: if g.num_edges() == elist[gcounter]: is_group = True gcounter += 1 except IndexError: break if is_group: kcore = pd.Series(gt.kcore_decomposition(g).a.copy()) mcore = kcore.value_counts().sort_index(ascending=False) mk = mcore.index[0] ms = mcore.iloc[0] mcore_k.append(mk) mcore_s.append(ms) mcore_idx.append(kcore.loc[kcore == mk].index.tolist()) lcv = gt.label_largest_component(g, directed=False) vnum.append(g.num_vertices()) enum.append(g.num_edges()) largest_component_vnum.append(lcv.a.sum()) logger.info(g) logger.info('gp counter: %s', gp_counter) logger.info('Main core at enum=%s: k=%s, num=%s', g.num_edges(), mk, ms) gp_counter += 1 if gp_counter > len(enum_list): break cdf = pd.DataFrame( dict(mcore_k=mcore_k, mcore_s=mcore_s, mcore_idx=mcore_idx, vnum=vnum, enum=enum, largest_commponent_vnum=largest_component_vnum)) cdf.to_csv(ofn, index=False)
def kcore_growing_daily_rewiring(fn, ofn=None, freq='D', model='constrained-configuration'): """The growing of kcore by rewiring daily.""" if ofn is None: ofn = 'kcore.growing.daily-rewiring.{}.csv'.format(model) # load only necessary columns df = pd.read_csv(fn, parse_dates=['tweet_created_at'], usecols=[2, 3, 4]) df = df.set_index('tweet_created_at') # remove self-loop df = df.loc[df.from_raw_id != df.to_raw_id] df['row_id'] = np.arange(len(df)) df['gpf'] = False gpf_rows = df.row_id.groupby(pd.Grouper(freq=freq)).last() gpf_rows = gpf_rows.loc[gpf_rows.notnull()].astype('int') df.loc[df.row_id.isin(gpf_rows.values), 'gpf'] = True v_map = dict() e_set = set() v_counter = -1 g = gt.Graph() mcore_k = [] mcore_s = [] mcore_idx = [] vnum = [] enum = [] largest_component_vnum = [] ts = [] for created_at, from_raw_id, to_raw_id, gpf in df[[ 'from_raw_id', 'to_raw_id', 'gpf' ]].itertuples(): e = (from_raw_id, to_raw_id) if e not in e_set: if from_raw_id not in v_map: v_counter += 1 v_map[from_raw_id] = v_counter if to_raw_id not in v_map: v_counter += 1 v_map[to_raw_id] = v_counter source = v_map.get(from_raw_id) target = v_map.get(to_raw_id) g.add_edge(source, target, add_missing=True) e_set.add(e) if gpf: g1 = g.copy() rejected = gt.random_rewire(g1, model=model, edge_sweep=True) logger.info('Number of rejected when rewiring: %s', rejected) ts.append(created_at) kcore = pd.Series(gt.kcore_decomposition(g1).a.copy()) mcore = kcore.value_counts().sort_index(ascending=False) mk = mcore.index[0] ms = mcore.iloc[0] mcore_k.append(mk) mcore_s.append(ms) mcore_idx.append(kcore.loc[kcore == mk].index.tolist()) lcv = gt.label_largest_component(g1, directed=False) vnum.append(g1.num_vertices()) enum.append(g1.num_edges()) largest_component_vnum.append(lcv.a.sum()) logger.info(g1) logger.info('Main core at %s: k=%s, num=%s', created_at, mk, ms) cdf = pd.DataFrame( dict(timeline=ts, mcore_k=mcore_k, mcore_s=mcore_s, mcore_idx=mcore_idx, vnum=vnum, enum=enum, largest_commponent_vnum=largest_component_vnum)) cdf.to_csv(ofn, index=False)
def set_cores_on_map(self, start_core=5, weight_filter=0, geo_precision=9): # decompose the k-shells cores = gt.kcore_decomposition(self.net) core_info = np.unique(cores.a) number_cores = core_info.shape[0] # Gets the IDs of all nodes inside core, or shell if 'shell_mode' is True. kcore_ids = net_utils.extract_ids_kshell(self.net, shell=start_core, shell_mode=False) nodecolor, ncomponents = net_utils.label_core_components( self.net, core=start_core) print('Number of components: {}'.format(ncomponents)) filtered_contacts = self.contact_table color_list = self.custom_color if color_list == None: if self.cmap_list == None: cmap = cm.get_cmap(self.cmap_name, ncomponents) values = np.linspace(0, 1, ncomponents) self.cmap_list = [ cm.colors.to_hex([cmap(v)[0], cmap(v)[1], cmap(v)[2]]) for v in values ] color_list = self.cmap_list # using the IDs from the selected core, filter the contacts. cond1 = filtered_contacts['sourceId'].isin(kcore_ids) cond2 = filtered_contacts['targetId'].isin(kcore_ids) eff_table = filtered_contacts[cond1 & cond2] place_info = map_utils.coreComponents_on_map( self.map, eff_table, self.net_date, self.id_to_layer, nodecolor, color_list, weight_filter=weight_filter, geo_precision=geo_precision) # generate structured csv table component_dict = place_info[0] unique_ids = place_info[1] table = [] for comp_index, current_dict in enumerate(component_dict): for geo_key in current_dict.keys(): geoloc = geo_key lat, lon = geohash.decode(geoloc) number_hits = current_dict[geo_key] current_component = comp_index color = self.cmap_list[comp_index] table.append({ 'geohash': geoloc, 'lat': lat, 'lon': lon, 'number_contacts': number_hits, 'component_index': current_component, 'color_hex': color }) table = pd.DataFrame.from_dict(table) return self.map, table
def statistics(G): """Provides general graph statistics. Args: G (graph_tool.Graph): The graph instance. Returns: An object with describing many statistical properties of the graph. """ if not G: return 'No Graph Loaded' float_formatter = lambda x: '{:.2f}'.format(x) if G.get_vertex_filter()[0] is not None: vfilt = G.get_vertex_filter()[0] v_idx = np.where(vfilt.a == 1)[0] else: v_idx = np.arange(G.num_vertices()) deg_counts, deg_bins = gt.vertex_hist(G, 'out', float_count=False) incl_idx = np.where(deg_counts != 0)[0] deg_bins = list(deg_bins[incl_idx]) deg_counts = list(deg_counts[incl_idx]) comp, cc_hist = gt.label_components(G) cc_size_counts = sorted(Counter(cc_hist).items()) cc_sizes = [csc[0] for csc in cc_size_counts] cc_counts = [csc[1] for csc in cc_size_counts] num_cc = len(np.unique(comp.a)) if deg_bins[0] == 0: num_singletons = deg_counts[0] else: num_singletons = 0 if G.get_vertex_filter()[0] or G.get_edge_filter()[0]: # Always correct, but much slower peel_partition = kcore_decomposition(G) peel_bins = sorted(peel_partition.keys()) peel_counts = [len(peel_partition[k]) for k in peel_bins] else: # NOTE: # Very fast, but unstable (not always accurate) for graphs with filters kcore = gt.kcore_decomposition(G) C = Counter(kcore.a[v_idx]) peel_bins, peel_counts = [list(t) for t in zip(*C.items())] vlogv = G.num_vertices() * np.log2(G.num_vertices()) return { 'num_vertices': G.num_vertices(), 'num_edges': G.num_edges(), 'num_cc': num_cc, 'num_singletons': num_singletons, 'vlogv': float_formatter(vlogv), 'deg_bins': deg_bins, 'deg_counts': deg_counts, 'cc_sizes': cc_sizes, 'cc_counts': cc_counts, 'peel_bins': peel_bins, 'peel_counts': peel_counts, }
def kcore(g): return gt_stats.vertex_hist(g, gt.kcore_decomposition(g))[0]