def simulate_cascade(g, p, source=None, return_tree=False): """ graph_tool version of simulating cascade return np.ndarray on vertices as the infection time in cascade uninfected node has dist -1 """ gv = sample_graph_by_p(g, p) if source is None: # consider the largest cc infected_nodes = np.nonzero(label_largest_component(gv).a)[0] source = np.random.choice(infected_nodes) times = get_infection_time(gv, source) if return_tree: # get the tree edges _, pred_map = shortest_distance(gv, source=source, pred_map=True) edges = [(pred_map[i], i) for i in infected_nodes if i != source] # create tree tree = Graph(directed=True) tree.add_vertex(g.num_vertices()) for u, v in edges: tree.add_edge(int(u), int(v)) vfilt = tree.new_vertex_property('bool') vfilt.a = False for v in set(itertools.chain(*edges)): vfilt[v] = True tree.set_vertex_filter(vfilt) if return_tree: return source, times, tree else: return source, times
def __init__(self, graphml=None, graph=None): if graphml != None: self.g = gt.load_graph(graphml) self.g.set_directed(False) print("Create graph from graphml {}".format(graphml)) elif graph != None: self.g = graph giant = gt.label_largest_component(self.g) origin_size = self.g.num_vertices() for v in range(1, origin_size + 1): if giant[origin_size - v] == False: self.g.remove_vertex(origin_size - v, fast=True) self.g.set_directed(False) print("Create graph from graph.") else: print("No graphml or graph are provided!") print("Number of vertices: {}\nNumber of edges: {}"\ .format(self.g.num_vertices(), self.g.num_edges())) print("\n-----------------------------------------") self.v_residents = self.g.new_vertex_property("object") self.e_weights = self.g.new_edge_property("float") self.e_filter = self.g.new_edge_property("bool") self.walkers = [{} for v in self.g.vertices()] self.boost_size = 100 self.random_boost = [[100, [0 for i in range(self.boost_size)]] for v in self.g.vertices()] self.exetime = [0., 0., 0., 0.]
def __init__(self, graphml=None, graph=None): if graphml != None: self.g = gt.load_graph(graphml) #self.v_name = self.g.new_edge_property("str") print(self.g.list_properties()) print("Create graph from graphml {}".format(graphml)) elif graph != None: self.g = graph giant = gt.label_largest_component(self.g) origin_size = self.g.num_vertices() for v in range(1, origin_size + 1): if giant[origin_size - v] == False: self.g.remove_vertex(origin_size - v, fast=True) self.g.set_directed(False) print("Create graph from graph.") else: print("No graphml or graph are provided!") print("Number of vertices: {}\nNumber of edges: {}"\ .format(self.g.num_vertices(), self.g.num_edges())) print("\n-----------------------------------------") self.v_infected = self.g.new_vertex_property("bool") self.v_reinfected = self.g.new_vertex_property("int") self.e_reinfected = self.g.new_edge_property("int") self.e_spread_beta = self.g.new_edge_property("double") self.e_extinct_beta = self.g.new_edge_property("double")
def main(): input_fasta = sys.argv[3] K = int(sys.argv[1]) x = float(sys.argv[2]) ht = khmer.Nodegraph(K, x, 4) sparse_graph = gt.Graph() hashes = sparse_graph.new_vertex_property("long long") for n, record in enumerate(screed.open(input_fasta)): if n % 1000 == 0: print('...loaded and tagged {} sequences'.format(n), file=sys.stderr) name = record.name sequence = record.sequence ht.consume_sequence_and_tag_with_labels(sequence, n) tags = ht.sweep_tag_neighborhood(sequence, 0) for i in range(len(tags) - 1): src = tags[i] dst = tags[i + 1] new = False srcv = gt.find_vertex(sparse_graph, hashes, src) if not srcv: srcv = sparse_graph.add_vertex() hashes[srcv] = src new = True else: srcv = srcv[0] dstv = gt.find_vertex(sparse_graph, hashes, dst) if not dstv: dstv = sparse_graph.add_vertex() hashes[dstv] = dst new = True else: dstv = dstv[0] if new: e = sparse_graph.add_edge(srcv, dstv) print('Sparse graph has {} nodes, {} edges'.format( sparse_graph.num_vertices(), sparse_graph.num_edges())) comp = gt.label_largest_component(sparse_graph, directed=False) #pos = gt.radial_tree_layout(sparse_graph, sparse_graph.vertex(0)) gt.graph_draw(sparse_graph, output_size=(5000, 5000), output=input_fasta + '_sparse.png') sparse_graph.set_vertex_filter(comp) gt.graph_draw(sparse_graph, output_size=(5000, 5000), output=input_fasta + '_sparse_comp.png')
def main(): input_fasta = sys.argv[3] K = int(sys.argv[1]) x = float(sys.argv[2]) ht = khmer.new_hashbits(K, x, 4) sparse_graph = gt.Graph() hashes = sparse_graph.new_vertex_property("long long") for n, record in enumerate(screed.open(input_fasta)): if n % 1000 == 0: print >>sys.stderr, '...loaded and tagged {} sequences'.format(n) name = record.name sequence = record.sequence ht.consume_sequence_and_tag_with_labels(sequence, n) tags = ht.sweep_tag_neighborhood(sequence, 0) for i in xrange(len(tags) - 1): src = tags[i] dst = tags[i + 1] new = False srcv = gt.find_vertex(sparse_graph, hashes, src) if not srcv: srcv = sparse_graph.add_vertex() hashes[srcv] = src new = True else: srcv = srcv[0] dstv = gt.find_vertex(sparse_graph, hashes, dst) if not dstv: dstv = sparse_graph.add_vertex() hashes[dstv] = dst new = True else: dstv = dstv[0] if new: e = sparse_graph.add_edge(srcv, dstv) print 'Sparse graph has {} nodes, {} edges'.format(sparse_graph.num_vertices(), sparse_graph.num_edges()) comp = gt.label_largest_component(sparse_graph, directed=False) #pos = gt.radial_tree_layout(sparse_graph, sparse_graph.vertex(0)) gt.graph_draw(sparse_graph, output_size=( 5000, 5000), output=input_fasta + '_sparse.png') sparse_graph.set_vertex_filter(comp) gt.graph_draw(sparse_graph, output_size=( 5000, 5000), output=input_fasta + '_sparse_comp.png')
def main(): # The description about the data is available at # <https://graph-tool.skewed.de/static/doc/collection.html> for name in ['karate', 'lesmis', 'football', 'dolphins', 'netscience']: g = gt.collection.data[name] g = gt.GraphView(g, directed=False) if name == 'netscience': # Use only the largest component in the netscience data l = gt.label_largest_component(g) g = gt.Graph(gt.GraphView(g, vfilt=l), prune=True) process(name, g)
def load_graph(path, algorithms, format='graphml', component=False): sys.stdout.write('Loading network ...') sys.stdout.flush() t0 = time.time() g = gt.load_graph(path, fmt=format) if 'kores' in algorithms: gt.remove_parallel_edges(g) gt.remove_self_loops(g) if component: largest_component = gt.label_largest_component(g, directed=False) g.set_vertex_filter(largest_component) g.purge_vertices() t = time.time() sys.stdout.write('Ok! ({0} s.)\n'.format(t - t0)) return g
def save_largest_component(): global Graph l = gt.label_largest_component(Graph) print l.a remove = [] for x in xrange(len(l.a)): if l.a[x] == 0: remove.append(x) Graph.remove_vertex(remove) #u = gt.GraphView(Graph, vfilt=l) gt.remove_parallel_edges(Graph) Graph.save(base_path + graph_tool_file)
def paint_kcore(path, graph, name): if path: sys.stdout.write('Drawing kcore graph ... ') sys.stdout.flush() network = gt.Graph(graph, directed=False) folder = os.path.abspath(path) network = gt.GraphView(network, vfilt=gt.label_largest_component(network)) kcore = gt.kcore_decomposition(network) pos = gt.sfdp_layout(network) gt.graph_draw(network, pos=pos, vertex_fill_color=kcore, vertex_text=kcore, output=os.path.join(folder, str(name) + '-graph-kcore.svg')) sys.stdout.write('Ok!\n') sys.stdout.flush()
def largest_strongly_connected_component(self, graph): from graph_tool import Graph import graph_tool.all as gt largest_connected_component = Graph(directed=True) if not self.is_relationship: edge_prop_time = largest_connected_component.new_edge_property( "int") edge_prop_type = largest_connected_component.new_edge_property( "string") for edge in tqdm(graph.edges(data=True)): e = tuple(edge[:2]) largest_connected_component.add_edge(e[0], e[1]) if not self.is_relationship: edge_prop_time[e] = edge[-1]["time"] edge_prop_type[e] = edge[-1]["type"] largest_connected_component_view = gt.label_largest_component( largest_connected_component) largest_connected_component = gt.GraphView( largest_connected_component, vfilt=largest_connected_component_view) print( "Total nodes {0} in largest strongly connected component.".format( largest_connected_component.num_vertices())) print( "Total edges {0} in largest strongly connected component.".format( largest_connected_component.num_edges())) with open(self.output, "w+") as output_file: for edge in tqdm(largest_connected_component.edges()): if not self.is_relationship: output_file.write("{0} {1} {2} {3}\n".format( edge.source(), edge.target(), edge_prop_time[edge], edge_prop_type[edge])) else: output_file.write("{0} {1}\n".format( edge.source(), edge.target()))
def kcore_growing_daily_rewiring(fn, ofn=None, freq='D', model='constrained-configuration'): """The growing of kcore by rewiring daily.""" if ofn is None: ofn = 'kcore.growing.daily-rewiring.{}.csv'.format(model) # load only necessary columns df = pd.read_csv(fn, parse_dates=['tweet_created_at'], usecols=[2, 3, 4]) df = df.set_index('tweet_created_at') # remove self-loop df = df.loc[df.from_raw_id != df.to_raw_id] df['row_id'] = np.arange(len(df)) df['gpf'] = False gpf_rows = df.row_id.groupby(pd.Grouper(freq=freq)).last() gpf_rows = gpf_rows.loc[gpf_rows.notnull()].astype('int') df.loc[df.row_id.isin(gpf_rows.values), 'gpf'] = True v_map = dict() e_set = set() v_counter = -1 g = gt.Graph() mcore_k = [] mcore_s = [] mcore_idx = [] vnum = [] enum = [] largest_component_vnum = [] ts = [] for created_at, from_raw_id, to_raw_id, gpf in df[[ 'from_raw_id', 'to_raw_id', 'gpf' ]].itertuples(): e = (from_raw_id, to_raw_id) if e not in e_set: if from_raw_id not in v_map: v_counter += 1 v_map[from_raw_id] = v_counter if to_raw_id not in v_map: v_counter += 1 v_map[to_raw_id] = v_counter source = v_map.get(from_raw_id) target = v_map.get(to_raw_id) g.add_edge(source, target, add_missing=True) e_set.add(e) if gpf: g1 = g.copy() rejected = gt.random_rewire(g1, model=model, edge_sweep=True) logger.info('Number of rejected when rewiring: %s', rejected) ts.append(created_at) kcore = pd.Series(gt.kcore_decomposition(g1).a.copy()) mcore = kcore.value_counts().sort_index(ascending=False) mk = mcore.index[0] ms = mcore.iloc[0] mcore_k.append(mk) mcore_s.append(ms) mcore_idx.append(kcore.loc[kcore == mk].index.tolist()) lcv = gt.label_largest_component(g1, directed=False) vnum.append(g1.num_vertices()) enum.append(g1.num_edges()) largest_component_vnum.append(lcv.a.sum()) logger.info(g1) logger.info('Main core at %s: k=%s, num=%s', created_at, mk, ms) cdf = pd.DataFrame( dict(timeline=ts, mcore_k=mcore_k, mcore_s=mcore_s, mcore_idx=mcore_idx, vnum=vnum, enum=enum, largest_commponent_vnum=largest_component_vnum)) cdf.to_csv(ofn, index=False)
# If True, the frames will be dumped to disk as images. offscreen = "offscreen" if args.offscreen else False dir = './frames_dynamic-graph_with-comm-2+' if offscreen and not os.path.exists(dir): os.mkdir(dir) # load the graph g = gt.load_graph(args.file) filter_comm = g.new_vertex_property("bool") comm_infomap = np.array(list(g.vp['comm_infomap'])) filter_comm.a = (comm_infomap > 1) g = gt.GraphView(g, vfilt=filter_comm, directed=False) g = gt.GraphView(g, vfilt=gt.label_largest_component(g), directed=False) g = gt.Graph(g, prune=True) pos = g.vp["pos_sfdp_infomap"] # layout positions # find the initial and final date id, im, iy = map(int, (g.gp['initial-date'].split('-'))) fd, fm, fy = map(int, (g.gp['final-date'].split('-'))) initial_date = date(iy, im, id) final_date = date(fy, fm, fd) # set the posible state of each vertex future = sns.xkcd_rgb['grey'] present = sns.xkcd_rgb['yellow'] past = sns.xkcd_rgb['brick red'] # Initialize all vertices to the _future_ state
new = False srcv = gt.find_vertex(sparse_graph, hashes, src) if not srcv: srcv = sparse_graph.add_vertex() hashes[srcv] = src new = True else: srcv = srcv[0] dstv = gt.find_vertex(sparse_graph, hashes, dst) if not dstv: dstv = sparse_graph.add_vertex() hashes[dstv] = dst new = True else: dstv = dstv[0] if new: e = sparse_graph.add_edge(srcv, dstv) print 'Sparse graph has {} nodes, {} edges'.format(sparse_graph.num_vertices(), sparse_graph.num_edges()) comp = gt.label_largest_component(sparse_graph, directed=False) #pos = gt.radial_tree_layout(sparse_graph, sparse_graph.vertex(0)) gt.graph_draw(sparse_graph, output_size=( 5000, 5000), output=input_fasta + '_sparse.png') sparse_graph.set_vertex_filter(comp) gt.graph_draw(sparse_graph, output_size=( 5000, 5000), output=input_fasta + '_sparse_comp.png')
def main(): import sys import os.path import glob import itertools from argparse import ArgumentParser parser = ArgumentParser( description='Read a graph, and produce a layout with t-SNE.') # Input parser.add_argument( 'graphs', nargs='+', help='(List of) input graph(s). Or a folder with graphs.') # Output parser.add_argument('-o', default='./output', help='Folder to write output to. Default: ./output') parser.add_argument('--save_every', type=int, help='Save a jpg snapshot ever x epochs.') parser.add_argument( '--render_video', action='store_true', help= 'Render a video of the layout evolution. Needs ImageMagick and ffmpeg.' ) parser.add_argument( '--retain_snaps', action='store_true', help= 'Retain the snapshots. This argument is ignored if no video is rendered.' ) parser.add_argument( '--save_layout_data', action='store_true', help='Save all layout coordinates in a .pickle file and a .txt file.') parser.add_argument('--opacity', type=float, default=0.3, help='Edge opacity.') # Manipulations to graph parser.add_argument( '--strip_graph', action='store_true', help='Retain only the largest connected component in the graph.') parser.add_argument('--rnd_seed', '-r', type=int, nargs='+', default=[None], help='Seed for random state. (Default: Random seed)') parser.add_argument( '--pre_sfdp', action='store_true', help= 'If this flag is given, the vertices will be pre-initialized with SFDP.' ) parser.add_argument('--only_sfdp', action='store_true', help='If this flag is given, only SFDP will be done.') parser.add_argument( '--accept_all_sfdp', action='store_true', help= 'If this flag is given, no confirmation is asked for the SFDP layouts.' ) parser.add_argument( '--remove_rnd_edges', nargs='+', type=float, default=[0], help= 'Mutate the graph by removing random edges. If this is used without a random seed, a random random seed will be generated. The value given to this argument is the fraction of edges that will be removed.' ) # Hyperparameters parser.add_argument('--n_epochs', '-e', nargs='+', type=int, default=[1000], help='One or more numbers of t-SNE epochs.') parser.add_argument('--lr_init', nargs='+', type=float, default=[80], help='One or more initial learning rates.') parser.add_argument( '--lr_final', nargs='+', type=float, default=[None], help='One or more final learning rates. Default: Same as lr_init.') parser.add_argument('--lr_switch', nargs='+', type=int, default=[None], help='One or more learning rate switch-points.') parser.add_argument('--momentum_init', nargs='+', type=float, default=[0.5], help='One or more initial momenta.') parser.add_argument('--momentum_final', nargs='+', type=float, default=[0.5], help='One or more initial momenta.') parser.add_argument('--momentum_switch', nargs='+', type=int, default=[None], help='One or more momentum switch-points.') # Distance metric parameters parser.add_argument( '--distance_metric', '-d', choices=['shortest_path', 'spdm', 'modified_adjacency', 'mam'], default='spdm', help='The distance metric that is used for the pairwise distances.') parser.add_argument('-k', nargs='+', type=float, default=[1], help='Exponent for transfer function.') # Cost function parameters # Kullback-Leibler parser.add_argument('--perplexity', '-p', nargs='+', type=float, default=[80], help='One or more perplexities.') parser.add_argument('--l_kl_init', nargs='+', type=float, default=[1], help='One or more KL factors.') parser.add_argument('--l_kl_final', nargs='+', type=float, default=[1], help='One or more KL factors.') parser.add_argument('--l_kl_switch', nargs='+', type=int, default=[None], help='One or more KL switch-points') # Edge contraction parser.add_argument('--l_e_init', nargs='+', type=float, default=[0], help='One or more edge contraction factors.') parser.add_argument('--l_e_final', nargs='+', type=float, default=[0], help='One or more edge contraction factors.') parser.add_argument('--l_e_switch', nargs='+', type=int, default=[None], help='One or more edge contraction switch-points') # Compression parser.add_argument('--l_c_init', nargs='+', type=float, default=[1.2], help='One or more compression factors.') parser.add_argument('--l_c_final', nargs='+', type=float, default=[0], help='One or more compression factors.') parser.add_argument('--l_c_switch', nargs='+', type=int, default=[None], help='One or more compression switch-points') # Repulsion parser.add_argument('--l_r_init', nargs='+', type=float, default=[0], help='One or more repulsion factors.') parser.add_argument('--l_r_final', nargs='+', type=float, default=[0.5], help='One or more repulsion factors.') parser.add_argument('--l_r_switch', nargs='+', type=int, default=[None], help='One or more repulsion switch-points') parser.add_argument( '--r_eps', nargs='+', type=float, default=[0.2], help='Additional term in denominator to prevent near-singularities.') args = parser.parse_args() # Retrieve a list of all files in the directory, if args.graphs[0] is a directory. if len(args.graphs) == 1 and os.path.isdir(args.graphs[0]): args.graphs = glob.glob(args.graphs[0] + '/*') # Check graph input for g_file in args.graphs: if not os.path.isfile(g_file): raise FileNotFoundError(g_file + ' is not a file.') # Generate random random seed if none is given. if args.rnd_seed == [None]: args.rnd_seed = [np.random.randint(1e8)] # Ignore retain_snaps argument if no video is rendered. if not args.render_video: args.retain_snaps = True # Get names of the graphs (by splitting of path and extension) names = [ os.path.split(os.path.splitext(file)[0])[1] for file in args.graphs ] # Determine output folders. One is created in the specified output folder # for every graph that is supplied. output_folders = [args.o + '/' + name for name in names] # Check (and possibly create) output folders for folder in [args.o] + output_folders: if not os.path.exists(folder): os.makedirs(folder) # At least everything is fine for now. there_were_exceptions = False # Loop over all graphs (and their respective output folders) for g_file, g_name, output_folder in zip(args.graphs, names, output_folders): # Load the graph g = graph_io.load_graph(g_file) print( '[tsnetwork] Loaded graph {0} (|V| = {1}, |E| = {2}) into memory.'. format(g_name, g.num_vertices(), g.num_edges())) # Add graph name as propery in the internal representation g.graph_properties['name'] = g.new_graph_property('string', g_name) # Usually this loop has just one iteration, with only 0 as the value # for rmv_edge_frac (that is, no edges are removed). for rmv_edge_frac in args.remove_rnd_edges: print( '[tsnetwork] Original graph: (|V|, |E|) = ({0}, {1}).'.format( g.num_vertices(), g.num_edges())) # Create a temporary copy of the graph that will be manipulated. gv = gt.GraphView(g) # Remove rmv_edge_frac of the graphs edges from gv. gv.clear_filters() gv.reindex_edges() edge_list = list(gv.edges()) not_here_ep = gv.new_edge_property('bool', val=True) n_remove_edges = int(rmv_edge_frac * gv.num_edges()) for e in np.random.randint(0, gv.num_edges(), n_remove_edges): not_here_ep[edge_list[e]] = False gv.set_edge_filter(not_here_ep) if n_remove_edges > 0: print( '[tsnetwork] Removed {2} random edges: (|V|, |E|) = ({0}, {1}).' .format(gv.num_vertices(), gv.num_edges(), n_remove_edges)) # Filter the graph s.t. only the largest connected component # remains. if args.strip_graph: largest_connected_component = gt.label_largest_component(gv) gv.set_vertex_filter(largest_connected_component) gv.purge_vertices() print( '[tsnetwork] Filtered largest component: (|V|, |E|) = ({0}, {1}).' .format(gv.num_vertices(), gv.num_edges())) if args.pre_sfdp or args.only_sfdp: # Perform a SFDP layout (either as the only layout or as a # starting point for t-SNE.) Y_init, _ = sfdp_placement( gv, output_folder, ask_for_acceptance=not args.accept_all_sfdp, opacity=args.opacity) if args.only_sfdp: continue else: # Random positions will be generated Y_init = None # Compute distance matrix of this graph with the specified metric X = distance_matrix.get_distance_matrix(gv, args.distance_metric) # Retrieve the adjacency matrix of the graph Adj_sparse = gt.adjacency(gv) Adj = np.zeros(Adj_sparse.shape, dtype='float32') for i, j in zip(*Adj_sparse.nonzero()): Adj[i, j] = Adj_sparse[i, j] # Make list of tsnetwork configuration objects. These are objects # that represent a configuration for a t-SNE layout. tsn_configs = [] for perplexity, n_epochs, initial_lr, final_lr, lr_switch, initial_momentum,\ final_momentum, momentum_switch,\ initial_l_kl, final_l_kl, l_kl_switch,\ initial_l_e, final_l_e, l_e_switch,\ initial_l_c, final_l_c, l_c_switch,\ initial_l_r, final_l_r, l_r_switch,\ r_eps, k, rnd_seed in itertools.product( args.perplexity, args.n_epochs, args.lr_init, args.lr_final, args.lr_switch, args.momentum_init, args.momentum_final, args.momentum_switch, args.l_kl_init, args.l_kl_final, args.l_kl_switch, args.l_e_init, args.l_e_final, args.l_e_switch, args.l_c_init, args.l_c_final, args.l_c_switch, args.l_r_init, args.l_r_final, args.l_r_switch, args.r_eps, args.k, args.rnd_seed): # Use 50% for the switching points if no argument is given if lr_switch is None: lr_switch = int(n_epochs * 0.5) if momentum_switch is None: momentum_switch = int(n_epochs * 0.5) if l_kl_switch is None: l_kl_switch = int(n_epochs * 0.5) if l_e_switch is None: l_e_switch = int(n_epochs * 0.5) if l_c_switch is None: l_c_switch = int(n_epochs * 0.5) if l_r_switch is None: l_r_switch = int(n_epochs * 0.5) if final_lr is None: final_lr = initial_lr cfg = TsnConfig(perplexity=perplexity, n_epochs=n_epochs, initial_lr=initial_lr, final_lr=final_lr, lr_switch=lr_switch, initial_momentum=initial_momentum, final_momentum=final_momentum, momentum_switch=momentum_switch, initial_l_kl=initial_l_kl, final_l_kl=final_l_kl, l_kl_switch=l_kl_switch, initial_l_e=initial_l_e, final_l_e=final_l_e, l_e_switch=l_e_switch, initial_l_c=initial_l_c, final_l_c=final_l_c, l_c_switch=l_c_switch, initial_l_r=initial_l_r, final_l_r=final_l_r, l_r_switch=l_r_switch, r_eps=r_eps, k=k, pre_sfdp=args.pre_sfdp, rmv_edge_frac=rmv_edge_frac, rnd_seed=rnd_seed, distance_matrix=args.distance_metric) # Do no add the configurations that already have files matching # the description, unless the user confirms to overwrite. if any([ file.startswith(cfg.get_description() + '.') for file in os.listdir(output_folder) ]): if not usr_input.confirm('[tsnetwork] ' + cfg.get_description() + ' files exists! Overwrite?'): continue tsn_configs.append(cfg) # Loop over the t-SNE configurations for a single graph for cfg in tsn_configs: print('[tsnetwork] Processing: ' + cfg.get_description()) # String that has the path to the directory where the snapshots # will come. (If --save_every is given) snaps_dir = output_folder + '/snaps_' + cfg.get_description() # Clean out existing snaps directory if it exists. if args.save_every is not None and os.path.exists(snaps_dir): if usr_input.confirm('[tsnetwork] ' + snaps_dir + ' exists. Delete contents?'): for file in os.listdir(snaps_dir): file_path = os.path.join(snaps_dir, file) try: if os.path.isfile(file_path): os.unlink(file_path) elif os.path.isdir(file_path): shutil.rmtree(file_path) except Exception as e: print(e) elif args.save_every is not None and not os.path.exists( snaps_dir): # Make folder for snaps, if it is necessary and it doesn't # exist yet. os.makedirs(snaps_dir) # Apply the transfer function X_transfered = X**cfg.k # Try to do the tsne layout. try: Y, costs = thesne.tsne( X_transfered, random_state=cfg.rnd_seed, perplexity=cfg.perplexity, n_epochs=cfg.n_epochs, Y=Y_init, initial_lr=cfg.initial_lr, final_lr=cfg.final_lr, lr_switch=cfg.lr_switch, initial_momentum=cfg.initial_momentum, final_momentum=cfg.final_momentum, momentum_switch=cfg.momentum_switch, initial_l_kl=cfg.initial_l_kl, final_l_kl=cfg.final_l_kl, l_kl_switch=cfg.l_kl_switch, initial_l_e=cfg.initial_l_e, final_l_e=cfg.final_l_e, l_e_switch=cfg.l_e_switch, initial_l_c=cfg.initial_l_c, final_l_c=cfg.final_l_c, l_c_switch=cfg.l_c_switch, initial_l_r=cfg.initial_l_r, final_l_r=cfg.final_l_r, l_r_switch=cfg.l_r_switch, r_eps=cfg.r_eps, Adj=Adj, g=gv, snaps_output_folder=snaps_dir, save_every=args.save_every) except (thesne.NaNException, thesne.SigmaTooLowException) as e: there_were_exceptions = True print('[exception] {0}'.format(e)) # Also write exception to a file. with open( output_folder + '/exception_' + cfg.get_description() + '.out', 'w') as f: print('{0}'.format(e), file=f) f.close() print('[tsnetwork] Continuing with next TsnConfig.') continue # Render an animation of the snapshots if args.render_video: animations.save_animation(snaps_dir, cfg.get_description()) # Remove the directory with snapshots. if args.save_every is not None and not args.retain_snaps and os.path.exists( snaps_dir): print('[tsnetwork] Cleaning up snaps directory.') shutil.rmtree(snaps_dir) # Save the data (graph, vertex coordinates) if args.save_layout_data: layout_io.save_vna_layout( output_folder + '/layout_' + cfg.get_description() + '.vna', gv, Y) layout_io.save_layout_txt( output_folder + '/layout_edges_' + cfg.get_description() + '.txt', gv, Y) # Save final drawing of the layout layout_io.save_drawing(output_folder, gv, Y.T, cfg.get_description(), formats=['jpg', 'pdf'], edge_colors="rgb", draw_vertices=False, opacity=args.opacity) if there_were_exceptions: print('[tsnetwork] Done! However, be wary. There were exceptions.') else: print('[tsnetwork] Done!')
ver_names[v2] = w2 else: v2 = pairs_graph.vertex(word_dict[w2]) if cur_weight == 0: continue e = pairs_graph.add_edge(v1, v2) edge_weights[e] = cur_weight # adding properties pairs_graph.vertex_properties["name"] = ver_names pairs_graph.vertex_properties["id"] = ver_id pairs_graph.edge_properties["weight"] = edge_weights print("graph builded") print(str(len(word_dict))) largest_label = label_largest_component(pairs_graph) # reading negative and positive parts pos_file = open('../results/pos' + ftag + '.txt', 'r', encoding="utf-8") neg_file = open('../results/neg' + ftag + '.txt', 'r', encoding="utf-8") positive = [] negative = [] neutral = [] for s in pos_file: s = s.strip(' \n\uefef') if len(s) != 0: positive.append(s) for s in neg_file: s = s.strip(' \n\uefef') if len(s) != 0:
def kcore_growing_shuffle(fn1='retweet.201710.claim.raw.csv', fn2='graph.daily.csv', ofn=None, rewiring=None): """The growing of kcore by shuffling the edge list.""" if ofn is None: ofn = 'kcore.growing.shuffle' if rewiring: ofn += '.' + rewiring ofn += '.csv' g = prepare_network_from_raw(fn1) if rewiring is not None: gt.random_rewire(g, model=rewiring) evmap = pd.read_csv(fn2) enum_list = evmap['enum'].tolist() emap = pd.DataFrame(g.get_edges().copy(), columns=['source', 'target', 'idx']) emap = emap[['source', 'target']] emap = emap.reindex(np.random.permutation( emap.index)).reset_index(drop=True) v_map = dict() v_counter = -1 gp_counter = 0 g = gt.Graph() mcore_k = [] mcore_s = [] mcore_idx = [] vnum = [] enum = [] largest_component_vnum = [] g = gt.Graph() for i, s, t in emap.itertuples(): if s not in v_map: v_counter += 1 v_map[s] = v_counter if t not in v_map: v_counter += 1 v_map[t] = v_counter source = v_map.get(s) target = v_map.get(t) g.add_edge(source, target, add_missing=True) if g.num_edges() >= enum_list[gp_counter]: kcore = pd.Series(gt.kcore_decomposition(g).a.copy()) mcore = kcore.value_counts().sort_index(ascending=False) mk = mcore.index[0] ms = mcore.iloc[0] mcore_k.append(mk) mcore_s.append(ms) mcore_idx.append(kcore.loc[kcore == mk].index.tolist()) lcv = gt.label_largest_component(g, directed=False) vnum.append(g.num_vertices()) enum.append(g.num_edges()) largest_component_vnum.append(lcv.a.sum()) logger.info(g) logger.info('gp counter: %s', gp_counter) logger.info('Main core at enum=%s: k=%s, num=%s', g.num_edges(), mk, ms) gp_counter += 1 cdf = pd.DataFrame( dict(mcore_k=mcore_k, mcore_s=mcore_s, mcore_idx=mcore_idx, vnum=vnum, enum=enum, largest_commponent_vnum=largest_component_vnum)) cdf.to_csv(ofn, index=False)
def kcore_growing_ba( fn1='ba.gml', fn2='graph.daily.csv', ofn=None, ): """The growing of kcore for a BA model.""" if ofn is None: ofn = 'kcore.growing.ba.csv' g = gt.load_graph(fn1) evmap = pd.read_csv(fn2) vnum_list = evmap['vnum'].tolist() emap = pd.DataFrame(g.get_edges().copy(), columns=['source', 'target', 'idx']) emap = emap[['source', 'target']] v_map = dict() v_counter = -1 gp_counter = 0 g = gt.Graph() mcore_k = [] mcore_s = [] mcore_idx = [] vnum = [] enum = [] largest_component_vnum = [] g = gt.Graph() for i, s, t in emap.itertuples(): if s not in v_map: v_counter += 1 v_map[s] = v_counter if t not in v_map: v_counter += 1 v_map[t] = v_counter source = v_map.get(s) target = v_map.get(t) g.add_edge(source, target, add_missing=True) if g.num_vertices() >= vnum_list[gp_counter]: kcore = pd.Series(gt.kcore_decomposition(g).a.copy()) mcore = kcore.value_counts().sort_index(ascending=False) mk = mcore.index[0] ms = mcore.iloc[0] mcore_k.append(mk) mcore_s.append(ms) mcore_idx.append(kcore.loc[kcore == mk].index.tolist()) lcv = gt.label_largest_component(g, directed=False) vnum.append(g.num_vertices()) enum.append(g.num_edges()) largest_component_vnum.append(lcv.a.sum()) logger.info(g) logger.info('gp counter: %s', gp_counter) logger.info('Main core at vnum=%s: k=%s, num=%s', g.num_vertices(), mk, ms) gp_counter += 1 try: vnum_list[gp_counter] except IndexError: break cdf = pd.DataFrame( dict(mcore_k=mcore_k, mcore_s=mcore_s, mcore_idx=mcore_idx, vnum=vnum, enum=enum, largest_commponent_vnum=largest_component_vnum)) cdf.to_csv(ofn, index=False)
def load_ply_layout(file): g = gt.Graph(directed=False) with open(file) as f: all_lines = f.read().splitlines() it = iter(all_lines) line = next(it) assert (line == 'ply') line = next(it) assert (line.startswith('format ascii')) line = next(it) while not line.startswith('element'): line = next(it) words = line.split(' ') assert (words[0] == 'element') assert (words[1] == 'vertex') assert (words[2].isdigit()) n_vertices = int(words[2]) g.add_vertex(n_vertices) assert (g.num_vertices() == n_vertices) line = next(it) v_props = OrderedDict() while line.startswith('property'): words = line.split(' ') the_type = words[1] if the_type == 'list': name = words[4] v_props[name] = dict() count_type = words[2] entry_type = words[3] v_props[name]['count_type'] = count_type v_props[name]['entry_type'] = entry_type else: name = words[2] v_props[name] = dict() v_props[name]['type'] = the_type line = next(it) print(v_props) vps = dict() for i, v_prop in enumerate(v_props): name = list(v_props.keys())[i] the_type = v_props[name]['type'] if the_type == 'float': vp = g.new_vp(the_type) vps[name] = vp else: raise NotImplementedError() print(vps) assert ('x' in vps.keys()) assert ('y' in vps.keys()) assert ('z' in vps.keys()) # Scan to next element while not line.startswith('element'): line = next(it) words = line.split(' ') assert (words[0] == 'element') assert (words[1] == 'face') assert (words[2].isdigit()) n_faces = int(words[2]) print(n_faces) line = next(it) f_props = OrderedDict() while line.startswith('property'): words = line.split(' ') the_type = words[1] if the_type == 'list': name = words[4] f_props[name] = dict() count_type = words[2] entry_type = words[3] f_props[name]['count_type'] = count_type f_props[name]['entry_type'] = entry_type else: name = words[2] f_props[name] = dict() f_props[name]['type'] = the_type line = next(it) print(f_props) while not line.startswith('end_header'): line = next(it) for i in range(n_vertices): line = next(it) words = line.split(' ') words = [word for word in words if word != ''] assert (len(words) == len(v_props.keys())) for j, word in enumerate(words): name = list(v_props.keys())[j] the_type = v_props[name]['type'] if the_type == 'float': vps[name][i] = float(word) else: raise NotImplementedError for _ in range(n_faces): line = next(it) words = line.split(' ') words = [word for word in words if word != ''] i = 0 for name in f_props.keys(): the_type = f_props[name]['type'] if the_type == 'list': if f_props[name]['count_type'] == 'uchar': n_items = int(words[i]) else: raise NotImplementedError the_list = [ int(word) for word in words[i + 1:i + 1 + n_items] ] i += 1 + n_items if name == 'vertex_indices': for j, idx1 in enumerate(the_list): idx2 = the_list[(j + 1) % len(the_list)] g.add_edge(idx1, idx2) assert (i == len(words)) gt.remove_parallel_edges(g) largest_connected_component = gt.label_largest_component(g) unreferenced = sum([1 for i in largest_connected_component.a if i == 0]) if unreferenced > 0: g.set_vertex_filter(largest_connected_component) g.purge_vertices() print('Filtered {0} unreferenced vertices.'.format(unreferenced)) if 'x' in vps.keys() and 'y' in vps.keys(): if 'z' in vps.keys(): Y = np.zeros((n_vertices, 3)) for v in g.vertices(): print(type(v)) Y[v, 0] = vps['x'][v] Y[v, 1] = vps['y'][v] Y[v, 2] = vps['z'][v] else: Y = np.zeros((n_vertices, 2)) for v in g.vertices(): Y[v, 0] = vps['x'][v] Y[v, 1] = vps['y'][v] return g, Y
def kcore_growing_weighted_shuffle(fn1, fn2='graph.daily.csv', ofn=None, freq='D'): """The growing of kcore by shuffling the retweet list.""" if ofn is None: ofn = 'kcore.growing.weighted-shuffle.csv' # load only necessary columns df = pd.read_csv(fn1, usecols=[3, 4]) # remove self-loop df = df.loc[df.from_raw_id != df.to_raw_id] df = df.reindex(np.random.permutation(df.index)) evmap = pd.read_csv(fn2) enum_list = evmap['enum'].tolist() v_map = dict() v_counter = -1 e_set = set() gp_counter = 0 g = gt.Graph() mcore_k = [] mcore_s = [] mcore_idx = [] vnum = [] enum = [] largest_component_vnum = [] ts = [] g = gt.Graph() for from_raw_id, to_raw_id in df[['from_raw_id', 'to_raw_id']].itertuples(index=False): e = (from_raw_id, to_raw_id) if e not in e_set: if from_raw_id not in v_map: v_counter += 1 v_map[from_raw_id] = v_counter if to_raw_id not in v_map: v_counter += 1 v_map[to_raw_id] = v_counter source = v_map.get(from_raw_id) target = v_map.get(to_raw_id) g.add_edge(source, target, add_missing=True) e_set.add(e) if g.num_edges() >= enum_list[gp_counter]: is_group = False if by == 'v': try: if g.num_vertices() == vlist[gcounter]: is_group = True gcounter += 1 except IndexError: break if by == 'e': try: if g.num_edges() == elist[gcounter]: is_group = True gcounter += 1 except IndexError: break if is_group: kcore = pd.Series(gt.kcore_decomposition(g).a.copy()) mcore = kcore.value_counts().sort_index(ascending=False) mk = mcore.index[0] ms = mcore.iloc[0] mcore_k.append(mk) mcore_s.append(ms) mcore_idx.append(kcore.loc[kcore == mk].index.tolist()) lcv = gt.label_largest_component(g, directed=False) vnum.append(g.num_vertices()) enum.append(g.num_edges()) largest_component_vnum.append(lcv.a.sum()) logger.info(g) logger.info('gp counter: %s', gp_counter) logger.info('Main core at enum=%s: k=%s, num=%s', g.num_edges(), mk, ms) gp_counter += 1 if gp_counter > len(enum_list): break cdf = pd.DataFrame( dict(mcore_k=mcore_k, mcore_s=mcore_s, mcore_idx=mcore_idx, vnum=vnum, enum=enum, largest_commponent_vnum=largest_component_vnum)) cdf.to_csv(ofn, index=False)
# Merging data ctrlity_frame = [df_eigen_centrality, df_harmnic_centrality, df_betweenness_centrality, df_degree_centrality] ctrlity_merged = reduce(lambda left,right: pd.merge(left, right, on=['ctry', 'year'], how='inner'), ctrlity_frame).fillna('0') ctrlity_merged.to_csv("/content/drive/MyDrive/G11-MEA-Diffusion/dataMEA_Ctrlity/ctrlity_output.csv") """### visualization""" #eigenvector centrality ee, x = gt.eigenvector(gt_2018_univ) x.a /= (x.a*10 - 0.7)/0.04 # follow the formula in the book gt.graph_draw(gt_2018_univ, vertex_fill_color=x, vcmap=matplotlib.cm.gist_earth, vorder=x) # gc = gt.GraphView(gt_2018_univ, vfilt=gt.label_largest_component(gt_2018_univ)) c = gt.closeness(gc) c.a /= c.a / 232 gt.graph_draw(gc, vertex_fill_color=c, vcmap=matplotlib.cm.Oranges, vorder=c) #betweenness centrality bv, be = betweenness(gt_2018_univ) graph_draw(gt_2018_univ, pos=None, vertex_fill_color=bv, vcmap=matplotlib.cm.summer) deg = gt_2018_univ.degree_property_map("total") gt.graph_draw(gt_2018_univ, vertex_fill_color=deg, vorder=deg) # https://colab.research.google.com/github/count0/colab-gt/blob/master/colab-gt.ipynb#scrollTo=6km1lWMF2kAm !apt-get install
import powerlaw import sys import scipy import math import numpy as np import graph_tool.all as gt import matplotlib.pyplot as plt import vis G = gt.collection.data[sys.argv[1]] giant = gt.label_largest_component(G) origin_size = G.num_vertices() print('test\n\n\n\n') for v in range(1, origin_size + 1): if giant[origin_size - v] == False: G.remove_vertex(origin_size - v, fast=True) G.set_directed(False) data = sorted(G.get_out_degrees([v for v in G.vertices() ])) # data can be list or numpy array print('test\n\n\n\n') print(len(data)) print('test\n\n\n\n') results = powerlaw.Fit(data) print(results.power_law.alpha) print(results.power_law.xmin) R, p = results.distribution_compare('power_law', 'lognormal') y = [] x = []
ver_names[v2] = w2 else: v2 = pairs_graph.vertex(word_dict[w2]) if cur_weight == 0: continue e = pairs_graph.add_edge(v1, v2) edge_weights[e] = cur_weight # adding properties pairs_graph.vertex_properties["name"] = ver_names pairs_graph.vertex_properties["id"] = ver_id pairs_graph.edge_properties["weight"] = edge_weights print("graph builded") print(str(len(word_dict))) largest_label = label_largest_component(pairs_graph) # reading negative and positive parts pos_file = open('../results/pos' + ftag + '.txt', 'r', encoding="utf-8") neg_file = open('../results/neg' + ftag + '.txt', 'r', encoding="utf-8") positive = [] negative = [] neutral = [] for s in pos_file: s = s.strip(' \n\uefef') if len(s) != 0: positive.append(s) for s in neg_file: s = s.strip(' \n\uefef')
f_g.edge_properties['cofield'] = e_w # <codecell> print f_g.num_edges() f_g.num_vertices() # <codecell> v_comm = gt.community_structure(f_g, 1000, 5) #v_comm = gt.betweenness(f_g) # <codecell> import numpy u = gt.GraphView(f_g, vfilt=gt.label_largest_component(f_g)) deg = u.degree_property_map('total', weight = f_g.edge_properties['cofield']) deg.fa = 2*(numpy.sqrt(deg.fa)*0.5 + 0.4) edg = f_g.edge_properties['cofield'] edg.fa = (numpy.sqrt(edg.fa)*0.6+1) ebet = gt.betweenness(f_g)[1] # <codecell> # <codecell> pos, int = gt.interactive_window(u, pos=gt.radial_tree_layout(f_g, f_g.vertex(1)), vertex_size = deg, vertex_fill_color = v_comm, vertex_text = f_g.vertex_properties['field'],
import graph_tool.all as gtool gr = gtool.collection.data["polblogs"] gr = gtool.GraphView(gr, vfilt=gtool.label_largest_component(gr)) cness = gtool.closeness(gr) gtool.graph_draw(gr, pos=gr.vp["pos"], vertex_fill_color=cness, vertex_size=gtool.prop_to_size(cness, mi=5, ma=15), vorder=cness, vcmap=matplotlib.cm.gist_heat, output="political_closeness.pdf")
def load_net(infile, core = False, filter = False): ''' Load a `graphml` file. :param infile: The `graphml` file to load. :param core: Does the net contain a core vertex property map? :filter: Apply a filter? :return: the graph_tool `Graph`, a prefix for output files, and (if core is True) the property map for core vertices ''' # Output filename # Prefix only, not extension: # `split('.')` splits `infile` at the periods and returns a list # `[:-1]` grabs everything except the extension # `'.'.join` recombines everything with periods outfile_pre = '.'.join(infile.split('.')[:-1]) if path.exists('output/' + outfile_pre + '.out.gt'): print('Found pre-procesed graph') infile = 'output/' + outfile_pre + '.out.gt' print('Loading ' + infile) net = gt.load_graph(infile) # If `core` is true, extract the core set if core: core_pmap = net.vertex_properties['core'] core_vertices = [vertex for vertex in net.vertices() if core_pmap[vertex]] # Print basic network statistics print('Loaded ' + infile) print('Vertices: ' + str(net.num_vertices())) print('Edges: ' + str(net.num_edges())) if core: print('Core vertices: ' + str(len(core_vertices))) if core and filter: # Add a filter print('Adding filter') # Recent papers filter for the citation net if 'citenet0' in infile: year = net.vp['year'] recent_list = [year[vertex] > 2005 for vertex in net.vertices()] recent_pmap = net.new_vertex_property('boolean') recent_pmap.a = np.array(recent_list) net.set_vertex_filter(recent_pmap) # Distance from core set for the author nets else: net.set_directed(False) extended_set_pmap = core_pmap.copy() gt.infect_vertex_property(net, extended_set_pmap, vals=[True]) gt.infect_vertex_property(net, extended_set_pmap, vals=[True]) net.set_vertex_filter(extended_set_pmap) # Remove everything caught in the filter net.purge_vertices() # Extract the largest component net.set_vertex_filter(gt.label_largest_component(net, directed=False)) net.purge_vertices() # Rebuild core core_pmap = net.vertex_properties['core'] core_vertices = [vertex for vertex in net.vertices() if core_pmap[vertex]] print('Filtered vertices: ' + str(net.num_vertices())) print('Filtered edges: ' + str(net.num_edges())) print('Filtered core: ' + str(len(core_vertices))) elif filter and not core: print('Filter = true with core = false') if core: return net, outfile_pre, core_pmap, core_vertices else: return net, outfile_pre
helper = lambda: int(random.random() * num_nodes) elif deg_sample_type == 'exp': helper = lambda: np.random.exponential(scale=0.06) * num_nodes deg_sample = lambda: (helper(), helper()) # print deg_sample() while True: g = gt.random_graph(num_nodes, deg_sampler=deg_sample, directed=directed) use_weights = random.random() > 0.5 if use_weights: weights = g.new_vertex_property('float') weights.a = np.array( [(1. + random.random() * 9) if i else 1. for i in (np.random.random(size=g.num_vertices()) > 0.5)]) else: weights = None lcc = gt.label_largest_component(g) g.set_vertex_filter(lcc) g.purge_vertices() g.purge_edges() g.clear_filters() if g.num_vertices() > 5 and check_aperiodic(g): break print str(str(g.num_vertices()).ljust(5) + ' | ' + str(iteration).ljust(4) + ' | ' + deg_sample_type.ljust(5) + ' | ' + str( use_weights).ljust(6)).ljust(20), # eigenvector stat dist A = gt.adjacency(g) if weights is not None: bias = diags(weights.a, 0) A = bias.dot(A) Q = normalize(A, norm='l1', axis=0, copy=False)