def main(): """Parse arguments, call the diameter_approximation_motwani.""" # Parse arguments parser = argparse.ArgumentParser() parser.description = "Compute an approximation of the diameter and output its value" parser.add_argument("graph", help="graph file") parser.add_argument( "-v", "--verbose", action="count", default=0, help="increase verbosity (use multiple times for more verbosity)") args = parser.parse_args() # Set the desired level of logging # Read graph from file G = util.read_graph(args.graph) # Compute the 2/3 approximation (elapsed_time, diam) = diameter_approximation_motwani(G) # Print info print("{}, diameter={}, time={}".format(args.graph, diam, elapsed_time))
def convert_graph(graphfile, clusmethod, filt=[]): assert clusmethod in ['filteronly', 'mtg', 'mtf', 'nocyc', 'dmst'] G = util.dct2nx(util.read_graph(graphfile)) # Step 0: If just filtering, do that if clusmethod == 'filteronly': G = filter_nodes(G, filt) return G # Step 1: Find equivalent terms based on method if clusmethod in ['mtg', 'mtf']: G = entmax2hyp(G) G = scc2equiv(G) if clusmethod == 'nocyc': # Find nodes with same direct hypernyms and hyponyms G = samehh2equiv(G) # Step 2: Collapse equivalent nodes G = util.consolidate_equiv(G) # Step 3: Find transitive closure G = hypernym_transitive_closure(G) # Step 4: Explode equiv nodes again G = util.expand_equiv_nodes(G) # Step 5: Remove nodes not in filter G = filter_nodes(G, filt) return G
def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' nx_G = read_graph(args) G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) learn_embeddings(walks)
def main(): """Parse arguments and perform the computation.""" # Parse arguments parser = argparse.ArgumentParser() parser.description = "Compute approximate betweenness centrality of all vertices in a graph using the algorihm by Brandes and Pich, and the time to compute them, and write them to file" parser.add_argument("epsilon", type=util.valid_interval_float, help="accuracy parameter") parser.add_argument("delta", type=util.valid_interval_float, help="confidence parameter") parser.add_argument("graph", help="graph file") parser.add_argument("output", help="output file") parser.add_argument("-m", "--maxconn", action="store_true", default=False, help="if the graph is not weakly connected, only save the largest connected component") parser.add_argument("-p", "--pickle", action="store_true", default=False, help="use pickle reader for input file") parser.add_argument("-s", "--samplesize", type=util.positive_int, default=0, help="use specified sample size. Overrides epsilon, delta, and diameter computation") parser.add_argument("-t", "--timeout", type=util.positive_int, default=3600, help="Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)") parser.add_argument("-u", "--undirected", action="store_true", default=False, help="consider the graph as undirected ") parser.add_argument("-v", "--verbose", action="count", default=0, help="increase verbosity (use multiple times for more verbosity)") parser.add_argument("-w", "--write", nargs="?", default=False, const="auto", help="write graph (and computed attributes) to file.") args = parser.parse_args() # Set the desired level of logging util.set_verbosity(args.verbose) # Read graph if args.pickle: G = util.read_graph(args.graph) else: G = converter.convert(args.graph, not args.undirected, args.maxconn) # Compute betweenness if args.samplesize: (stats, betw) = betweenness_sample_size(G, args.samplesize, args.write, args.timeout) else: (stats, betw) = betweenness(G, args.epsilon, args.delta, args.write, args.timeout) # If specified, write betweenness as vertex attributes, and time as graph # attribute back to file if args.write: logging.info("Writing betweenness as vertex attributes and stats as graph attribute") if args.write == "auto": filename = os.path.splitext(args.graph)[0] + ("-undir" if args.undirected else "dir") + ".picklez" G.write(filename) else: G.write(args.write) # Write stats and betweenness to output util.write_to_output(stats, betw, args.output)
def main(): """Parse arguments, call the approximation, write it to file.""" # Parse arguments parser = argparse.ArgumentParser() parser.description = "Compute an approximation of the diameter of a graph and the time needed to compute it, and (if specified) write these info as a graph attributes" parser.add_argument("graph", help="graph file") parser.add_argument("-i", "--implementation", choices=["homegrown", "igraph"], default="homegrown", help="use specified implementation of betweenness computation") parser.add_argument("-m", "--maxconn", action="store_true", default=False, help="if the graph is not weakly connected, only save the largest connected component") parser.add_argument("-p", "--pickle", action="store_true", default=False, help="use pickle reader for input file") parser.add_argument("-u", "--undirected", action="store_true", default=False, help="consider the graph as undirected ") parser.add_argument("-v", "--verbose", action="count", default=0, help="increase verbosity (use multiple times for more verbosity)") parser.add_argument("-w", "--write", action="store_true", default=False, help="write the approximation of diameter of the graph as the 'approx_diameter' graph attribute and the time taken to compute it as the 'approx_diam_time' attribute") args = parser.parse_args() # Set the desired level of logging util.set_verbosity(args.verbose) # Seed the random number generator random.seed() # Read graph if args.pickle: G = util.read_graph(args.graph) else: G = converter.convert(args.graph, not args.undirected, args.maxconn) # Read graph from file # Compute the diameter (elapsed_time, diam) = diameter(G, args.implementation) # Print info print("{}, diameter={}, time={}".format(args.graph, diam, elapsed_time)) # If requested, add graph attributes and write graph back to original file if args.write: logging.info("Writing diameter approximation and time to graph") G["approx_diam"] = diam G["approx_diam_time"] = elapsed_time # We use format auto-detection, which should work given that it worked # when we read the file G.write(args.graph)
def test_client(): print('\nTest For Graph Search:\n') g = util.read_graph("./data/tinyCG.txt", False) source = 0 bfs = False # assign search strategy here search = GraphSearch(g, source, not bfs) print('BFS' if not bfs else 'DFS', 'search strategy') for v in range(g.v()): print(source, 'to', v, ': ', end='') if search.has_path_to(v): for i in search.path_to(v): if i != v: print(i, '-> ', end='') else: print(i) print(search.count(), 'vertexs connect to the source', search._source)
def generate_random_weights(path,pickle,undirected): """Read number of nodes from the file path. Return int.""" if pickle: G = util.read_graph(path) else: G = converter.convert(path, not undirected, False) print(G.ecount()) txt_dir=path[0:len(path)-4] + "_weights.txt" file = open(txt_dir,'w') file.write("") for line in range(0,G.ecount()): file.write(repr(random.random()) + '\n') file.close() return 0
def generate_random_weights(path, pickle, undirected): """Read number of nodes from the file path. Return int.""" if pickle: G = util.read_graph(path) else: G = converter.convert(path, not undirected, False) print(G.ecount()) txt_dir = path[0:len(path) - 4] + "_weights.txt" file = open(txt_dir, 'w') file.write("") for line in range(0, G.ecount()): file.write(repr(random.random()) + '\n') file.close() return 0
def main(): """Parse arguments, call the diameter_approximation_motwani.""" # Parse arguments parser = argparse.ArgumentParser() parser.description = "Compute an approximation of the diameter and output its value" parser.add_argument("graph", help="graph file") parser.add_argument("-v", "--verbose", action="count", default=0, help="increase verbosity (use multiple times for more verbosity)") args = parser.parse_args() # Set the desired level of logging # Read graph from file G = util.read_graph(args.graph) # Compute the 2/3 approximation (elapsed_time,diam) = diameter_approximation_motwani(G) # Print info print("{}, diameter={}, time={}".format(args.graph, diam, elapsed_time))
def main(): graph = read_graph('aero.yml') discovered = dfs(graph)
def compute_centralities(n, l0, d, prop_mispl, prop_neg, network_no, network_desc, centralities, force=False): """This method computes all the implemented centralities for a set of input parameters. :param n: int :type n: int :param l0: number of modules from which the underlying graph is created :type l0: int :param d: density :type d: float :param prop_mispl: proportion of misplaced links :type prop_mispl: float :param prop_neg: proportion of negative links :type prop_neg: float :param network_no: network no :type network_no: int :param network_desc: network description, i.e. whether network is weighted or unweighted :type network_desc: str. One of them: SIGNED_UNWEIGHTED, SIGNED_WEIGHTED :param centralities: centralities, e.g. consts.CENTR_DEGREE_NEG, consts.CENTR_DEGREE_POS, etc. :type centralities: str list """ network_folder = path.get_input_network_folder_path( n, l0, d, prop_mispl, prop_neg, network_no) network_path = os.path.join(network_folder, consts.SIGNED_UNWEIGHTED + ".graphml") # we continue if the corresponding input network exists if os.path.exists(network_path): g = util.read_graph(network_path, consts.FILE_FORMAT_GRAPHML) for centr_name in centralities: centr_folder_path = path.get_centrality_folder_path( n, l0, d, prop_mispl, prop_neg, network_no, network_desc) print("computing centrality: " + centr_name + " in " + centr_folder_path) os.makedirs(centr_folder_path, exist_ok=True) result_filename = centr_name + ".csv" result_filepath = os.path.join(centr_folder_path, result_filename) if not os.path.exists(result_filepath) or force: result = None if centr_name == consts.CENTR_DEGREE_NEG: result = centrality.degree_centrality.NegativeCentrality.undirected( g, False).tolist() elif centr_name == consts.CENTR_DEGREE_POS: result = centrality.degree_centrality.PositiveCentrality.undirected( g, False).tolist() elif centr_name == consts.CENTR_DEGREE_PN: result = centrality.degree_centrality.PNCentrality.undirected( g, False).tolist() elif centr_name == consts.CENTR_EIGEN: result = centrality.eigenvector_centrality.compute_eigenvector_centrality( g) #print(result) # write the centrality values into file (as the number of values as the number of lines) result_formatted = [util.format_4digits(e) for e in result] df = pd.DataFrame({consts.CENT_COL_NAME: result_formatted}) df.to_csv(result_filepath, sep=",", quoting=1, index=False) # write the mean of the centrality values desc = consts.PREFIX_MEAN + centr_name result_filepath = os.path.join( centr_folder_path, consts.PREFIX_MEAN + result_filename) result_formatted = util.format_4digits(mean(result)) df = pd.DataFrame({desc: [result_formatted]}) df.to_csv(result_filepath, sep=",", quoting=1, index=False) # write the standard deviation of the centrality values desc = consts.PREFIX_STD + centr_name result_filepath = os.path.join( centr_folder_path, consts.PREFIX_STD + result_filename) result_formatted = util.format_4digits(stdev(result)) df = pd.DataFrame({desc: [result_formatted]}) df.to_csv(result_filepath, sep=",", quoting=1, index=False)
''' Created on Sep 23, 2020 @author: nejat ''' import util import consts import centrality.degree_centrality import path if __name__ == '__main__': network_path = "../../in/n=20_l0=3_dens=1.0000/propMispl=0.2000/propNeg=0.7000/network=1/signed-unweighted.graphml" g = util.read_graph(network_path, consts.FILE_FORMAT_GRAPHML) print(g.ecount()) # get the number of edges print(g.vcount()) # get the number of vertices result = centrality.degree_centrality.NegativeCentrality.undirected( g, False) result_list = result.tolist() print(result_list) print(len(result_list))
def main(): """Parse arguments, call betweenness(), write to file.""" # Parse arguments parser = argparse.ArgumentParser() parser.description = "Compute approximate betweenness centrality of all vertices in a graph using sampling and VC-dimension, and the time to compute them, and write them to file" parser.add_argument("epsilon", type=util.valid_interval_float, help="accuracy parameter") parser.add_argument("delta", type=util.valid_interval_float, help="confidence parameter") parser.add_argument("graph", help="graph file") parser.add_argument("output", help="output file") group = parser.add_mutually_exclusive_group() group.add_argument("-a", "--approximate", action="store_true", default=True, help="use approximate diameter (default)") group.add_argument("-d", "--diameter", type=util.positive_int, default=0, help="value to use for the diameter") group.add_argument("-e", "--exact", action="store_true", default=False, help="use exact diameter") parser.add_argument( "-m", "--maxconn", action="store_true", default=False, help= "if the graph is not weakly connected, only save the largest connected component" ) parser.add_argument("-p", "--pickle", action="store_true", default=False, help="use pickle reader for input file") parser.add_argument( "-s", "--samplesize", type=util.positive_int, default=0, help= "use specified sample size. Overrides epsilon, delta, and diameter computation" ) parser.add_argument( "-t", "--timeout", type=util.positive_int, default=3600, help= "Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)" ) parser.add_argument("-u", "--undirected", action="store_true", default=False, help="consider the graph as undirected ") parser.add_argument( "-v", "--verbose", action="count", default=0, help="increase verbosity (use multiple times for more verbosity)") parser.add_argument("-w", "--write", nargs="?", default=False, const="auto", help="write graph (and computed attributes) to file.") parser.add_argument( "-l", "--weightFile", default="-", help= "random weights within the interval 0 to 1, must have as many entries as the number of edges" ) args = parser.parse_args() # Set the desired level of logging util.set_verbosity(args.verbose) # Seed the random number generator random.seed() # Read graph if args.pickle: G = util.read_graph(args.graph) else: G = converter.convert(args.graph, not args.undirected, args.maxconn) if args.exact: args.approximate = False # Read the weights weights_list = [] if args.weightFile != "-": with open(args.weightFile, 'r') as weight_file: for line in weight_file: weights_list.append(float(line.strip())) # Compute betweenness if args.samplesize: (stats, betw) = betweenness_sample_size(G, args.samplesize, args.write) else: if args.diameter > 0: (stats, betw) = betweenness(G, args.epsilon, args.delta, weights_list, args.diameter, args.write) else: (stats, betw) = betweenness(G, args.epsilon, args.delta, weights_list, args.approximate, args.write) # If specified, write betweenness as vertex attributes, and time as graph # attribute back to file if args.write: logging.info( "Writing betweenness as vertex attributes and stats as graph attribute" ) if args.write == "auto": filename = os.path.splitext(args.graph)[0] + ( "-undir" if args.undirected else "dir") + ".picklez" G.write(filename) else: G.write(args.write) # Write stats and betweenness to output util.write_to_output(stats, betw, args.output)
def main(): """Parse arguments, call betweenness(), write to file.""" # Parse arguments parser = argparse.ArgumentParser() parser.description = "Compute approximate betweenness centrality of all vertices in a graph using sampling and VC-dimension, and the time to compute them, and write them to file" parser.add_argument("epsilon", type=util.valid_interval_float, help="accuracy parameter") parser.add_argument("delta", type=util.valid_interval_float, help="confidence parameter") parser.add_argument("graph", help="graph file") parser.add_argument("output", help="output file") group = parser.add_mutually_exclusive_group() group.add_argument("-a", "--approximate", action="store_true", default=True, help="use approximate diameter (default)") group.add_argument("-d", "--diameter", type=util.positive_int, default=0, help="value to use for the diameter") group.add_argument("-e", "--exact", action="store_true", default=False, help="use exact diameter") parser.add_argument("-m", "--maxconn", action="store_true", default=False, help="if the graph is not weakly connected, only save the largest connected component") parser.add_argument("-p", "--pickle", action="store_true", default=False, help="use pickle reader for input file") parser.add_argument("-s", "--samplesize", type=util.positive_int, default=0, help="use specified sample size. Overrides epsilon, delta, and diameter computation") parser.add_argument("-t", "--timeout", type=util.positive_int, default=3600, help="Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)") parser.add_argument("-u", "--undirected", action="store_true", default=False, help="consider the graph as undirected ") parser.add_argument("-v", "--verbose", action="count", default=0, help="increase verbosity (use multiple times for more verbosity)") parser.add_argument("-w", "--write", nargs="?", default=False, const="auto", help="write graph (and computed attributes) to file.") parser.add_argument("-l", "--weightFile", default="-", help="random weights within the interval 0 to 1, must have as many entries as the number of edges") args = parser.parse_args() # Set the desired level of logging util.set_verbosity(args.verbose) # Seed the random number generator random.seed() # Read graph if args.pickle: G = util.read_graph(args.graph) else: G = converter.convert(args.graph, not args.undirected, args.maxconn) if args.exact: args.approximate = False # Read the weights weights_list=[] if args.weightFile != "-": with open(args.weightFile,'r') as weight_file: for line in weight_file: weights_list.append(float(line.strip())) # Compute betweenness if args.samplesize: (stats, betw) = betweenness_sample_size(G, args.samplesize, args.write) else: if args.diameter > 0: (stats, betw) = betweenness(G, args.epsilon, args.delta, weights_list, args.diameter, args.write) else: (stats, betw) = betweenness(G, args.epsilon, args.delta, weights_list, args.approximate, args.write) # If specified, write betweenness as vertex attributes, and time as graph # attribute back to file if args.write: logging.info("Writing betweenness as vertex attributes and stats as graph attribute") if args.write == "auto": filename = os.path.splitext(args.graph)[0] + ("-undir" if args.undirected else "dir") + ".picklez" G.write(filename) else: G.write(args.write) # Write stats and betweenness to output util.write_to_output(stats, betw, args.output)
def main(): """Parse arguments, run experiments, collect results and stats, write to file.""" # Parse arguments parser = argparse.ArgumentParser() parser.description = "TODO" parser.add_argument("epsilon", type=util.valid_interval_float, help="accuracy parameter") parser.add_argument("delta", type=util.valid_interval_float, help="confidence parameter") parser.add_argument("runs", type=util.positive_int, default=20, help="number of runs") parser.add_argument("graph", help="graph file") parser.add_argument("output", help="output file") group = parser.add_mutually_exclusive_group() group.add_argument("-a", "--approximate", action="store_true", default=True, help="use approximate diameter (default)") group.add_argument("-d", "--diameter", type=util.positive_int, default=0, help="value to use for the diameter") group.add_argument("-e", "--exact", action="store_true", default=False, help="use exact diameter") parser.add_argument("-m", "--maxconn", action="store_true", default=False, help="if the graph is not weakly connected, only save the largest connected component") parser.add_argument("-p", "--pickle", action="store_true", default=False, help="use pickle reader for input file") parser.add_argument("-s", "--samplesize", type=util.positive_int, default=0, help="use specified sample size. Overrides epsilon, delta, and diameter computation") parser.add_argument("-t", "--timeout", type=util.positive_int, default=3600, help="Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)") parser.add_argument("-u", "--undirected", action="store_true", default=False, help="consider the graph as undirected ") parser.add_argument("-v", "--verbose", action="count", default=0, help="increase verbosity (use multiple times for more verbosity)") parser.add_argument("-w", "--weightFile", default="-", help="random weights within the interval 0 to 1, must have as many entries as the number of edges") args = parser.parse_args() # Set the desired level of logging util.set_verbosity(args.verbose) # Read graph if args.pickle: G = util.read_graph(args.graph) else: G = converter.convert(args.graph, not args.undirected, args.maxconn) if args.exact: args.approximate = False # Read the weights weights_list=[] if args.weightFile != "-": with open(args.weightFile,'r') as weight_file: for line in weight_file: weights_list.append(float(line.strip())) # Perform experiment multiple times results = [] for i in range(args.runs): logging.info("Run #%d", i) # Compute betweenness if args.samplesize: results.append(vc_sample.betweenness_sample_size(G, args.samplesize, False, args.timeout)) else: if args.diameter > 0: results.append(vc_sample.betweenness(G, args.epsilon, args.delta, weights_list, args.diameter, False, args.timeout)) else: results.append(vc_sample.betweenness(G, args.epsilon, args.delta, weights_list, args.approximate, False, args.timeout)) # Compute aggregate statistics about the experiments stats = dict() stats["graph"]= os.path.basename(args.graph) stats["vertices"] = G.vcount() stats["edges"] = G.ecount() stats["runs"] = args.runs if args.samplesize: stats["sample_size"] = args.samplesize else: stats["delta"] = args.delta stats["epsilon"] = args.epsilon stats["sample_size"] = results[0][0]["sample_size"] stats_names = ["time", "forward_touched_edges", "backward_touched_edges"] if not args.samplesize: stats_names.append("diameter") stats_names.append("diameter_touched_edges") for stat_name in stats_names: values = sorted([x[0][stat_name] for x in results]) stats[stat_name + "_max"] = values[-1] stats[stat_name + "_min"] = values[0] stats[stat_name + "_avg"] = sum(values) / args.runs if args.runs > 1: stats[stat_name + "_stddev"] = math.sqrt(sum([math.pow(value - stats[stat_name + "_avg"], 2) for value in values]) / (args.runs - 1)) else: stats[stat_name + "_stddev"] = 0.0 stats["betw_min"] = [0.0] * G.vcount() stats["betw_max"] = [0.0] * G.vcount() stats["betw_avg"] = [0.0] * G.vcount() for i in range(G.vcount()): betws = sorted([x[1][i] for x in results]) stats["betw_min"][i]= betws[0] stats["betw_max"][i] = betws[-1] stats["betw_avg"][i] = sum(betws) / args.runs csvkeys="graph, runs, epsilon, delta, sample_size" csvkeys_names= ["{0}_avg, {0}_min, {0}_stddev, {0}_max, {0}_min".format(stat_name) for stat_name in stats_names] csvkeys_list = [csvkeys] + csvkeys_names csvkeys = ",".join(csvkeys_list) # print(stats["betw_min"]) print(csvkeys) print(util.dict_to_csv(stats, csvkeys)) # Write stats and results to output file try: with open(args.output, "wb") as output: logging.info("Writing stats and results to %s", args.output) pickle.dump((stats, results), output) output.close() #pkl_file = open("vc_out.picklez", 'rb') #reader = pickle.load(pkl_file) #print(reader[0]["diameter_touched_edges_avg"]) except OSError as E: logging.critical("Cannot write stats and results to %s: %s", args.output, E.strerror) sys.exit(2)
def main(): """Parse arguments, do the comparison, write to output.""" parser = argparse.ArgumentParser() parser.description = "compare estimation of betweenness centralities to exact values" parser.add_argument("epsilon", type=util.valid_interval_float, help="accuracy parameter") parser.add_argument("delta", type=util.valid_interval_float, help="confidence parameter") parser.add_argument("graph", help="graph file") group = parser.add_mutually_exclusive_group() group.add_argument("-a", "--approximate", action="store_true", default=True, help="use approximate diameter when computing approximation of betweenness using VC-Dimension (default)") group.add_argument("-d", "--diameter", type=util.positive_int, default=0, help="value to use for the diameter") group.add_argument("-e", "--exact", action="store_true", default=False, help="use exact diameter when computing approximation of betweenness using VC-Dimension") parser.add_argument("-m", "--maxconn", action="store_true", default=False, help="if the graph is not weakly connected, only save the largest connected component") parser.add_argument("-p", "--pickle", action="store_true", default=False, help="use pickle reader for input file") parser.add_argument("-r", "--resultfiles", nargs=4, help="Use results files rather than recomputing betweenness. Files should be specified as 'exact_res vc_res bp_res gss_res'") parser.add_argument("-s", "--samplesize", type=util.positive_int, default=0, help="use specified sample size. Overrides epsilon, delta, and diameter computation") parser.add_argument("-t", "--timeout", type=util.positive_int, default=3600, help="Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)") parser.add_argument("-u", "--undirected", action="store_true", default=False, help="consider the graph as undirected ") parser.add_argument("-v", "--verbose", action="count", default=0, help="increase verbosity (use multiple times for more verbosity)") parser.add_argument("-w", "--write", nargs="?", default=False, const="auto", help="write graph (and computed attributes) to file.") args = parser.parse_args() # Set the desired level of logging util.set_verbosity(args.verbose) # Seed the random number generator random.seed() # Read graph if args.pickle: G = util.read_graph(args.graph) else: G = converter.convert(args.graph, not args.undirected, args.maxconn) if args.exact: args.approximate = False if not args.resultfiles: (exact_stats, exact_betw) = brandes_exact.betweenness(G, args.write, args.timeout) if args.samplesize: (vc_stats, vc_betw) = vc_sample.betweenness_sample_size(G, args.samplesize, args.write, args.timeout) (bp_stats, bp_betw) = brandespich_sample.betweenness_sample_size(G, args.samplesize, args.write, args.timeout) (gss_stats, gss_betw) = geisbergerss_sample.betweenness_sample_size(G, args.samplesize, args.write, args.timeout) else: if args.diameter > 0: (vc_stats, vc_betw) = vc_sample.betweenness(G, args.epsilon, args.delta, args.diameter, args.write, args.timeout) else: (vc_stats, vc_betw) = vc_sample.betweenness(G, args.epsilon, args.delta, args.approximate, args.write, args.timeout) (bp_stats, bp_betw) = brandespich_sample.betweenness(G, args.epsilon, args.delta, args.write, args.timeout) (gss_stats, gss_betw) = geisbergerss_sample.betweenness(G, args.epsilon, args.delta, args.write, args.timeout) else: (exact_stats, exact_betw) = util.read_stats_betw(args.result_files[0]) (vc_stats, vc_betw) = util.read_stats_betw(args.result_files[1]) (bp_stats, bp_betw) = util.read_stats_betw(args.result_files[2]) (gss_stats, gss_betw) = util.read_stats_betw(args.result_files[3]) #Compute useful graph statistics (mainly diameter) if "diam" not in G.attributes(): diameter.diameter(G) # If specified, write betweenness as vertex attributes, and time and # diameter as graph attributes back to file if args.write: logging.info("Writing betweenness as vertex attributes and stats as graph attribute") if args.write == "auto": filename = os.path.splitext(args.graph)[0] + ("-undir" if args.undirected else "dir") + ".picklez" G.write(filename) else: G.write(args.write) # Compute error statistics # It is not a problem to sort the error by value because we only compute # aggregates. # Normalize #normalizer = math.pow(G.vcount(),2)-G.vcount() #norm_exact_betw = [a/normalizer for a in exact_betw] #norm_vc_betw = [a/normalizer for a in vc_betw] #norm_bp_betw = [a/normalizer for a in bp_betw] #norm_gss_betw = [a/normalizer for a in gss_betw] #VC-STATISTICS logging.info("Computing error statistics") max_err = args.epsilon * G.vcount() * (G.vcount() - 1) / 2 vc_errs = sorted([abs(a - b) for a,b in zip(exact_betw,vc_betw)]) vc_stats["err_avg"] = sum(vc_errs) / G.vcount() vc_stats["err_max"] = vc_errs[-1] vc_stats["err_min"] = list(itertools.filterfalse(lambda x: x == 0, vc_errs))[0] vc_stats["err_stddev"] = math.sqrt(sum([math.pow(err - vc_stats["err_avg"], 2) for err in vc_errs]) / (G.vcount() -1)) vc_stats["euc_dist"] = math.sqrt(sum([math.pow(a - b, 2) for a,b in zip(exact_betw,vc_betw)])) vc_stats["wrong_eps"] = 0; for i in range(G.vcount()): err = abs(exact_betw[i] - vc_betw[i]) #if err > max_err: #vc_stats["wrong_eps"] += 1 #if vc_stats["wrong_eps"] == 1: #print("## VC wrong epsilon ##") #print("{} {} {} {} {} {} {}".format(i, G.vs[i].degree(), #exact_betw[i], vc_betw[i], bp_betw[i], #err, err / (G.vcount() * (G.vcount() -1) / 2))) #BP-STATISTICS bp_errs = sorted([abs(a - b) for a,b in zip(exact_betw,bp_betw)]) bp_stats["err_avg"] = sum(bp_errs) / G.vcount() bp_stats["err_max"] = max(bp_errs) bp_stats["err_min"] = list(itertools.filterfalse(lambda x: x == 0, bp_errs))[0] bp_stats["err_stddev"] = math.sqrt(sum([math.pow(err - bp_stats["err_avg"], 2) for err in bp_errs]) / (G.vcount() -1)) bp_stats["euc_dist"] = math.sqrt(sum([math.pow(a - b, 2) for a,b in zip(exact_betw,bp_betw)])) bp_stats["wrong_eps"] = 0 for i in range(G.vcount()): err = abs(exact_betw[i] - bp_betw[i]) #if err > max_err: #bp_stats["wrong_eps"] += 1 #if bp_stats["wrong_eps"] == 1: #print("## BP wrong epsilon ##") #print("{} {} {} {} {} {} {}".format(i, G.vs[i].degree(), #exact_betw[i], bp_betw[i], vc_betw[i], err, err / (G.vcount() * (G.vcount() -1) / 2))) #GSS-STATISTICS gss_errs = sorted([abs(a - b) for a,b in zip(exact_betw,gss_betw)]) gss_stats["err_avg"] = sum(gss_errs) / G.vcount() gss_stats["err_max"] = max(gss_errs) gss_stats["err_min"] = list(itertools.filterfalse(lambda x: x == 0, gss_errs))[0] gss_stats["err_stddev"] = math.sqrt(sum([math.pow(err - gss_stats["err_avg"], 2) for err in gss_errs]) / (G.vcount() -1)) gss_stats["euc_dist"] = math.sqrt(sum([math.pow(a - b, 2) for a,b in zip(exact_betw,gss_betw)])) gss_stats["wrong_eps"] = 0 for i in range(G.vcount()): err = abs(exact_betw[i] - gss_betw[i]) #if err > max_err: #gss_stats["wrong_eps"] += 1 #if gss_stats["wrong_eps"] == 1: #print("## GSS wrong epsilon ##") #print("{} {} {} {} {} {} {}".format(i, G.vs[i].degree(), #exact_betw[i], gss_betw[i], vc_betw[i], err, err / (G.vcount() * (G.vcount() -1) / 2))) # Print statistics to output as CSV logging.info("Printing statistics") print("graph,nodes,edges,diam,directed,epsilon,delta,sample_size") print("{},{},{},{},{},{},{},{}".format(G["filename"], G.vcount(), G.ecount(), G["diam"], G.is_directed(), args.epsilon, args.delta, args.samplesize)) #csvkeys="epsilon, delta, sample_size, time, wrong_eps, err_avg, err_max, err_min, err_stddev, forward_touched_edges, backward_touched_edges, diameter_touched_edges, euc_dist, diameter, diam_type" csvkeys="epsilon,delta,sample_size,time,wrong_eps,err_avg,err_stddev,forward_touched_edges,backward_touched_edges,diameter_touched_edges,euc_dist,diameter,diam_type" print("type,", csvkeys) print("vc,", util.dict_to_csv(vc_stats,csvkeys)) print("bp,", util.dict_to_csv(bp_stats,csvkeys)) print("gss,", util.dict_to_csv(gss_stats,csvkeys)) print("exact,", util.dict_to_csv(exact_stats,csvkeys))
def compute_stats(n, l0, d, prop_mispl, prop_neg, network_no, network_desc, mystats, force=False): """This method computes all the implemented stats for the given signed network. :param n: int :type n: int :param l0: number of modules from which the underlying graph is created :type l0: int :param d: density :type d: float :param prop_mispl: proportion of misplaced links :type prop_mispl: float :param prop_neg: proportion of negative links :type prop_neg: float :param network_no: network no :type network_no: int :param network_desc: network description, i.e. whether network is weighted or unweighted :type network_desc: str. One of them: SIGNED_UNWEIGHTED, SIGNED_WEIGHTED :param stats: graph related statistics, e.g. consts.STATS_SIGNED_TRIANGLES, consts.STATS_POS_NEG_RATIO :type stats: str list """ network_folder = path.get_input_network_folder_path(n, l0, d, prop_mispl, prop_neg, network_no) network_path = os.path.join(network_folder, consts.SIGNED_UNWEIGHTED+".graphml") # we continue if the corresponding input network exists if os.path.exists(network_path): g = util.read_graph(network_path, consts.FILE_FORMAT_GRAPHML) for stat_name in mystats: stat_folder_path = path.get_stat_folder_path(n, l0, d, prop_mispl, prop_neg, network_no, network_desc) print("computing stats: "+stat_name+" in "+stat_folder_path) os.makedirs(stat_folder_path, exist_ok=True) result_filename = stat_name+".csv" result_filepath = os.path.join(stat_folder_path,result_filename) if not os.path.exists(result_filepath) or force: result = None colnames = None if stat_name == consts.STATS_NB_NODES: result = [g.vcount()] colnames = [consts.COL_NAMES[stat_name]] if stat_name == consts.STATS_SIGNED_TRIANGLES: result = stats.str_balance.compute_signed_triangle_ratios(g) result = [util.format_4digits(e) for e in result] colnames = consts.COL_NAMES[stat_name] elif stat_name == consts.STATS_LARGEST_EIGENVALUE: result = stats.spectral.retreive_largest_eigenvalue(g) result = [util.format_4digits(result)] colnames = [consts.COL_NAMES[stat_name]] elif stat_name == consts.STATS_POS_NEG_RATIO: result = stats.structural.retreive_pos_neg_ratio(g) result = [util.format_4digits(result)] colnames = [consts.COL_NAMES[stat_name]] elif stat_name == consts.STATS_POS_PROP: result = stats.structural.retreive_pos_prop(g) result = [util.format_4digits(result)] colnames = [consts.COL_NAMES[stat_name]] elif stat_name == consts.STATS_NEG_PROP: result = stats.structural.retreive_neg_prop(g) result = [util.format_4digits(result)] colnames = [consts.COL_NAMES[stat_name]] # write the result into file with its column name result_filepath = os.path.join(stat_folder_path,result_filename) df = pd.DataFrame(data=result, index=colnames).transpose() # row vector df.to_csv(result_filepath, sep=",",quoting=1,index=False) else: print("already exists")
def main(): """Parse arguments, call the approximation, write it to file.""" # Parse arguments parser = argparse.ArgumentParser() parser.description = "Compute an approximation of the diameter of a graph and the time needed to compute it, and (if specified) write these info as a graph attributes" parser.add_argument("graph", help="graph file") parser.add_argument( "-i", "--implementation", choices=["homegrown", "igraph"], default="homegrown", help="use specified implementation of betweenness computation") parser.add_argument( "-m", "--maxconn", action="store_true", default=False, help= "if the graph is not weakly connected, only save the largest connected component" ) parser.add_argument("-p", "--pickle", action="store_true", default=False, help="use pickle reader for input file") parser.add_argument("-u", "--undirected", action="store_true", default=False, help="consider the graph as undirected ") parser.add_argument( "-v", "--verbose", action="count", default=0, help="increase verbosity (use multiple times for more verbosity)") parser.add_argument( "-w", "--write", action="store_true", default=False, help= "write the approximation of diameter of the graph as the 'approx_diameter' graph attribute and the time taken to compute it as the 'approx_diam_time' attribute" ) args = parser.parse_args() # Set the desired level of logging util.set_verbosity(args.verbose) # Seed the random number generator random.seed() # Read graph if args.pickle: G = util.read_graph(args.graph) else: G = converter.convert(args.graph, not args.undirected, args.maxconn) # Read graph from file # Compute the diameter (elapsed_time, diam) = diameter(G, args.implementation) # Print info print("{}, diameter={}, time={}".format(args.graph, diam, elapsed_time)) # If requested, add graph attributes and write graph back to original file if args.write: logging.info("Writing diameter approximation and time to graph") G["approx_diam"] = diam G["approx_diam_time"] = elapsed_time # We use format auto-detection, which should work given that it worked # when we read the file G.write(args.graph)
from util import read_graph, visualize actual = read_graph('actual1.graph') a = read_graph('inferred1.graph') b = read_graph('actual2.graph') c = read_graph('inferred2.graph') visualize('graph.svg', [actual, a, b, c], [], [], 4)
def main(): """Parse arguments and perform the computation.""" # Parse arguments parser = argparse.ArgumentParser() parser.description = "Compute approximate betweenness centrality of all vertices in a graph using the algorihm by Brandes and Pich, and the time to compute them, and write them to file" parser.add_argument("epsilon", type=util.valid_interval_float, help="accuracy parameter") parser.add_argument("delta", type=util.valid_interval_float, help="confidence parameter") parser.add_argument("graph", help="graph file") parser.add_argument("output", help="output file") parser.add_argument( "-m", "--maxconn", action="store_true", default=False, help= "if the graph is not weakly connected, only save the largest connected component" ) parser.add_argument("-p", "--pickle", action="store_true", default=False, help="use pickle reader for input file") parser.add_argument( "-s", "--samplesize", type=util.positive_int, default=0, help= "use specified sample size. Overrides epsilon, delta, and diameter computation" ) parser.add_argument( "-t", "--timeout", type=util.positive_int, default=3600, help= "Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)" ) parser.add_argument("-u", "--undirected", action="store_true", default=False, help="consider the graph as undirected ") parser.add_argument( "-v", "--verbose", action="count", default=0, help="increase verbosity (use multiple times for more verbosity)") parser.add_argument("-w", "--write", nargs="?", default=False, const="auto", help="write graph (and computed attributes) to file.") args = parser.parse_args() # Set the desired level of logging util.set_verbosity(args.verbose) # Read graph if args.pickle: G = util.read_graph(args.graph) else: G = converter.convert(args.graph, not args.undirected, args.maxconn) # Compute betweenness if args.samplesize: (stats, betw) = betweenness_sample_size(G, args.samplesize, args.write, args.timeout) else: (stats, betw) = betweenness(G, args.epsilon, args.delta, args.write, args.timeout) # If specified, write betweenness as vertex attributes, and time as graph # attribute back to file if args.write: logging.info( "Writing betweenness as vertex attributes and stats as graph attribute" ) if args.write == "auto": filename = os.path.splitext(args.graph)[0] + ( "-undir" if args.undirected else "dir") + ".picklez" G.write(filename) else: G.write(args.write) # Write stats and betweenness to output util.write_to_output(stats, betw, args.output)
for i, embedding in enumerate(embeddings): self.embeddings[idx2node[i]] = embedding return self.embeddings def transform(self): self.train() self.get_embedding() return self.embeddings if __name__ == '__main__': from util import read_graph import os print(os.getcwd()) Graph = read_graph('../wiki/Wiki_edgelist.txt') line = Line( Graph=Graph, dimension_size=128, per_vertex=100, walk_length=10, window_size=5, work=1, negative_ratio=1, batch_size=128, log_dir='logs/0/', epoch=100, ) embeddings = line.transform() from evaluate import evaluate_tools tool = evaluate_tools(embeddings)
## print "LNG ERROR DISTRIBUTION" ## for lng_e in sorted(n_by_lng_e.keys()): ## print "%d\t%d" % (lng_e, n_by_lng_e[lng_e]) def output_result(filename, ans_loc_by_id, test_id_list): output = open(filename, "w") output.write("Id,Lat,Lon\n") id_list = sorted(test_id_list) for pid in id_list: lat, lng = ans_loc_by_id[pid] output.write("%d,%f,%f\n" % (pid, lat, lng)) output.close() if __name__ == "__main__": loc_by_id, info_by_id = read_loc_by_id("./data/posts-train.txt") graph = util.read_graph("./data/graph.txt") test_info_by_id = util.read_test_set("./data/posts-test-x.txt") info_by_id.update(test_info_by_id) test_id_list = test_info_by_id.keys() import time start = time.time() s = 0.7 k = 50 #### exactly avg_avg ## s = 0 ## k = 40000 df = calculate_df(graph, loc_by_id, info_by_id) paras = make_invidx(graph, loc_by_id, df, info_by_id)
def main(): """Parse arguments, run experiments, collect results and stats, write to file.""" # Parse arguments parser = argparse.ArgumentParser() parser.description = "Perform experiment to compute exact betweenness centrality of all vertices in a graph using Brandes' algorithm" parser.add_argument("runs", type=util.positive_int, default=20, help="number of runs") parser.add_argument("graph", help="graph file") parser.add_argument("output", help="output file") parser.add_argument("-m", "--maxconn", action="store_true", default=False, help="if the graph is not weakly connected, only save the largest connected component") parser.add_argument("-p", "--pickle", action="store_true", default=False, help="use pickle reader for input file") parser.add_argument("-t", "--timeout", type=util.positive_int, default=3600, help="Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)") parser.add_argument("-u", "--undirected", action="store_true", default=False, help="consider the graph as undirected ") parser.add_argument("-v", "--verbose", action="count", default=0, help="increase verbosity (use multiple times for more verbosity)") args = parser.parse_args() # Set the desired level of logging util.set_verbosity(args.verbose) # Read graph if args.pickle: G = util.read_graph(args.graph) else: G = converter.convert(args.graph, not args.undirected, args.maxconn) # Perform experiment multiple times results = [] for i in range(args.runs): logging.info("Run #%d", i) results.append(brandes_exact.betweenness(G, False, args.timeout)) # Compute aggregate statistics about the experiments stats = dict(results[0][0]) stats["graph"]= os.path.basename(args.graph) stats["vertices"] = G.vcount() stats["edges"] = G.ecount() stats["runs"] = args.runs del stats["time"] times = sorted([x[0]["time"] for x in results]) stats["time_max"] = times[-1] stats["time_min"] = times[0] stats["time_avg"] = sum(times) / args.runs if args.runs > 1: stats["time_stddev"] = math.sqrt(sum([math.pow(time - stats["time_avg"], 2) for time in times]) / (args.runs - 1)) else: stats["time_stddev"] = 0.0 csvkeys="graph, runs, time_avg, time_stddev, time_max, time_min, forward_touched_edges, backward_touched_edges" print(csvkeys) print(util.dict_to_csv(stats, csvkeys)) # Write stats and results to output file try: with open(args.output, "wb") as output: logging.info("Writing stats and results to %s", args.output) pickle.dump((stats, results), output) output.close() except OSError as E: logging.critical("Cannot write stats and results to %s: %s", args.output, E.strerror) sys.exit(2)
# print (geneScores) R = R.sort_values(ascending=False) print (R) for idx, value in enumerate(R): R[idx] = abs(p/2 - idx + 1) return R # export PYTHONPATH="/Users/csx/GitProject/sciMallNetworkScore:$PYTHONPATH" if __name__ == "__main__": embPath = '/Users/csx/GitProject/sciMallNetworkScore/data/emb/test.emb' args = parse_args() G = read_graph(args) nodes = G.nodes pathWays = [set([32, 34, 3])] R = generateKSScore(embPath, list(nodes), p = len(nodes), n = 10, pathWays = pathWays, geneSets=set(nodes)) for pathway in pathWays: Es = getEnrichStatisc(R, pathway, set(nodes), p = len(nodes)) print ('pathway: 32 34 3 scores: ', Es) # print (G.nodes) # print (G.nodes) # geneSetsScore()
def collect_features(n, l0, d, prop_mispl, prop_neg, network_no, network_desc, centralities, stats): """This method collects all the indicated features, which are centrality measures and graph-related statistics (number of nodes, etc.). :param n: int :type n: int :param l0: number of modules from which the underlying graph is created :type l0: int :param d: density :type d: float :param prop_mispl: proportion of misplaced links :type prop_mispl: float :param prop_neg: proportion of negative links :type prop_neg: float :param network_no: network no :type network_no: int :param network_desc: network description, i.e. whether network is weighted or unweighted :type network_desc: str. One of them: SIGNED_UNWEIGHTED, SIGNED_WEIGHTED :param centralities: centralities, e.g. consts.CENTR_DEGREE_NEG, consts.CENTR_DEGREE_POS, etc. :type centralities: str list :param stats: graph related statistics, e.g. consts.STATS_SIGNED_TRIANGLES, consts.STATS_POS_NEG_RATIO :type stats: str list """ features = pd.DataFrame([]) network_folder = path.get_input_network_folder_path( n, l0, d, prop_mispl, prop_neg, network_no) network_path = os.path.join(network_folder, consts.SIGNED_UNWEIGHTED + ".graphml") # we continue if the corresponding input network exists if os.path.exists(network_path): g = util.read_graph(network_path, consts.FILE_FORMAT_GRAPHML) stats_folder_path = path.get_stat_folder_path(n, l0, d, prop_mispl, prop_neg, network_no, network_desc) #print("..... collecting features in "+stats_folder_path) for stat_name in stats: result_filepath = os.path.join(stats_folder_path, stat_name + ".csv") if os.path.exists(result_filepath): df = pd.read_csv(os.path.join(stats_folder_path, stat_name + ".csv"), usecols=consts.COL_NAMES[stat_name]) features = pd.concat([features, df], axis=1) # =============================================================== cent_folder_path = path.get_centrality_folder_path( n, l0, d, prop_mispl, prop_neg, network_no, network_desc) #print("..... collecting features in "+cent_folder_path) for centr_name in centralities: desc = consts.PREFIX_MEAN + centr_name result_filepath = os.path.join(cent_folder_path, desc + ".csv") if os.path.exists(result_filepath): df = pd.read_csv(result_filepath, usecols=[desc]) features = pd.concat([features, df], axis=1) desc = consts.PREFIX_STD + centr_name result_filepath = os.path.join(cent_folder_path, desc + ".csv") if os.path.exists(result_filepath): df = pd.read_csv(result_filepath, usecols=[desc]) features = pd.concat([features, df], axis=1) return features
def main(): """Parse arguments, run experiments, collect results and stats, write to file.""" # Parse arguments parser = argparse.ArgumentParser() parser.description = "Perform experiment to compute exact betweenness centrality of all vertices in a graph using Brandes' algorithm" parser.add_argument("runs", type=util.positive_int, default=20, help="number of runs") parser.add_argument("graph", help="graph file") parser.add_argument("output", help="output file") parser.add_argument( "-m", "--maxconn", action="store_true", default=False, help= "if the graph is not weakly connected, only save the largest connected component" ) parser.add_argument("-p", "--pickle", action="store_true", default=False, help="use pickle reader for input file") parser.add_argument( "-t", "--timeout", type=util.positive_int, default=3600, help= "Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)" ) parser.add_argument("-u", "--undirected", action="store_true", default=False, help="consider the graph as undirected ") parser.add_argument( "-v", "--verbose", action="count", default=0, help="increase verbosity (use multiple times for more verbosity)") args = parser.parse_args() # Set the desired level of logging util.set_verbosity(args.verbose) # Read graph if args.pickle: G = util.read_graph(args.graph) else: G = converter.convert(args.graph, not args.undirected, args.maxconn) # Perform experiment multiple times results = [] for i in range(args.runs): logging.info("Run #%d", i) results.append(brandes_exact.betweenness(G, False, args.timeout)) # Compute aggregate statistics about the experiments stats = dict(results[0][0]) stats["graph"] = os.path.basename(args.graph) stats["vertices"] = G.vcount() stats["edges"] = G.ecount() stats["runs"] = args.runs del stats["time"] times = sorted([x[0]["time"] for x in results]) stats["time_max"] = times[-1] stats["time_min"] = times[0] stats["time_avg"] = sum(times) / args.runs if args.runs > 1: stats["time_stddev"] = math.sqrt( sum([math.pow(time - stats["time_avg"], 2) for time in times]) / (args.runs - 1)) else: stats["time_stddev"] = 0.0 csvkeys = "graph, runs, time_avg, time_stddev, time_max, time_min, forward_touched_edges, backward_touched_edges" print(csvkeys) print(util.dict_to_csv(stats, csvkeys)) # Write stats and results to output file try: with open(args.output, "wb") as output: logging.info("Writing stats and results to %s", args.output) pickle.dump((stats, results), output) output.close() except OSError as E: logging.critical("Cannot write stats and results to %s: %s", args.output, E.strerror) sys.exit(2)
from util import read_graph, visualize actual = read_graph('section3_graphs/inferred2.graph') a = read_graph('section3_graphs/inferred1.graph') visualize('inferred2.svg', [actual], [], [], 1)