def generateLargeGraphs(M, N): startTotal = time.time() print("Generating %d graphs of size %d..." % (M, N), end=" ") # Generate graphs using barabasi-albert graphs = [] num_of_edges = min(100, N // 10) for m in range(0, M): graphs.append(barabasi_albert_graph(N, num_of_edges)) m += 1 endTotal = int(time.time()) print("Duration: %d minutes and %d seconds" % secondsToMinSec(endTotal - startTotal)) return graphs
def generateSmallGraphs(N): startTotal = time.time() print("Importing graphs of size %d..." % N, end=" ") if not (5 < N < 10): raise Exception('The size for small graphs should be between 6 and 10') exit(0) graphs = [] with open('./graphs/non-isomorphs/graph' + str(N) + 'c.g6') as graphs_file: graph_line = graphs_file.readline() while graph_line: graph_line = graph_line.rstrip().encode('ascii') graph = from_graph6_bytes(graph_line) graphs.append(graph) graph_line = graphs_file.readline() endTotal = int(time.time()) print("Duration: %d minutes and %d seconds" % secondsToMinSec(endTotal - startTotal)) return graphs
def getCentralityValuesDict(graphs, centralities): startTotal = time.time() print("Building centrality dictionaries...", end=" ") result = OrderedDict() for c in centralities: result[c] = OrderedDict() if "betweenness" in centralities: for g in graphs: result["betweenness"][g] = betweenness_centrality(g, k=len(g.nodes)) if "closeness" in centralities: for g in graphs: result["closeness"][g] = closeness_centrality(g) if "katz" in centralities: for g in graphs: result["katz"][g] = katz_centrality_numpy(g) if "eigenvector" in centralities: for g in graphs: result["eigenvector"][g] = eigenvector_centrality_numpy(g) if "pagerank" in centralities: for g in graphs: result["pagerank"][g] = pagerank_numpy(g) if "degree" in centralities: for g in graphs: result["degree"][g] = degree_centrality(g) # is normalized endTotal = int(time.time()) print("Duration: %d minutes and %d seconds" % secondsToMinSec(endTotal - startTotal)) return result
def buildDataSetPar(graphs, centralities_dict, spread_param, iterations): X, y = [], [] startTotal = time.time() processors = cpu_count() bs = batches(graphs, floor(int(len(graphs) / processors))) print("Building dataset from", len(bs), "batches of", len(bs[0]), "graphs") results = Parallel(n_jobs=-1, verbose=0)(delayed(buildDataSetWorker)( batch, centralities_dict, spread_param, iterations) for batch in bs) for _X, _y in results: for __X in _X: X.append(__X) y.extend(_y) Xn, yn = np.array(X), np.array(y) endTotal = int(time.time()) print("Duration: %d minutes and %d seconds" % secondsToMinSec(endTotal - startTotal)) return Xn, y
def buildDataSet(graphs, centralities_dict, spread_param, iterations): X, y = [], [] startTotal = time.time() print("Building data set...", end=" ") for graph in graphs: for seed in graph.nodes: temp_centralities = [] for centr_key in centralities_dict: temp_centralities.append( centralities_dict[centr_key][graph][seed]) X.append(temp_centralities) if spread_param is not None: if concurrent and iterations >= 1500 and len( graphs[0].nodes) > 50: spread = independentCascadePar(graph, seed, spread_param, iterations) else: spread = independentCascade(graph, seed, spread_param, iterations) else: spread = weightedCascade(graph, seed, iterations) y.append(spread) Xn, yn = np.array(X), np.array(y) endTotal = int(time.time()) print("Duration: %d minutes and %d seconds" % secondsToMinSec(endTotal - startTotal)) return Xn, y