def modularity(timestamp=None): output.important('\nCalculating graph modularity' + get_timestamp_sting(timestamp) + '...') graph_tool_graph = graph if timestamp is not None: output.normal('\nCreating a graph copy and purging filtered edges') graph_tool_graph = graph.copy() graph_tool_graph.purge_edges() graph_tool_graph.purge_vertices() output.normal('Created graph copy and purged filtered edges') output.normal('\nSaving graph as file...') filepath = get_timestamp_path(graph_path, timestamp, '.graphml') graph_tool_graph.save(filepath) output.normal('Saved graph to "' + filepath + '"') output.normal('\nLoading saved graph into igraph...') igraph_graph = igraph.Graph.Read_GraphML(filepath) igraph_graph.to_undirected() output.normal('Loaded graph into igraph') output.normal( '\nGenerating partitions with igraph using Louvain\'s algorithm...') partitions = graph_partitions(igraph_graph) output.normal('Calculated graph partitions') output.normal( '\nCalculating modularity using igraph and calculated partitions...') modularity = graph_modularity(igraph_graph, partitions) output.success('Graph modularity' + get_timestamp_sting(timestamp) + ': ' + str(modularity))
def degree(timestamp=None): output.important('\nGathering graph degree information' + get_timestamp_sting(timestamp) + '...') degree_dataframe = graph_degree(graph) output.normal('Gathered degree information.') output.normal('Sorting for in degree...') in_degree_sorted = sort_for_in_degree(degree_dataframe) output.normal('\n10 nodes with the highest in degree' + get_timestamp_sting(timestamp) + ':') output.normal(in_degree_sorted.head(10)) output.normal('\nSorting for out degree...') out_degree_sorted = sort_for_out_degree(degree_dataframe) output.normal('\n10 nodes with the highest out degree' + get_timestamp_sting(timestamp) + ':') output.normal(out_degree_sorted.head(10)) output.normal('\nSorting for degree sum...') sum_degree_sorted = sort_for_degree_sum(degree_dataframe) output.normal('\n10 nodes with the highest degree sum' + get_timestamp_sting(timestamp) + ':') output.normal(sum_degree_sorted.head(10)) output.normal('\nWriting degree information to file...') data.dataframe_to_csv(sum_degree_sorted, get_timestamp_path(degree_path, timestamp), True) output.success('Saved degree information to "' + get_timestamp_path(degree_path, timestamp) + '"')
def mean_shortest_path(timestamp=None): output.important('\nCalculating shortest paths' + get_timestamp_sting(timestamp) + '...') lcc = graph_lcc(graph) mean = shortest_paths_mean(lcc) output.normal('Calculated shortest paths') output.normal('mean shortest path length' + get_timestamp_sting(timestamp) + ': ' + mean)
def pagerank(timestamp=None): output.important('\nCalculating graph pageranks' + get_timestamp_sting(timestamp) + '...') pagerank_dataframe = graph_pagerank(graph) output.normal('Calculated pageranks.') output.normal('\n10 nodes with the highest pagerank' + get_timestamp_sting(timestamp) + ':') output.normal(pagerank_dataframe.head(10)) output.normal('\nWriting pagerank results to file...') data.dataframe_to_csv(pagerank_dataframe, get_timestamp_path(pagerank_path, timestamp), True) output.success('Saved pagerank results to "' + get_timestamp_path(pagerank_path, timestamp) + '"')
def eigenvector(timestamp=None): output.important('\nCalculating graph eigenvectors' + get_timestamp_sting(timestamp) + '...') eigenvalue, eigenvector_dataframe = graph_eigenvector(graph) output.normal('Calculated eigenvectors.') output.dim('Largest eigenvalue' + get_timestamp_sting(timestamp) + ': ' + str(eigenvalue)) output.normal('\n10 nodes with the highest eigenvector' + get_timestamp_sting(timestamp) + ':') output.normal(eigenvector_dataframe.head(10)) output.normal('\nWriting eigenvector results to file...') data.dataframe_to_csv(eigenvector_dataframe, get_timestamp_path(eigenvector_path, timestamp), True) output.success('Saved eigenvector results to "' + get_timestamp_path(eigenvector_path, timestamp) + '"')
def betweenness_centrality(timestamp=None): output.important('\nCalculating betweenness centralities' + get_timestamp_sting(timestamp) + '...') nodes, edges = graph_betweenness_centrality(graph) output.normal( 'Calculated betweenness centralities for both edges and nodes.') output.normal('\n10 nodes with the highest betweenness centrality' + get_timestamp_sting(timestamp) + ':') output.normal(nodes.head(10)) output.normal('\n10 edges with the highest betweenness centrality' + get_timestamp_sting(timestamp) + ':') output.normal(edges.head(10)) output.normal('\nWriting betweenness centralities to file..') data.dataframe_to_csv( nodes, get_timestamp_path(nodes_betweenness_path, timestamp), True) output.success('Saved nodes betweenness centrality information to "' + get_timestamp_path(nodes_betweenness_path, timestamp) + '"') data.dataframe_to_csv( edges, get_timestamp_path(edges_betweenness_path, timestamp), True) output.success('Saved edges betweenness centrality information to "' + get_timestamp_path(edges_betweenness_path, timestamp) + '"')
def density(timestamp=None): output.important('\nCalculating graph density...') output.dim('Graph density' + get_timestamp_sting(timestamp) + ': ' + str(graph_density(graph)))
def reciprocity(timestamp=None): output.important('\nCalculating edge reciprocity' + get_timestamp_sting(timestamp) + '...') output.success('Edge reciprocity' + get_timestamp_sting(timestamp) + ': ' + str(graph_reciprocity(graph)))
def assortativity(timestamp=None): output.important('\nCalculating graph assortativity' + get_timestamp_sting(timestamp) + '...') assortativity_tuple = graph_assortativity(graph) output.dim('Graph assortativity: ' + str(assortativity_tuple))
def diameter(timestamp=None): output.important('\nCalculating graph diameter...') output.dim('Graph diameter' + get_timestamp_sting(timestamp) + ': ' + str(graph_diameter(graph)))
def largest_connected_component(timestamp=None): output.important('\nCalculating largest connected component...') lcc = graph_lcc(graph) output.dim('Largest connected component' + get_timestamp_sting(timestamp) + ': ' + str(lcc.num_vertices()) + ' vertices and ' + str(lcc.num_edges()) + ' edges.')
# Add results folder data.make_folder(results_folder) data.make_folder(graphs_folder) # Import the data file filename = 'tgraph_real_wikiedithyperlinks.txt' # If no file was selected, exit if filename == '': output.error('No data file selected, exiting...') exit() if TEST: # Create a random graph output.important('Creating a random connected graph with 100 nodes') graph = random_graph() else: # Read the graph output.important('Reading graph data from "' + filename + '"...') graph = datafile_to_graph(filename) # Output graph info output.success('\nSuccessfully read graph. Info:') output.dim(str(graph.num_edges()) + " edges") output.dim(str(graph.num_vertices()) + " vertices") # Returns the filename for a file with a given timestamp def get_timestamp_path(path, timestamp=None, postfix='.csv'): if timestamp is None: