def test_difference(self): sys.stdout.write("Testing set difference\n") fileout = os.path.join(current_dir, 'pyntacletests/test_sets/tmp/result_set.adjm') expected = os.path.join(current_dir, 'pyntacletests/test_sets/output/set/result_difference.adjm') output_graph = GraphSetOps.difference(self.graph1, self.graph2, new_graph_name='result_set') PyntacleExporter.AdjacencyMatrix(graph=output_graph, file=os.path.join(current_dir, 'pyntacletests/test_sets/tmp/result_set.adjm'), sep='\t', header=True) self.assertEqual(getmd5(fileout), getmd5(expected), 'Wrong checksum for Set, difference case')
def run(self): if not self.args.suppress_cursor: cursor = CursorAnimation() cursor.daemon = True cursor.start() if not os.path.exists(self.args.input_file_1) or not os.path.exists( self.args.input_file_2): sys.stderr.write( u"One of the two input files does not exist. Quitting\n") sys.exit(1) if filecmp.cmp(self.args.input_file_1, self.args.input_file_2, shallow=False): sys.stderr.write(u"The two input files are equal. Quitting\n") sys.exit(1) input_header = True if self.args.no_header: input_header = False sys.stdout.write(import_start) input_format = format_dictionary.get(self.args.format, "NA") sys.stdout.write(u"Reading first input file\n") graph1 = GraphLoad(self.args.input_file_1, file_format=input_format, header=input_header, separator=self.args.input_separator).graph_load() sys.stdout.write(u"Reading second input file\n") graph2 = GraphLoad(self.args.input_file_2, file_format=input_format, header=input_header, separator=self.args.input_separator).graph_load() # init Utils global stuff utils1 = GraphUtils(graph=graph1) utils2 = GraphUtils(graph=graph2) if self.args.output_file is None: if self.args.which == "union": self.args.output_file = "_".join \ ([os.path.splitext(os.path.basename(self.args.input_file_1))[0], "UNION", os.path.splitext(os.path.basename(self.args.input_file_2))[0], self.date]) elif self.args.which == "intersection": self.args.output_file = "_".join \ ([os.path.splitext(os.path.basename(self.args.input_file_1))[0], "INTERSECTION", os.path.splitext(os.path.basename(self.args.input_file_2))[0], self.date]) elif self.args.which == "difference": self.args.output_file = "_".join([ os.path.splitext(os.path.basename( self.args.input_file_1))[0], "DIFFERENCE", os.path.splitext(os.path.basename( self.args.input_file_2))[0], self.date ]) if self.args.largest_component: try: graph1 = utils1.get_largest_component() sys.stdout.write( u"Taking the largest component of the input graph {0} as you requested ({1} nodes, {2} edges)\n" .format(graph2["name"], graph1.vcount(), graph1.ecount())) utils1.set_graph(graph1) except MultipleSolutionsError: sys.stderr.write( u"Graph {} has two largest components of the same size. Cannot choose one. either remove one of the components or run 'pyntacle set' without the '--largest-component' option. Quitting\n" .format(graph1["name"])) sys.exit(1) try: graph2 = utils2.get_largest_component() sys.stdout.write( u"Taking the largest component of the input graph {0} as you requested ({1} nodes, {2} edges)\n" .format(graph2["name"], graph2.vcount(), graph2.ecount())) utils2.set_graph(graph2) except MultipleSolutionsError: sys.stderr.write( u"Graph {} has two largest components of the same size. Cannot choose one. either remove one of the components or run 'pyntacle set' without the '--largest-component' option. Quitting\n" .format(graph2["name"])) sys.exit(1) # Check provided dimensions' format if self.args.plot_dim: # define custom format self.args.plot_dim = self.args.plot_dim.split(",") for i in range(0, len(self.args.plot_dim)): try: self.args.plot_dim[i] = int(self.args.plot_dim[i]) except ValueError: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) if self.args.plot_dim[i] <= 0: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) plot_size = tuple(self.args.plot_dim) else: # generate different formats according to graph size if graph1.vcount() <= 150 and graph2.vcount() <= 150: plot_size = (800, 800) else: plot_size = (1600, 1600) if self.args.format == "sif" or all( x is None for x in graph1.es()["sif_interaction"]) or all( x is None for x in graph2.es()["sif_interaction"]): sys.stdout.write( u"WARNING: Interaction stored in SIF files will be removed\n") # GraphSetOps(graph1=graph1, graph2=graph2,new_name = new_name sys.stdout.write(section_end) sys.stdout.write(run_start) if self.args.which == "union": sys.stdout.write( u" Performing union between input graph {} and {}\n".format( self.args.input_file_1, self.args.input_file_2)) output_graph = GraphSetOps.union(graph1, graph2, self.args.output_file) if all(len(x) <= 2 for x in output_graph.vs()["parent"]): sys.stdout.write( u"There were no common nodes when performing Graph union. Will return two disjoint graphs\n" ) elif self.args.which == "intersection": sys.stdout.write( u"Performing intersection between input graph {} and {}\n". format(self.args.input_file_1, self.args.input_file_2)) output_graph = GraphSetOps.intersection(graph1, graph2, self.args.output_file) if output_graph.ecount() == 0: sys.stdout.write( u"No intersection was possible for the two input graphs. No output will be generated\n" ) if not self.args.suppress_cursor: cursor.stop() sys.exit(0) elif self.args.which == "difference": sys.stdout.write( "Performing difference between input graph {} and {}\n". format(self.args.input_file_1, self.args.input_file_2)) output_graph = GraphSetOps.difference(graph1, graph2, self.args.output_file) if output_graph.vcount() == graph1.vcount( ) and output_graph.ecount() == graph1.ecount(): sys.stdout.write( u"Nothing of graph {} could be subtracted from graph {}\n". format(os.path.basename(self.args.input_file_1), os.path.basename(self.args.input_file_2))) if output_graph.vcount() == 0 and output_graph.ecount() == 0: sys.stdout.write( u"Graph difference was complete, no nodes and edges could be retrieved. No output will be produced. Quitting\n" ) sys.exit(0) if output_graph.vcount() <= 1 and output_graph.ecount() < 1: sys.stdout.write( u"Graph difference returned only node {} and no edge. No output will be produced. Quitting\n" .format("".join(output_graph.vs["name"]))) sys.exit(0) if output_graph.vcount() > 1 and output_graph.ecount() == 0: sys.stdout.write( u"Graph difference returned {} nodes, namely: {} and no edge. No output will be produced. Quitting\n" .format(output_graph.vcount(), ",\n".join(output_graph.vs()["name"]))) sys.exit(0) sys.stdout.write(section_end) sys.stdout.write(report_start) # print pyntacle_commands_utils to command line sys.stdout.write(u"Report of set operation: {}\n".format( self.args.which)) sys.stdout.write(section_end) sys.stdout.write(u"Input graphs:\n") sys.stdout.write( u"Graph 1: {0}\nNodes:\t{1}\nEdges:\t{2}\nComponents:\t{3}\n". format(graph1["name"][0], graph1.vcount(), graph1.ecount(), len(graph1.components()))) sys.stdout.write(section_end) sys.stdout.write( u"Graph 2: {0}\nNodes:\t{1}\nEdges:\t{2}\nComponents:\t{3}\n". format(graph2["name"][0], graph2.vcount(), graph2.ecount(), len(graph2.components()))) sys.stdout.write(section_end) sys.stdout.write(u"Resulting graph:\n") sys.stdout.write( u"Nodes:\t{0}\nEdges:\t{1}\nComponents:\t{2}\n".format( output_graph.vcount(), output_graph.ecount(), len(output_graph.components()))) sys.stdout.write(section_end) sys.stdout.write(report_start) if not os.path.isdir(self.args.directory): sys.stdout.write( u"WARNING: Output directory does not exist, will create one at {}\n" .format(os.path.abspath(self.args.directory))) os.makedirs(os.path.abspath(self.args.directory), exist_ok=True) out_form = format_dictionary.get(self.args.output_format, "NA") output_path = os.path.join(self.args.directory, ".".join([self.args.output_file, out_form])) sys.stdout.write(u"Basename of output graph: {}\n".format( self.args.output_file)) sys.stdout.write( u"Path to generated graph is: {}\n".format(output_path)) # producing output graph if self.args.no_output_header: sys.stdout.write(u"Skipping header on output files\n") output_header = False else: output_header = True if self.args.output_separator is None: sys.stdout.write( u"Using '\\t' as default separator for output file\n") self.args.output_separator = "\t" if os.path.exists(output_path): self.logging.warning( u"A file named {} already exist, will be overwritten".format( output_path)) # output generated networks if out_form == "adjm": sys.stdout.write( u"Writing resulting graph to an adjacency matrix\n") PyntacleExporter.AdjacencyMatrix(output_graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "egl": sys.stdout.write(u"Writing resulting graph to an edge list\n") PyntacleExporter.EdgeList(output_graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "sif": sys.stdout.write( u"Writing resulting graph to Simple Interaction Format (SIF) file\n" ) PyntacleExporter.Sif(output_graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "dot": sys.stdout.write("Writing resulting graph to a DOT file\n") # Ignore ugly RuntimeWarnings while creating a dot simplefilter("ignore", RuntimeWarning) PyntacleExporter.Dot(output_graph, output_path) elif out_form == "graph": sys.stdout.write( "Writing resulting graph into a binary file (ending in .graph)\n" ) PyntacleExporter.Binary(output_graph, output_path) # producing plots if not self.args.no_plot: # generates plot directory plot_dir = os.path.join(self.args.directory, "pyntacle-plots") if os.path.isdir(plot_dir): self.logging.warning( u"A directory named 'pyntacle-plots' already exists.") else: os.mkdir(plot_dir) sys.stdout.write(u"Generating plots in {} format\n".format( self.args.plot_format)) sys.stdout.write(u"Drawing starting graphs\n") graph1_plot_path = os.path.join( plot_dir, ".".join([ "_".join([ os.path.splitext( os.path.basename(self.args.input_file_1))[0], self.date ]), self.args.plot_format ])) graph2_plot_path = os.path.join( plot_dir, ".".join([ "_".join([ os.path.splitext( os.path.basename(self.args.input_file_2))[0], self.date ]), self.args.plot_format ])) graph1_plotter = PlotGraph(graph=graph1) graph2_plotter = PlotGraph(graph=graph2) # first create two plots of the input graph input_graph_node_size = 25 pal = sns.color_palette("hls", 10).as_hex() framepal = sns.color_palette("hls", 10, desat=0.5).as_hex() graph_1_colour = pal[0] graph_1_frame = framepal[0] graph_2_colour = pal[3] graph_2_frame = framepal[3] # set input graph node labels graph1_plotter.set_node_labels(labels=graph1.vs()["name"]) graph2_plotter.set_node_labels(labels=graph2.vs()["name"]) # set input graph node colors graph1_plotter.set_node_colors(colors=[graph_1_colour] * graph1.vcount()) graph2_plotter.set_node_colors(colors=[graph_2_colour] * graph2.vcount()) # set input graphs node sizes graph1_plotter.set_node_sizes(sizes=[input_graph_node_size] * graph1.vcount()) graph2_plotter.set_node_sizes(sizes=[input_graph_node_size] * graph2.vcount()) # set input graph vertex colors graph_1_frame_colors = [graph_1_frame] * graph1.vcount() graph_2_frame_colors = [graph_2_frame] * graph1.vcount() # define layouts graph1_plotter.set_layouts(self.args.plot_layout) graph2_plotter.set_layouts(self.args.plot_layout) # plot input graphs graph1_plotter.plot_graph(path=graph1_plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=graph_1_frame_colors) graph2_plotter.plot_graph(path=graph2_plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=graph_2_frame_colors) if output_graph.vcount() > 0: # plot output graph output_plot_path = os.path.join( plot_dir, ".".join([ "_".join([ self.args.which, self.args.output_file, self.date ]), self.args.plot_format ])) output_graph_plotter = PlotGraph( graph=output_graph) # init plotter class # for the merge part sys.stdout.write(u"Drawing resulting graphs\n") node_intersection_colour = pal[1] node_intersection_frame = framepal[1] node_intersection_size = 45 intersection_node_color_list = [] intersection_frame_color_list = [] intersection_set = [] for v in output_graph.vs(): parent_g1 = graph1["name"][0] parent_g2 = graph2["name"][0] if parent_g1 in v["parent"] and parent_g2 in v["parent"]: intersection_node_color_list.append( node_intersection_colour) intersection_frame_color_list.append( node_intersection_frame) intersection_set.append(v["name"]) elif parent_g1 in v[ "parent"] and not parent_g2 in v["parent"]: intersection_node_color_list.append(graph_1_colour) intersection_frame_color_list.append(graph_1_frame) elif parent_g2 in v[ "parent"] and not parent_g1 in v["parent"]: intersection_node_color_list.append(graph_2_colour) intersection_frame_color_list.append(graph_2_frame) output_graph_plotter.set_node_colors( colors=intersection_node_color_list) output_graph_plotter.set_node_sizes(sizes=[ node_intersection_size if parent_g1 in v["parent"] and parent_g2 in v["parent"] else input_graph_node_size for v in output_graph.vs() ]) output_graph_plotter.set_node_labels( labels=output_graph.vs()["name"]) output_graph_plotter.set_layouts(self.args.plot_layout) output_graph_plotter.plot_graph( path=output_plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=intersection_frame_color_list) else: sys.stdout.write( u"The output graph does not contain vertices. Can't draw graph\n" ) elif not self.args.no_plot and (graph1.vcount() >= 1000 or graph2.vcount() >= 1000): sys.stdout.write( u"One of the two input graphs exceeds Pyntacle limits for plotting (maximum 1000 nodes). Will not draw graph\n" ) # Report reporter1 = PyntacleReporter(graph=graph1) # init reporter1 reporter2 = PyntacleReporter(graph=graph2) # init reporter2 reporter_final = PyntacleReporter(graph=output_graph) set1_attr_dict = OrderedDict() set2_attr_dict = OrderedDict() setF_attr_dict = OrderedDict() if self.args.which == 'intersection': setF_attr_dict[ '\nCommon Nodes'] = 'Node names' #(len(intersection_set), ','.join(intersection_set)) setF_attr_dict[len(intersection_set)] = ','.join(intersection_set) reporter1.create_report(ReportEnum.Set, set1_attr_dict) reporter2.create_report(ReportEnum.Set, set2_attr_dict) reporter_final.create_report(ReportEnum.Set, setF_attr_dict) reporter1.report[1] = ['\n--- Graph 1 ---'] reporter2.report[1] = ['--- Graph 2 ---'] del (reporter1.report[-1]) del (reporter2.report[-1]) del (reporter2.report[0]) del (reporter_final.report[0]) for e in reporter_final.report: if e[0] == 'Pyntacle Command:': e[1] = e[1] + ' ' + self.args.which reporter_final.report[0] = ['\n--- Resulting Graph ---'] reporter1.report.extend(reporter2.report) reporter1.report.extend(reporter_final.report) reporter1.write_report(report_dir=self.args.directory, format=self.args.report_format) if not self.args.suppress_cursor: cursor.stop() sys.stdout.write(section_end) sys.stdout.write(u"Pyntacle set completed successfully\n") sys.exit(0)
def run(self): if not hasattr(self.args, 'which'): raise Error( u"usage: pyntacle.py keyplayer {kp-finder, kp-info} [options]'" ) if not self.args.suppress_cursor: cursor = CursorAnimation() cursor.daemon = True cursor.start() if self.args.m_reach == None and self.args.type in ["pos", "all"]: sys.stderr.write( u"m-reach distance must be provided for computing m-reach. Quitting\n" ) sys.exit(1) # Checking input file if self.args.input_file is None: sys.stderr.write( u"Please specify an input file using the `-i/--input-file` option. Quitting\n" ) sys.exit(1) if not os.path.exists(self.args.input_file): sys.stdout.write(u"Cannot find {}. Is the path correct?\n".format( self.args.input_file)) sys.exit(1) if self.args.no_header: header = False else: header = True # Load Graph sys.stdout.write(import_start) sys.stdout.write(u"Importing graph from file\n") graph = GraphLoad(self.args.input_file, format_dictionary.get(self.args.format, "NA"), header, separator=self.args.input_separator).graph_load() # init graph utils class utils = gu(graph=graph) if hasattr(self.args, "nodes"): self.args.nodes = self.args.nodes.split(",") if not utils.nodes_in_graph(self.args.nodes): sys.stderr.write( "One or more of the specified nodes {} is not present in the graph. Please check your spelling and the presence of empty spaces between node names. Quitting\n" .format(self.args.nodes)) sys.exit(1) if self.args.largest_component: try: graph = utils.get_largest_component() sys.stdout.write( u"Taking the largest component of the input graph as you requested ({} nodes, {} edges)\n" .format(graph.vcount(), graph.ecount())) # reinitialize graph utils class utils.set_graph(graph) except MultipleSolutionsError: sys.stderr.write( u"The graph has two largest components of the same size. Cannot choose one. Please parse your file or remove the '--largest-component' option. Quitting\n" ) sys.exit(1) if hasattr(self.args, 'nodes'): if not utils.nodes_in_graph(self.args.nodes): sys.stderr.write( "One or more of the specified nodes is not present in the largest graph component. Select a different set or remove this option. Quitting\n" ) sys.exit(1) if hasattr(self.args, "k_size") and self.args.k_size >= graph.vcount(): sys.stderr.write( "The 'k' argument ({}) must be strictly less than the graph size({}). Quitting\n" .format(self.args.k_size, graph.vcount())) sys.exit(1) if 'implementation' in graph.attributes(): implementation = graph['implementation'] else: implementation = CmodeEnum.igraph # check that output directory is properly set createdir = False if not os.path.isdir(self.args.directory): createdir = True # control plot dimensions if self.args.plot_dim: # define custom format self.args.plot_dim = self.args.plot_dim.split(",") for i in range(0, len(self.args.plot_dim)): try: self.args.plot_dim[i] = int(self.args.plot_dim[i]) if self.args.plot_dim[i] <= 0: raise ValueError except ValueError: sys.stderr.write( u"Format specified must be a comma-separated list of positive integers (e.g. 1920,1080). Quitting\n" ) sys.exit(1) plot_size = tuple(self.args.plot_dim) else: plot_size = (800, 600) if graph.vcount() > 150: plot_size = (1600, 1600) #initialize reporter for later usage and plot dimension for later usage r = PyntacleReporter(graph=graph) initial_results = {} results = OrderedDict() sys.stdout.write(section_end) sys.stdout.write(run_start) if self.args.which == 'kp-finder': # Greedy optimization if self.args.implementation == "greedy": report_type = ReportEnum.KP_greedy kp_runner = gow(graph=graph) sys.stdout.write( u"Using greedy optimization algorithm for searching optimal key player set for the requested key player metrics\n" ) sys.stdout.write("\n") if self.args.type in (['F', 'neg', 'all']): sys.stdout.write( u"KP-NEG: Finding optimal set of nodes of size {0} that maximizes F\n" .format(self.args.k_size)) initial_results[KpnegEnum.F.name] = kpp.F(graph) kp_runner.run_fragmentation(self.args.k_size, KpnegEnum.F, seed=self.args.seed, cmode=implementation) sys.stdout.write("\n") if self.args.type in (['dF', 'neg', 'all']): sys.stdout.write( u"KP-NEG: Finding optimal set of nodes of size {0} that maximizes dF\n" .format(self.args.k_size)) initial_results[KpnegEnum.dF.name] = kpp.dF( graph, cmode=implementation) kp_runner.run_fragmentation( self.args.k_size, KpnegEnum.dF, max_distance=self.args.max_distance, seed=self.args.seed, cmode=implementation) sys.stdout.write("\n") if self.args.type in (['dR', 'pos', 'all']): sys.stdout.write( u"KP-POS: Finding optimal set of nodes of size {0} that maximizes dR\n" .format(self.args.k_size)) kp_runner.run_reachability( self.args.k_size, KpposEnum.dR, max_distance=self.args.max_distance, seed=self.args.seed, cmode=implementation) sys.stdout.write("\n") if self.args.type in (['mreach', 'pos', 'all']): sys.stdout.write( u"KP-POS: Finding optimal set of nodes of size {0} that maximizes the m-reach at distance {1}\n" .format(self.args.k_size, self.args.m_reach)) kp_runner.run_reachability( self.args.k_size, KpposEnum.mreach, m=self.args.m_reach, max_distance=self.args.max_distance, seed=self.args.seed, cmode=implementation) sys.stdout.write("\n") elif self.args.implementation == "brute-force": report_type = ReportEnum.KP_bruteforce kp_runner = bfw(graph=graph) sys.stdout.write( u"Using brute-force search algorithm to find the best key player set(s)\n" ) sys.stdout.write(sep_line) if self.args.type in (['F', 'neg', 'all']): sys.stdout.write( u"KP-NEG: Finding best set (or sets) of nodes of size {0} that holds the maximum F\n" .format(self.args.k_size)) initial_results[KpnegEnum.F.name] = kpp.F(graph) kp_runner.run_fragmentation(self.args.k_size, KpnegEnum.F, threads=self.args.threads) sys.stdout.write("\n") if self.args.type in (['dF', 'neg', 'all']): sys.stdout.write( u"KP-NEG: Finding best set(s) of nodes of size {0} that holds the maximum dF\n" .format(self.args.k_size)) initial_results[KpnegEnum.dF.name] = kpp.dF( graph, cmode=CmodeEnum.igraph) kp_runner.run_fragmentation( self.args.k_size, KpnegEnum.dF, max_distance=self.args.max_distance, cmode=CmodeEnum.igraph, threads=self.args.threads) sys.stdout.write("\n") if self.args.type in (['dR', 'pos', 'all']): sys.stdout.write( u"KP-POS: Finding best set(s) of nodes of size {0} that hold the maximum dR\n" .format(self.args.k_size)) kp_runner.run_reachability( self.args.k_size, KpposEnum.dR, max_distance=self.args.max_distance, cmode=CmodeEnum.igraph, threads=self.args.threads) sys.stdout.write(sep_line) if self.args.type in (['mreach', 'pos', 'all']): sys.stdout.write( u"KP-POS: Finding the best set(s) of nodes of size {0} that maximizes the m-reach at distance {1}\n" .format(self.args.k_size, self.args.m_reach)) kp_runner.run_reachability( self.args.k_size, KpposEnum.mreach, m=self.args.m_reach, max_distance=self.args.max_distance, cmode=CmodeEnum.igraph, threads=self.args.threads) sys.stdout.write("\n") #get report results results.update(kp_runner.get_results()) sys.stdout.write(section_end) sys.stdout.write(summary_start) sys.stdout.write( u"Node set size for key player search: {}\n".format( str(self.args.k_size))) sys.stdout.write("\n") for kp in results.keys(): if len(results[kp] [0]) > 1 and self.args.implementation == 'brute-force': plurals = ['s', 'are'] else: plurals = ['', 'is'] if results[kp][0][ 0] is None: # the case in which there's no solution results[kp][0] = ["None"] if self.args.implementation == 'brute-force': list_of_results = "\n".join( ['(' + ', '.join(x) + ')' for x in results[kp][0]]) else: list_of_results = "(" + ", ".join(results[kp][0]) + ")" if kp == KpnegEnum.F.name or kp == KpnegEnum.dF.name: # joining initial results with final ones results[kp].append(initial_results[kp]) sys.stdout.write( u"Best key player set{0} of size {1} for negative key player index {2} {3}:\n{4}\nFinal {2} value: {5}\nStarting graph {2} was {6}\n" .format(plurals[0], self.args.k_size, kp, plurals[1], list_of_results, results[kp][1], results[kp][2])) sys.stdout.write("\n") elif kp == KpposEnum.dR.name: sys.stdout.write( u"Best key player set{0} of size {1} for positive key player index {2} {3}:\n{4}\nFinal {2} value: {5}\n" .format(plurals[0], self.args.k_size, kp, plurals[1], list_of_results, results[kp][1])) sys.stdout.write("\n") elif kp == KpposEnum.mreach.name: results[kp].append(self.args.m_reach) node_perc_reached = ((self.args.k_size + results[kp][1]) / graph.vcount()) * 100 if node_perc_reached == 100: node_perc_reached = int(node_perc_reached) else: node_perc_reached = round(node_perc_reached, 2) sys.stdout.write( u'Key player set{0} of size {1} for positive key player index m-reach, using at best ' '{3} steps {4}:\n{5}\nwith value {6} on {8} (number of nodes reached on total number of nodes)\nThe total percentage of nodes, which ' 'includes the kp-set, is {7}%\n'.format( plurals[0], self.args.k_size, kp, self.args.m_reach, plurals[1], list_of_results, results[kp][1], node_perc_reached, graph.vcount())) sys.stdout.write("\n") sys.stdout.write(section_end) # kpinfo: compute kpmetrics for a set of predetermined nodes elif self.args.which == 'kp-info': report_type = ReportEnum.KP_info initial_results = OrderedDict() kp_runner = kpw(graph=graph, nodes=self.args.nodes) results = OrderedDict() sys.stdout.write(u"Input node set: ({})\n".format(', '.join( self.args.nodes))) sys.stdout.write("\n") if self.args.type in (['F', 'neg', 'all']): initial_results[KpnegEnum.F.name] = kpp.F(graph) kp_runner.run_fragmentation(KpnegEnum.F) sys.stdout.write("\n") if self.args.type in (['dF', 'neg', 'all']): initial_results[KpnegEnum.dF.name] = kpp.dF( graph, cmode=implementation, max_distance=self.args.max_distance) kp_runner.run_fragmentation( KpnegEnum.dF, max_distance=self.args.max_distance, cmode=implementation) sys.stdout.write("\n") if self.args.type in (['dR', 'pos', 'all']): kp_runner.run_reachability(KpposEnum.dR, max_distance=self.args.max_distance, cmode=implementation) sys.stdout.write("\n") if self.args.type in (['m-reach', 'pos', 'all']): kp_runner.run_reachability(KpposEnum.mreach, m=self.args.m_reach, max_distance=self.args.max_distance, cmode=implementation) sys.stdout.write("\n") sys.stdout.write(section_end) results.update(kp_runner.get_results()) sys.stdout.write(summary_start) for metric in results.keys(): if metric == KpnegEnum.F.name or metric == KpnegEnum.dF.name: results[metric].append(initial_results[metric]) sys.stdout.write( u"Removing node set \n({2})\ngives a {0} value of {3}\nStarting graph {0}: {1}\n" .format(metric, results[metric][2], ', '.join(self.args.nodes), results[metric][1])) sys.stdout.write("\n") elif metric == KpposEnum.mreach.name: results[metric].append(self.args.m_reach) perc_node_reached = round( (results[metric][1] + len(self.args.nodes)) / graph.vcount() * 100, 3) sys.stdout.write( u"The m-reach of node set:\n({0})\nis {1} on {4} (number of nodes reached on total number of " u"nodes)\nThis means it can reach the {2}% of remaining nodes in the graph nodes in at most {3} steps\n" .format(', '.join(results[metric][0]), results[metric][1], perc_node_reached, self.args.m_reach, graph.vcount())) sys.stdout.write("\n") else: #dR case sys.stdout.write( "The {0} value for node set:\n({1})\nis {2}\n".format( metric, ', '.join(results[metric][0]), results[metric][1])) sys.stdout.write("\n") sys.stdout.write(section_end) sys.stdout.write(report_start) sys.stdout.write("Writing Results\n") # check output directory if createdir: sys.stdout.write( u"WARNING: output directory does not exist, {} will be created\n" .format(os.path.abspath(self.args.directory))) os.makedirs(os.path.abspath(self.args.directory), exist_ok=True) # reporting and plotting part sys.stdout.write(u"Producing report in {} format\n".format( self.args.report_format)) r.create_report(report_type=report_type, report=results) r.write_report(report_dir=self.args.directory, format=self.args.report_format) if self.args.save_binary: # reproduce octopus behaviour by adding kp information to the graph before saving it sys.stdout.write( u"Saving graph to a binary file (ending in .graph)\n") for key in results.keys(): if key == KpposEnum.mreach.name: #replace the mreach distance new_mreach = "_".join([ KpposEnum.mreach.name, str(results[KpposEnum.mreach.name][-1]) ]) #create new key results[new_mreach] = results[ KpposEnum.mreach. name][: -1] #remove the mreach distance before adding it to the binary file del results[KpposEnum.mreach.name] key = new_mreach if self.args.which == "kp-finder": if self.args.implementation == "brute-force": suffix = "bruteforce" attr_key = tuple( tuple(sorted(tuple(x))) for x in results[key][0]) else: suffix = "greedy" attr_key = tuple(sorted(tuple(results[key][0]))) else: suffix = "info" attr_key = tuple(sorted(tuple(results[key][0]))) attr_name = "_".join([key, suffix]) attr_val = results[key][1] if attr_name in graph.attributes(): if not isinstance(graph[attr_name], dict): sys.stdout.write( "WARNING: attribute {} does not point to a dictionary, will overwrite\n" .format(attr_name)) AddAttributes.add_graph_attribute( graph, attr_name, {attr_key: attr_val}) else: if attr_key in graph[attr_name]: sys.stdout.write( "WARNING: {} already present in the {} graph attribute, will overwrite\n" .format(attr_key, attr_val)) graph[attr_name].update({attr_key: attr_val}) else: AddAttributes.add_graph_attribute(graph, attr_name, {attr_key: attr_val}) binary_prefix = "_".join([ os.path.splitext(os.path.basename(self.args.input_file))[0], self.args.which, self.date ]) binary_path = os.path.join(self.args.directory, binary_prefix + ".graph") PyntacleExporter.Binary(graph, binary_path) # generate and output plot if not self.args.no_plot and graph.vcount() < 1000: sys.stdout.write(u"Generating network plots in {} format\n".format( self.args.plot_format)) plot_dir = os.path.join(self.args.directory, "pyntacle-plots") if not os.path.isdir(plot_dir): os.mkdir(plot_dir) plot_graph = PlotGraph(graph=graph) plot_format = self.args.plot_format plot_graph.set_node_labels( labels=graph.vs()["name"]) # assign node labels to graph pal = sns.color_palette("Accent", 8).as_hex() framepal = sns.color_palette("Accent", 8, desat=0.5).as_hex() other_nodes_colour = pal[2] other_frame_colour = framepal[2] other_nodes_size = 25 # other_nodes_shape = "circle" other_edge_width = 1 for metric in results: if self.args.which == 'kp-finder' and self.args.implementation == "brute-force": results[metric][0] = list( set(list(chain(*results[metric][0])))) if metric == "F": f_nodes_colour = pal[0] f_frames_colour = framepal[0] # create a list of node colors node_colors = [ f_nodes_colour if x["name"] in results[metric][0] else other_nodes_colour for x in graph.vs() ] node_frames = [ f_frames_colour if x["name"] in results[metric][0] else other_frame_colour for x in graph.vs() ] plot_graph.set_node_colors(colors=node_colors) # node_shapes = ["square" if x["name"] in results[metric][1] else other_nodes_shape for x in graph.vs()] # plot_graph.set_node_shapes(shapes=node_shapes) elif metric == "dF": df_nodes_colour = pal[1] df_frames_colour = framepal[1] # create a list of node colors node_colors = [ df_nodes_colour if x["name"] in results[metric][0] else other_nodes_colour for x in graph.vs() ] node_frames = [ df_frames_colour if x["name"] in results[metric][0] else other_frame_colour for x in graph.vs() ] plot_graph.set_node_colors(colors=node_colors) # node_shapes = ["rectangle" if x["name"] in results[metric][1] else other_nodes_shape for x in graph.vs()] # plot_graph.set_node_shapes(shapes=node_shapes) elif metric == "m-reach": mreach_nodes_colour = pal[4] mreach_frames_colour = framepal[4] # create a list of node colors node_colors = [ mreach_nodes_colour if x["name"] in results[metric][0] else other_nodes_colour for x in graph.vs() ] node_frames = [ mreach_frames_colour if x["name"] in results[metric][0] else other_frame_colour for x in graph.vs() ] plot_graph.set_node_colors(colors=node_colors) # node_shapes = ["triangle-up" if x["name"] in results[metric][1] else other_nodes_shape for x in graph.vs()] # plot_graph.set_node_shapes(shapes=node_shapes) else: #dR dr_nodes_colour = pal[3] dr_frames_colour = framepal[3] # create a list of node colors node_colors = [ dr_nodes_colour if x["name"] in results[metric][0] else other_nodes_colour for x in graph.vs() ] node_frames = [ dr_frames_colour if x["name"] in results[metric][0] else other_frame_colour for x in graph.vs() ] plot_graph.set_node_colors(colors=node_colors) # node_shapes = ["triangle-down" if x["name"] in results[metric][1] else other_nodes_shape for x in # graph.vs()] # # plot_graph.set_node_shapes(shapes=node_shapes) node_sizes = [ 35 if x["name"] in results[metric][0] else other_nodes_size for x in graph.vs() ] plot_graph.set_node_sizes(sizes=node_sizes) # print (other_edge_width) # print (edge.source(), edge.target()) # add recursive edge widths if metric != "mreach": edge_widths = [ 5 if any( y in results[metric][0] for y in x["adjacent_nodes"]) else other_edge_width for x in graph.es() ] else: if self.args.m_reach > 5: edge_widths = [ 5 if any(y in results[metric][0] for y in x["adjacent_nodes"]) else other_edge_width for x in graph.es() ] sys.stdout.write( u"WARNING: you chose a very high value of m-reach, the edge width " "may be too big, hence it may not be represented correctly\n" ) else: mreach_nodes = results[metric][0] # get node indices of corresponding kpset indices = utils.get_node_indices(mreach_nodes) edge_widths = [ other_edge_width ] * graph.ecount() # define a starting list of values mreach_width = ( self.args.m_reach * 2 ) + 2 # maxium and minimum boundaries for edge width # print(mreach_width) memory_indices = indices step_before = indices for i in range(1, self.args.m_reach + 1): # print(mreach_width) neighbours = Graph.neighborhood(graph, vertices=indices) # print(neighbours) indices = list( chain(*neighbours)) # flat out list of indices # print(indices) remaining_indices = list( set(indices) - set(memory_indices)) # print(remaining_indices) # print(step_before) mreach_edge_ids = [] for elem in step_before: for el in remaining_indices: if Graph.are_connected(graph, elem, el): mreach_edge_ids.append( graph.get_eid(elem, el)) # print (mreach_edge_ids) for edge in mreach_edge_ids: edge_widths[edge] = mreach_width # finally mreach_width = mreach_width - 2 memory_indices = memory_indices + remaining_indices step_before = remaining_indices # sys.exit() plot_graph.set_edge_widths(edge_widths) plot_graph.set_layouts(self.args.plot_layout) plot_path = os.path.join( plot_dir, "_".join([self.args.which, ["name"][0], metric, self.date ]) + "." + plot_format) if os.path.exists(plot_path): sys.stdout.write( u"WARNING: a plot with the name ({}) already exists, overwriting it\n" .format(os.path.basename(plot_path))) plot_graph.plot_graph(path=plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=node_frames) elif graph.vcount() >= 1000: sys.stdout.write( u"The graph has too many nodes ({}, we plot nodes with a maximum of 1000 nodes). It will not be drawn\n" .format(graph.vcount())) if not self.args.suppress_cursor: cursor.stop() sys.stdout.write(section_end) sys.stdout.write(u"Pyntacle keyplayer completed successfully\n") sys.exit(0)
def run(self): if not self.args.suppress_cursor: cursor = CursorAnimation() cursor.daemon = True cursor.start() if self.args.input_file is None: sys.stderr.write( u"Please specify an input file using the `-i/--input-file` option. Quitting\n" ) sys.exit(1) if not os.path.exists(self.args.input_file): sys.stderr.write(u"Cannot find {}. Is the path correct?".format( self.args.input_file)) sys.exit(1) input_header = True if self.args.no_header: input_header = False input_format = format_dictionary.get(self.args.format, "NA") sys.stdout.write(import_start) sys.stdout.write(u"Importing graph from file\n") graph = GraphLoad(self.args.input_file, file_format=input_format, header=input_header, separator=self.args.input_separator).graph_load() # init Utils global stuff utils = GraphUtils(graph=graph) if self.args.largest_component: try: graph = utils.get_largest_component() sys.stdout.write( u"Taking the largest component of the input graph as you requested ({} nodes, {} edges)\n" .format(graph.vcount(), graph.ecount())) utils.set_graph(graph) except MultipleSolutionsError: sys.stderr.write( u"The graph has two largest components of the same size. Cannot choose one. Please parse your file or remove the '--largest-component' option. Quitting\n" ) sys.exit(1) # define plot sizes if self.args.plot_dim: # define custom format self.args.plot_dim = self.args.plot_dim.split(",") for i in range(0, len(self.args.plot_dim)): try: self.args.plot_dim[i] = int(self.args.plot_dim[i]) except ValueError: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) if self.args.plot_dim[i] <= 0: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) plot_size = tuple(self.args.plot_dim) else: # generate different formats according to graph size if graph.vcount() <= 150: plot_size = (800, 800) else: plot_size = (1600, 1600) # initialize module finder method communities = CommunityFinder(graph=graph) # initialize Reporter results = OrderedDict() if self.args.which == "fastgreedy": if self.args.weights is not None: # import edge attributes if not os.path.exists(self.args.weights): sys.stderr.write( u"Attribute file {} does not exist. Quitting\n".format( self.args.weights)) sys.exit(1) else: ImportAttributes.import_edge_attributes( graph, self.args.weights, sep=separator_detect(self.args.weights), mode=self.args.weights_format) weights = [ float(x) if x is not None else 1.0 for x in graph.es()["weights"] ] else: weights = None if self.args.clusters is not None: try: self.args.clusters = int(self.args.clusters) except: sys.stderr.write( u"argument of '--clusters' must be an integer. Quitting\n" ) sys.exit(1) sys.stdout.write(section_end) sys.stdout.write(run_start) sys.stdout.write( u"Finding communities using the fastgreedy algorithm\n") communities.fastgreedy(weights=weights, n=self.args.clusters) mods = communities.get_modules algorithm = "fastgreedy" elif self.args.which == "infomap": sys.stdout.write(section_end) sys.stdout.write(run_start) sys.stdout.write( u"Finding communities using the infomap (naive) algorithm\n") communities.infomap() mods = communities.get_modules algorithm = "infomap" elif self.args.which == "leading-eigenvector": sys.stdout.write(section_end) sys.stdout.write(run_start) sys.stdout.write( u"Finding communities using the leading-eigenvector algorithm\n" ) communities.leading_eigenvector() mods = communities.get_modules algorithm = "leading-eigenvector" elif self.args.which == "community-walktrap": try: self.args.steps = int(self.args.steps) except: sys.stderr.write( u"Argument of '--steps' must be an integer. Quitting\n") sys.exit(1) if self.args.weights is not None: # import edge attributes if not os.path.exists(self.args.weights): sys.stderr.write( u"Weights file {} does not exist. Quitting\n".format( self.args.weights)) sys.exit(1) else: ImportAttributes.import_edge_attributes( graph, self.args.weights, sep=separator_detect(self.args.weights), mode=self.args.weights_format) weights = [ float(x) if x != None else 1.0 for x in graph.es()["weights"] ] else: weights = None if self.args.clusters is not None: try: self.args.clusters = int(self.args.clusters) except: sys.stderr.write( u"Argument of '--clusters' must be an integer. Quitting\n" ) sys.exit(1) sys.stdout.write(section_end) sys.stdout.write(run_start) sys.stdout.write( u"Finding communities using the walktrap algorithm and a walker of {} steps\n" .format(self.args.steps)) communities.community_walktrap(weights=weights, n=self.args.clusters, steps=self.args.steps) mods = communities.get_modules algorithm = "community-walktrap" mods_report = [] if not mods: sys.stderr.write(u"No communities found. Quitting.") sys.exit(1) for i, elem in enumerate(mods): mods_report.append("\t".join([ str(x) for x in [i, elem.vcount(), elem.ecount(), len(elem.components())] ]) + "\n") sys.stdout.write(section_end) sys.stdout.write(summary_start) sys.stdout.write( u"Pyntacle - Community finding report:\nAlgorithm:{0}\nTotal number of communities found:" "\t{1}\nIndex\tNodes\tEdges \tComponents\n{2}".format( algorithm, len(mods), "".join(mods_report))) # initialize Moduleutils class mod_utils = ModuleUtils(modules=mods) if not all(x is None for x in [ self.args.min_nodes, self.args.max_nodes, self.args.min_components, self.args.max_components ]): init_mods = len(mods) if self.args.min_nodes is not None: try: self.args.min_nodes = int(self.args.min_nodes) except: sys.stderr.write( u"Argument of '--min-nodes' must be an integer. Quitting\n" ) sys.exit(1) if self.args.max_nodes is not None: try: self.args.max_nodes = int(self.args.max_nodes) except: sys.stderr.write( u"Argument of '--max-nodes' must be an integer. Quitting\n" ) sys.exit(1) if self.args.max_components is not None: try: self.args.max_components = int(self.args.max_components) except: sys.stderr.write( u"Argument of '--max-components' must be an integer. Quitting\n" ) sys.exit(1) if self.args.min_components is not None: try: self.args.min_components = int(self.args.min_components) except: sys.stderr.write( u"Argument of '--min-components' must be an integer. Quitting\n" ) sys.exit(1) mod_utils.filter_subgraphs(min_nodes=self.args.min_nodes, max_nodes=self.args.max_nodes, min_components=self.args.min_components, max_components=self.args.max_components) if len(mod_utils.modules) > 0: sys.stdout.write( u"Filtered out {0} communities. Keeping {1} communities\n". format((init_mods - len(mod_utils.modules)), len(mod_utils.modules))) else: sys.stdout.write( u"No community could be kept using the current filters. Quitting\n" ) sys.exit(0) else: sys.stdout.write( u"No filters specified. All modules will be kept\n") sys.stdout.write(section_end) mod_utils.label_modules_in_graph(graph=graph) final_mods = mod_utils.get_modules() for elem in final_mods: results[elem["module"]] = [ elem.vcount(), elem.ecount(), len(elem.components()) ] sys.stdout.write(report_start) # producing output graph if self.args.no_output_header: sys.stdout.write( u"Skipping header writing on output graph community files\n") output_header = False else: output_header = True if not os.path.isdir(self.args.directory): sys.stdout.write( u"WARNING: output directory does not exists {} will be created\n" .format(os.path.abspath(self.args.directory))) os.makedirs(os.path.abspath(self.args.directory), exist_ok=True) # dictionary that stores the extension of the output file if self.args.output_format is None: self.args.output_format = self.args.format out_form = format_dictionary.get(self.args.output_format, "NA") if self.args.output_file is None: # insert random name generator self.args.output_file = "_".join( ["pyntacle", graph["name"][0], algorithm]) sys.stdout.write( u"Basename of the output modules will be {} (default)\n". format(self.args.output_file)) output_basename = os.path.join(self.args.directory, self.args.output_file) # output generated networks sys.stdout.write( "Writing resulting communities to the specified network file format\n" ) for elem in final_mods: output_path = ".".join([ "_".join([output_basename, str(elem["module"]), self.date]), out_form ]) try: if out_form == "adjm": PyntacleExporter.AdjacencyMatrix( elem, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "egl": PyntacleExporter.EdgeList(elem, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "sif": PyntacleExporter.Sif(elem, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "dot": # Ignore ugly RuntimeWarnings while creating a dot simplefilter("ignore", RuntimeWarning) PyntacleExporter.Dot(elem, output_path) elif out_form == "bin": PyntacleExporter.Binary(elem, output_path) except UnsupportedGraphError: sys.stdout.write( "Module {0} was skipped because it is too small ({1} nodes, {2} edges), use the `--save-binary` flag to retrieve it\n" .format(elem["module"], elem.vcount(), elem.ecount())) # reporting and plotting part sys.stdout.write(u"Producing report in {} format\n".format( self.args.report_format)) r = PyntacleReporter(graph=graph) report_type = ReportEnum.Communities results["algorithm"] = algorithm r.create_report(report_type=report_type, report=results) r.write_report(report_dir=self.args.directory, format=self.args.report_format) # save the original graph into a binary file if self.args.save_binary: binary_name = ".".join([ "_".join([ os.path.splitext(os.path.basename( self.args.input_file))[0], "communities" ]), "graph" ]) binary_path = os.path.join(self.args.directory, binary_name) sys.stdout.write( u"Storing the input graph with module labels into a binary file in the results directory\n" .format(binary_path)) if not self.args.no_plot: plot_dir = os.path.join(self.args.directory, "pyntacle-plots") if os.path.isdir(plot_dir): self.logging.info( u"A directory named \"pyntacle-plots\" already exists.") else: os.mkdir(plot_dir) avail_colors_fill = sns.color_palette( "Spectral", n_colors=len( final_mods)).as_hex() # available colors for node fill avail_colors_borders = sns.color_palette("Spectral", n_colors=len(final_mods), desat=0.5).as_hex() if graph.vcount() < 1000: sys.stdout.write(u"Plotting graph in {} format\n".format( self.args.plot_format)) main_plot_path = os.path.join( plot_dir, ".".join([ "_".join([ self.args.which, os.path.splitext( os.path.basename(self.args.input_file))[0], "communities", self.date ]), self.args.plot_format ])) # initialize general graph Drawer sys.stdout.write( u"Drawing original graph, highlighting communities\n") if len(final_mods) > 20: sys.stdout.write( u"WARNING:The number of modules found ({}) is very high. The plot of the input graph will have nuanced colors\n" .format(len(final_mods))) graph_plotter = PlotGraph(graph=graph) graph_plotter.set_node_labels(labels=graph.vs()["name"]) graph_plotter.set_node_sizes([30] * graph.vcount()) # define different colors for each module not_in_module_colors = "#A9A9A9" col_list = [] bord_list = [] for elem in graph.vs(): module = elem["module"] if module is not None: col_list.append(avail_colors_fill[module]) bord_list.append(avail_colors_borders[module]) else: col_list.append(not_in_module_colors) bord_list.append(not_in_module_colors) graph_plotter.set_node_colors(col_list) graph_plotter.set_layouts(self.args.plot_layout) graph_plotter.plot_graph(path=main_plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=bord_list) else: sys.stdout.write( u"Input graph is above Pyntacle plotting limit ({} nodes found, only graphs with at best 1000 nodes). Input graph will not be plotted\n" .format(graph.vcount())) sys.stdout.write("Drawing each module separately\n") for i, comm in enumerate(final_mods): if comm.vcount() <= 1000: plotter = PlotGraph(graph=comm) plotter.set_node_labels(labels=comm.vs()["name"]) plotter.set_node_colors([avail_colors_fill[i]] * comm.vcount()) plotter.set_node_sizes([30] * comm.vcount()) comm_plot_path = os.path.join( plot_dir, ".".join([ "_".join([ self.args.output_file, str(comm["module"]), self.date ]), self.args.plot_format ])) plotter.set_layouts(self.args.plot_layout) plotter.plot_graph( path=comm_plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=[avail_colors_borders[i]] * comm.vcount()) else: sys.stdout.write( u"Module {0} is above Pyntacle plotting limit ({1} nodes found, communities with at best 1000 nodes are plotted). Plotting of this module will be skipped\n" .format(i, comm.vcount())) if not self.args.suppress_cursor: cursor.stop() sys.stdout.write(section_end) sys.stdout.write(u"Pyntacle communities completed successfully\n") sys.exit(0)
def run(self): if not self.args.suppress_cursor: cursor = CursorAnimation() cursor.daemon = True cursor.start() sys.stdout.write(run_start) if self.args.which == "random": if self.args.nodes is None: self.args.nodes = random.randint(100, 500) else: try: self.args.nodes = int(self.args.nodes) except ValueError: sys.stderr.write( u"Number of nodes must be a positive integer. Quitting\n" ) sys.exit(1) if not self.args.probability and self.args.edges: try: self.args.edges = int(self.args.edges) u"Generating graph with random topology\nParameters:\nNumber of nodes: {0}\nNumber of edges: {1}\n".format( self.args.nodes, self.args.edges) graph = PyntacleGenerator.Random( [self.args.nodes, self.args.edges], name="Random", seed=self.args.seed) except (ValueError, TypeError, IllegalGraphSizeError): sys.stderr.write( u"Number of nodes must be a positive integer greater than 2 and number of edges must be a positive integer greater than zero. Quitting\n" ) sys.exit(1) else: if not self.args.probability: self.args.probability = 0.5 else: try: self.args.probability = float(self.args.probability) if self.args.probability > 1.0 or self.args.probability < 0.0: raise ValueError except ValueError: sys.stderr.write( u"Probability must be a float between 0 and 1. Quitting\n" ) sys.exit(1) try: sys.stdout.write( "uGenerating graph with random topology\nParameters:\nNumber of nodes: {0}\nProbability of wiring: {1}\n" .format(self.args.nodes, self.args.probability)) graph = PyntacleGenerator.Random( [self.args.nodes, self.args.probability], name="Random", seed=self.args.seed) except (ValueError, TypeError, IllegalGraphSizeError): sys.stderr.write( u"Number of nodes must be a positive integer greater than 2 and a probability must be a float between 0 and 1. Quitting\n" ) sys.exit(1) elif self.args.which == "scale-free": if self.args.nodes is None: self.args.nodes = random.randint(100, 500) else: try: self.args.nodes = int(self.args.nodes) except ValueError: sys.stderr.write( u"Number of nodes must be a positive integer. Quitting\n" ) sys.exit(1) if self.args.avg_edges is None: self.args.avg_edges = random.randint(10, 100) else: try: self.args.avg_edges = int(self.args.nodes) except ValueError: sys.stderr.write( u"Number of outgoing edges must be a positive integer. Quitting\n" ) sys.exit(1) try: sys.stdout.write( u"Generating graph with scale-free topology\nParameters:\nNumber of Nodes: {0}\nNumber of Outgoing edges: {1}\n" .format(self.args.nodes, self.args.avg_edges)) graph = PyntacleGenerator.ScaleFree( [self.args.nodes, self.args.avg_edges], name="ScaleFree", seed=self.args.seed) except (ValueError, TypeError, IllegalGraphSizeError): sys.stderr.write( u"Number of nodes and number of outgoing edges must be positive integers. Quitting\n" ) sys.exit(1) elif self.args.which == "tree": if self.args.nodes is None: self.args.nodes = random.randint(100, 500) else: try: self.args.nodes = int(self.args.nodes) except ValueError: sys.stderr.write( u"Number of nodes must be a positive integer. Quitting\n" ) sys.exit(1) if self.args.children is None: self.args.children = random.randint(2, 10) else: try: self.args.children = int(self.args.nodes) except ValueError: sys.stderr.write( u"Number of children must be a positive integer. Quitting\n" ) sys.exit(1) try: sys.stdout.write( u"Generating Graph with tree topology\nParameters:\nNumber of nodes: {0}\nChildren per node: {1}\n" .format(self.args.nodes, self.args.children)) graph = PyntacleGenerator.Tree( [self.args.nodes, self.args.children], name="Tree", seed=self.args.seed) except (ValueError, TypeError, IllegalGraphSizeError): sys.stderr.write( u"Number of nodes and number of children must be positive integers. Quitting\n" ) sys.exit(1) elif self.args.which == "small-world": #This does not happen anymore, as default is 2. if not self.args.lattice_size: self.args.lattice_size = random.randint(2, 5) if not self.args.nei: self.args.nei = random.randint(1, 5) if isinstance(self.args.lattice, str): try: self.args.lattice = int(self.args.lattice) self.args.lattice_size = int(self.args.lattice_size) self.args.nei = int(self.args.nei) self.args.probability = float(self.args.probability) if 0 < self.args.probability > 1.0: raise ValueError if self.args.lattice_size <= 1: raise ValueError if self.args.nei < 1: raise ValueError if self.args.lattice <= 1: raise ValueError except ValueError: sys.stderr.write( u"One of the parameters you specified is not the proper type or it is out of boundaries. Quitting\n" ) sys.exit(1) try: sys.stdout.write( u"Generating Graph with small-world topology\nParameters:\nInitial lattice dimensions: {0}\nLattice size: {1}\nNei (number of edges that connect each graph): {2}\nRewiring probability: {3}\n" .format(self.args.lattice, self.args.lattice_size, self.args.nei, self.args.probability)) graph = PyntacleGenerator.SmallWorld([ self.args.lattice, self.args.lattice_size, self.args.nei, self.args.probability ], name="SmallWorld", seed=self.args.seed) except (TypeError, ValueError): sys.stderr.write( u"The parameters you chose were invalid. Please check your command line. Quitting\n" ) if graph.vcount() < 2 and graph.ecount() < 1: sys.stdout.write( "Generated Graph is too small ({} nodes, {} edges). Rerun this command and tune your parameters. Quitting\n" .format(graph.ecount(), graph.ecount())) sys.exit(1) sys.stdout.write(section_end) sys.stdout.write(report_start) if not os.path.isdir(self.args.directory): sys.stdout.write( u"WARNING: output directory does not exist {} will be created\n" .format(os.path.abspath(self.args.directory))) os.makedirs(os.path.abspath(self.args.directory), exist_ok=True) if self.args.output_file is None: self.args.output_file = graph["name"][0] out_form = format_dictionary.get(self.args.output_format, "NA") if self.args.no_output_header: sys.stdout.write( u"Skipping header on output graph file, as requested\n") output_header = False else: output_header = True if out_form == "NA": sys.stderr.write( u"Output extension specified is not supported. Quitting\n") sys.exit(1) output_path = os.path.join(self.args.directory, ".".join([self.args.output_file, out_form])) sys.stdout.write(u"Path to graph : {}\n".format(output_path)) if self.args.output_separator is None: sys.stdout.write( u"Using '\\t' as default separator for output file\n") self.args.output_separator = "\t" # output generated networks if out_form == "adjm": sys.stdout.write( u"Writing generated graph to an adjacency matrix\n") PyntacleExporter.AdjacencyMatrix(graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "egl": sys.stdout.write(u"Writing generated graph to an edge list\n") PyntacleExporter.EdgeList(graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "sif": sys.stdout.write( u"Writing generated graph to a Simple Interaction Format (SIF) file\n" ) PyntacleExporter.Sif(graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "dot": sys.stdout.write(u"Writing generated graph to a DOT file\n") # Ignore ugly RuntimeWarnings while creating a dot simplefilter("ignore", RuntimeWarning) PyntacleExporter.Dot(graph, output_path) elif out_form == "graph": sys.stdout.write( u"Writing generated graph to a binary file (ending in .graph)\n" ) PyntacleExporter.Binary(graph, output_path) # Check provided dimensions' format if self.args.plot_dim: # define custom format self.args.plot_dim = self.args.plot_dim.split(",") for i in range(0, len(self.args.plot_dim)): try: self.args.plot_dim[i] = int(self.args.plot_dim[i]) except ValueError: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) if self.args.plot_dim[i] <= 0: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) plot_size = tuple(self.args.plot_dim) else: # generate different formats according to graph size if graph.vcount() <= 150: plot_size = (800, 800) else: plot_size = (1600, 1600) if not self.args.no_plot and graph.vcount() < 1000: sys.stdout.write(u"Drawing generated graph\n") # generates plot directory plot_dir = os.path.join(self.args.directory, "pyntacle-plots") if not os.path.isdir(plot_dir): os.mkdir(plot_dir) plot_path = os.path.join( plot_dir, ".".join([self.args.output_file, self.args.plot_format])) pal = sns.color_palette("Spectral", 10).as_hex() pal2 = sns.color_palette("RdYlGn", 10).as_hex() framepal = sns.color_palette("Spectral", 10, desat=0.5).as_hex() framepal2 = sns.color_palette("RdYlGn", 10, desat=0.5).as_hex() other_nodes_size = 18 # deep sky blue plot_graph = PlotGraph(graph=graph) # define layout according to the toplogy of the graph if self.args.which == "random": if self.args.plot_layout != "random": plot_graph.set_layouts(self.args.plot_layout) else: plot_graph.set_layouts(layout="random") other_nodes_colour = pal[-3] frame_vertex_colour = framepal[-3] elif self.args.which == "scale-free": if self.args.plot_layout != "fr" and self.args.plot_layout != "fruchterman_reingold": plot_graph.set_layouts(self.args.plot_layout) else: plot_graph.set_layouts(layout="fr") other_nodes_colour = pal[3] frame_vertex_colour = framepal[3] elif self.args.which == "tree": if self.args.plot_layout != "rt" and self.args.plot_layout != "reingold_tilford": plot_graph.set_layouts(self.args.plot_layout) else: plot_graph.set_layouts(layout="reingold_tilford") other_nodes_colour = pal2[-2] frame_vertex_colour = framepal2[-2] else: if self.args.plot_layout != "circle": plot_graph.set_layouts(self.args.plot_layout) else: plot_graph.set_layouts(layout="circle") other_nodes_colour = pal[0] frame_vertex_colour = framepal[0] node_colors = [other_nodes_colour] * graph.vcount() plot_graph.set_node_colors(colors=node_colors) plot_graph.set_node_labels( labels=graph.vs()["name"]) # assign node labels to graph node_sizes = [other_nodes_size] * graph.vcount() plot_graph.set_node_sizes(sizes=node_sizes) frame_vertex_colour = [frame_vertex_colour] * graph.vcount() sys.stdout.write( u"Drawing graph in {} format at path: {}\n".format( self.args.plot_format, plot_path)) plot_graph.plot_graph(path=plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=frame_vertex_colour) elif not self.args.no_plot and graph.vcount() >= 1000: self.logging.warning( u"Graph is above Pyntacle plotting capability ({} nodes, we plot graph with at best 1000 nodes). Graph plotting will be skipped." .format(graph.vcount())) if not self.args.suppress_cursor: cursor.stop() sys.stdout.write(section_end) sys.stdout.write(u"Pyntacle generate completed successfully\n") if self.args.repeat == 1: sys.exit(0)
def run(self): if not self.args.suppress_cursor: cursor = CursorAnimation() cursor.daemon = True cursor.start() if self.args.input_file is None: sys.stderr.write( u"Please specify an input file using the `-i/--input-file` option. Quitting\n") sys.exit(1) if not os.path.exists(self.args.input_file): sys.stdout.write(u"Cannot find {}. Is the path correct?\n".format(self.args.input_file)) sys.exit(1) #verify that group distance is set if group closeness is specified distancedict = {"min": GroupDistanceEnum.minimum, "max":GroupDistanceEnum.maximum, "mean": GroupDistanceEnum.mean} if self.args.type in ["all", "closeness"]: if self.args.group_distance not in distancedict.keys(): sys.stdout.write("'--group-distance/-D parameter must be one of the followings: {}'. Quitting\n".format(",".join(distancedict.keys()))) sys.exit(1) else: group_distance = distancedict[self.args.group_distance] # Parsing optional node list if hasattr(self.args, 'nodes'): self.args.nodes = self.args.nodes.split(',') # self.args.nodes = [str.lower(x) for x in self.args.nodes] if not hasattr(self.args, 'which'): raise Error(u"usage: pyntacle.py groupcentrality {gr-finder, gr-info} [options]'") if self.args.no_header: header = False else: header = True sys.stdout.write(import_start) sys.stdout.write(u"Importing graph from file\n") graph = GraphLoad(self.args.input_file, format_dictionary.get(self.args.format, "NA"), header, separator=self.args.input_separator).graph_load() # init graph utils class utils = gu(graph=graph) if hasattr(self.args, 'nodes'): if not utils.nodes_in_graph(self.args.nodes): sys.stderr.write("One or more of the specified nodes is not present in the graph. Please check your spelling and the presence of empty spaces in between node names. Quitting\n") sys.exit(1) if self.args.largest_component: try: graph = utils.get_largest_component() sys.stdout.write( u"Taking the largest component of the input graph as you requested ({} nodes, {} edges)\n".format( graph.vcount(), graph.ecount())) #reinitialize graph utils class utils.set_graph(graph) except MultipleSolutionsError: sys.stderr.write( u"The graph has two largest components of the same size. Cannot choose one. Please parse your file or remove the '--largest-component' option. Quitting\n") sys.exit(1) #check that the nodes are in the largest component if hasattr(self.args, 'nodes'): if not utils.nodes_in_graph(self.args.nodes): sys.stderr.write("One or more of the specified nodes is not present in the largest graph component. Select a different set or remove this option. Quitting\n") sys.exit(1) if hasattr(self.args, "k_size") and self.args.k_size >= graph.vcount(): sys.stderr.write("The 'k' argument ({}) must be strictly less than the graph size({}). Quitting\n".format(self.args.k_size, graph.vcount())) sys.exit(1) if 'implementation' in graph.attributes(): implementation = graph['implementation'] else: implementation = CmodeEnum.igraph # check that output directory is properly set createdir = False if not os.path.isdir(self.args.directory): createdir = True # control plot dimensions if self.args.plot_dim: # define custom format self.args.plot_dim = self.args.plot_dim.split(",") for i in range(0, len(self.args.plot_dim)): try: self.args.plot_dim[i] = int(self.args.plot_dim[i]) if self.args.plot_dim[i] <= 0: raise ValueError except ValueError: sys.stderr.write( u"Format specified must be a comma-separated list of positive integers (e.g. 1920,1080). Quitting\n") sys.exit(1) plot_size = tuple(self.args.plot_dim) else: plot_size = (800, 600) if graph.vcount() > 150: plot_size = (1600, 1600) # initialize reporter for later usage and plot dimension for later usage r = PyntacleReporter(graph=graph) results = OrderedDict() sys.stdout.write(section_end) sys.stdout.write(run_start) if self.args.which == "gr-finder": # Greedy optimization if self.args.implementation == "greedy": if self.args.seed: random.seed(self.args.seed) report_type = ReportEnum.GR_greedy go_runner = gow(graph=graph) sys.stdout.write(u"Using greedy optimization algorithm for searching optimal set of nodes using group centrality metrics\n") sys.stdout.write(sep_line) if self.args.type in (["all", "degree"]): sys.stdout.write( u"Finding a set of nodes of size {0} that optimizes group degree\n".format( self.args.k_size)) go_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_degree, seed=self.args.seed, cmode=implementation) sys.stdout.write(sep_line) if self.args.type in (["all", "betweenness"]): sys.stdout.write( u"Finding a set of nodes of size {0} that optimizes group betweenness\n".format( self.args.k_size)) go_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_betweenness, seed=self.args.seed, cmode=implementation) sys.stdout.write(sep_line) if self.args.type in (["all", "closeness"]): sys.stdout.write( u"Finding a set of nodes of size {0} that optimizes group closeness using the {1} distance from the node set\n".format( self.args.k_size, group_distance.name)) go_runner.run_groupcentrality(k = self.args.k_size,gr_type=GroupCentralityEnum.group_closeness, seed=self.args.seed, cmode=implementation ,distance=group_distance) sys.stdout.write(sep_line) sys.stdout.write(sep_line) results.update(go_runner.get_results()) #bruteforce implementation elif self.args.implementation == "brute-force": if self.args.threads > 1: plural = "s" else: plural = "" report_type = ReportEnum.GR_bruteforce bf_runner = bfw(graph=graph) sys.stdout.write(u"Using brute-force search algorithm to find the best set(s) that optimize group centrality metrics\n") sys.stdout.write(sep_line) if self.args.type in (["all", "degree"]): sys.stdout.write( u"Finding the best set(s) of nodes of size {0} that maximizes group degree using {1} thread{2}\n".format( self.args.k_size, self.args.threads, plural)) bf_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_degree, cmode=implementation, threads=self.args.threads) sys.stdout.write(sep_line) if self.args.type in (["all", "betweenness"]): sys.stdout.write( u"Finding the best set(s) of nodes of size {0} that maximizes group betweenness using {1} thread{2}\n".format( self.args.k_size, self.args.threads, plural)) bf_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_betweenness, cmode=implementation, threads=self.args.threads) sys.stdout.write(sep_line) if self.args.type in (["all", "closeness"]): sys.stdout.write( u"Finding the best set(s) of nodes of size {0} that maximizes group closeness using the {1} distance from the node set and {2} thread{3}\n".format( self.args.k_size, group_distance, self.args.threads, plural)) bf_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_closeness, cmode=implementation, threads=self.args.threads, distance=group_distance) sys.stdout.write(sep_line) results.update(bf_runner.get_results()) #shell output report part sys.stdout.write(section_end) sys.stdout.write(summary_start) sys.stdout.write(u"Node set size for group centrality search: {}\n".format(str(self.args.k_size))) sys.stdout.write(sep_line) for kk in results.keys(): if len(results[kk][0]) > 1 and self.args.implementation == 'brute-force': plurals = ['s', 'are'] else: plurals = ['', 'is'] if results[kk][0][0] is None: # the case in which there's no solution results[kk][0] = ["None"] if self.args.implementation == 'brute-force': list_of_results = "\n".join(['(' + ', '.join(x) + ')' for x in results[kk][0]]) else: list_of_results = "(" + ", ".join(results[kk][0]) + ")" sys.stdout.write( u'Best node set{0} of size {1} for {5} centrality {2}:\n{3}\nwith value {4}\n'.format( plurals[0], self.args.k_size, plurals[1], list_of_results, results[kk][1], " ".join(kk.split("_")[:2]))) if kk.startswith(GroupCentralityEnum.group_closeness.name): sys.stdout.write("The {} distance was considered for computing closeness\n".format(group_distance.name)) sys.stdout.write("\n") sys.stdout.write(section_end) elif self.args.which == "gr-info": report_type = ReportEnum.GR_info sys.stdout.write("Input node set: ({})\n".format(', '.join(self.args.nodes))) sys.stdout.write(sep_line) grinfo_runner = ipw(graph=graph, nodes=self.args.nodes) if self.args.type in (["degree", "all"]): grinfo_runner.run_groupcentrality(gr_type=GroupCentralityEnum.group_degree, cmode=implementation) if self.args.type in (["betweenness", "all"]): grinfo_runner.run_groupcentrality(gr_type=GroupCentralityEnum.group_betweenness, cmode=implementation) if self.args.type in (["closeness", "all"]): grinfo_runner.run_groupcentrality(gr_type=GroupCentralityEnum.group_closeness, cmode=implementation, gr_distance=group_distance) results.update(grinfo_runner.get_results()) sys.stdout.write(summary_start) for metric in results.keys(): if metric == GroupCentralityEnum.group_degree.name: sys.stdout.write("The group degree value for the input node set:\n({0})\nis {1}\n".format(', '.join(results[metric][0]), results[metric][1])) sys.stdout.write("\n") if metric == GroupCentralityEnum.group_betweenness.name: sys.stdout.write( "The group betweenness value for the input node set:\n({0})\nis {1}\n".format(', '.join(results[metric][0]), results[metric][1])) sys.stdout.write("\n") if metric.startswith(GroupCentralityEnum.group_closeness.name): sys.stdout.write( "The group closeness value for the input node set:\n({0})\nis {1}.\nThe {2} distance was considered between the set and the rest of the graph\n".format(', '.join(results[metric][0]), results[metric][1], group_distance.name)) sys.stdout.write("\n") sys.stdout.write(section_end) #output part##### sys.stdout.write(report_start) sys.stdout.write("Writing Results\n") if createdir: sys.stdout.write(u"WARNING: output directory does not exist, {} will be created".format( os.path.abspath(self.args.directory))) os.makedirs(os.path.abspath(self.args.directory), exist_ok=True) if self.args.save_binary: # reproduce octopus behaviour by adding kp information to the graph before saving it sys.stdout.write(u"Saving graph to a binary file (ending in .graph)\n") for key in results.keys(): if self.args.which == "gr-finder": if self.args.implementation == "brute-force": suffix = "bruteforce" attr_key = tuple(tuple(sorted(tuple(x))) for x in results[key][0]) else: suffix = "greedy" attr_key = tuple(sorted(tuple(results[key][0]))) else: suffix = "info" attr_key = tuple(sorted(tuple(results[key][0]))) attr_name = "_".join([key, suffix]) attr_val = results[key][1] if attr_name in graph.attributes(): if not isinstance(graph[attr_name], dict): sys.stdout.write("WARNING: attribute {} does not point to a dictionary, will overwrite".format(attr_name)) AddAttributes.add_graph_attribute(graph, attr_name, {attr_key: attr_val}) else: if attr_key in graph[attr_name]: sys.stdout.write("WARNING {} already present in the {} graph attribute, will overwrite\n".format(attr_key, attr_val)) graph[attr_name].update({attr_key: attr_val}) else: AddAttributes.add_graph_attribute(graph, attr_name, {attr_key: attr_val}) binary_prefix = "_".join([os.path.splitext(os.path.basename(self.args.input_file))[0], self.args.which, self.date]) binary_path = os.path.join(self.args.directory, binary_prefix + ".graph") PyntacleExporter.Binary(graph, binary_path) sys.stdout.write(u"Producing report in {} format\n".format(self.args.report_format)) r.create_report(report_type=report_type, report=results) r.write_report(report_dir=self.args.directory, format=self.args.report_format) if not self.args.no_plot and graph.vcount() < 1000: sys.stdout.write(u"Generating network plots in {} format\n".format(self.args.plot_format)) plot_dir = os.path.join(self.args.directory, "pyntacle-plots") if not os.path.isdir(plot_dir): os.mkdir(plot_dir) plot_graph = PlotGraph(graph=graph) plot_format = self.args.plot_format plot_graph.set_node_labels(labels=graph.vs()["name"]) # assign node labels to graph pal = sns.color_palette("Accent", 8).as_hex() framepal = sns.color_palette("Accent", 8, desat=0.5).as_hex() other_nodes_colour = pal[2] other_frame_colour = framepal[2] other_nodes_size = 25 # other_nodes_shape = "circle" other_edge_width = 1 for metric in results: if self.args.which == 'gr-finder' and self.args.implementation == "brute-force": results[metric][0] = list(set(list(chain(*results[metric][0])))) if metric.startswith(GroupCentralityEnum.group_closeness.name): cl_nodes_colour = pal[5] cl_frames_colour = framepal[5] # create a list of node colors node_colors = [cl_nodes_colour if x["name"] in results[metric][0] else other_nodes_colour for x in graph.vs()] node_frames = [cl_frames_colour if x["name"] in results[metric][0] else other_frame_colour for x in graph.vs()] plot_graph.set_node_colors(colors=node_colors) elif metric == GroupCentralityEnum.group_degree: dg_nodes_colour = pal[4] dg_frames_colour = framepal[4] # create a list of node colors node_colors = [dg_nodes_colour if x["name"] in results[metric][0] else other_nodes_colour for x in graph.vs()] node_frames = [dg_frames_colour if x["name"] in results[metric][0] else other_frame_colour for x in graph.vs()] plot_graph.set_node_colors(colors=node_colors) else: #group betweenness bt_nodes_colour = pal[6] bt_frames_colour = framepal[6] # create a list of node colors node_colors = [bt_nodes_colour if x["name"] in results[metric][0] else other_nodes_colour for x in graph.vs()] node_frames = [bt_frames_colour if x["name"] in results[metric][0] else other_frame_colour for x in graph.vs()] plot_graph.set_node_colors(colors=node_colors) node_sizes = [35 if x["name"] in results[metric][0] else other_nodes_size for x in graph.vs()] plot_graph.set_node_sizes(sizes=node_sizes) edge_widths = [5 if any(y in results[metric][0] for y in x["adjacent_nodes"]) else other_edge_width for x in graph.es()] plot_graph.set_edge_widths(edge_widths) plot_graph.set_layouts(self.args.plot_layout) plot_path = os.path.join(plot_dir, "_".join( [self.args.which, graph["name"][0], metric, self.date]) + "." + plot_format) if os.path.exists(plot_path): sys.stdout.write( u"WARNING: a plot with the name ({}) already exists, overwriting it\n".format( os.path.basename(plot_path))) plot_graph.plot_graph(path=plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=node_frames) elif graph.vcount() >= 1000: sys.stdout.write(u"The graph has too many nodes ({}, we plot nodes with a maximum of 1000 nodes). It will not be drawn\n".format(graph.vcount())) if not self.args.suppress_cursor: cursor.stop() sys.stdout.write(section_end) sys.stdout.write(u"Pyntacle groupcentrality completed successfully\n") sys.exit(0)
def run(self): # dictionary that stores the basename of the output file if not self.args.suppress_cursor: cursor = CursorAnimation() cursor.daemon = True cursor.start() if self.args.no_header: header = False else: header = True if self.args.no_output_header: output_header = False else: output_header = True if self.args.input_file is None: sys.stderr.write( u"Please specify an input file using the `-i/--input-file` option. Quitting\n" ) sys.exit(1) if not os.path.exists(self.args.input_file): sys.stderr.write(u"Cannot find {}. Is the path correct?".format( self.args.input_file)) sys.exit(1) if self.args.input_separator is None: separator = separator_detect(self.args.input_file) else: separator = self.args.input_separator sys.stdout.write(run_start) sys.stdout.write( u"Converting input file {0} to requested output file: {1}\n". format(os.path.basename(self.args.input_file), os.path.basename(self.args.output_file))) out_form = format_dictionary.get(self.args.output_format, "NA") if self.args.output_file is None: self.args.output_file = os.path.splitext( os.path.basename(self.args.input_file))[0] sys.stdout.write( u"Output file name will be the basename of the input file ({})\n" .format(self.args.output_file)) # print(self.args.output_file) if self.args.output_separator is None: sys.stdout.write( u"Using the field separator used in the input network file in the converted output file, if the desired output format requires field separator\n" ) self.args.output_separator = separator if not os.path.isdir(self.args.directory): sys.stdout.write( u"WARNING: output directory does not exist, will create one at {}\n" .format(os.path.abspath(self.args.directory))) os.makedirs(os.path.abspath(self.args.directory), exist_ok=True) if out_form == "NA": sys.stderr.write( u"Output extension specified is not supported, see '--help' for more info. Quitting\n" ) sys.exit(1) output_path = os.path.join(self.args.directory, ".".join([self.args.output_file, out_form])) init_graph = GraphLoad(input_file=self.args.input_file, file_format=format_dictionary.get( self.args.format, "NA"), header=header, separator=self.args.input_separator) input_basename = os.path.basename(self.args.input_file) # special cases: #1: convert an edgelist to a sif file if format_dictionary.get(self.args.format, "NA") == "egl" and out_form == "sif": sys.stdout.write( u"Converting edge list to Simple Interaction Format (SIF)\nFull path to the output file:\n{}\n" .format(output_path)) PyntacleConverter.edgelistToSif( file=self.args.input_file, sep=separator, output_sep=self.args.output_separator, header=output_header, output_file=output_path) #2: convert a sif to an edgelist file elif format_dictionary.get(self.args.format, "NA") == "sif" and out_form == "egl": sys.stdout.write( u"Converting Simple Interaction Format (SIF) to edge list\nFull path to the output file:\n{}\n" .format(output_path)) PyntacleConverter.sifToEdgelist( file=self.args.input_file, sep=separator, output_sep=self.args.output_separator, header=output_header, output_file=output_path) else: graph = init_graph.graph_load() in_form = init_graph.get_format() if in_form == out_form: sys.stderr.write( u"The output format specified is the same as the input format. Quitting\n" ) sys.exit(1) if out_form == "adjm": sys.stdout.write( u"Converting input graph file {0} to adjacency matrix at full path:\n{1}\n" .format(input_basename, output_path)) PyntacleExporter.AdjacencyMatrix( graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "egl": sys.stdout.write( u"Converting input graph file {0} to edge list at full path:\n{1}\n" .format(input_basename, output_path)) PyntacleExporter.EdgeList(graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "sif": sys.stdout.write( u"Converting input graph file {0} to Simple Interaction Format (SIF) file at full path:\n{1}\n" .format(input_basename, output_path)) PyntacleExporter.Sif(graph, output_path, sep=self.args.output_separator, header=output_header) elif out_form == "dot": # Ignore ugly RuntimeWarnings while converting to dot simplefilter("ignore", RuntimeWarning) sys.stdout.write( u"Converting input graph file {0} to DOT file using igraph utilities at full path:\n{1}\n(output separator will be ignored)\n" .format(input_basename, output_path)) PyntacleExporter.Dot(graph, output_path) elif out_form == "graph": sys.stdout.write( u"Converting input graph file {0} to a binary file (ending in .graph) at full path:\n{1}\n(output separator will be ignored)\n" .format(input_basename, output_path)) PyntacleExporter.Binary(graph, output_path) if not self.args.suppress_cursor: cursor.stop() sys.stdout.write(section_end) sys.stdout.write( u"Pyntacle convert completed successfully. Ending\n".format( os.path.basename(self.args.input_file))) sys.exit(0)
def run(self): if not self.args.suppress_cursor: cursor = CursorAnimation() cursor.daemon = True cursor.start() if self.args.no_header: header = False else: header = True if not hasattr(self.args, 'which'): raise Error( u"usage: pyntacle.py metrics {global, local} [options]") # Checking input file if self.args.input_file is None: sys.stderr.write( u"Please specify an input file using the `-i/--input-file` option. Quitting\n" ) sys.exit(1) if not os.path.exists(self.args.input_file): self.logging.error(u"Cannot find {}. Is the path correct?".format( self.args.input_file)) sys.exit(1) if hasattr(self.args, "damping_factor"): if self.args.damping_factor is not None: if self.args.damping_factor < 0.0 or self.args.damping_factor > 1.0: sys.stderr.write( u"Damping factor must be between 0 and 1. Quitting\n") sys.exit(1) self.logging.debug(u'Running Pyntacle metrics, with arguments ') self.logging.debug(self.args) # Load Graph sys.stdout.write(import_start) sys.stdout.write(u"Importing graph from file\n") graph = GraphLoad(self.args.input_file, format_dictionary.get(self.args.format, "NA"), header, separator=self.args.input_separator).graph_load() # init Utils global stuff utils = gu(graph=graph) if hasattr(self.args, "nodes"): if self.args.nodes is not None: self.args.nodes = self.args.nodes.split(",") if not utils.nodes_in_graph(self.args.nodes): sys.stderr.write( "One or more of the specified nodes is not present in the graph. Please check your spelling and the presence of empty spaces in between node names. Quitting\n" ) sys.exit(1) if self.args.largest_component: try: graph = utils.get_largest_component() sys.stdout.write( u"Taking the largest component of the input graph as you requested ({} nodes, {} edges)\n" .format(graph.vcount(), graph.ecount())) # reinitialize graph utils class utils.set_graph(graph) except MultipleSolutionsError: sys.stderr.write( u"The graph has two largest components of the same size. Cannot choose one. Please parse your file or remove the '--largest-component' option. Quitting\n" ) sys.exit(1) if self.args.nodes is not None: if not utils.nodes_in_graph(self.args.nodes): sys.stderr.write( "One or more of the specified nodes is not present in the largest graph component. Select a different set or remove this option. Quitting\n" ) sys.exit(1) # Decide implementation if 'implementation' in graph.attributes(): implementation = graph['implementation'] else: implementation = CmodeEnum.igraph if hasattr(self.args, "nodes"): if self.args.weights is not None: sys.stdout.write(u"Adding edge weights from file {}\n".format( self.args.weights)) if not os.path.exists(self.args.weights): sys.stderr.write( u"Weights file {} does not exist. Is the path correct?\n" .format(self.args.weights)) sys.exit(1) ImportAttributes.import_edge_attributes( graph, self.args.weights, sep=separator_detect(self.args.weights), mode=self.args.weights_format) try: weights = [ float(x) if x != None else 1.0 for x in graph.es()["weights"] ] except KeyError: sys.stderr.write( u"The attribute file does not contain a column named 'weights'." "Quitting\n") sys.exit(1) else: weights = None # Check provided dimensions' format if hasattr(self.args.plot_dim, "plot_dim"): # define custom format self.args.plot_dim = self.args.plot_dim.split(",") if len(self.args.plot_dim) != 2: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) for i in range(0, len(self.args.plot_dim)): try: self.args.plot_dim[i] = int(self.args.plot_dim[i]) except ValueError: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) if self.args.plot_dim[i] <= 0: sys.stderr.write( u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n" ) sys.exit(1) plot_size = tuple(self.args.plot_dim) else: # generate different formats according to graph size if graph.vcount() <= 150: plot_size = (800, 800) else: plot_size = (1600, 1600) sys.stdout.write(section_end) #end report sys.stdout.write(run_start) #start run if self.args.which == "local": reporter = PyntacleReporter(graph=graph) #init reporter if self.args.nodes is not None: sys.stdout.write( u"Computing local metrics for nodes {}\n".format(', '.join( self.args.nodes))) nodes_list = self.args.nodes else: sys.stdout.write( u"Computing local metrics for all nodes in the graph\n") nodes_list = None local_attributes_dict = OrderedDict({ LocalAttributeEnum.degree.name: LocalTopology.degree(graph=graph, nodes=nodes_list), LocalAttributeEnum.clustering_coefficient.name: LocalTopology.clustering_coefficient(graph=graph, nodes=nodes_list), LocalAttributeEnum.betweenness.name: LocalTopology.betweenness(graph=graph, nodes=nodes_list), LocalAttributeEnum.closeness.name: LocalTopology.closeness(graph=graph, nodes=nodes_list), LocalAttributeEnum.radiality.name: LocalTopology.radiality(graph=graph, nodes=nodes_list, cmode=implementation), LocalAttributeEnum.radiality_reach.name: LocalTopology.radiality_reach(graph=graph, nodes=nodes_list, cmode=implementation), LocalAttributeEnum.eccentricity.name: LocalTopology.eccentricity(graph=graph, nodes=nodes_list), LocalAttributeEnum.eigenvector_centrality.name: LocalTopology.eigenvector_centrality(graph=graph, nodes=nodes_list), LocalAttributeEnum.pagerank.name: LocalTopology.pagerank(graph=graph, nodes=nodes_list, weights=weights, damping=self.args.damping_factor) }) if self.args.nodes: local_attributes_dict["nodes"] = self.args.nodes sys.stdout.write("Local metrics computed\n") sys.stdout.write(section_end) sys.stdout.write(report_start) # check output directory if not os.path.isdir(self.args.directory): sys.stdout.write( u"WARNING: Output directory does not exist; {} will be created\n" .format(os.path.abspath(self.args.directory))) os.makedirs(os.path.abspath(self.args.directory), exist_ok=True) sys.stdout.write(u"Producing report in {} format\n".format( self.args.report_format)) reporter.create_report(ReportEnum.Local, local_attributes_dict) reporter.write_report(report_dir=self.args.directory, format=self.args.report_format) if not self.args.no_plot and graph.vcount() < 1000: sys.stdout.write(u"Generating plots in {} format\n".format( self.args.plot_format)) # generates plot directory plot_dir = os.path.join(self.args.directory, "pyntacle-plots") if not os.path.isdir(plot_dir): os.makedirs(plot_dir, exist_ok=True) plot_graph = PlotGraph(graph=graph) plot_graph.set_node_labels( labels=graph.vs()["name"]) # assign node labels to graph pal = sns.color_palette("Accent", 8).as_hex() framepal = sns.color_palette("Accent", 8, desat=0.5).as_hex() other_nodes_colour = pal[2] other_frame_colour = framepal[2] other_nodes_size = 25 if self.args.nodes: # make node selected of a different colour and bigger than the other ones, so they can be visualized sys.stdout.write( u"Highlighting nodes ({}) in plot\n".format( ', '.join(nodes_list))) selected_nodes_colour = pal[0] selected_nodes_frames = framepal[0] node_colors = [ selected_nodes_colour if x["name"] in nodes_list else other_nodes_colour for x in graph.vs() ] node_frames = [ selected_nodes_frames if x["name"] in nodes_list else other_frame_colour for x in graph.vs() ] #print(node_colors) plot_graph.set_node_colors(colors=node_colors) node_sizes = [ 45 if x["name"] in nodes_list else other_nodes_size for x in graph.vs() ] plot_graph.set_node_sizes(sizes=node_sizes) else: # sys.stdout.write("Plotting network\n".format(nodes_list)) node_colors = [other_nodes_colour] * graph.vcount() node_frames = [other_frame_colour] * graph.vcount() plot_graph.set_node_colors(colors=node_colors) node_sizes = [other_nodes_size] * graph.vcount() plot_graph.set_node_sizes(sizes=node_sizes) # define layout plot_graph.set_layouts(self.args.plot_layout) plot_path = os.path.join( plot_dir, ".".join([ "_".join([graph["name"][0], self.date]), self.args.plot_format ])) plot_graph.plot_graph(path=plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=node_frames) elif not self.args.no_plot and graph.vcount() >= 1000: sys.stdout.write( u"The graph has too many nodes ({}). It will not be drawn\n" .format(graph.vcount())) elif self.args.which == "global": sys.stdout.write(u"Computing global metrics\n") global_attributes_dict = OrderedDict({ GlobalAttributeEnum.average_shortest_path_length.name: ShortestPath.average_global_shortest_path_length(graph=graph), GlobalAttributeEnum.median_shortest_path_length.name: ShortestPath.median_global_shortest_path_length(graph=graph), GlobalAttributeEnum.diameter.name: GlobalTopology.diameter(graph=graph), GlobalAttributeEnum.components.name: GlobalTopology.components(graph=graph), GlobalAttributeEnum.radius.name: GlobalTopology.radius(graph=graph), GlobalAttributeEnum.density.name: GlobalTopology.density(graph=graph), GlobalAttributeEnum.pi.name: GlobalTopology.pi(graph=graph), GlobalAttributeEnum.average_clustering_coefficient.name: GlobalTopology.average_clustering_coefficient(graph=graph), GlobalAttributeEnum.weighted_clustering_coefficient.name: GlobalTopology.weighted_clustering_coefficient(graph=graph), GlobalAttributeEnum.average_degree.name: GlobalTopology.average_degree(graph=graph), GlobalAttributeEnum.average_closeness.name: GlobalTopology.average_closeness(graph=graph), GlobalAttributeEnum.average_eccentricity.name: GlobalTopology.average_eccentricity(graph=graph), GlobalAttributeEnum.average_radiality.name: GlobalTopology.average_radiality(graph=graph, cmode=implementation), GlobalAttributeEnum.average_radiality_reach.name: GlobalTopology.average_radiality_reach(graph=graph, cmode=implementation), GlobalAttributeEnum.completeness_naive.name: Sparseness.completeness_naive(graph=graph), GlobalAttributeEnum.completeness.name: Sparseness.completeness(graph=graph), GlobalAttributeEnum.compactness.name: Sparseness.compactness(graph=graph) }) sys.stdout.write(u"Global metrics computed\n") sys.stdout.write(section_end) sys.stdout.write(report_start) sys.stdout.write( u"Producing global metrics report for the input graph\n") reporter = PyntacleReporter(graph=graph) # init reporter reporter.create_report(ReportEnum.Global, global_attributes_dict) reporter.write_report(report_dir=self.args.directory, format=self.args.report_format) if self.args.no_nodes: # create an additional report for the graph minus the selected nodes sys.stdout.write( u"Removing nodes:\n\t{}\nfrom input graph and computing Global Metrics\n" .format(self.args.no_nodes)) nodes_list = self.args.no_nodes.split(",") # this will be useful when producing the two global topology plots, one for the global graph and the other one fo all nodes nodes_list = [x.replace(" ", "") for x in nodes_list] index_list = utils.get_node_indices(nodes=nodes_list) # delete vertices graph_nonodes = graph.copy() graph_nonodes.delete_vertices(index_list) #remove target nodes global_attributes_dict_nonodes = OrderedDict({ 'Removed nodes': ','.join(nodes_list), GlobalAttributeEnum.average_shortest_path_length.name: ShortestPath.average_global_shortest_path_length( graph=graph_nonodes), GlobalAttributeEnum.median_shortest_path_length.name: ShortestPath.median_global_shortest_path_length( graph=graph_nonodes), GlobalAttributeEnum.diameter.name: GlobalTopology.diameter(graph=graph_nonodes), GlobalAttributeEnum.components.name: GlobalTopology.components(graph=graph_nonodes), GlobalAttributeEnum.radius.name: GlobalTopology.radius(graph=graph_nonodes), GlobalAttributeEnum.density.name: GlobalTopology.density(graph=graph_nonodes), GlobalAttributeEnum.pi.name: GlobalTopology.pi(graph=graph_nonodes), GlobalAttributeEnum.average_clustering_coefficient.name: GlobalTopology.average_clustering_coefficient( graph=graph_nonodes), GlobalAttributeEnum.weighted_clustering_coefficient.name: GlobalTopology.weighted_clustering_coefficient( graph=graph_nonodes), GlobalAttributeEnum.average_degree.name: GlobalTopology.average_degree(graph=graph_nonodes), GlobalAttributeEnum.average_closeness.name: GlobalTopology.average_closeness(graph=graph_nonodes), GlobalAttributeEnum.average_eccentricity.name: GlobalTopology.average_eccentricity(graph=graph_nonodes), GlobalAttributeEnum.average_radiality.name: GlobalTopology.average_radiality(graph=graph_nonodes, cmode=implementation), GlobalAttributeEnum.average_radiality_reach.name: GlobalTopology.average_radiality_reach( graph=graph_nonodes, cmode=implementation), GlobalAttributeEnum.completeness_naive.name: Sparseness.completeness_naive(graph=graph_nonodes), GlobalAttributeEnum.completeness.name: Sparseness.completeness(graph=graph_nonodes), GlobalAttributeEnum.compactness.name: Sparseness.compactness(graph=graph_nonodes), }) sys.stdout.write( u"Producing global metrics report for the input graph after node removal\n" ) graph_nonodes["name"][0] += '_without_nodes' reporter = PyntacleReporter( graph=graph_nonodes) # init reporter reporter.create_report(ReportEnum.Global, global_attributes_dict_nonodes) reporter.write_report(report_dir=self.args.directory, format=self.args.report_format) if not self.args.no_plot and graph.vcount() < 1000: if self.args.no_nodes: sys.stdout.write( u"Generating plots of both the input network and the resulting network without nodes {} in {} format\n" .format(self.args.no_nodes, self.args.plot_format)) else: sys.stdout.write( u"Generating network plot in {} format\n".format( self.args.plot_format)) # generates plot directory plot_dir = os.path.join(self.args.directory, "pyntacle-plots") if not os.path.isdir(plot_dir): os.mkdir(plot_dir) other_nodes_size = 25 pal = sns.color_palette("Accent", 8).as_hex() framepal = sns.color_palette("Accent", 8, desat=0.5).as_hex() other_nodes_colour = pal[2] other_frame_colour = framepal[2] no_nodes_size = 35 no_nodes_colour = pal[4] no_nodes_frames = framepal[4] if self.args.no_nodes: plot_path = os.path.join( plot_dir, ".".join([ "_".join([ "metric", self.args.which, re.sub('_nodes_removed', '', graph["name"][0]), "global_metrics_plot", self.date ]), self.args.plot_format ])) node_colors = [ no_nodes_colour if x["name"] in nodes_list else other_nodes_colour for x in graph.vs() ] node_frames = [ no_nodes_frames if x["name"] in nodes_list else other_frame_colour for x in graph.vs() ] node_sizes = [ no_nodes_size if x["name"] in nodes_list else other_nodes_size for x in graph.vs() ] else: node_colors = [other_nodes_colour] * graph.vcount() node_frames = [other_frame_colour] * graph.vcount() node_sizes = [other_nodes_size] * graph.vcount() plot_path = os.path.join( plot_dir, ".".join([ "_".join([ "Metric", self.args.which, graph["name"][0], self.date ]), self.args.plot_format ])) plot_graph = PlotGraph(graph=graph) plot_graph.set_node_labels( labels=graph.vs()["name"]) # assign node labels to graph plot_graph.set_node_colors(colors=node_colors) plot_graph.set_node_sizes(sizes=node_sizes) plot_graph.set_node_colors(colors=node_colors) plot_graph.set_node_sizes(sizes=node_sizes) # define layout plot_graph.set_layouts(self.args.plot_layout) plot_graph.plot_graph(path=plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=node_frames) if self.args.no_nodes: plot_graph = PlotGraph(graph=graph_nonodes) plot_graph.set_node_labels( labels=graph_nonodes.vs() ["name"]) # assign node labels to graph # print(graph_copy.vs()["name"]) node_colors = [other_nodes_colour] * graph_nonodes.vcount() node_frames = [other_frame_colour] * graph_nonodes.vcount() node_sizes = [other_nodes_size] * graph_nonodes.vcount() plot_graph.set_node_colors(colors=node_colors) plot_graph.set_node_sizes(sizes=node_sizes) # define layout plot_graph.set_layouts(self.args.plot_layout) plot_path = os.path.join( plot_dir, ".".join([ "_".join( [graph["name"][0], "_no_nodes", self.date]), self.args.plot_format ])) plot_graph.plot_graph(path=plot_path, bbox=plot_size, margin=20, edge_curved=0.2, keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=node_frames) elif not self.args.no_plot and graph.vcount() >= 1000: sys.stdout.write( u"The graph has too many nodes ({}). It will not be drawn\n" .format(graph.vcount())) if self.args.save_binary: sys.stdout.write( u"Saving graph to a binary file (ending in .graph)\n") basename_graph = os.path.splitext( os.path.basename(self.args.input_file))[0] binary_path = os.path.join(self.args.directory, basename_graph + ".graph") # elif self.args.no_nodes: # nodes_list = graph_nonodes.vs() if self.args.which == 'local': if self.args.nodes: nodes_list = self.args.nodes.split(",") else: nodes_list = graph.vs["name"] for key in local_attributes_dict: AddAttributes.add_node_attributes( graph, key, local_attributes_dict[key], nodes_list) PyntacleExporter.Binary(graph, binary_path) elif self.args.which == 'global': for key in global_attributes_dict: if (key == "average_shortest_path_length" or key == 'median_shortest_path_length'): newkey = re.sub("_shortest_path_length", "_global_shortest_path_length", key) else: newkey = key AddAttributes.add_graph_attributes( graph, newkey, global_attributes_dict[key]) PyntacleExporter.Binary(graph, binary_path) if self.args.no_nodes: binary_path_nonodes = os.path.join( self.args.directory, basename_graph + "_no_nodes" + ".graph") sys.stdout.write( u"Saving a binary of the input graph without the requested nodes at path: {}\n" .format(os.path.basename(binary_path_nonodes))) for key in global_attributes_dict_nonodes: if (key == "average_shortest_path_length" or key == 'median_shortest_path_length'): newkey = re.sub("_shortest_path_length", "_global_shortest_path_length", key) else: newkey = key AddAttributes.add_graph_attributes( graph_nonodes, newkey, global_attributes_dict_nonodes[key]) PyntacleExporter.Binary(graph_nonodes, binary_path_nonodes) if not self.args.suppress_cursor: cursor.stop() sys.stdout.write(section_end) sys.stdout.write(u"Pyntacle metrics completed successfully\n") sys.exit(0)
def write_json_report(self, report_dir=None, report_dict=None, suffix=None): """ Create a JSON version of the report, possibly appending data to already existing results. :return: """ plots_path = os.path.join(report_dir, 'pyntacle-plots_' + suffix) if not os.path.exists(plots_path): os.makedirs(plots_path) json_report = os.path.join(plots_path, 'report.js') json_graph = os.path.join(plots_path, 'graph.js') index_path = os.path.join(plots_path, 'index.html') index_css_path = os.path.join(plots_path, 'index.css') if os.path.exists(json_report): json_line = open(json_report).readlines()[0].split(' = ')[1] print("LINEA", json_line) with open(json_report, 'r') as f: json_data = json.loads(json_line) else: json_data = {} print("EXTRACT JSON FROM HERE") print(report_dict) print(type(report_dict)) print(self.report_type) print(self.dat) if self.report_type == ReportEnum.KP_bruteforce or self.report_type == ReportEnum.KP_greedy: json_data.setdefault("Key-player", {}) json_data["Key-player"].setdefault( str(self.report_type).split('.')[1], {}) json_data["Key-player"][str( self.report_type).split('.')[1]].setdefault(self.dat, {}) # multiple_sol for k in report_dict: if self.report_type == ReportEnum.KP_greedy: json_data["Key-player"][str( self.report_type).split('.')[1]][self.dat][k] = [ ','.join(report_dict[k][0]) ] elif self.report_type == ReportEnum.KP_bruteforce: json_data["Key-player"][str( self.report_type).split('.')[1]][self.dat][k] = [ ';'.join( list(','.join(sol) for sol in report_dict[k][0])) ] # Adding numerical values of solutions json_data["Key-player"][str( self.report_type).split('.')[1]][self.dat][k].extend( report_dict[k][1:]) if self.report_type == ReportEnum.GR_bruteforce or self.report_type == ReportEnum.GR_greedy: json_data.setdefault("Group-centrality", {}) json_data["Group-centrality"].setdefault( str(self.report_type).split('.')[1], {}) json_data["Group-centrality"][str( self.report_type).split('.')[1]].setdefault(self.dat, {}) # multiple_sol for k in report_dict: print(report_dict[k][0]) if self.report_type == ReportEnum.GR_greedy: json_data["Group-centrality"][str( self.report_type).split('.')[1]][self.dat][k] = [ ','.join(report_dict[k][0]) ] elif self.report_type == ReportEnum.GR_bruteforce: json_data["Group-centrality"][str( self.report_type).split('.')[1]][self.dat][k] = [ ';'.join( list(','.join(sol) for sol in report_dict[k][0])) ] # Adding numerical values of solutions json_data["Group-centrality"][str( self.report_type).split('.')[1]][self.dat][k].extend( report_dict[k][1:]) if self.report_type == ReportEnum.Communities: json_data.setdefault("Communities", {}) json_data["Communities"].setdefault(report_dict["algorithm"], {}) json_data["Communities"][report_dict["algorithm"]].setdefault( self.dat, {}) for i, k in enumerate(report_dict["communities"]): json_data["Communities"][report_dict["algorithm"]][ self.dat][i] = [report_dict["communities"][i][1]] if self.report_type == ReportEnum.Set: json_data.setdefault("Set", {}) json_data["Set"].setdefault(report_dict["algorithm"], {}) json_data["Set"][report_dict["algorithm"]].setdefault(self.dat, {}) for k in report_dict.keys(): if k == 'algorithm': continue json_data["Set"][report_dict["algorithm"]][self.dat][k] = [ report_dict[k]['nodes'], ';'.join(['-'.join(e) for e in report_dict[k]['edges']]) ] # edges=[', '.join(e) for e in report_dict[k]['edges']] # for edge in report_dict[k]['edges']: # print(edge) # print(edges) # Adding minimal graph info json_data.setdefault("Info", {}) json_data["Info"]['graph name'] = self.graph["name"][0] json_data["Info"]['nodes'] = len(self.graph.vs()) json_data["Info"]['edges'] = len(self.graph.es()) json_data["Info"]['components'] = len(self.graph.components()) # Adding global metrics to the basic info, if available if self.report_type == ReportEnum.Global: for i in report_dict.keys(): json_data["Info"][i] = report_dict[i] # exporting results in json format with open(json_report, 'w') as f: f.write("var reportData = ") json.dump(json_data, f, ensure_ascii=False) # exporting graph in json format PyntacleExporter.JSON(self.graph, json_graph, prefix="var graphData = ") # print html_file with open(index_path, 'w') as f: f.write(html_template) with open(index_css_path, 'w') as f: f.write(css_template)