Example #1
0
 def test_difference(self):
     sys.stdout.write("Testing set difference\n")
     fileout = os.path.join(current_dir, 'pyntacletests/test_sets/tmp/result_set.adjm')
     expected = os.path.join(current_dir, 'pyntacletests/test_sets/output/set/result_difference.adjm')
     output_graph = GraphSetOps.difference(self.graph1, self.graph2, new_graph_name='result_set')
     PyntacleExporter.AdjacencyMatrix(graph=output_graph, file=os.path.join(current_dir, 'pyntacletests/test_sets/tmp/result_set.adjm'),
                                      sep='\t', header=True)
     self.assertEqual(getmd5(fileout), getmd5(expected), 'Wrong checksum for Set, difference case')
Example #2
0
    def run(self):

        if not self.args.suppress_cursor:
            cursor = CursorAnimation()
            cursor.daemon = True
            cursor.start()

        if not os.path.exists(self.args.input_file_1) or not os.path.exists(
                self.args.input_file_2):
            sys.stderr.write(
                u"One of the two input files does not exist. Quitting\n")
            sys.exit(1)

        if filecmp.cmp(self.args.input_file_1,
                       self.args.input_file_2,
                       shallow=False):
            sys.stderr.write(u"The two input files are equal. Quitting\n")
            sys.exit(1)

        input_header = True
        if self.args.no_header:
            input_header = False

        sys.stdout.write(import_start)
        input_format = format_dictionary.get(self.args.format, "NA")
        sys.stdout.write(u"Reading first input file\n")
        graph1 = GraphLoad(self.args.input_file_1,
                           file_format=input_format,
                           header=input_header,
                           separator=self.args.input_separator).graph_load()

        sys.stdout.write(u"Reading second input file\n")
        graph2 = GraphLoad(self.args.input_file_2,
                           file_format=input_format,
                           header=input_header,
                           separator=self.args.input_separator).graph_load()

        # init Utils global stuff
        utils1 = GraphUtils(graph=graph1)
        utils2 = GraphUtils(graph=graph2)

        if self.args.output_file is None:
            if self.args.which == "union":

                self.args.output_file = "_".join \
                    ([os.path.splitext(os.path.basename(self.args.input_file_1))[0], "UNION",
                      os.path.splitext(os.path.basename(self.args.input_file_2))[0],
                      self.date])

            elif self.args.which == "intersection":
                self.args.output_file = "_".join \
                    ([os.path.splitext(os.path.basename(self.args.input_file_1))[0], "INTERSECTION",
                      os.path.splitext(os.path.basename(self.args.input_file_2))[0],
                      self.date])

            elif self.args.which == "difference":

                self.args.output_file = "_".join([
                    os.path.splitext(os.path.basename(
                        self.args.input_file_1))[0], "DIFFERENCE",
                    os.path.splitext(os.path.basename(
                        self.args.input_file_2))[0], self.date
                ])

        if self.args.largest_component:
            try:
                graph1 = utils1.get_largest_component()
                sys.stdout.write(
                    u"Taking the largest component of the input graph {0} as you requested ({1} nodes, {2} edges)\n"
                    .format(graph2["name"], graph1.vcount(), graph1.ecount()))
                utils1.set_graph(graph1)

            except MultipleSolutionsError:
                sys.stderr.write(
                    u"Graph {} has two largest components of the same size. Cannot choose one. either remove one of the components or run 'pyntacle set' without the '--largest-component' option. Quitting\n"
                    .format(graph1["name"]))
                sys.exit(1)

            try:
                graph2 = utils2.get_largest_component()
                sys.stdout.write(
                    u"Taking the largest component of the input graph {0} as you requested ({1} nodes, {2} edges)\n"
                    .format(graph2["name"], graph2.vcount(), graph2.ecount()))
                utils2.set_graph(graph2)

            except MultipleSolutionsError:
                sys.stderr.write(
                    u"Graph {} has two largest components of the same size. Cannot choose one. either remove one of the components or run 'pyntacle set' without the '--largest-component' option. Quitting\n"
                    .format(graph2["name"]))
                sys.exit(1)

        # Check provided dimensions' format
        if self.args.plot_dim:  # define custom format
            self.args.plot_dim = self.args.plot_dim.split(",")

            for i in range(0, len(self.args.plot_dim)):
                try:
                    self.args.plot_dim[i] = int(self.args.plot_dim[i])

                except ValueError:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n"
                    )
                    sys.exit(1)

                if self.args.plot_dim[i] <= 0:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n"
                    )
                    sys.exit(1)

            plot_size = tuple(self.args.plot_dim)

        else:
            # generate different formats according to graph size
            if graph1.vcount() <= 150 and graph2.vcount() <= 150:
                plot_size = (800, 800)

            else:
                plot_size = (1600, 1600)

        if self.args.format == "sif" or all(
                x is None for x in graph1.es()["sif_interaction"]) or all(
                    x is None for x in graph2.es()["sif_interaction"]):
            sys.stdout.write(
                u"WARNING: Interaction stored in SIF files will be removed\n")

        # GraphSetOps(graph1=graph1, graph2=graph2,new_name = new_name
        sys.stdout.write(section_end)
        sys.stdout.write(run_start)
        if self.args.which == "union":
            sys.stdout.write(
                u" Performing union between input graph {} and {}\n".format(
                    self.args.input_file_1, self.args.input_file_2))

            output_graph = GraphSetOps.union(graph1, graph2,
                                             self.args.output_file)
            if all(len(x) <= 2 for x in output_graph.vs()["parent"]):
                sys.stdout.write(
                    u"There were no common nodes when performing Graph union. Will return two disjoint graphs\n"
                )

        elif self.args.which == "intersection":
            sys.stdout.write(
                u"Performing intersection between input graph {} and {}\n".
                format(self.args.input_file_1, self.args.input_file_2))

            output_graph = GraphSetOps.intersection(graph1, graph2,
                                                    self.args.output_file)
            if output_graph.ecount() == 0:
                sys.stdout.write(
                    u"No intersection was possible for the two input graphs. No output will be generated\n"
                )
                if not self.args.suppress_cursor:
                    cursor.stop()
                sys.exit(0)

        elif self.args.which == "difference":
            sys.stdout.write(
                "Performing difference between input graph {} and  {}\n".
                format(self.args.input_file_1, self.args.input_file_2))

            output_graph = GraphSetOps.difference(graph1, graph2,
                                                  self.args.output_file)
            if output_graph.vcount() == graph1.vcount(
            ) and output_graph.ecount() == graph1.ecount():
                sys.stdout.write(
                    u"Nothing of graph {} could be subtracted from graph {}\n".
                    format(os.path.basename(self.args.input_file_1),
                           os.path.basename(self.args.input_file_2)))

            if output_graph.vcount() == 0 and output_graph.ecount() == 0:
                sys.stdout.write(
                    u"Graph difference was complete, no nodes and edges could be retrieved. No output will be produced. Quitting\n"
                )
                sys.exit(0)

            if output_graph.vcount() <= 1 and output_graph.ecount() < 1:
                sys.stdout.write(
                    u"Graph difference returned only node {} and no edge. No output will be produced. Quitting\n"
                    .format("".join(output_graph.vs["name"])))
                sys.exit(0)

            if output_graph.vcount() > 1 and output_graph.ecount() == 0:
                sys.stdout.write(
                    u"Graph difference returned {} nodes, namely: {} and no edge. No output will be produced. Quitting\n"
                    .format(output_graph.vcount(),
                            ",\n".join(output_graph.vs()["name"])))
                sys.exit(0)

        sys.stdout.write(section_end)
        sys.stdout.write(report_start)
        # print pyntacle_commands_utils to command line
        sys.stdout.write(u"Report of set operation: {}\n".format(
            self.args.which))
        sys.stdout.write(section_end)
        sys.stdout.write(u"Input graphs:\n")

        sys.stdout.write(
            u"Graph 1: {0}\nNodes:\t{1}\nEdges:\t{2}\nComponents:\t{3}\n".
            format(graph1["name"][0], graph1.vcount(), graph1.ecount(),
                   len(graph1.components())))
        sys.stdout.write(section_end)
        sys.stdout.write(
            u"Graph 2: {0}\nNodes:\t{1}\nEdges:\t{2}\nComponents:\t{3}\n".
            format(graph2["name"][0], graph2.vcount(), graph2.ecount(),
                   len(graph2.components())))

        sys.stdout.write(section_end)
        sys.stdout.write(u"Resulting graph:\n")
        sys.stdout.write(
            u"Nodes:\t{0}\nEdges:\t{1}\nComponents:\t{2}\n".format(
                output_graph.vcount(), output_graph.ecount(),
                len(output_graph.components())))

        sys.stdout.write(section_end)
        sys.stdout.write(report_start)
        if not os.path.isdir(self.args.directory):
            sys.stdout.write(
                u"WARNING: Output directory does not exist, will create one at {}\n"
                .format(os.path.abspath(self.args.directory)))
            os.makedirs(os.path.abspath(self.args.directory), exist_ok=True)

        out_form = format_dictionary.get(self.args.output_format, "NA")
        output_path = os.path.join(self.args.directory,
                                   ".".join([self.args.output_file, out_form]))

        sys.stdout.write(u"Basename of output graph: {}\n".format(
            self.args.output_file))
        sys.stdout.write(
            u"Path to generated graph is: {}\n".format(output_path))

        # producing output graph
        if self.args.no_output_header:
            sys.stdout.write(u"Skipping header on output files\n")
            output_header = False

        else:
            output_header = True

        if self.args.output_separator is None:
            sys.stdout.write(
                u"Using '\\t' as default separator for output file\n")
            self.args.output_separator = "\t"

        if os.path.exists(output_path):
            self.logging.warning(
                u"A file named {} already exist, will be overwritten".format(
                    output_path))

        # output generated networks
        if out_form == "adjm":
            sys.stdout.write(
                u"Writing resulting graph to an adjacency matrix\n")
            PyntacleExporter.AdjacencyMatrix(output_graph,
                                             output_path,
                                             sep=self.args.output_separator,
                                             header=output_header)

        elif out_form == "egl":
            sys.stdout.write(u"Writing resulting graph to an edge list\n")
            PyntacleExporter.EdgeList(output_graph,
                                      output_path,
                                      sep=self.args.output_separator,
                                      header=output_header)

        elif out_form == "sif":
            sys.stdout.write(
                u"Writing resulting graph to Simple Interaction Format (SIF) file\n"
            )
            PyntacleExporter.Sif(output_graph,
                                 output_path,
                                 sep=self.args.output_separator,
                                 header=output_header)

        elif out_form == "dot":
            sys.stdout.write("Writing resulting graph to a DOT file\n")

            # Ignore ugly RuntimeWarnings while creating a dot
            simplefilter("ignore", RuntimeWarning)
            PyntacleExporter.Dot(output_graph, output_path)

        elif out_form == "graph":
            sys.stdout.write(
                "Writing resulting graph into a  binary file (ending in .graph)\n"
            )
            PyntacleExporter.Binary(output_graph, output_path)

        # producing plots
        if not self.args.no_plot:
            # generates plot directory
            plot_dir = os.path.join(self.args.directory, "pyntacle-plots")

            if os.path.isdir(plot_dir):
                self.logging.warning(
                    u"A directory named 'pyntacle-plots' already exists.")

            else:
                os.mkdir(plot_dir)
            sys.stdout.write(u"Generating plots in {} format\n".format(
                self.args.plot_format))
            sys.stdout.write(u"Drawing starting graphs\n")

            graph1_plot_path = os.path.join(
                plot_dir, ".".join([
                    "_".join([
                        os.path.splitext(
                            os.path.basename(self.args.input_file_1))[0],
                        self.date
                    ]), self.args.plot_format
                ]))
            graph2_plot_path = os.path.join(
                plot_dir, ".".join([
                    "_".join([
                        os.path.splitext(
                            os.path.basename(self.args.input_file_2))[0],
                        self.date
                    ]), self.args.plot_format
                ]))

            graph1_plotter = PlotGraph(graph=graph1)
            graph2_plotter = PlotGraph(graph=graph2)

            # first create two plots of the input graph
            input_graph_node_size = 25

            pal = sns.color_palette("hls", 10).as_hex()
            framepal = sns.color_palette("hls", 10, desat=0.5).as_hex()

            graph_1_colour = pal[0]
            graph_1_frame = framepal[0]
            graph_2_colour = pal[3]
            graph_2_frame = framepal[3]

            # set input graph node labels
            graph1_plotter.set_node_labels(labels=graph1.vs()["name"])
            graph2_plotter.set_node_labels(labels=graph2.vs()["name"])

            # set input graph node colors
            graph1_plotter.set_node_colors(colors=[graph_1_colour] *
                                           graph1.vcount())
            graph2_plotter.set_node_colors(colors=[graph_2_colour] *
                                           graph2.vcount())

            # set input graphs node sizes
            graph1_plotter.set_node_sizes(sizes=[input_graph_node_size] *
                                          graph1.vcount())
            graph2_plotter.set_node_sizes(sizes=[input_graph_node_size] *
                                          graph2.vcount())

            # set input graph vertex colors
            graph_1_frame_colors = [graph_1_frame] * graph1.vcount()
            graph_2_frame_colors = [graph_2_frame] * graph1.vcount()

            # define layouts
            graph1_plotter.set_layouts(self.args.plot_layout)
            graph2_plotter.set_layouts(self.args.plot_layout)

            # plot input graphs
            graph1_plotter.plot_graph(path=graph1_plot_path,
                                      bbox=plot_size,
                                      margin=20,
                                      edge_curved=0.2,
                                      keep_aspect_ratio=True,
                                      vertex_label_size=6,
                                      vertex_frame_color=graph_1_frame_colors)
            graph2_plotter.plot_graph(path=graph2_plot_path,
                                      bbox=plot_size,
                                      margin=20,
                                      edge_curved=0.2,
                                      keep_aspect_ratio=True,
                                      vertex_label_size=6,
                                      vertex_frame_color=graph_2_frame_colors)

            if output_graph.vcount() > 0:

                # plot output graph
                output_plot_path = os.path.join(
                    plot_dir, ".".join([
                        "_".join([
                            self.args.which, self.args.output_file, self.date
                        ]), self.args.plot_format
                    ]))
                output_graph_plotter = PlotGraph(
                    graph=output_graph)  # init plotter class

                # for the merge part
                sys.stdout.write(u"Drawing resulting graphs\n")
                node_intersection_colour = pal[1]
                node_intersection_frame = framepal[1]

                node_intersection_size = 45

                intersection_node_color_list = []
                intersection_frame_color_list = []

                intersection_set = []
                for v in output_graph.vs():
                    parent_g1 = graph1["name"][0]
                    parent_g2 = graph2["name"][0]

                    if parent_g1 in v["parent"] and parent_g2 in v["parent"]:
                        intersection_node_color_list.append(
                            node_intersection_colour)
                        intersection_frame_color_list.append(
                            node_intersection_frame)
                        intersection_set.append(v["name"])

                    elif parent_g1 in v[
                            "parent"] and not parent_g2 in v["parent"]:
                        intersection_node_color_list.append(graph_1_colour)
                        intersection_frame_color_list.append(graph_1_frame)

                    elif parent_g2 in v[
                            "parent"] and not parent_g1 in v["parent"]:
                        intersection_node_color_list.append(graph_2_colour)
                        intersection_frame_color_list.append(graph_2_frame)

                output_graph_plotter.set_node_colors(
                    colors=intersection_node_color_list)
                output_graph_plotter.set_node_sizes(sizes=[
                    node_intersection_size if parent_g1 in v["parent"]
                    and parent_g2 in v["parent"] else input_graph_node_size
                    for v in output_graph.vs()
                ])

                output_graph_plotter.set_node_labels(
                    labels=output_graph.vs()["name"])
                output_graph_plotter.set_layouts(self.args.plot_layout)
                output_graph_plotter.plot_graph(
                    path=output_plot_path,
                    bbox=plot_size,
                    margin=20,
                    edge_curved=0.2,
                    keep_aspect_ratio=True,
                    vertex_label_size=6,
                    vertex_frame_color=intersection_frame_color_list)

            else:
                sys.stdout.write(
                    u"The output graph does not contain vertices. Can't draw graph\n"
                )

        elif not self.args.no_plot and (graph1.vcount() >= 1000
                                        or graph2.vcount() >= 1000):
            sys.stdout.write(
                u"One of the two input graphs exceeds Pyntacle limits for plotting (maximum 1000 nodes). Will not draw graph\n"
            )

        # Report
        reporter1 = PyntacleReporter(graph=graph1)  # init reporter1
        reporter2 = PyntacleReporter(graph=graph2)  # init reporter2
        reporter_final = PyntacleReporter(graph=output_graph)

        set1_attr_dict = OrderedDict()
        set2_attr_dict = OrderedDict()
        setF_attr_dict = OrderedDict()

        if self.args.which == 'intersection':
            setF_attr_dict[
                '\nCommon Nodes'] = 'Node names'  #(len(intersection_set), ','.join(intersection_set))
            setF_attr_dict[len(intersection_set)] = ','.join(intersection_set)
        reporter1.create_report(ReportEnum.Set, set1_attr_dict)
        reporter2.create_report(ReportEnum.Set, set2_attr_dict)
        reporter_final.create_report(ReportEnum.Set, setF_attr_dict)

        reporter1.report[1] = ['\n--- Graph 1 ---']
        reporter2.report[1] = ['--- Graph 2 ---']
        del (reporter1.report[-1])
        del (reporter2.report[-1])
        del (reporter2.report[0])
        del (reporter_final.report[0])
        for e in reporter_final.report:
            if e[0] == 'Pyntacle Command:':
                e[1] = e[1] + ' ' + self.args.which

        reporter_final.report[0] = ['\n--- Resulting Graph ---']
        reporter1.report.extend(reporter2.report)
        reporter1.report.extend(reporter_final.report)
        reporter1.write_report(report_dir=self.args.directory,
                               format=self.args.report_format)

        if not self.args.suppress_cursor:
            cursor.stop()

        sys.stdout.write(section_end)

        sys.stdout.write(u"Pyntacle set completed successfully\n")
        sys.exit(0)
Example #3
0
    def run(self):
        if not hasattr(self.args, 'which'):
            raise Error(
                u"usage: pyntacle.py keyplayer {kp-finder, kp-info} [options]'"
            )

        if not self.args.suppress_cursor:
            cursor = CursorAnimation()
            cursor.daemon = True
            cursor.start()

        if self.args.m_reach == None and self.args.type in ["pos", "all"]:
            sys.stderr.write(
                u"m-reach distance must be provided for computing m-reach. Quitting\n"
            )
            sys.exit(1)

        # Checking input file
        if self.args.input_file is None:
            sys.stderr.write(
                u"Please specify an input file using the `-i/--input-file` option. Quitting\n"
            )
            sys.exit(1)

        if not os.path.exists(self.args.input_file):
            sys.stdout.write(u"Cannot find {}. Is the path correct?\n".format(
                self.args.input_file))
            sys.exit(1)

        if self.args.no_header:
            header = False
        else:
            header = True

        # Load Graph
        sys.stdout.write(import_start)
        sys.stdout.write(u"Importing graph from file\n")
        graph = GraphLoad(self.args.input_file,
                          format_dictionary.get(self.args.format, "NA"),
                          header,
                          separator=self.args.input_separator).graph_load()
        # init graph utils class

        utils = gu(graph=graph)

        if hasattr(self.args, "nodes"):
            self.args.nodes = self.args.nodes.split(",")

            if not utils.nodes_in_graph(self.args.nodes):
                sys.stderr.write(
                    "One or more of the specified nodes {} is not present in the graph. Please check your spelling and the presence of empty spaces between node names. Quitting\n"
                    .format(self.args.nodes))
                sys.exit(1)

        if self.args.largest_component:
            try:
                graph = utils.get_largest_component()
                sys.stdout.write(
                    u"Taking the largest component of the input graph as you requested ({} nodes, {} edges)\n"
                    .format(graph.vcount(), graph.ecount()))
                # reinitialize graph utils class
                utils.set_graph(graph)

            except MultipleSolutionsError:
                sys.stderr.write(
                    u"The graph has two largest components of the same size. Cannot choose one. Please parse your file or remove the '--largest-component' option. Quitting\n"
                )
                sys.exit(1)

            if hasattr(self.args, 'nodes'):
                if not utils.nodes_in_graph(self.args.nodes):
                    sys.stderr.write(
                        "One or more of the specified nodes is not present in the largest graph component. Select a different set or remove this option. Quitting\n"
                    )
                    sys.exit(1)

        if hasattr(self.args, "k_size") and self.args.k_size >= graph.vcount():
            sys.stderr.write(
                "The 'k' argument ({}) must be strictly less than the graph size({}). Quitting\n"
                .format(self.args.k_size, graph.vcount()))
            sys.exit(1)

        if 'implementation' in graph.attributes():
            implementation = graph['implementation']
        else:
            implementation = CmodeEnum.igraph

        # check that output directory is properly set
        createdir = False
        if not os.path.isdir(self.args.directory):
            createdir = True

        # control plot dimensions
        if self.args.plot_dim:  # define custom format
            self.args.plot_dim = self.args.plot_dim.split(",")

            for i in range(0, len(self.args.plot_dim)):
                try:
                    self.args.plot_dim[i] = int(self.args.plot_dim[i])

                    if self.args.plot_dim[i] <= 0:
                        raise ValueError

                except ValueError:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of positive integers (e.g. 1920,1080). Quitting\n"
                    )
                    sys.exit(1)

            plot_size = tuple(self.args.plot_dim)

        else:
            plot_size = (800, 600)

            if graph.vcount() > 150:
                plot_size = (1600, 1600)

        #initialize reporter for later usage and plot dimension for later usage
        r = PyntacleReporter(graph=graph)
        initial_results = {}
        results = OrderedDict()

        sys.stdout.write(section_end)
        sys.stdout.write(run_start)

        if self.args.which == 'kp-finder':

            # Greedy optimization
            if self.args.implementation == "greedy":
                report_type = ReportEnum.KP_greedy
                kp_runner = gow(graph=graph)

                sys.stdout.write(
                    u"Using greedy optimization algorithm for searching optimal key player set for the requested key player metrics\n"
                )
                sys.stdout.write("\n")

                if self.args.type in (['F', 'neg', 'all']):
                    sys.stdout.write(
                        u"KP-NEG: Finding optimal set of nodes of size {0} that maximizes F\n"
                        .format(self.args.k_size))

                    initial_results[KpnegEnum.F.name] = kpp.F(graph)
                    kp_runner.run_fragmentation(self.args.k_size,
                                                KpnegEnum.F,
                                                seed=self.args.seed,
                                                cmode=implementation)
                    sys.stdout.write("\n")

                if self.args.type in (['dF', 'neg', 'all']):
                    sys.stdout.write(
                        u"KP-NEG: Finding optimal set of nodes of size {0} that maximizes dF\n"
                        .format(self.args.k_size))

                    initial_results[KpnegEnum.dF.name] = kpp.dF(
                        graph, cmode=implementation)
                    kp_runner.run_fragmentation(
                        self.args.k_size,
                        KpnegEnum.dF,
                        max_distance=self.args.max_distance,
                        seed=self.args.seed,
                        cmode=implementation)
                    sys.stdout.write("\n")

                if self.args.type in (['dR', 'pos', 'all']):
                    sys.stdout.write(
                        u"KP-POS: Finding optimal set of nodes of size {0} that maximizes dR\n"
                        .format(self.args.k_size))
                    kp_runner.run_reachability(
                        self.args.k_size,
                        KpposEnum.dR,
                        max_distance=self.args.max_distance,
                        seed=self.args.seed,
                        cmode=implementation)
                    sys.stdout.write("\n")

                if self.args.type in (['mreach', 'pos', 'all']):
                    sys.stdout.write(
                        u"KP-POS: Finding optimal set of nodes of size {0} that maximizes the m-reach at distance {1}\n"
                        .format(self.args.k_size, self.args.m_reach))
                    kp_runner.run_reachability(
                        self.args.k_size,
                        KpposEnum.mreach,
                        m=self.args.m_reach,
                        max_distance=self.args.max_distance,
                        seed=self.args.seed,
                        cmode=implementation)
                    sys.stdout.write("\n")

            elif self.args.implementation == "brute-force":
                report_type = ReportEnum.KP_bruteforce
                kp_runner = bfw(graph=graph)
                sys.stdout.write(
                    u"Using brute-force search algorithm to find the best key player set(s)\n"
                )
                sys.stdout.write(sep_line)

                if self.args.type in (['F', 'neg', 'all']):

                    sys.stdout.write(
                        u"KP-NEG: Finding best set (or sets) of nodes of size {0} that holds the maximum F\n"
                        .format(self.args.k_size))

                    initial_results[KpnegEnum.F.name] = kpp.F(graph)
                    kp_runner.run_fragmentation(self.args.k_size,
                                                KpnegEnum.F,
                                                threads=self.args.threads)
                    sys.stdout.write("\n")

                if self.args.type in (['dF', 'neg', 'all']):
                    sys.stdout.write(
                        u"KP-NEG: Finding best set(s) of nodes of size {0} that holds the maximum dF\n"
                        .format(self.args.k_size))

                    initial_results[KpnegEnum.dF.name] = kpp.dF(
                        graph, cmode=CmodeEnum.igraph)
                    kp_runner.run_fragmentation(
                        self.args.k_size,
                        KpnegEnum.dF,
                        max_distance=self.args.max_distance,
                        cmode=CmodeEnum.igraph,
                        threads=self.args.threads)

                    sys.stdout.write("\n")

                if self.args.type in (['dR', 'pos', 'all']):
                    sys.stdout.write(
                        u"KP-POS: Finding best set(s) of nodes of size {0} that hold the maximum dR\n"
                        .format(self.args.k_size))
                    kp_runner.run_reachability(
                        self.args.k_size,
                        KpposEnum.dR,
                        max_distance=self.args.max_distance,
                        cmode=CmodeEnum.igraph,
                        threads=self.args.threads)

                    sys.stdout.write(sep_line)

                if self.args.type in (['mreach', 'pos', 'all']):
                    sys.stdout.write(
                        u"KP-POS: Finding the best set(s) of nodes of size {0} that maximizes the m-reach at distance {1}\n"
                        .format(self.args.k_size, self.args.m_reach))

                    kp_runner.run_reachability(
                        self.args.k_size,
                        KpposEnum.mreach,
                        m=self.args.m_reach,
                        max_distance=self.args.max_distance,
                        cmode=CmodeEnum.igraph,
                        threads=self.args.threads)

                    sys.stdout.write("\n")

            #get report results
            results.update(kp_runner.get_results())
            sys.stdout.write(section_end)
            sys.stdout.write(summary_start)
            sys.stdout.write(
                u"Node set size for key player search: {}\n".format(
                    str(self.args.k_size)))

            sys.stdout.write("\n")
            for kp in results.keys():

                if len(results[kp]
                       [0]) > 1 and self.args.implementation == 'brute-force':
                    plurals = ['s', 'are']
                else:
                    plurals = ['', 'is']

                if results[kp][0][
                        0] is None:  # the case in which there's no solution
                    results[kp][0] = ["None"]

                if self.args.implementation == 'brute-force':
                    list_of_results = "\n".join(
                        ['(' + ', '.join(x) + ')' for x in results[kp][0]])
                else:
                    list_of_results = "(" + ", ".join(results[kp][0]) + ")"

                if kp == KpnegEnum.F.name or kp == KpnegEnum.dF.name:
                    # joining initial results with final ones
                    results[kp].append(initial_results[kp])

                    sys.stdout.write(
                        u"Best key player set{0} of size {1} for negative key player index {2} {3}:\n{4}\nFinal {2} value: {5}\nStarting graph {2} was {6}\n"
                        .format(plurals[0], self.args.k_size, kp, plurals[1],
                                list_of_results, results[kp][1],
                                results[kp][2]))
                    sys.stdout.write("\n")

                elif kp == KpposEnum.dR.name:
                    sys.stdout.write(
                        u"Best key player set{0} of size {1} for positive key player index {2} {3}:\n{4}\nFinal {2} value: {5}\n"
                        .format(plurals[0], self.args.k_size, kp, plurals[1],
                                list_of_results, results[kp][1]))
                    sys.stdout.write("\n")

                elif kp == KpposEnum.mreach.name:
                    results[kp].append(self.args.m_reach)
                    node_perc_reached = ((self.args.k_size + results[kp][1]) /
                                         graph.vcount()) * 100
                    if node_perc_reached == 100:
                        node_perc_reached = int(node_perc_reached)
                    else:
                        node_perc_reached = round(node_perc_reached, 2)
                    sys.stdout.write(
                        u'Key player set{0} of size {1} for positive key player index m-reach, using at best '
                        '{3} steps {4}:\n{5}\nwith value {6} on {8} (number of nodes reached on total number of nodes)\nThe total percentage of nodes, which '
                        'includes the kp-set, is {7}%\n'.format(
                            plurals[0], self.args.k_size, kp,
                            self.args.m_reach, plurals[1], list_of_results,
                            results[kp][1], node_perc_reached, graph.vcount()))
                    sys.stdout.write("\n")
            sys.stdout.write(section_end)

        # kpinfo: compute kpmetrics for a set of predetermined nodes
        elif self.args.which == 'kp-info':
            report_type = ReportEnum.KP_info
            initial_results = OrderedDict()
            kp_runner = kpw(graph=graph, nodes=self.args.nodes)
            results = OrderedDict()

            sys.stdout.write(u"Input node set: ({})\n".format(', '.join(
                self.args.nodes)))
            sys.stdout.write("\n")

            if self.args.type in (['F', 'neg', 'all']):
                initial_results[KpnegEnum.F.name] = kpp.F(graph)
                kp_runner.run_fragmentation(KpnegEnum.F)
                sys.stdout.write("\n")
            if self.args.type in (['dF', 'neg', 'all']):
                initial_results[KpnegEnum.dF.name] = kpp.dF(
                    graph,
                    cmode=implementation,
                    max_distance=self.args.max_distance)
                kp_runner.run_fragmentation(
                    KpnegEnum.dF,
                    max_distance=self.args.max_distance,
                    cmode=implementation)
                sys.stdout.write("\n")

            if self.args.type in (['dR', 'pos', 'all']):
                kp_runner.run_reachability(KpposEnum.dR,
                                           max_distance=self.args.max_distance,
                                           cmode=implementation)
                sys.stdout.write("\n")

            if self.args.type in (['m-reach', 'pos', 'all']):
                kp_runner.run_reachability(KpposEnum.mreach,
                                           m=self.args.m_reach,
                                           max_distance=self.args.max_distance,
                                           cmode=implementation)
                sys.stdout.write("\n")

            sys.stdout.write(section_end)
            results.update(kp_runner.get_results())
            sys.stdout.write(summary_start)
            for metric in results.keys():

                if metric == KpnegEnum.F.name or metric == KpnegEnum.dF.name:
                    results[metric].append(initial_results[metric])
                    sys.stdout.write(
                        u"Removing node set \n({2})\ngives a {0} value of {3}\nStarting graph {0}: {1}\n"
                        .format(metric, results[metric][2],
                                ', '.join(self.args.nodes),
                                results[metric][1]))
                    sys.stdout.write("\n")

                elif metric == KpposEnum.mreach.name:
                    results[metric].append(self.args.m_reach)
                    perc_node_reached = round(
                        (results[metric][1] + len(self.args.nodes)) /
                        graph.vcount() * 100, 3)
                    sys.stdout.write(
                        u"The m-reach of node set:\n({0})\nis {1} on {4} (number of nodes reached on total number of "
                        u"nodes)\nThis means it can reach the {2}% of remaining nodes in the graph nodes in at most {3} steps\n"
                        .format(', '.join(results[metric][0]),
                                results[metric][1], perc_node_reached,
                                self.args.m_reach, graph.vcount()))
                    sys.stdout.write("\n")

                else:  #dR case
                    sys.stdout.write(
                        "The {0} value for node set:\n({1})\nis {2}\n".format(
                            metric, ', '.join(results[metric][0]),
                            results[metric][1]))
                    sys.stdout.write("\n")
            sys.stdout.write(section_end)

        sys.stdout.write(report_start)
        sys.stdout.write("Writing Results\n")
        # check output directory
        if createdir:
            sys.stdout.write(
                u"WARNING: output directory does not exist, {} will be created\n"
                .format(os.path.abspath(self.args.directory)))
            os.makedirs(os.path.abspath(self.args.directory), exist_ok=True)

        # reporting and plotting part
        sys.stdout.write(u"Producing report in {} format\n".format(
            self.args.report_format))

        r.create_report(report_type=report_type, report=results)

        r.write_report(report_dir=self.args.directory,
                       format=self.args.report_format)

        if self.args.save_binary:
            # reproduce octopus behaviour by adding kp information to the graph before saving it
            sys.stdout.write(
                u"Saving graph to a binary file (ending in .graph)\n")

            for key in results.keys():
                if key == KpposEnum.mreach.name:  #replace the mreach distance
                    new_mreach = "_".join([
                        KpposEnum.mreach.name,
                        str(results[KpposEnum.mreach.name][-1])
                    ])
                    #create new key
                    results[new_mreach] = results[
                        KpposEnum.mreach.
                        name][:
                              -1]  #remove the mreach distance before adding it to the binary file
                    del results[KpposEnum.mreach.name]
                    key = new_mreach

                if self.args.which == "kp-finder":
                    if self.args.implementation == "brute-force":
                        suffix = "bruteforce"
                        attr_key = tuple(
                            tuple(sorted(tuple(x))) for x in results[key][0])

                    else:
                        suffix = "greedy"
                        attr_key = tuple(sorted(tuple(results[key][0])))

                else:
                    suffix = "info"
                    attr_key = tuple(sorted(tuple(results[key][0])))

                attr_name = "_".join([key, suffix])
                attr_val = results[key][1]

                if attr_name in graph.attributes():
                    if not isinstance(graph[attr_name], dict):
                        sys.stdout.write(
                            "WARNING: attribute {} does not point to a dictionary, will overwrite\n"
                            .format(attr_name))
                        AddAttributes.add_graph_attribute(
                            graph, attr_name, {attr_key: attr_val})
                    else:
                        if attr_key in graph[attr_name]:
                            sys.stdout.write(
                                "WARNING: {} already present in the {} graph attribute, will overwrite\n"
                                .format(attr_key, attr_val))
                        graph[attr_name].update({attr_key: attr_val})
                else:
                    AddAttributes.add_graph_attribute(graph, attr_name,
                                                      {attr_key: attr_val})

            binary_prefix = "_".join([
                os.path.splitext(os.path.basename(self.args.input_file))[0],
                self.args.which, self.date
            ])
            binary_path = os.path.join(self.args.directory,
                                       binary_prefix + ".graph")
            PyntacleExporter.Binary(graph, binary_path)

        # generate and output plot
        if not self.args.no_plot and graph.vcount() < 1000:

            sys.stdout.write(u"Generating network plots in {} format\n".format(
                self.args.plot_format))
            plot_dir = os.path.join(self.args.directory, "pyntacle-plots")

            if not os.path.isdir(plot_dir):
                os.mkdir(plot_dir)

            plot_graph = PlotGraph(graph=graph)

            plot_format = self.args.plot_format
            plot_graph.set_node_labels(
                labels=graph.vs()["name"])  # assign node labels to graph
            pal = sns.color_palette("Accent", 8).as_hex()
            framepal = sns.color_palette("Accent", 8, desat=0.5).as_hex()

            other_nodes_colour = pal[2]
            other_frame_colour = framepal[2]

            other_nodes_size = 25
            # other_nodes_shape = "circle"
            other_edge_width = 1

            for metric in results:
                if self.args.which == 'kp-finder' and self.args.implementation == "brute-force":
                    results[metric][0] = list(
                        set(list(chain(*results[metric][0]))))

                if metric == "F":

                    f_nodes_colour = pal[0]
                    f_frames_colour = framepal[0]
                    # create a list of node colors
                    node_colors = [
                        f_nodes_colour if x["name"] in results[metric][0] else
                        other_nodes_colour for x in graph.vs()
                    ]
                    node_frames = [
                        f_frames_colour if x["name"] in results[metric][0] else
                        other_frame_colour for x in graph.vs()
                    ]

                    plot_graph.set_node_colors(colors=node_colors)

                    # node_shapes = ["square" if x["name"] in results[metric][1] else other_nodes_shape for x in graph.vs()]
                    # plot_graph.set_node_shapes(shapes=node_shapes)

                elif metric == "dF":
                    df_nodes_colour = pal[1]
                    df_frames_colour = framepal[1]

                    # create a list of node colors
                    node_colors = [
                        df_nodes_colour if x["name"] in results[metric][0] else
                        other_nodes_colour for x in graph.vs()
                    ]
                    node_frames = [
                        df_frames_colour if x["name"] in results[metric][0]
                        else other_frame_colour for x in graph.vs()
                    ]

                    plot_graph.set_node_colors(colors=node_colors)

                    # node_shapes = ["rectangle" if x["name"] in results[metric][1] else other_nodes_shape for x in graph.vs()]
                    # plot_graph.set_node_shapes(shapes=node_shapes)

                elif metric == "m-reach":
                    mreach_nodes_colour = pal[4]
                    mreach_frames_colour = framepal[4]
                    # create a list of node colors
                    node_colors = [
                        mreach_nodes_colour if x["name"] in results[metric][0]
                        else other_nodes_colour for x in graph.vs()
                    ]
                    node_frames = [
                        mreach_frames_colour if x["name"] in results[metric][0]
                        else other_frame_colour for x in graph.vs()
                    ]

                    plot_graph.set_node_colors(colors=node_colors)

                    # node_shapes = ["triangle-up" if x["name"] in results[metric][1] else other_nodes_shape for x in graph.vs()]
                    # plot_graph.set_node_shapes(shapes=node_shapes)

                else:  #dR
                    dr_nodes_colour = pal[3]
                    dr_frames_colour = framepal[3]

                    # create a list of node colors
                    node_colors = [
                        dr_nodes_colour if x["name"] in results[metric][0] else
                        other_nodes_colour for x in graph.vs()
                    ]
                    node_frames = [
                        dr_frames_colour if x["name"] in results[metric][0]
                        else other_frame_colour for x in graph.vs()
                    ]

                    plot_graph.set_node_colors(colors=node_colors)

                    # node_shapes = ["triangle-down" if x["name"] in results[metric][1] else other_nodes_shape for x in
                    #                graph.vs()]
                    #
                    # plot_graph.set_node_shapes(shapes=node_shapes)

                node_sizes = [
                    35 if x["name"] in results[metric][0] else other_nodes_size
                    for x in graph.vs()
                ]

                plot_graph.set_node_sizes(sizes=node_sizes)
                # print (other_edge_width)

                #     print (edge.source(), edge.target())
                # add recursive edge widths
                if metric != "mreach":

                    edge_widths = [
                        5 if any(
                            y in results[metric][0]
                            for y in x["adjacent_nodes"]) else other_edge_width
                        for x in graph.es()
                    ]

                else:
                    if self.args.m_reach > 5:
                        edge_widths = [
                            5 if any(y in results[metric][0]
                                     for y in x["adjacent_nodes"]) else
                            other_edge_width for x in graph.es()
                        ]
                        sys.stdout.write(
                            u"WARNING: you chose a very high value of m-reach, the edge width "
                            "may be too big, hence it may not be represented correctly\n"
                        )
                    else:
                        mreach_nodes = results[metric][0]
                        # get node indices of corresponding kpset
                        indices = utils.get_node_indices(mreach_nodes)

                        edge_widths = [
                            other_edge_width
                        ] * graph.ecount()  # define a starting list of values

                        mreach_width = (
                            self.args.m_reach * 2
                        ) + 2  # maxium and minimum boundaries for edge width
                        # print(mreach_width)

                        memory_indices = indices
                        step_before = indices

                        for i in range(1, self.args.m_reach + 1):
                            # print(mreach_width)
                            neighbours = Graph.neighborhood(graph,
                                                            vertices=indices)
                            # print(neighbours)

                            indices = list(
                                chain(*neighbours))  # flat out list of indices
                            # print(indices)
                            remaining_indices = list(
                                set(indices) - set(memory_indices))

                            # print(remaining_indices)
                            # print(step_before)

                            mreach_edge_ids = []

                            for elem in step_before:
                                for el in remaining_indices:
                                    if Graph.are_connected(graph, elem, el):
                                        mreach_edge_ids.append(
                                            graph.get_eid(elem, el))

                            # print (mreach_edge_ids)
                            for edge in mreach_edge_ids:
                                edge_widths[edge] = mreach_width

                            # finally
                            mreach_width = mreach_width - 2
                            memory_indices = memory_indices + remaining_indices
                            step_before = remaining_indices

                        # sys.exit()

                plot_graph.set_edge_widths(edge_widths)

                plot_graph.set_layouts(self.args.plot_layout)

                plot_path = os.path.join(
                    plot_dir,
                    "_".join([self.args.which, ["name"][0], metric, self.date
                              ]) + "." + plot_format)
                if os.path.exists(plot_path):
                    sys.stdout.write(
                        u"WARNING: a plot with the name ({}) already exists, overwriting it\n"
                        .format(os.path.basename(plot_path)))

                plot_graph.plot_graph(path=plot_path,
                                      bbox=plot_size,
                                      margin=20,
                                      edge_curved=0.2,
                                      keep_aspect_ratio=True,
                                      vertex_label_size=6,
                                      vertex_frame_color=node_frames)

        elif graph.vcount() >= 1000:
            sys.stdout.write(
                u"The graph has too many nodes ({}, we plot nodes with a maximum of 1000 nodes). It will not be drawn\n"
                .format(graph.vcount()))

        if not self.args.suppress_cursor:
            cursor.stop()

        sys.stdout.write(section_end)
        sys.stdout.write(u"Pyntacle keyplayer completed successfully\n")

        sys.exit(0)
Example #4
0
    def run(self):

        if not self.args.suppress_cursor:
            cursor = CursorAnimation()
            cursor.daemon = True
            cursor.start()

        if self.args.input_file is None:
            sys.stderr.write(
                u"Please specify an input file using the `-i/--input-file` option. Quitting\n"
            )
            sys.exit(1)

        if not os.path.exists(self.args.input_file):
            sys.stderr.write(u"Cannot find {}. Is the path correct?".format(
                self.args.input_file))
            sys.exit(1)

        input_header = True
        if self.args.no_header:
            input_header = False

        input_format = format_dictionary.get(self.args.format, "NA")

        sys.stdout.write(import_start)
        sys.stdout.write(u"Importing graph from file\n")
        graph = GraphLoad(self.args.input_file,
                          file_format=input_format,
                          header=input_header,
                          separator=self.args.input_separator).graph_load()

        # init Utils global stuff
        utils = GraphUtils(graph=graph)

        if self.args.largest_component:
            try:
                graph = utils.get_largest_component()
                sys.stdout.write(
                    u"Taking the largest component of the input graph as you requested ({} nodes, {} edges)\n"
                    .format(graph.vcount(), graph.ecount()))
                utils.set_graph(graph)

            except MultipleSolutionsError:
                sys.stderr.write(
                    u"The graph has two largest components of the same size. Cannot choose one. Please parse your file or remove the '--largest-component' option. Quitting\n"
                )
                sys.exit(1)

        # define plot sizes
        if self.args.plot_dim:  # define custom format
            self.args.plot_dim = self.args.plot_dim.split(",")

            for i in range(0, len(self.args.plot_dim)):
                try:
                    self.args.plot_dim[i] = int(self.args.plot_dim[i])

                except ValueError:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n"
                    )
                    sys.exit(1)

                if self.args.plot_dim[i] <= 0:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n"
                    )
                    sys.exit(1)

            plot_size = tuple(self.args.plot_dim)

        else:
            # generate different formats according to graph size
            if graph.vcount() <= 150:
                plot_size = (800, 800)

            else:
                plot_size = (1600, 1600)

        # initialize module finder method
        communities = CommunityFinder(graph=graph)
        # initialize Reporter
        results = OrderedDict()

        if self.args.which == "fastgreedy":
            if self.args.weights is not None:
                # import edge attributes
                if not os.path.exists(self.args.weights):
                    sys.stderr.write(
                        u"Attribute file {} does not exist. Quitting\n".format(
                            self.args.weights))
                    sys.exit(1)

                else:
                    ImportAttributes.import_edge_attributes(
                        graph,
                        self.args.weights,
                        sep=separator_detect(self.args.weights),
                        mode=self.args.weights_format)
                    weights = [
                        float(x) if x is not None else 1.0
                        for x in graph.es()["weights"]
                    ]

            else:
                weights = None

            if self.args.clusters is not None:
                try:
                    self.args.clusters = int(self.args.clusters)

                except:
                    sys.stderr.write(
                        u"argument of '--clusters' must be an integer. Quitting\n"
                    )
                    sys.exit(1)

            sys.stdout.write(section_end)
            sys.stdout.write(run_start)
            sys.stdout.write(
                u"Finding communities using the fastgreedy algorithm\n")
            communities.fastgreedy(weights=weights, n=self.args.clusters)
            mods = communities.get_modules
            algorithm = "fastgreedy"

        elif self.args.which == "infomap":
            sys.stdout.write(section_end)
            sys.stdout.write(run_start)
            sys.stdout.write(
                u"Finding communities using the infomap (naive) algorithm\n")
            communities.infomap()
            mods = communities.get_modules
            algorithm = "infomap"

        elif self.args.which == "leading-eigenvector":
            sys.stdout.write(section_end)
            sys.stdout.write(run_start)
            sys.stdout.write(
                u"Finding communities using the leading-eigenvector algorithm\n"
            )
            communities.leading_eigenvector()
            mods = communities.get_modules
            algorithm = "leading-eigenvector"

        elif self.args.which == "community-walktrap":
            try:
                self.args.steps = int(self.args.steps)

            except:
                sys.stderr.write(
                    u"Argument of '--steps' must be an integer. Quitting\n")
                sys.exit(1)

            if self.args.weights is not None:
                # import edge attributes
                if not os.path.exists(self.args.weights):
                    sys.stderr.write(
                        u"Weights file {} does not exist. Quitting\n".format(
                            self.args.weights))
                    sys.exit(1)

                else:
                    ImportAttributes.import_edge_attributes(
                        graph,
                        self.args.weights,
                        sep=separator_detect(self.args.weights),
                        mode=self.args.weights_format)
                    weights = [
                        float(x) if x != None else 1.0
                        for x in graph.es()["weights"]
                    ]

            else:
                weights = None

            if self.args.clusters is not None:
                try:
                    self.args.clusters = int(self.args.clusters)

                except:
                    sys.stderr.write(
                        u"Argument of '--clusters' must be an integer. Quitting\n"
                    )
                    sys.exit(1)

            sys.stdout.write(section_end)
            sys.stdout.write(run_start)
            sys.stdout.write(
                u"Finding communities using the walktrap algorithm and a walker of {} steps\n"
                .format(self.args.steps))
            communities.community_walktrap(weights=weights,
                                           n=self.args.clusters,
                                           steps=self.args.steps)
            mods = communities.get_modules
            algorithm = "community-walktrap"

        mods_report = []
        if not mods:
            sys.stderr.write(u"No communities found. Quitting.")
            sys.exit(1)
        for i, elem in enumerate(mods):
            mods_report.append("\t".join([
                str(x) for x in
                [i, elem.vcount(),
                 elem.ecount(),
                 len(elem.components())]
            ]) + "\n")

        sys.stdout.write(section_end)
        sys.stdout.write(summary_start)
        sys.stdout.write(
            u"Pyntacle - Community finding report:\nAlgorithm:{0}\nTotal number of communities found:"
            "\t{1}\nIndex\tNodes\tEdges \tComponents\n{2}".format(
                algorithm, len(mods), "".join(mods_report)))

        # initialize Moduleutils class
        mod_utils = ModuleUtils(modules=mods)

        if not all(x is None for x in [
                self.args.min_nodes, self.args.max_nodes,
                self.args.min_components, self.args.max_components
        ]):
            init_mods = len(mods)

            if self.args.min_nodes is not None:
                try:
                    self.args.min_nodes = int(self.args.min_nodes)

                except:
                    sys.stderr.write(
                        u"Argument of '--min-nodes' must be an integer. Quitting\n"
                    )
                    sys.exit(1)

            if self.args.max_nodes is not None:
                try:
                    self.args.max_nodes = int(self.args.max_nodes)

                except:
                    sys.stderr.write(
                        u"Argument of '--max-nodes' must be an integer. Quitting\n"
                    )
                    sys.exit(1)

            if self.args.max_components is not None:
                try:
                    self.args.max_components = int(self.args.max_components)
                except:

                    sys.stderr.write(
                        u"Argument of '--max-components' must be an integer. Quitting\n"
                    )
                    sys.exit(1)

            if self.args.min_components is not None:
                try:
                    self.args.min_components = int(self.args.min_components)

                except:
                    sys.stderr.write(
                        u"Argument of '--min-components' must be an integer. Quitting\n"
                    )
                    sys.exit(1)

            mod_utils.filter_subgraphs(min_nodes=self.args.min_nodes,
                                       max_nodes=self.args.max_nodes,
                                       min_components=self.args.min_components,
                                       max_components=self.args.max_components)
            if len(mod_utils.modules) > 0:
                sys.stdout.write(
                    u"Filtered out {0} communities. Keeping {1} communities\n".
                    format((init_mods - len(mod_utils.modules)),
                           len(mod_utils.modules)))
            else:
                sys.stdout.write(
                    u"No community could be kept using the current filters. Quitting\n"
                )
                sys.exit(0)

        else:
            sys.stdout.write(
                u"No filters specified. All modules will be kept\n")
        sys.stdout.write(section_end)

        mod_utils.label_modules_in_graph(graph=graph)
        final_mods = mod_utils.get_modules()

        for elem in final_mods:
            results[elem["module"]] = [
                elem.vcount(),
                elem.ecount(),
                len(elem.components())
            ]

        sys.stdout.write(report_start)
        # producing output graph
        if self.args.no_output_header:
            sys.stdout.write(
                u"Skipping header writing on output graph community files\n")
            output_header = False

        else:
            output_header = True

        if not os.path.isdir(self.args.directory):
            sys.stdout.write(
                u"WARNING: output directory does not exists {} will be created\n"
                .format(os.path.abspath(self.args.directory)))
            os.makedirs(os.path.abspath(self.args.directory), exist_ok=True)

        # dictionary that stores the extension of the output file
        if self.args.output_format is None:
            self.args.output_format = self.args.format

        out_form = format_dictionary.get(self.args.output_format, "NA")

        if self.args.output_file is None:
            # insert random name generator
            self.args.output_file = "_".join(
                ["pyntacle", graph["name"][0], algorithm])
            sys.stdout.write(
                u"Basename of the output modules will be {} (default)\n".
                format(self.args.output_file))

        output_basename = os.path.join(self.args.directory,
                                       self.args.output_file)
        # output generated networks

        sys.stdout.write(
            "Writing resulting communities to the specified network file format\n"
        )

        for elem in final_mods:
            output_path = ".".join([
                "_".join([output_basename,
                          str(elem["module"]), self.date]), out_form
            ])
            try:
                if out_form == "adjm":
                    PyntacleExporter.AdjacencyMatrix(
                        elem,
                        output_path,
                        sep=self.args.output_separator,
                        header=output_header)
                elif out_form == "egl":
                    PyntacleExporter.EdgeList(elem,
                                              output_path,
                                              sep=self.args.output_separator,
                                              header=output_header)

                elif out_form == "sif":
                    PyntacleExporter.Sif(elem,
                                         output_path,
                                         sep=self.args.output_separator,
                                         header=output_header)

                elif out_form == "dot":
                    # Ignore ugly RuntimeWarnings while creating a dot
                    simplefilter("ignore", RuntimeWarning)
                    PyntacleExporter.Dot(elem, output_path)

                elif out_form == "bin":
                    PyntacleExporter.Binary(elem, output_path)

            except UnsupportedGraphError:
                sys.stdout.write(
                    "Module {0} was skipped because it is too small ({1} nodes, {2} edges), use the `--save-binary` flag to retrieve it\n"
                    .format(elem["module"], elem.vcount(), elem.ecount()))

        # reporting and plotting part
        sys.stdout.write(u"Producing report in {} format\n".format(
            self.args.report_format))
        r = PyntacleReporter(graph=graph)
        report_type = ReportEnum.Communities
        results["algorithm"] = algorithm

        r.create_report(report_type=report_type, report=results)
        r.write_report(report_dir=self.args.directory,
                       format=self.args.report_format)

        # save the original graph into a binary file
        if self.args.save_binary:
            binary_name = ".".join([
                "_".join([
                    os.path.splitext(os.path.basename(
                        self.args.input_file))[0], "communities"
                ]), "graph"
            ])
            binary_path = os.path.join(self.args.directory, binary_name)
            sys.stdout.write(
                u"Storing the input graph with module labels into a binary file in the results directory\n"
                .format(binary_path))

        if not self.args.no_plot:

            plot_dir = os.path.join(self.args.directory, "pyntacle-plots")

            if os.path.isdir(plot_dir):
                self.logging.info(
                    u"A directory named \"pyntacle-plots\" already exists.")

            else:
                os.mkdir(plot_dir)

            avail_colors_fill = sns.color_palette(
                "Spectral", n_colors=len(
                    final_mods)).as_hex()  # available colors for node fill
            avail_colors_borders = sns.color_palette("Spectral",
                                                     n_colors=len(final_mods),
                                                     desat=0.5).as_hex()

            if graph.vcount() < 1000:

                sys.stdout.write(u"Plotting graph in {} format\n".format(
                    self.args.plot_format))

                main_plot_path = os.path.join(
                    plot_dir, ".".join([
                        "_".join([
                            self.args.which,
                            os.path.splitext(
                                os.path.basename(self.args.input_file))[0],
                            "communities", self.date
                        ]), self.args.plot_format
                    ]))

                # initialize general graph Drawer
                sys.stdout.write(
                    u"Drawing original graph, highlighting communities\n")

                if len(final_mods) > 20:
                    sys.stdout.write(
                        u"WARNING:The number of modules found ({}) is very high. The plot of the input graph will have nuanced colors\n"
                        .format(len(final_mods)))

                graph_plotter = PlotGraph(graph=graph)
                graph_plotter.set_node_labels(labels=graph.vs()["name"])
                graph_plotter.set_node_sizes([30] * graph.vcount())

                # define different colors for each module
                not_in_module_colors = "#A9A9A9"
                col_list = []
                bord_list = []
                for elem in graph.vs():
                    module = elem["module"]
                    if module is not None:
                        col_list.append(avail_colors_fill[module])
                        bord_list.append(avail_colors_borders[module])

                    else:
                        col_list.append(not_in_module_colors)
                        bord_list.append(not_in_module_colors)

                graph_plotter.set_node_colors(col_list)
                graph_plotter.set_layouts(self.args.plot_layout)
                graph_plotter.plot_graph(path=main_plot_path,
                                         bbox=plot_size,
                                         margin=20,
                                         edge_curved=0.2,
                                         keep_aspect_ratio=True,
                                         vertex_label_size=6,
                                         vertex_frame_color=bord_list)
            else:
                sys.stdout.write(
                    u"Input graph is above Pyntacle plotting limit ({} nodes found, only graphs with at best 1000 nodes). Input graph will not be plotted\n"
                    .format(graph.vcount()))

            sys.stdout.write("Drawing each module separately\n")

            for i, comm in enumerate(final_mods):
                if comm.vcount() <= 1000:
                    plotter = PlotGraph(graph=comm)
                    plotter.set_node_labels(labels=comm.vs()["name"])

                    plotter.set_node_colors([avail_colors_fill[i]] *
                                            comm.vcount())

                    plotter.set_node_sizes([30] * comm.vcount())

                    comm_plot_path = os.path.join(
                        plot_dir, ".".join([
                            "_".join([
                                self.args.output_file,
                                str(comm["module"]), self.date
                            ]), self.args.plot_format
                        ]))

                    plotter.set_layouts(self.args.plot_layout)
                    plotter.plot_graph(
                        path=comm_plot_path,
                        bbox=plot_size,
                        margin=20,
                        edge_curved=0.2,
                        keep_aspect_ratio=True,
                        vertex_label_size=6,
                        vertex_frame_color=[avail_colors_borders[i]] *
                        comm.vcount())

                else:
                    sys.stdout.write(
                        u"Module {0} is above Pyntacle plotting limit ({1} nodes found, communities with at best 1000 nodes are plotted). Plotting of this module will be skipped\n"
                        .format(i, comm.vcount()))
        if not self.args.suppress_cursor:
            cursor.stop()

        sys.stdout.write(section_end)
        sys.stdout.write(u"Pyntacle communities completed successfully\n")
        sys.exit(0)
Example #5
0
    def run(self):
        if not self.args.suppress_cursor:
            cursor = CursorAnimation()
            cursor.daemon = True
            cursor.start()

        sys.stdout.write(run_start)
        if self.args.which == "random":

            if self.args.nodes is None:
                self.args.nodes = random.randint(100, 500)
            else:
                try:
                    self.args.nodes = int(self.args.nodes)

                except ValueError:
                    sys.stderr.write(
                        u"Number of nodes must be a positive integer. Quitting\n"
                    )
                    sys.exit(1)

            if not self.args.probability and self.args.edges:

                try:
                    self.args.edges = int(self.args.edges)
                    u"Generating graph with random topology\nParameters:\nNumber of nodes: {0}\nNumber of edges: {1}\n".format(
                        self.args.nodes, self.args.edges)

                    graph = PyntacleGenerator.Random(
                        [self.args.nodes, self.args.edges],
                        name="Random",
                        seed=self.args.seed)

                except (ValueError, TypeError, IllegalGraphSizeError):
                    sys.stderr.write(
                        u"Number of nodes must be a positive integer greater than 2 and number of edges must be a positive integer greater than zero. Quitting\n"
                    )
                    sys.exit(1)

            else:
                if not self.args.probability:
                    self.args.probability = 0.5

                else:
                    try:
                        self.args.probability = float(self.args.probability)
                        if self.args.probability > 1.0 or self.args.probability < 0.0:
                            raise ValueError

                    except ValueError:
                        sys.stderr.write(
                            u"Probability must be a float between 0 and 1. Quitting\n"
                        )
                        sys.exit(1)

                try:
                    sys.stdout.write(
                        "uGenerating graph with random topology\nParameters:\nNumber of nodes: {0}\nProbability of wiring: {1}\n"
                        .format(self.args.nodes, self.args.probability))
                    graph = PyntacleGenerator.Random(
                        [self.args.nodes, self.args.probability],
                        name="Random",
                        seed=self.args.seed)

                except (ValueError, TypeError, IllegalGraphSizeError):
                    sys.stderr.write(
                        u"Number of nodes must be a positive integer greater than 2 and a probability must be a float between 0 and 1. Quitting\n"
                    )
                    sys.exit(1)

        elif self.args.which == "scale-free":
            if self.args.nodes is None:
                self.args.nodes = random.randint(100, 500)

            else:
                try:
                    self.args.nodes = int(self.args.nodes)

                except ValueError:
                    sys.stderr.write(
                        u"Number of nodes must be a positive integer. Quitting\n"
                    )
                    sys.exit(1)

            if self.args.avg_edges is None:
                self.args.avg_edges = random.randint(10, 100)

            else:
                try:
                    self.args.avg_edges = int(self.args.nodes)

                except ValueError:
                    sys.stderr.write(
                        u"Number of outgoing edges must be a positive integer. Quitting\n"
                    )
                    sys.exit(1)

            try:
                sys.stdout.write(
                    u"Generating graph with scale-free topology\nParameters:\nNumber of Nodes: {0}\nNumber of Outgoing edges: {1}\n"
                    .format(self.args.nodes, self.args.avg_edges))
                graph = PyntacleGenerator.ScaleFree(
                    [self.args.nodes, self.args.avg_edges],
                    name="ScaleFree",
                    seed=self.args.seed)

            except (ValueError, TypeError, IllegalGraphSizeError):
                sys.stderr.write(
                    u"Number of nodes and number of outgoing edges must be positive integers. Quitting\n"
                )
                sys.exit(1)

        elif self.args.which == "tree":

            if self.args.nodes is None:
                self.args.nodes = random.randint(100, 500)

            else:
                try:
                    self.args.nodes = int(self.args.nodes)

                except ValueError:
                    sys.stderr.write(
                        u"Number of nodes must be a positive integer. Quitting\n"
                    )
                    sys.exit(1)

            if self.args.children is None:
                self.args.children = random.randint(2, 10)

            else:
                try:
                    self.args.children = int(self.args.nodes)

                except ValueError:
                    sys.stderr.write(
                        u"Number of children must be a positive integer. Quitting\n"
                    )
                    sys.exit(1)

            try:
                sys.stdout.write(
                    u"Generating Graph with tree topology\nParameters:\nNumber of nodes: {0}\nChildren per node: {1}\n"
                    .format(self.args.nodes, self.args.children))
                graph = PyntacleGenerator.Tree(
                    [self.args.nodes, self.args.children],
                    name="Tree",
                    seed=self.args.seed)

            except (ValueError, TypeError, IllegalGraphSizeError):
                sys.stderr.write(
                    u"Number of nodes and number of children must be positive integers. Quitting\n"
                )
                sys.exit(1)

        elif self.args.which == "small-world":

            #This does not happen anymore, as default is 2.
            if not self.args.lattice_size:
                self.args.lattice_size = random.randint(2, 5)

            if not self.args.nei:
                self.args.nei = random.randint(1, 5)

            if isinstance(self.args.lattice, str):
                try:
                    self.args.lattice = int(self.args.lattice)
                    self.args.lattice_size = int(self.args.lattice_size)
                    self.args.nei = int(self.args.nei)
                    self.args.probability = float(self.args.probability)

                    if 0 < self.args.probability > 1.0:
                        raise ValueError

                    if self.args.lattice_size <= 1:
                        raise ValueError

                    if self.args.nei < 1:
                        raise ValueError

                    if self.args.lattice <= 1:
                        raise ValueError

                except ValueError:
                    sys.stderr.write(
                        u"One of the parameters you specified is not the proper type or it is out of boundaries. Quitting\n"
                    )
                    sys.exit(1)

            try:
                sys.stdout.write(
                    u"Generating Graph with small-world topology\nParameters:\nInitial lattice dimensions: {0}\nLattice size: {1}\nNei (number of edges that connect each graph): {2}\nRewiring probability: {3}\n"
                    .format(self.args.lattice, self.args.lattice_size,
                            self.args.nei, self.args.probability))
                graph = PyntacleGenerator.SmallWorld([
                    self.args.lattice, self.args.lattice_size, self.args.nei,
                    self.args.probability
                ],
                                                     name="SmallWorld",
                                                     seed=self.args.seed)

            except (TypeError, ValueError):
                sys.stderr.write(
                    u"The parameters you chose were invalid. Please check your command line. Quitting\n"
                )

        if graph.vcount() < 2 and graph.ecount() < 1:
            sys.stdout.write(
                "Generated Graph is too small ({} nodes, {} edges). Rerun this command and tune your parameters. Quitting\n"
                .format(graph.ecount(), graph.ecount()))
            sys.exit(1)

        sys.stdout.write(section_end)

        sys.stdout.write(report_start)

        if not os.path.isdir(self.args.directory):
            sys.stdout.write(
                u"WARNING: output directory does not exist {} will be created\n"
                .format(os.path.abspath(self.args.directory)))
            os.makedirs(os.path.abspath(self.args.directory), exist_ok=True)

        if self.args.output_file is None:
            self.args.output_file = graph["name"][0]

        out_form = format_dictionary.get(self.args.output_format, "NA")
        if self.args.no_output_header:
            sys.stdout.write(
                u"Skipping header on output graph file, as requested\n")
            output_header = False

        else:
            output_header = True

        if out_form == "NA":
            sys.stderr.write(
                u"Output extension specified is not supported. Quitting\n")
            sys.exit(1)

        output_path = os.path.join(self.args.directory,
                                   ".".join([self.args.output_file, out_form]))
        sys.stdout.write(u"Path to graph : {}\n".format(output_path))

        if self.args.output_separator is None:
            sys.stdout.write(
                u"Using '\\t' as default separator for output file\n")
            self.args.output_separator = "\t"

        # output generated networks
        if out_form == "adjm":
            sys.stdout.write(
                u"Writing generated graph to an adjacency matrix\n")
            PyntacleExporter.AdjacencyMatrix(graph,
                                             output_path,
                                             sep=self.args.output_separator,
                                             header=output_header)

        elif out_form == "egl":
            sys.stdout.write(u"Writing generated graph to an edge list\n")
            PyntacleExporter.EdgeList(graph,
                                      output_path,
                                      sep=self.args.output_separator,
                                      header=output_header)

        elif out_form == "sif":
            sys.stdout.write(
                u"Writing generated graph to a Simple Interaction Format (SIF) file\n"
            )
            PyntacleExporter.Sif(graph,
                                 output_path,
                                 sep=self.args.output_separator,
                                 header=output_header)

        elif out_form == "dot":
            sys.stdout.write(u"Writing generated graph to a DOT file\n")

            # Ignore ugly RuntimeWarnings while creating a dot
            simplefilter("ignore", RuntimeWarning)
            PyntacleExporter.Dot(graph, output_path)

        elif out_form == "graph":
            sys.stdout.write(
                u"Writing generated graph to a binary file (ending in .graph)\n"
            )
            PyntacleExporter.Binary(graph, output_path)

        # Check provided dimensions' format
        if self.args.plot_dim:  # define custom format
            self.args.plot_dim = self.args.plot_dim.split(",")

            for i in range(0, len(self.args.plot_dim)):
                try:
                    self.args.plot_dim[i] = int(self.args.plot_dim[i])

                except ValueError:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n"
                    )
                    sys.exit(1)

                if self.args.plot_dim[i] <= 0:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n"
                    )
                    sys.exit(1)

            plot_size = tuple(self.args.plot_dim)

        else:
            # generate different formats according to graph size
            if graph.vcount() <= 150:
                plot_size = (800, 800)

            else:
                plot_size = (1600, 1600)

        if not self.args.no_plot and graph.vcount() < 1000:
            sys.stdout.write(u"Drawing generated graph\n")
            # generates plot directory
            plot_dir = os.path.join(self.args.directory, "pyntacle-plots")

            if not os.path.isdir(plot_dir):
                os.mkdir(plot_dir)

            plot_path = os.path.join(
                plot_dir,
                ".".join([self.args.output_file, self.args.plot_format]))

            pal = sns.color_palette("Spectral", 10).as_hex()
            pal2 = sns.color_palette("RdYlGn", 10).as_hex()
            framepal = sns.color_palette("Spectral", 10, desat=0.5).as_hex()
            framepal2 = sns.color_palette("RdYlGn", 10, desat=0.5).as_hex()

            other_nodes_size = 18

            # deep sky blue
            plot_graph = PlotGraph(graph=graph)

            # define layout according to the toplogy of the graph
            if self.args.which == "random":
                if self.args.plot_layout != "random":
                    plot_graph.set_layouts(self.args.plot_layout)
                else:
                    plot_graph.set_layouts(layout="random")
                other_nodes_colour = pal[-3]
                frame_vertex_colour = framepal[-3]

            elif self.args.which == "scale-free":
                if self.args.plot_layout != "fr" and self.args.plot_layout != "fruchterman_reingold":
                    plot_graph.set_layouts(self.args.plot_layout)
                else:
                    plot_graph.set_layouts(layout="fr")
                other_nodes_colour = pal[3]
                frame_vertex_colour = framepal[3]

            elif self.args.which == "tree":
                if self.args.plot_layout != "rt" and self.args.plot_layout != "reingold_tilford":
                    plot_graph.set_layouts(self.args.plot_layout)
                else:
                    plot_graph.set_layouts(layout="reingold_tilford")
                other_nodes_colour = pal2[-2]
                frame_vertex_colour = framepal2[-2]

            else:
                if self.args.plot_layout != "circle":
                    plot_graph.set_layouts(self.args.plot_layout)
                else:
                    plot_graph.set_layouts(layout="circle")
                other_nodes_colour = pal[0]
                frame_vertex_colour = framepal[0]

            node_colors = [other_nodes_colour] * graph.vcount()
            plot_graph.set_node_colors(colors=node_colors)
            plot_graph.set_node_labels(
                labels=graph.vs()["name"])  # assign node labels to graph
            node_sizes = [other_nodes_size] * graph.vcount()
            plot_graph.set_node_sizes(sizes=node_sizes)
            frame_vertex_colour = [frame_vertex_colour] * graph.vcount()

            sys.stdout.write(
                u"Drawing graph in {} format at path: {}\n".format(
                    self.args.plot_format, plot_path))

            plot_graph.plot_graph(path=plot_path,
                                  bbox=plot_size,
                                  margin=20,
                                  edge_curved=0.2,
                                  keep_aspect_ratio=True,
                                  vertex_label_size=6,
                                  vertex_frame_color=frame_vertex_colour)

        elif not self.args.no_plot and graph.vcount() >= 1000:
            self.logging.warning(
                u"Graph is above Pyntacle plotting capability ({} nodes, we plot graph with at best 1000 nodes). Graph plotting will be skipped."
                .format(graph.vcount()))

        if not self.args.suppress_cursor:
            cursor.stop()

        sys.stdout.write(section_end)
        sys.stdout.write(u"Pyntacle generate completed successfully\n")
        if self.args.repeat == 1:
            sys.exit(0)
Example #6
0
    def run(self):
        if not self.args.suppress_cursor:
            cursor = CursorAnimation()
            cursor.daemon = True
            cursor.start()

        if self.args.input_file is None:
            sys.stderr.write(
                u"Please specify an input file using the `-i/--input-file` option. Quitting\n")
            sys.exit(1)

        if not os.path.exists(self.args.input_file):
            sys.stdout.write(u"Cannot find {}. Is the path correct?\n".format(self.args.input_file))
            sys.exit(1)

        #verify that group distance is set if group closeness is specified
        distancedict = {"min": GroupDistanceEnum.minimum, "max":GroupDistanceEnum.maximum, "mean": GroupDistanceEnum.mean}
        if self.args.type in ["all", "closeness"]:
            if self.args.group_distance not in distancedict.keys():
                sys.stdout.write("'--group-distance/-D parameter must be one of the followings: {}'. Quitting\n".format(",".join(distancedict.keys())))
                sys.exit(1)
            else:
                group_distance = distancedict[self.args.group_distance]

        # Parsing optional node list
        if hasattr(self.args, 'nodes'):
            self.args.nodes = self.args.nodes.split(',')
            # self.args.nodes = [str.lower(x) for x in self.args.nodes]

        if not hasattr(self.args, 'which'):
            raise Error(u"usage: pyntacle.py groupcentrality {gr-finder, gr-info} [options]'")

        if self.args.no_header:
            header = False
        else:
            header = True

        sys.stdout.write(import_start)
        sys.stdout.write(u"Importing graph from file\n")
        graph = GraphLoad(self.args.input_file, format_dictionary.get(self.args.format, "NA"), header,
                          separator=self.args.input_separator).graph_load()

        # init graph utils class
        utils = gu(graph=graph)

        if hasattr(self.args, 'nodes'):

            if not utils.nodes_in_graph(self.args.nodes):
                sys.stderr.write("One or more of the specified nodes is not present in the graph. Please check your spelling and the presence of empty spaces in between node names. Quitting\n")
                sys.exit(1)

        if self.args.largest_component:
            try:
                graph = utils.get_largest_component()
                sys.stdout.write(
                    u"Taking the largest component of the input graph as you requested ({} nodes, {} edges)\n".format(
                        graph.vcount(), graph.ecount()))
                #reinitialize graph utils class
                utils.set_graph(graph)

            except MultipleSolutionsError:
                sys.stderr.write(
                    u"The graph has two largest components of the same size. Cannot choose one. Please parse your file or remove the '--largest-component' option. Quitting\n")
                sys.exit(1)

            #check that the nodes are in the largest component
            if hasattr(self.args, 'nodes'):

                if not utils.nodes_in_graph(self.args.nodes):
                    sys.stderr.write("One or more of the specified nodes is not present in the largest graph component. Select a different set or remove this option. Quitting\n")
                    sys.exit(1)

        if hasattr(self.args, "k_size") and self.args.k_size >= graph.vcount():
            sys.stderr.write("The 'k' argument ({}) must be strictly less than the graph size({}). Quitting\n".format(self.args.k_size, graph.vcount()))
            sys.exit(1)

        if 'implementation' in graph.attributes():
            implementation = graph['implementation']
        else:
            implementation = CmodeEnum.igraph

        # check that output directory is properly set
        createdir = False
        if not os.path.isdir(self.args.directory):
            createdir = True

        # control plot dimensions
        if self.args.plot_dim:  # define custom format
            self.args.plot_dim = self.args.plot_dim.split(",")

            for i in range(0, len(self.args.plot_dim)):
                try:
                    self.args.plot_dim[i] = int(self.args.plot_dim[i])

                    if self.args.plot_dim[i] <= 0:
                        raise ValueError

                except ValueError:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of positive integers (e.g. 1920,1080). Quitting\n")
                    sys.exit(1)

            plot_size = tuple(self.args.plot_dim)

        else:
            plot_size = (800, 600)

            if graph.vcount() > 150:
                plot_size = (1600, 1600)

        # initialize reporter for later usage and plot dimension for later usage
        r = PyntacleReporter(graph=graph)
        results = OrderedDict()

        sys.stdout.write(section_end)
        sys.stdout.write(run_start)

        if self.args.which == "gr-finder":

            # Greedy optimization
            if self.args.implementation == "greedy":
                if self.args.seed:
                    random.seed(self.args.seed)

                report_type = ReportEnum.GR_greedy
                go_runner = gow(graph=graph)
                sys.stdout.write(u"Using greedy optimization algorithm for searching optimal set of nodes using group centrality metrics\n")
                sys.stdout.write(sep_line)

                if self.args.type in (["all", "degree"]):
                    sys.stdout.write(
                        u"Finding a set of nodes of size {0} that optimizes group degree\n".format(
                            self.args.k_size))

                    go_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_degree,
                                                  seed=self.args.seed,
                                                  cmode=implementation)
                    sys.stdout.write(sep_line)


                if self.args.type in (["all", "betweenness"]):
                    sys.stdout.write(
                        u"Finding a set of nodes of size {0} that optimizes group betweenness\n".format(
                            self.args.k_size))

                    go_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_betweenness,
                                                  seed=self.args.seed,
                                                  cmode=implementation)
                    sys.stdout.write(sep_line)

                if self.args.type in (["all", "closeness"]):
                    sys.stdout.write(
                        u"Finding a set of nodes of size {0} that optimizes group closeness using the {1} distance from the node set\n".format(
                            self.args.k_size, group_distance.name))


                    go_runner.run_groupcentrality(k = self.args.k_size,gr_type=GroupCentralityEnum.group_closeness, seed=self.args.seed, cmode=implementation ,distance=group_distance)
                    sys.stdout.write(sep_line)

                sys.stdout.write(sep_line)
                results.update(go_runner.get_results())

            #bruteforce implementation
            elif self.args.implementation == "brute-force":

                if self.args.threads > 1:
                    plural = "s"
                else:
                    plural = ""

                report_type = ReportEnum.GR_bruteforce
                bf_runner = bfw(graph=graph)
                sys.stdout.write(u"Using brute-force search algorithm to find the best set(s) that optimize group centrality metrics\n")
                sys.stdout.write(sep_line)

                if self.args.type in (["all", "degree"]):
                    sys.stdout.write(
                        u"Finding the best set(s) of nodes of size {0} that maximizes group degree using {1} thread{2}\n".format(
                            self.args.k_size, self.args.threads, plural))
                    bf_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_degree,
                                                  cmode=implementation, threads=self.args.threads)

                    sys.stdout.write(sep_line)

                if self.args.type in (["all", "betweenness"]):
                    sys.stdout.write(
                        u"Finding the best set(s) of nodes of size {0} that maximizes group betweenness using {1} thread{2}\n".format(
                            self.args.k_size,  self.args.threads, plural))
                    bf_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_betweenness,
                                                  cmode=implementation, threads=self.args.threads)
                    sys.stdout.write(sep_line)

                if self.args.type in (["all", "closeness"]):
                    sys.stdout.write(
                        u"Finding the best set(s) of nodes of size {0} that maximizes group closeness using the {1} distance from the node set and {2} thread{3}\n".format(
                            self.args.k_size,  group_distance, self.args.threads, plural))
                    bf_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_closeness, cmode=implementation, threads=self.args.threads, distance=group_distance)
                    sys.stdout.write(sep_line)

                results.update(bf_runner.get_results())

            #shell output report part
            sys.stdout.write(section_end)
            sys.stdout.write(summary_start)
            sys.stdout.write(u"Node set size for group centrality search: {}\n".format(str(self.args.k_size)))
            sys.stdout.write(sep_line)

            for kk in results.keys():

                if len(results[kk][0]) > 1 and self.args.implementation == 'brute-force':
                    plurals = ['s', 'are']
                else:
                    plurals = ['', 'is']

                if results[kk][0][0] is None:  # the case in which there's no solution
                    results[kk][0] = ["None"]

                if self.args.implementation == 'brute-force':
                    list_of_results = "\n".join(['(' + ', '.join(x) + ')' for x in results[kk][0]])


                else:
                    list_of_results = "(" + ", ".join(results[kk][0]) + ")"

                sys.stdout.write(
                    u'Best node set{0} of size {1} for {5} centrality {2}:\n{3}\nwith value {4}\n'.format(
                        plurals[0], self.args.k_size, plurals[1], list_of_results, results[kk][1], " ".join(kk.split("_")[:2])))

                if kk.startswith(GroupCentralityEnum.group_closeness.name):
                    sys.stdout.write("The {} distance was considered for computing closeness\n".format(group_distance.name))

                sys.stdout.write("\n")

            sys.stdout.write(section_end)

        elif self.args.which == "gr-info":
            report_type = ReportEnum.GR_info
            sys.stdout.write("Input node set: ({})\n".format(', '.join(self.args.nodes)))
            sys.stdout.write(sep_line)

            grinfo_runner = ipw(graph=graph, nodes=self.args.nodes)

            if self.args.type in (["degree", "all"]):
                grinfo_runner.run_groupcentrality(gr_type=GroupCentralityEnum.group_degree, cmode=implementation)

            if self.args.type in (["betweenness", "all"]):
                grinfo_runner.run_groupcentrality(gr_type=GroupCentralityEnum.group_betweenness, cmode=implementation)

            if self.args.type in (["closeness", "all"]):
                grinfo_runner.run_groupcentrality(gr_type=GroupCentralityEnum.group_closeness, cmode=implementation, gr_distance=group_distance)

            results.update(grinfo_runner.get_results())

            sys.stdout.write(summary_start)

            for metric in results.keys():

                if metric == GroupCentralityEnum.group_degree.name:
                    sys.stdout.write("The group degree value for the input node set:\n({0})\nis {1}\n".format(', '.join(results[metric][0]),
                                                                 results[metric][1]))
                    sys.stdout.write("\n")

                if metric == GroupCentralityEnum.group_betweenness.name:
                    sys.stdout.write(
                        "The group betweenness value for the input node set:\n({0})\nis {1}\n".format(', '.join(results[metric][0]),
                                                                                        results[metric][1]))
                    sys.stdout.write("\n")

                if metric.startswith(GroupCentralityEnum.group_closeness.name):
                    sys.stdout.write(
                        "The group closeness value for the input node set:\n({0})\nis {1}.\nThe {2} distance was considered between the set and the rest of the graph\n".format(', '.join(results[metric][0]),
                                                                                      results[metric][1], group_distance.name))
                    sys.stdout.write("\n")

            sys.stdout.write(section_end)

        #output part#####
        sys.stdout.write(report_start)
        sys.stdout.write("Writing Results\n")

        if createdir:
            sys.stdout.write(u"WARNING: output directory does not exist, {} will be created".format(
                os.path.abspath(self.args.directory)))
            os.makedirs(os.path.abspath(self.args.directory), exist_ok=True)

        if self.args.save_binary:
            # reproduce octopus behaviour by adding kp information to the graph before saving it
            sys.stdout.write(u"Saving graph to a binary file (ending in .graph)\n")

            for key in results.keys():
                if self.args.which == "gr-finder":
                    if self.args.implementation == "brute-force":
                        suffix = "bruteforce"
                        attr_key = tuple(tuple(sorted(tuple(x))) for x in results[key][0])

                    else:
                        suffix = "greedy"
                        attr_key = tuple(sorted(tuple(results[key][0])))

                else:
                    suffix = "info"
                    attr_key = tuple(sorted(tuple(results[key][0])))

                attr_name = "_".join([key, suffix])
                attr_val = results[key][1]

                if attr_name in graph.attributes():
                    if not isinstance(graph[attr_name], dict):
                        sys.stdout.write("WARNING: attribute {} does not point to a dictionary, will overwrite".format(attr_name))
                        AddAttributes.add_graph_attribute(graph, attr_name, {attr_key: attr_val})
                    else:
                        if attr_key in graph[attr_name]:
                            sys.stdout.write("WARNING {} already present in the {} graph attribute, will overwrite\n".format(attr_key, attr_val))
                        graph[attr_name].update({attr_key: attr_val})
                else:
                    AddAttributes.add_graph_attribute(graph, attr_name, {attr_key: attr_val})

            binary_prefix = "_".join([os.path.splitext(os.path.basename(self.args.input_file))[0], self.args.which, self.date])
            binary_path = os.path.join(self.args.directory, binary_prefix + ".graph")
            PyntacleExporter.Binary(graph, binary_path)

        sys.stdout.write(u"Producing report in {} format\n".format(self.args.report_format))

        r.create_report(report_type=report_type, report=results)
        r.write_report(report_dir=self.args.directory, format=self.args.report_format)

        if not self.args.no_plot and graph.vcount() < 1000:

            sys.stdout.write(u"Generating network plots in {} format\n".format(self.args.plot_format))
            plot_dir = os.path.join(self.args.directory, "pyntacle-plots")

            if not os.path.isdir(plot_dir):
                os.mkdir(plot_dir)

            plot_graph = PlotGraph(graph=graph)
            plot_format = self.args.plot_format
            plot_graph.set_node_labels(labels=graph.vs()["name"])  # assign node labels to graph
            pal = sns.color_palette("Accent", 8).as_hex()
            framepal = sns.color_palette("Accent", 8, desat=0.5).as_hex()

            other_nodes_colour = pal[2]
            other_frame_colour = framepal[2]

            other_nodes_size = 25
            # other_nodes_shape = "circle"
            other_edge_width = 1

            for metric in results:
                if self.args.which == 'gr-finder' and self.args.implementation == "brute-force":
                    results[metric][0] = list(set(list(chain(*results[metric][0]))))

                if metric.startswith(GroupCentralityEnum.group_closeness.name):
                    cl_nodes_colour = pal[5]
                    cl_frames_colour = framepal[5]
                    # create a list of node colors
                    node_colors = [cl_nodes_colour if x["name"] in results[metric][0] else other_nodes_colour
                                   for x in graph.vs()]
                    node_frames = [cl_frames_colour if x["name"] in results[metric][0] else other_frame_colour
                                   for x in
                                   graph.vs()]

                    plot_graph.set_node_colors(colors=node_colors)

                elif metric == GroupCentralityEnum.group_degree:
                    dg_nodes_colour = pal[4]
                    dg_frames_colour = framepal[4]

                    # create a list of node colors
                    node_colors = [dg_nodes_colour if x["name"] in results[metric][0] else other_nodes_colour
                                    for x in
                                    graph.vs()]
                    node_frames = [dg_frames_colour if x["name"] in results[metric][0] else other_frame_colour
                                   for x in
                                   graph.vs()]

                    plot_graph.set_node_colors(colors=node_colors)

                else: #group betweenness
                    bt_nodes_colour = pal[6]
                    bt_frames_colour = framepal[6]

                    # create a list of node colors
                    node_colors = [bt_nodes_colour if x["name"] in results[metric][0] else other_nodes_colour
                                    for x in
                                    graph.vs()]
                    node_frames = [bt_frames_colour if x["name"] in results[metric][0] else other_frame_colour
                                   for x in
                                   graph.vs()]

                    plot_graph.set_node_colors(colors=node_colors)

                node_sizes = [35 if x["name"] in results[metric][0] else other_nodes_size for x in graph.vs()]
                plot_graph.set_node_sizes(sizes=node_sizes)

                edge_widths = [5 if any(y in results[metric][0] for y in x["adjacent_nodes"]) else other_edge_width for
                               x in graph.es()]

                plot_graph.set_edge_widths(edge_widths)
                plot_graph.set_layouts(self.args.plot_layout)

                plot_path = os.path.join(plot_dir, "_".join(
                    [self.args.which, graph["name"][0], metric, self.date]) + "." + plot_format)
                if os.path.exists(plot_path):
                    sys.stdout.write(
                        u"WARNING: a plot with the name ({}) already exists, overwriting it\n".format(
                            os.path.basename(plot_path)))

                plot_graph.plot_graph(path=plot_path, bbox=plot_size, margin=20, edge_curved=0.2,
                                      keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=node_frames)
        elif graph.vcount() >= 1000:
            sys.stdout.write(u"The graph has too many nodes ({}, we plot nodes with a maximum of 1000 nodes). It will not be drawn\n".format(graph.vcount()))

        if not self.args.suppress_cursor:
            cursor.stop()

        sys.stdout.write(section_end)
        sys.stdout.write(u"Pyntacle groupcentrality completed successfully\n")
        sys.exit(0)
Example #7
0
    def run(self):
        # dictionary that stores the basename of the output file
        if not self.args.suppress_cursor:
            cursor = CursorAnimation()
            cursor.daemon = True
            cursor.start()

        if self.args.no_header:
            header = False
        else:
            header = True

        if self.args.no_output_header:
            output_header = False
        else:
            output_header = True

        if self.args.input_file is None:
            sys.stderr.write(
                u"Please specify an input file using the `-i/--input-file` option. Quitting\n"
            )
            sys.exit(1)

        if not os.path.exists(self.args.input_file):
            sys.stderr.write(u"Cannot find {}. Is the path correct?".format(
                self.args.input_file))
            sys.exit(1)

        if self.args.input_separator is None:
            separator = separator_detect(self.args.input_file)

        else:
            separator = self.args.input_separator

        sys.stdout.write(run_start)
        sys.stdout.write(
            u"Converting  input file {0} to requested output file: {1}\n".
            format(os.path.basename(self.args.input_file),
                   os.path.basename(self.args.output_file)))

        out_form = format_dictionary.get(self.args.output_format, "NA")

        if self.args.output_file is None:
            self.args.output_file = os.path.splitext(
                os.path.basename(self.args.input_file))[0]
            sys.stdout.write(
                u"Output file name will be the basename of the input file ({})\n"
                .format(self.args.output_file))
            # print(self.args.output_file)

        if self.args.output_separator is None:
            sys.stdout.write(
                u"Using the field separator used in the input network file in the converted output file, if the desired output format requires field separator\n"
            )
            self.args.output_separator = separator

        if not os.path.isdir(self.args.directory):
            sys.stdout.write(
                u"WARNING: output directory does not exist, will create one at {}\n"
                .format(os.path.abspath(self.args.directory)))
            os.makedirs(os.path.abspath(self.args.directory), exist_ok=True)

        if out_form == "NA":
            sys.stderr.write(
                u"Output extension specified is not supported, see  '--help' for more info. Quitting\n"
            )
            sys.exit(1)

        output_path = os.path.join(self.args.directory,
                                   ".".join([self.args.output_file, out_form]))
        init_graph = GraphLoad(input_file=self.args.input_file,
                               file_format=format_dictionary.get(
                                   self.args.format, "NA"),
                               header=header,
                               separator=self.args.input_separator)
        input_basename = os.path.basename(self.args.input_file)
        # special cases:
        #1: convert an edgelist to a sif file
        if format_dictionary.get(self.args.format,
                                 "NA") == "egl" and out_form == "sif":

            sys.stdout.write(
                u"Converting edge list to Simple Interaction Format (SIF)\nFull path to the output file:\n{}\n"
                .format(output_path))
            PyntacleConverter.edgelistToSif(
                file=self.args.input_file,
                sep=separator,
                output_sep=self.args.output_separator,
                header=output_header,
                output_file=output_path)

        #2: convert a sif to an edgelist file
        elif format_dictionary.get(self.args.format,
                                   "NA") == "sif" and out_form == "egl":
            sys.stdout.write(
                u"Converting Simple Interaction Format (SIF) to edge list\nFull path to the output file:\n{}\n"
                .format(output_path))
            PyntacleConverter.sifToEdgelist(
                file=self.args.input_file,
                sep=separator,
                output_sep=self.args.output_separator,
                header=output_header,
                output_file=output_path)

        else:

            graph = init_graph.graph_load()
            in_form = init_graph.get_format()

            if in_form == out_form:
                sys.stderr.write(
                    u"The output format specified is the same as the input format. Quitting\n"
                )
                sys.exit(1)

            if out_form == "adjm":
                sys.stdout.write(
                    u"Converting input graph file {0} to adjacency matrix at full path:\n{1}\n"
                    .format(input_basename, output_path))
                PyntacleExporter.AdjacencyMatrix(
                    graph,
                    output_path,
                    sep=self.args.output_separator,
                    header=output_header)

            elif out_form == "egl":
                sys.stdout.write(
                    u"Converting input graph file {0} to edge list at full path:\n{1}\n"
                    .format(input_basename, output_path))
                PyntacleExporter.EdgeList(graph,
                                          output_path,
                                          sep=self.args.output_separator,
                                          header=output_header)

            elif out_form == "sif":
                sys.stdout.write(
                    u"Converting input graph file {0} to Simple Interaction Format (SIF) file at full path:\n{1}\n"
                    .format(input_basename, output_path))
                PyntacleExporter.Sif(graph,
                                     output_path,
                                     sep=self.args.output_separator,
                                     header=output_header)

            elif out_form == "dot":
                # Ignore ugly RuntimeWarnings while converting to dot
                simplefilter("ignore", RuntimeWarning)

                sys.stdout.write(
                    u"Converting input graph file {0} to DOT file using igraph utilities at full path:\n{1}\n(output separator will be ignored)\n"
                    .format(input_basename, output_path))
                PyntacleExporter.Dot(graph, output_path)

            elif out_form == "graph":
                sys.stdout.write(
                    u"Converting input graph file {0} to a binary file (ending in .graph) at full path:\n{1}\n(output separator will be ignored)\n"
                    .format(input_basename, output_path))
                PyntacleExporter.Binary(graph, output_path)

            if not self.args.suppress_cursor:
                cursor.stop()

            sys.stdout.write(section_end)
            sys.stdout.write(
                u"Pyntacle convert completed successfully. Ending\n".format(
                    os.path.basename(self.args.input_file)))
            sys.exit(0)
Example #8
0
    def run(self):
        if not self.args.suppress_cursor:
            cursor = CursorAnimation()
            cursor.daemon = True
            cursor.start()
        if self.args.no_header:
            header = False
        else:
            header = True

        if not hasattr(self.args, 'which'):
            raise Error(
                u"usage: pyntacle.py metrics {global, local} [options]")

        # Checking input file
        if self.args.input_file is None:
            sys.stderr.write(
                u"Please specify an input file using the `-i/--input-file` option. Quitting\n"
            )
            sys.exit(1)

        if not os.path.exists(self.args.input_file):
            self.logging.error(u"Cannot find {}. Is the path correct?".format(
                self.args.input_file))
            sys.exit(1)

        if hasattr(self.args, "damping_factor"):
            if self.args.damping_factor is not None:
                if self.args.damping_factor < 0.0 or self.args.damping_factor > 1.0:
                    sys.stderr.write(
                        u"Damping factor must be between 0 and 1. Quitting\n")
                    sys.exit(1)

        self.logging.debug(u'Running Pyntacle metrics, with arguments ')
        self.logging.debug(self.args)

        # Load Graph
        sys.stdout.write(import_start)
        sys.stdout.write(u"Importing graph from file\n")
        graph = GraphLoad(self.args.input_file,
                          format_dictionary.get(self.args.format, "NA"),
                          header,
                          separator=self.args.input_separator).graph_load()
        # init Utils global stuff
        utils = gu(graph=graph)

        if hasattr(self.args, "nodes"):
            if self.args.nodes is not None:

                self.args.nodes = self.args.nodes.split(",")

                if not utils.nodes_in_graph(self.args.nodes):
                    sys.stderr.write(
                        "One or more of the specified nodes is not present in the graph. Please check your spelling and the presence of empty spaces in between node names. Quitting\n"
                    )
                    sys.exit(1)

        if self.args.largest_component:
            try:
                graph = utils.get_largest_component()
                sys.stdout.write(
                    u"Taking the largest component of the input graph as you requested ({} nodes, {} edges)\n"
                    .format(graph.vcount(), graph.ecount()))
                # reinitialize graph utils class
                utils.set_graph(graph)

            except MultipleSolutionsError:
                sys.stderr.write(
                    u"The graph has two largest components of the same size. Cannot choose one. Please parse your file or remove the '--largest-component' option. Quitting\n"
                )
                sys.exit(1)

            if self.args.nodes is not None:
                if not utils.nodes_in_graph(self.args.nodes):
                    sys.stderr.write(
                        "One or more of the specified nodes is not present in the largest graph component. Select a different set or remove this option. Quitting\n"
                    )
                    sys.exit(1)

        # Decide implementation
        if 'implementation' in graph.attributes():
            implementation = graph['implementation']
        else:
            implementation = CmodeEnum.igraph

        if hasattr(self.args, "nodes"):
            if self.args.weights is not None:
                sys.stdout.write(u"Adding edge weights from file {}\n".format(
                    self.args.weights))
                if not os.path.exists(self.args.weights):
                    sys.stderr.write(
                        u"Weights file {} does not exist. Is the path correct?\n"
                        .format(self.args.weights))
                    sys.exit(1)

                ImportAttributes.import_edge_attributes(
                    graph,
                    self.args.weights,
                    sep=separator_detect(self.args.weights),
                    mode=self.args.weights_format)
                try:
                    weights = [
                        float(x) if x != None else 1.0
                        for x in graph.es()["weights"]
                    ]

                except KeyError:
                    sys.stderr.write(
                        u"The attribute file does not contain a column named 'weights'."
                        "Quitting\n")
                    sys.exit(1)
            else:
                weights = None

        # Check provided dimensions' format
        if hasattr(self.args.plot_dim, "plot_dim"):
            # define custom format
            self.args.plot_dim = self.args.plot_dim.split(",")

            if len(self.args.plot_dim) != 2:
                sys.stderr.write(
                    u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n"
                )

            for i in range(0, len(self.args.plot_dim)):
                try:
                    self.args.plot_dim[i] = int(self.args.plot_dim[i])

                except ValueError:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n"
                    )
                    sys.exit(1)

                if self.args.plot_dim[i] <= 0:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n"
                    )
                    sys.exit(1)

            plot_size = tuple(self.args.plot_dim)

        else:
            # generate different formats according to graph size
            if graph.vcount() <= 150:
                plot_size = (800, 800)

            else:
                plot_size = (1600, 1600)

        sys.stdout.write(section_end)  #end report
        sys.stdout.write(run_start)  #start run

        if self.args.which == "local":

            reporter = PyntacleReporter(graph=graph)  #init reporter

            if self.args.nodes is not None:
                sys.stdout.write(
                    u"Computing local metrics for nodes {}\n".format(', '.join(
                        self.args.nodes)))
                nodes_list = self.args.nodes

            else:
                sys.stdout.write(
                    u"Computing local metrics for all nodes in the graph\n")
                nodes_list = None

            local_attributes_dict = OrderedDict({
                LocalAttributeEnum.degree.name:
                LocalTopology.degree(graph=graph, nodes=nodes_list),
                LocalAttributeEnum.clustering_coefficient.name:
                LocalTopology.clustering_coefficient(graph=graph,
                                                     nodes=nodes_list),
                LocalAttributeEnum.betweenness.name:
                LocalTopology.betweenness(graph=graph, nodes=nodes_list),
                LocalAttributeEnum.closeness.name:
                LocalTopology.closeness(graph=graph, nodes=nodes_list),
                LocalAttributeEnum.radiality.name:
                LocalTopology.radiality(graph=graph,
                                        nodes=nodes_list,
                                        cmode=implementation),
                LocalAttributeEnum.radiality_reach.name:
                LocalTopology.radiality_reach(graph=graph,
                                              nodes=nodes_list,
                                              cmode=implementation),
                LocalAttributeEnum.eccentricity.name:
                LocalTopology.eccentricity(graph=graph, nodes=nodes_list),
                LocalAttributeEnum.eigenvector_centrality.name:
                LocalTopology.eigenvector_centrality(graph=graph,
                                                     nodes=nodes_list),
                LocalAttributeEnum.pagerank.name:
                LocalTopology.pagerank(graph=graph,
                                       nodes=nodes_list,
                                       weights=weights,
                                       damping=self.args.damping_factor)
            })

            if self.args.nodes:
                local_attributes_dict["nodes"] = self.args.nodes

            sys.stdout.write("Local metrics computed\n")

            sys.stdout.write(section_end)
            sys.stdout.write(report_start)
            # check output directory
            if not os.path.isdir(self.args.directory):
                sys.stdout.write(
                    u"WARNING: Output directory does not exist; {} will be created\n"
                    .format(os.path.abspath(self.args.directory)))
                os.makedirs(os.path.abspath(self.args.directory),
                            exist_ok=True)

            sys.stdout.write(u"Producing report in {} format\n".format(
                self.args.report_format))

            reporter.create_report(ReportEnum.Local, local_attributes_dict)
            reporter.write_report(report_dir=self.args.directory,
                                  format=self.args.report_format)

            if not self.args.no_plot and graph.vcount() < 1000:

                sys.stdout.write(u"Generating plots in {} format\n".format(
                    self.args.plot_format))

                # generates plot directory
                plot_dir = os.path.join(self.args.directory, "pyntacle-plots")

                if not os.path.isdir(plot_dir):
                    os.makedirs(plot_dir, exist_ok=True)

                plot_graph = PlotGraph(graph=graph)
                plot_graph.set_node_labels(
                    labels=graph.vs()["name"])  # assign node labels to graph

                pal = sns.color_palette("Accent", 8).as_hex()
                framepal = sns.color_palette("Accent", 8, desat=0.5).as_hex()

                other_nodes_colour = pal[2]
                other_frame_colour = framepal[2]
                other_nodes_size = 25

                if self.args.nodes:  # make node selected of a different colour and bigger than the other ones, so they can be visualized
                    sys.stdout.write(
                        u"Highlighting nodes ({}) in plot\n".format(
                            ', '.join(nodes_list)))
                    selected_nodes_colour = pal[0]
                    selected_nodes_frames = framepal[0]

                    node_colors = [
                        selected_nodes_colour
                        if x["name"] in nodes_list else other_nodes_colour
                        for x in graph.vs()
                    ]
                    node_frames = [
                        selected_nodes_frames
                        if x["name"] in nodes_list else other_frame_colour
                        for x in graph.vs()
                    ]

                    #print(node_colors)

                    plot_graph.set_node_colors(colors=node_colors)

                    node_sizes = [
                        45 if x["name"] in nodes_list else other_nodes_size
                        for x in graph.vs()
                    ]
                    plot_graph.set_node_sizes(sizes=node_sizes)

                else:
                    # sys.stdout.write("Plotting network\n".format(nodes_list))
                    node_colors = [other_nodes_colour] * graph.vcount()
                    node_frames = [other_frame_colour] * graph.vcount()
                    plot_graph.set_node_colors(colors=node_colors)

                    node_sizes = [other_nodes_size] * graph.vcount()
                    plot_graph.set_node_sizes(sizes=node_sizes)

                # define layout
                plot_graph.set_layouts(self.args.plot_layout)

                plot_path = os.path.join(
                    plot_dir, ".".join([
                        "_".join([graph["name"][0], self.date]),
                        self.args.plot_format
                    ]))
                plot_graph.plot_graph(path=plot_path,
                                      bbox=plot_size,
                                      margin=20,
                                      edge_curved=0.2,
                                      keep_aspect_ratio=True,
                                      vertex_label_size=6,
                                      vertex_frame_color=node_frames)

            elif not self.args.no_plot and graph.vcount() >= 1000:
                sys.stdout.write(
                    u"The graph has too many nodes ({}). It will not be drawn\n"
                    .format(graph.vcount()))

        elif self.args.which == "global":

            sys.stdout.write(u"Computing global metrics\n")
            global_attributes_dict = OrderedDict({
                GlobalAttributeEnum.average_shortest_path_length.name:
                ShortestPath.average_global_shortest_path_length(graph=graph),
                GlobalAttributeEnum.median_shortest_path_length.name:
                ShortestPath.median_global_shortest_path_length(graph=graph),
                GlobalAttributeEnum.diameter.name:
                GlobalTopology.diameter(graph=graph),
                GlobalAttributeEnum.components.name:
                GlobalTopology.components(graph=graph),
                GlobalAttributeEnum.radius.name:
                GlobalTopology.radius(graph=graph),
                GlobalAttributeEnum.density.name:
                GlobalTopology.density(graph=graph),
                GlobalAttributeEnum.pi.name:
                GlobalTopology.pi(graph=graph),
                GlobalAttributeEnum.average_clustering_coefficient.name:
                GlobalTopology.average_clustering_coefficient(graph=graph),
                GlobalAttributeEnum.weighted_clustering_coefficient.name:
                GlobalTopology.weighted_clustering_coefficient(graph=graph),
                GlobalAttributeEnum.average_degree.name:
                GlobalTopology.average_degree(graph=graph),
                GlobalAttributeEnum.average_closeness.name:
                GlobalTopology.average_closeness(graph=graph),
                GlobalAttributeEnum.average_eccentricity.name:
                GlobalTopology.average_eccentricity(graph=graph),
                GlobalAttributeEnum.average_radiality.name:
                GlobalTopology.average_radiality(graph=graph,
                                                 cmode=implementation),
                GlobalAttributeEnum.average_radiality_reach.name:
                GlobalTopology.average_radiality_reach(graph=graph,
                                                       cmode=implementation),
                GlobalAttributeEnum.completeness_naive.name:
                Sparseness.completeness_naive(graph=graph),
                GlobalAttributeEnum.completeness.name:
                Sparseness.completeness(graph=graph),
                GlobalAttributeEnum.compactness.name:
                Sparseness.compactness(graph=graph)
            })

            sys.stdout.write(u"Global metrics computed\n")
            sys.stdout.write(section_end)
            sys.stdout.write(report_start)
            sys.stdout.write(
                u"Producing global metrics report for the input graph\n")

            reporter = PyntacleReporter(graph=graph)  # init reporter
            reporter.create_report(ReportEnum.Global, global_attributes_dict)
            reporter.write_report(report_dir=self.args.directory,
                                  format=self.args.report_format)

            if self.args.no_nodes:  # create an additional report for the graph minus the selected nodes

                sys.stdout.write(
                    u"Removing nodes:\n\t{}\nfrom input graph and computing Global Metrics\n"
                    .format(self.args.no_nodes))
                nodes_list = self.args.no_nodes.split(",")

                # this will be useful when producing the two global topology plots, one for the global graph and the other one fo all nodes
                nodes_list = [x.replace(" ", "") for x in nodes_list]
                index_list = utils.get_node_indices(nodes=nodes_list)

                # delete vertices
                graph_nonodes = graph.copy()
                graph_nonodes.delete_vertices(index_list)  #remove target nodes

                global_attributes_dict_nonodes = OrderedDict({
                    'Removed nodes':
                    ','.join(nodes_list),
                    GlobalAttributeEnum.average_shortest_path_length.name:
                    ShortestPath.average_global_shortest_path_length(
                        graph=graph_nonodes),
                    GlobalAttributeEnum.median_shortest_path_length.name:
                    ShortestPath.median_global_shortest_path_length(
                        graph=graph_nonodes),
                    GlobalAttributeEnum.diameter.name:
                    GlobalTopology.diameter(graph=graph_nonodes),
                    GlobalAttributeEnum.components.name:
                    GlobalTopology.components(graph=graph_nonodes),
                    GlobalAttributeEnum.radius.name:
                    GlobalTopology.radius(graph=graph_nonodes),
                    GlobalAttributeEnum.density.name:
                    GlobalTopology.density(graph=graph_nonodes),
                    GlobalAttributeEnum.pi.name:
                    GlobalTopology.pi(graph=graph_nonodes),
                    GlobalAttributeEnum.average_clustering_coefficient.name:
                    GlobalTopology.average_clustering_coefficient(
                        graph=graph_nonodes),
                    GlobalAttributeEnum.weighted_clustering_coefficient.name:
                    GlobalTopology.weighted_clustering_coefficient(
                        graph=graph_nonodes),
                    GlobalAttributeEnum.average_degree.name:
                    GlobalTopology.average_degree(graph=graph_nonodes),
                    GlobalAttributeEnum.average_closeness.name:
                    GlobalTopology.average_closeness(graph=graph_nonodes),
                    GlobalAttributeEnum.average_eccentricity.name:
                    GlobalTopology.average_eccentricity(graph=graph_nonodes),
                    GlobalAttributeEnum.average_radiality.name:
                    GlobalTopology.average_radiality(graph=graph_nonodes,
                                                     cmode=implementation),
                    GlobalAttributeEnum.average_radiality_reach.name:
                    GlobalTopology.average_radiality_reach(
                        graph=graph_nonodes, cmode=implementation),
                    GlobalAttributeEnum.completeness_naive.name:
                    Sparseness.completeness_naive(graph=graph_nonodes),
                    GlobalAttributeEnum.completeness.name:
                    Sparseness.completeness(graph=graph_nonodes),
                    GlobalAttributeEnum.compactness.name:
                    Sparseness.compactness(graph=graph_nonodes),
                })

                sys.stdout.write(
                    u"Producing global metrics report for the input graph after node removal\n"
                )
                graph_nonodes["name"][0] += '_without_nodes'
                reporter = PyntacleReporter(
                    graph=graph_nonodes)  # init reporter
                reporter.create_report(ReportEnum.Global,
                                       global_attributes_dict_nonodes)
                reporter.write_report(report_dir=self.args.directory,
                                      format=self.args.report_format)

            if not self.args.no_plot and graph.vcount() < 1000:

                if self.args.no_nodes:
                    sys.stdout.write(
                        u"Generating plots of both the input network and the resulting network without nodes {} in {} format\n"
                        .format(self.args.no_nodes, self.args.plot_format))

                else:
                    sys.stdout.write(
                        u"Generating network plot in {} format\n".format(
                            self.args.plot_format))

                # generates plot directory
                plot_dir = os.path.join(self.args.directory, "pyntacle-plots")

                if not os.path.isdir(plot_dir):
                    os.mkdir(plot_dir)

                other_nodes_size = 25
                pal = sns.color_palette("Accent", 8).as_hex()
                framepal = sns.color_palette("Accent", 8, desat=0.5).as_hex()
                other_nodes_colour = pal[2]
                other_frame_colour = framepal[2]
                no_nodes_size = 35
                no_nodes_colour = pal[4]
                no_nodes_frames = framepal[4]

                if self.args.no_nodes:
                    plot_path = os.path.join(
                        plot_dir, ".".join([
                            "_".join([
                                "metric", self.args.which,
                                re.sub('_nodes_removed', '', graph["name"][0]),
                                "global_metrics_plot", self.date
                            ]), self.args.plot_format
                        ]))
                    node_colors = [
                        no_nodes_colour
                        if x["name"] in nodes_list else other_nodes_colour
                        for x in graph.vs()
                    ]
                    node_frames = [
                        no_nodes_frames
                        if x["name"] in nodes_list else other_frame_colour
                        for x in graph.vs()
                    ]

                    node_sizes = [
                        no_nodes_size
                        if x["name"] in nodes_list else other_nodes_size
                        for x in graph.vs()
                    ]

                else:
                    node_colors = [other_nodes_colour] * graph.vcount()
                    node_frames = [other_frame_colour] * graph.vcount()
                    node_sizes = [other_nodes_size] * graph.vcount()
                    plot_path = os.path.join(
                        plot_dir, ".".join([
                            "_".join([
                                "Metric", self.args.which, graph["name"][0],
                                self.date
                            ]), self.args.plot_format
                        ]))

                plot_graph = PlotGraph(graph=graph)
                plot_graph.set_node_labels(
                    labels=graph.vs()["name"])  # assign node labels to graph

                plot_graph.set_node_colors(colors=node_colors)
                plot_graph.set_node_sizes(sizes=node_sizes)

                plot_graph.set_node_colors(colors=node_colors)
                plot_graph.set_node_sizes(sizes=node_sizes)

                # define layout
                plot_graph.set_layouts(self.args.plot_layout)

                plot_graph.plot_graph(path=plot_path,
                                      bbox=plot_size,
                                      margin=20,
                                      edge_curved=0.2,
                                      keep_aspect_ratio=True,
                                      vertex_label_size=6,
                                      vertex_frame_color=node_frames)

                if self.args.no_nodes:

                    plot_graph = PlotGraph(graph=graph_nonodes)
                    plot_graph.set_node_labels(
                        labels=graph_nonodes.vs()
                        ["name"])  # assign node labels to graph

                    # print(graph_copy.vs()["name"])
                    node_colors = [other_nodes_colour] * graph_nonodes.vcount()
                    node_frames = [other_frame_colour] * graph_nonodes.vcount()
                    node_sizes = [other_nodes_size] * graph_nonodes.vcount()

                    plot_graph.set_node_colors(colors=node_colors)
                    plot_graph.set_node_sizes(sizes=node_sizes)

                    # define layout
                    plot_graph.set_layouts(self.args.plot_layout)

                    plot_path = os.path.join(
                        plot_dir, ".".join([
                            "_".join(
                                [graph["name"][0], "_no_nodes", self.date]),
                            self.args.plot_format
                        ]))

                    plot_graph.plot_graph(path=plot_path,
                                          bbox=plot_size,
                                          margin=20,
                                          edge_curved=0.2,
                                          keep_aspect_ratio=True,
                                          vertex_label_size=6,
                                          vertex_frame_color=node_frames)

            elif not self.args.no_plot and graph.vcount() >= 1000:
                sys.stdout.write(
                    u"The graph has too many nodes ({}). It will not be drawn\n"
                    .format(graph.vcount()))

        if self.args.save_binary:
            sys.stdout.write(
                u"Saving graph to a binary file (ending in .graph)\n")
            basename_graph = os.path.splitext(
                os.path.basename(self.args.input_file))[0]
            binary_path = os.path.join(self.args.directory,
                                       basename_graph + ".graph")
            # elif self.args.no_nodes:
            # nodes_list = graph_nonodes.vs()
            if self.args.which == 'local':
                if self.args.nodes:
                    nodes_list = self.args.nodes.split(",")
                else:
                    nodes_list = graph.vs["name"]
                for key in local_attributes_dict:
                    AddAttributes.add_node_attributes(
                        graph, key, local_attributes_dict[key], nodes_list)
                PyntacleExporter.Binary(graph, binary_path)

            elif self.args.which == 'global':

                for key in global_attributes_dict:
                    if (key == "average_shortest_path_length"
                            or key == 'median_shortest_path_length'):
                        newkey = re.sub("_shortest_path_length",
                                        "_global_shortest_path_length", key)
                    else:
                        newkey = key
                    AddAttributes.add_graph_attributes(
                        graph, newkey, global_attributes_dict[key])
                PyntacleExporter.Binary(graph, binary_path)

                if self.args.no_nodes:
                    binary_path_nonodes = os.path.join(
                        self.args.directory,
                        basename_graph + "_no_nodes" + ".graph")
                    sys.stdout.write(
                        u"Saving a binary of the input graph without the requested nodes at path: {}\n"
                        .format(os.path.basename(binary_path_nonodes)))
                    for key in global_attributes_dict_nonodes:
                        if (key == "average_shortest_path_length"
                                or key == 'median_shortest_path_length'):
                            newkey = re.sub("_shortest_path_length",
                                            "_global_shortest_path_length",
                                            key)
                        else:
                            newkey = key
                        AddAttributes.add_graph_attributes(
                            graph_nonodes, newkey,
                            global_attributes_dict_nonodes[key])

                    PyntacleExporter.Binary(graph_nonodes, binary_path_nonodes)

        if not self.args.suppress_cursor:
            cursor.stop()

        sys.stdout.write(section_end)
        sys.stdout.write(u"Pyntacle metrics completed successfully\n")
        sys.exit(0)
Example #9
0
    def write_json_report(self,
                          report_dir=None,
                          report_dict=None,
                          suffix=None):
        """
        Create a JSON version of the report, possibly appending data to already existing results.
        :return:
        """

        plots_path = os.path.join(report_dir, 'pyntacle-plots_' + suffix)

        if not os.path.exists(plots_path):
            os.makedirs(plots_path)
        json_report = os.path.join(plots_path, 'report.js')
        json_graph = os.path.join(plots_path, 'graph.js')
        index_path = os.path.join(plots_path, 'index.html')
        index_css_path = os.path.join(plots_path, 'index.css')
        if os.path.exists(json_report):
            json_line = open(json_report).readlines()[0].split(' = ')[1]
            print("LINEA", json_line)
            with open(json_report, 'r') as f:
                json_data = json.loads(json_line)
        else:
            json_data = {}

        print("EXTRACT JSON FROM HERE")
        print(report_dict)
        print(type(report_dict))
        print(self.report_type)
        print(self.dat)

        if self.report_type == ReportEnum.KP_bruteforce or self.report_type == ReportEnum.KP_greedy:
            json_data.setdefault("Key-player", {})
            json_data["Key-player"].setdefault(
                str(self.report_type).split('.')[1], {})
            json_data["Key-player"][str(
                self.report_type).split('.')[1]].setdefault(self.dat, {})
            # multiple_sol
            for k in report_dict:

                if self.report_type == ReportEnum.KP_greedy:
                    json_data["Key-player"][str(
                        self.report_type).split('.')[1]][self.dat][k] = [
                            ','.join(report_dict[k][0])
                        ]

                elif self.report_type == ReportEnum.KP_bruteforce:
                    json_data["Key-player"][str(
                        self.report_type).split('.')[1]][self.dat][k] = [
                            ';'.join(
                                list(','.join(sol)
                                     for sol in report_dict[k][0]))
                        ]

                # Adding numerical values of solutions
                json_data["Key-player"][str(
                    self.report_type).split('.')[1]][self.dat][k].extend(
                        report_dict[k][1:])

        if self.report_type == ReportEnum.GR_bruteforce or self.report_type == ReportEnum.GR_greedy:
            json_data.setdefault("Group-centrality", {})
            json_data["Group-centrality"].setdefault(
                str(self.report_type).split('.')[1], {})
            json_data["Group-centrality"][str(
                self.report_type).split('.')[1]].setdefault(self.dat, {})
            # multiple_sol
            for k in report_dict:
                print(report_dict[k][0])

                if self.report_type == ReportEnum.GR_greedy:
                    json_data["Group-centrality"][str(
                        self.report_type).split('.')[1]][self.dat][k] = [
                            ','.join(report_dict[k][0])
                        ]

                elif self.report_type == ReportEnum.GR_bruteforce:
                    json_data["Group-centrality"][str(
                        self.report_type).split('.')[1]][self.dat][k] = [
                            ';'.join(
                                list(','.join(sol)
                                     for sol in report_dict[k][0]))
                        ]

                # Adding numerical values of solutions
                json_data["Group-centrality"][str(
                    self.report_type).split('.')[1]][self.dat][k].extend(
                        report_dict[k][1:])

        if self.report_type == ReportEnum.Communities:
            json_data.setdefault("Communities", {})
            json_data["Communities"].setdefault(report_dict["algorithm"], {})
            json_data["Communities"][report_dict["algorithm"]].setdefault(
                self.dat, {})
            for i, k in enumerate(report_dict["communities"]):
                json_data["Communities"][report_dict["algorithm"]][
                    self.dat][i] = [report_dict["communities"][i][1]]

        if self.report_type == ReportEnum.Set:
            json_data.setdefault("Set", {})
            json_data["Set"].setdefault(report_dict["algorithm"], {})
            json_data["Set"][report_dict["algorithm"]].setdefault(self.dat, {})
            for k in report_dict.keys():
                if k == 'algorithm':
                    continue
                json_data["Set"][report_dict["algorithm"]][self.dat][k] = [
                    report_dict[k]['nodes'],
                    ';'.join(['-'.join(e) for e in report_dict[k]['edges']])
                ]
                # edges=[', '.join(e) for e in report_dict[k]['edges']]
                # for edge in report_dict[k]['edges']:
                #     print(edge)
            # print(edges)

        # Adding minimal graph info
        json_data.setdefault("Info", {})
        json_data["Info"]['graph name'] = self.graph["name"][0]
        json_data["Info"]['nodes'] = len(self.graph.vs())
        json_data["Info"]['edges'] = len(self.graph.es())
        json_data["Info"]['components'] = len(self.graph.components())

        # Adding global metrics to the basic info, if available
        if self.report_type == ReportEnum.Global:
            for i in report_dict.keys():
                json_data["Info"][i] = report_dict[i]

        # exporting results in json format
        with open(json_report, 'w') as f:
            f.write("var reportData = ")
            json.dump(json_data, f, ensure_ascii=False)

        # exporting graph in json format
        PyntacleExporter.JSON(self.graph,
                              json_graph,
                              prefix="var graphData = ")

        # print html_file
        with open(index_path, 'w') as f:
            f.write(html_template)
        with open(index_css_path, 'w') as f:
            f.write(css_template)