Ejemplo n.º 1
0
    def Binary(file: str) -> Graph:
        r"""
        Loads a binary file  (a :py:class:`pickle` object) that stores a :py:class:`igraph.Graph` object and makes it
        ready to be used for Pyntacle.

        We refer the user to the `File Formats Guide <http://pyntacle.css-mendel.it/resources/file_formats/file_formats.html#bin>`_
        within the Pyntacloe official page for more details regarding  the specifics of :py:class:`igraph.Graph`
        binary objects that can be serialized by Pyntacle.

        :param str file: the location of the binary file

        :return igraph.Graph: an iGraph.Graph object compliant with Pyntacle `Minimum Requirements <http://pyntacle.css-mendel.it/requirements.html>`_

        :raise IOError: if the binary does not contain a :py:class:`igraph.Graph` object
        """

        if not is_binary_file(file):
            raise WrongArgumentError(u"file is not a binary")

        graph = pickle.load(open(file, "rb"))

        if not isinstance(graph, Graph):
            raise IOError(u"binary is not a graph object")

        else:
            if graph.ecount() < 1 and graph.vcount() < 2:
                raise IllegalGraphSizeError(
                    u"Graph must contain at least 2 nodes linked by one edge")

            else:
                utils = gu(graph=graph)
                utils.graph_initializer(
                    graph_name=os.path.splitext(os.path.basename(file))[0])

                if Graph.is_directed(graph):

                    sys.stdout.write(u"Converting graph to undirect\n")
                    graph.to_undirected()

                utils.check_graph()
                graph = utils.get_graph()
                sys.stdout.write(u"Binary from {} imported\n".format(
                    os.path.basename(file)))
                return graph
Ejemplo n.º 2
0
    def run(self):
        if not hasattr(self.args, 'which'):
            raise Error(
                u"usage: pyntacle.py keyplayer {kp-finder, kp-info} [options]'"
            )

        if not self.args.suppress_cursor:
            cursor = CursorAnimation()
            cursor.daemon = True
            cursor.start()

        if self.args.m_reach == None and self.args.type in ["pos", "all"]:
            sys.stderr.write(
                u"m-reach distance must be provided for computing m-reach. Quitting\n"
            )
            sys.exit(1)

        # Checking input file
        if self.args.input_file is None:
            sys.stderr.write(
                u"Please specify an input file using the `-i/--input-file` option. Quitting\n"
            )
            sys.exit(1)

        if not os.path.exists(self.args.input_file):
            sys.stdout.write(u"Cannot find {}. Is the path correct?\n".format(
                self.args.input_file))
            sys.exit(1)

        if self.args.no_header:
            header = False
        else:
            header = True

        # Load Graph
        sys.stdout.write(import_start)
        sys.stdout.write(u"Importing graph from file\n")
        graph = GraphLoad(self.args.input_file,
                          format_dictionary.get(self.args.format, "NA"),
                          header,
                          separator=self.args.input_separator).graph_load()
        # init graph utils class

        utils = gu(graph=graph)

        if hasattr(self.args, "nodes"):
            self.args.nodes = self.args.nodes.split(",")

            if not utils.nodes_in_graph(self.args.nodes):
                sys.stderr.write(
                    "One or more of the specified nodes {} is not present in the graph. Please check your spelling and the presence of empty spaces between node names. Quitting\n"
                    .format(self.args.nodes))
                sys.exit(1)

        if self.args.largest_component:
            try:
                graph = utils.get_largest_component()
                sys.stdout.write(
                    u"Taking the largest component of the input graph as you requested ({} nodes, {} edges)\n"
                    .format(graph.vcount(), graph.ecount()))
                # reinitialize graph utils class
                utils.set_graph(graph)

            except MultipleSolutionsError:
                sys.stderr.write(
                    u"The graph has two largest components of the same size. Cannot choose one. Please parse your file or remove the '--largest-component' option. Quitting\n"
                )
                sys.exit(1)

            if hasattr(self.args, 'nodes'):
                if not utils.nodes_in_graph(self.args.nodes):
                    sys.stderr.write(
                        "One or more of the specified nodes is not present in the largest graph component. Select a different set or remove this option. Quitting\n"
                    )
                    sys.exit(1)

        if hasattr(self.args, "k_size") and self.args.k_size >= graph.vcount():
            sys.stderr.write(
                "The 'k' argument ({}) must be strictly less than the graph size({}). Quitting\n"
                .format(self.args.k_size, graph.vcount()))
            sys.exit(1)

        if 'implementation' in graph.attributes():
            implementation = graph['implementation']
        else:
            implementation = CmodeEnum.igraph

        # check that output directory is properly set
        createdir = False
        if not os.path.isdir(self.args.directory):
            createdir = True

        # control plot dimensions
        if self.args.plot_dim:  # define custom format
            self.args.plot_dim = self.args.plot_dim.split(",")

            for i in range(0, len(self.args.plot_dim)):
                try:
                    self.args.plot_dim[i] = int(self.args.plot_dim[i])

                    if self.args.plot_dim[i] <= 0:
                        raise ValueError

                except ValueError:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of positive integers (e.g. 1920,1080). Quitting\n"
                    )
                    sys.exit(1)

            plot_size = tuple(self.args.plot_dim)

        else:
            plot_size = (800, 600)

            if graph.vcount() > 150:
                plot_size = (1600, 1600)

        #initialize reporter for later usage and plot dimension for later usage
        r = PyntacleReporter(graph=graph)
        initial_results = {}
        results = OrderedDict()

        sys.stdout.write(section_end)
        sys.stdout.write(run_start)

        if self.args.which == 'kp-finder':

            # Greedy optimization
            if self.args.implementation == "greedy":
                report_type = ReportEnum.KP_greedy
                kp_runner = gow(graph=graph)

                sys.stdout.write(
                    u"Using greedy optimization algorithm for searching optimal key player set for the requested key player metrics\n"
                )
                sys.stdout.write("\n")

                if self.args.type in (['F', 'neg', 'all']):
                    sys.stdout.write(
                        u"KP-NEG: Finding optimal set of nodes of size {0} that maximizes F\n"
                        .format(self.args.k_size))

                    initial_results[KpnegEnum.F.name] = kpp.F(graph)
                    kp_runner.run_fragmentation(self.args.k_size,
                                                KpnegEnum.F,
                                                seed=self.args.seed,
                                                cmode=implementation)
                    sys.stdout.write("\n")

                if self.args.type in (['dF', 'neg', 'all']):
                    sys.stdout.write(
                        u"KP-NEG: Finding optimal set of nodes of size {0} that maximizes dF\n"
                        .format(self.args.k_size))

                    initial_results[KpnegEnum.dF.name] = kpp.dF(
                        graph, cmode=implementation)
                    kp_runner.run_fragmentation(
                        self.args.k_size,
                        KpnegEnum.dF,
                        max_distance=self.args.max_distance,
                        seed=self.args.seed,
                        cmode=implementation)
                    sys.stdout.write("\n")

                if self.args.type in (['dR', 'pos', 'all']):
                    sys.stdout.write(
                        u"KP-POS: Finding optimal set of nodes of size {0} that maximizes dR\n"
                        .format(self.args.k_size))
                    kp_runner.run_reachability(
                        self.args.k_size,
                        KpposEnum.dR,
                        max_distance=self.args.max_distance,
                        seed=self.args.seed,
                        cmode=implementation)
                    sys.stdout.write("\n")

                if self.args.type in (['mreach', 'pos', 'all']):
                    sys.stdout.write(
                        u"KP-POS: Finding optimal set of nodes of size {0} that maximizes the m-reach at distance {1}\n"
                        .format(self.args.k_size, self.args.m_reach))
                    kp_runner.run_reachability(
                        self.args.k_size,
                        KpposEnum.mreach,
                        m=self.args.m_reach,
                        max_distance=self.args.max_distance,
                        seed=self.args.seed,
                        cmode=implementation)
                    sys.stdout.write("\n")

            elif self.args.implementation == "brute-force":
                report_type = ReportEnum.KP_bruteforce
                kp_runner = bfw(graph=graph)
                sys.stdout.write(
                    u"Using brute-force search algorithm to find the best key player set(s)\n"
                )
                sys.stdout.write(sep_line)

                if self.args.type in (['F', 'neg', 'all']):

                    sys.stdout.write(
                        u"KP-NEG: Finding best set (or sets) of nodes of size {0} that holds the maximum F\n"
                        .format(self.args.k_size))

                    initial_results[KpnegEnum.F.name] = kpp.F(graph)
                    kp_runner.run_fragmentation(self.args.k_size,
                                                KpnegEnum.F,
                                                threads=self.args.threads)
                    sys.stdout.write("\n")

                if self.args.type in (['dF', 'neg', 'all']):
                    sys.stdout.write(
                        u"KP-NEG: Finding best set(s) of nodes of size {0} that holds the maximum dF\n"
                        .format(self.args.k_size))

                    initial_results[KpnegEnum.dF.name] = kpp.dF(
                        graph, cmode=CmodeEnum.igraph)
                    kp_runner.run_fragmentation(
                        self.args.k_size,
                        KpnegEnum.dF,
                        max_distance=self.args.max_distance,
                        cmode=CmodeEnum.igraph,
                        threads=self.args.threads)

                    sys.stdout.write("\n")

                if self.args.type in (['dR', 'pos', 'all']):
                    sys.stdout.write(
                        u"KP-POS: Finding best set(s) of nodes of size {0} that hold the maximum dR\n"
                        .format(self.args.k_size))
                    kp_runner.run_reachability(
                        self.args.k_size,
                        KpposEnum.dR,
                        max_distance=self.args.max_distance,
                        cmode=CmodeEnum.igraph,
                        threads=self.args.threads)

                    sys.stdout.write(sep_line)

                if self.args.type in (['mreach', 'pos', 'all']):
                    sys.stdout.write(
                        u"KP-POS: Finding the best set(s) of nodes of size {0} that maximizes the m-reach at distance {1}\n"
                        .format(self.args.k_size, self.args.m_reach))

                    kp_runner.run_reachability(
                        self.args.k_size,
                        KpposEnum.mreach,
                        m=self.args.m_reach,
                        max_distance=self.args.max_distance,
                        cmode=CmodeEnum.igraph,
                        threads=self.args.threads)

                    sys.stdout.write("\n")

            #get report results
            results.update(kp_runner.get_results())
            sys.stdout.write(section_end)
            sys.stdout.write(summary_start)
            sys.stdout.write(
                u"Node set size for key player search: {}\n".format(
                    str(self.args.k_size)))

            sys.stdout.write("\n")
            for kp in results.keys():

                if len(results[kp]
                       [0]) > 1 and self.args.implementation == 'brute-force':
                    plurals = ['s', 'are']
                else:
                    plurals = ['', 'is']

                if results[kp][0][
                        0] is None:  # the case in which there's no solution
                    results[kp][0] = ["None"]

                if self.args.implementation == 'brute-force':
                    list_of_results = "\n".join(
                        ['(' + ', '.join(x) + ')' for x in results[kp][0]])
                else:
                    list_of_results = "(" + ", ".join(results[kp][0]) + ")"

                if kp == KpnegEnum.F.name or kp == KpnegEnum.dF.name:
                    # joining initial results with final ones
                    results[kp].append(initial_results[kp])

                    sys.stdout.write(
                        u"Best key player set{0} of size {1} for negative key player index {2} {3}:\n{4}\nFinal {2} value: {5}\nStarting graph {2} was {6}\n"
                        .format(plurals[0], self.args.k_size, kp, plurals[1],
                                list_of_results, results[kp][1],
                                results[kp][2]))
                    sys.stdout.write("\n")

                elif kp == KpposEnum.dR.name:
                    sys.stdout.write(
                        u"Best key player set{0} of size {1} for positive key player index {2} {3}:\n{4}\nFinal {2} value: {5}\n"
                        .format(plurals[0], self.args.k_size, kp, plurals[1],
                                list_of_results, results[kp][1]))
                    sys.stdout.write("\n")

                elif kp == KpposEnum.mreach.name:
                    results[kp].append(self.args.m_reach)
                    node_perc_reached = ((self.args.k_size + results[kp][1]) /
                                         graph.vcount()) * 100
                    if node_perc_reached == 100:
                        node_perc_reached = int(node_perc_reached)
                    else:
                        node_perc_reached = round(node_perc_reached, 2)
                    sys.stdout.write(
                        u'Key player set{0} of size {1} for positive key player index m-reach, using at best '
                        '{3} steps {4}:\n{5}\nwith value {6} on {8} (number of nodes reached on total number of nodes)\nThe total percentage of nodes, which '
                        'includes the kp-set, is {7}%\n'.format(
                            plurals[0], self.args.k_size, kp,
                            self.args.m_reach, plurals[1], list_of_results,
                            results[kp][1], node_perc_reached, graph.vcount()))
                    sys.stdout.write("\n")
            sys.stdout.write(section_end)

        # kpinfo: compute kpmetrics for a set of predetermined nodes
        elif self.args.which == 'kp-info':
            report_type = ReportEnum.KP_info
            initial_results = OrderedDict()
            kp_runner = kpw(graph=graph, nodes=self.args.nodes)
            results = OrderedDict()

            sys.stdout.write(u"Input node set: ({})\n".format(', '.join(
                self.args.nodes)))
            sys.stdout.write("\n")

            if self.args.type in (['F', 'neg', 'all']):
                initial_results[KpnegEnum.F.name] = kpp.F(graph)
                kp_runner.run_fragmentation(KpnegEnum.F)
                sys.stdout.write("\n")
            if self.args.type in (['dF', 'neg', 'all']):
                initial_results[KpnegEnum.dF.name] = kpp.dF(
                    graph,
                    cmode=implementation,
                    max_distance=self.args.max_distance)
                kp_runner.run_fragmentation(
                    KpnegEnum.dF,
                    max_distance=self.args.max_distance,
                    cmode=implementation)
                sys.stdout.write("\n")

            if self.args.type in (['dR', 'pos', 'all']):
                kp_runner.run_reachability(KpposEnum.dR,
                                           max_distance=self.args.max_distance,
                                           cmode=implementation)
                sys.stdout.write("\n")

            if self.args.type in (['m-reach', 'pos', 'all']):
                kp_runner.run_reachability(KpposEnum.mreach,
                                           m=self.args.m_reach,
                                           max_distance=self.args.max_distance,
                                           cmode=implementation)
                sys.stdout.write("\n")

            sys.stdout.write(section_end)
            results.update(kp_runner.get_results())
            sys.stdout.write(summary_start)
            for metric in results.keys():

                if metric == KpnegEnum.F.name or metric == KpnegEnum.dF.name:
                    results[metric].append(initial_results[metric])
                    sys.stdout.write(
                        u"Removing node set \n({2})\ngives a {0} value of {3}\nStarting graph {0}: {1}\n"
                        .format(metric, results[metric][2],
                                ', '.join(self.args.nodes),
                                results[metric][1]))
                    sys.stdout.write("\n")

                elif metric == KpposEnum.mreach.name:
                    results[metric].append(self.args.m_reach)
                    perc_node_reached = round(
                        (results[metric][1] + len(self.args.nodes)) /
                        graph.vcount() * 100, 3)
                    sys.stdout.write(
                        u"The m-reach of node set:\n({0})\nis {1} on {4} (number of nodes reached on total number of "
                        u"nodes)\nThis means it can reach the {2}% of remaining nodes in the graph nodes in at most {3} steps\n"
                        .format(', '.join(results[metric][0]),
                                results[metric][1], perc_node_reached,
                                self.args.m_reach, graph.vcount()))
                    sys.stdout.write("\n")

                else:  #dR case
                    sys.stdout.write(
                        "The {0} value for node set:\n({1})\nis {2}\n".format(
                            metric, ', '.join(results[metric][0]),
                            results[metric][1]))
                    sys.stdout.write("\n")
            sys.stdout.write(section_end)

        sys.stdout.write(report_start)
        sys.stdout.write("Writing Results\n")
        # check output directory
        if createdir:
            sys.stdout.write(
                u"WARNING: output directory does not exist, {} will be created\n"
                .format(os.path.abspath(self.args.directory)))
            os.makedirs(os.path.abspath(self.args.directory), exist_ok=True)

        # reporting and plotting part
        sys.stdout.write(u"Producing report in {} format\n".format(
            self.args.report_format))

        r.create_report(report_type=report_type, report=results)

        r.write_report(report_dir=self.args.directory,
                       format=self.args.report_format)

        if self.args.save_binary:
            # reproduce octopus behaviour by adding kp information to the graph before saving it
            sys.stdout.write(
                u"Saving graph to a binary file (ending in .graph)\n")

            for key in results.keys():
                if key == KpposEnum.mreach.name:  #replace the mreach distance
                    new_mreach = "_".join([
                        KpposEnum.mreach.name,
                        str(results[KpposEnum.mreach.name][-1])
                    ])
                    #create new key
                    results[new_mreach] = results[
                        KpposEnum.mreach.
                        name][:
                              -1]  #remove the mreach distance before adding it to the binary file
                    del results[KpposEnum.mreach.name]
                    key = new_mreach

                if self.args.which == "kp-finder":
                    if self.args.implementation == "brute-force":
                        suffix = "bruteforce"
                        attr_key = tuple(
                            tuple(sorted(tuple(x))) for x in results[key][0])

                    else:
                        suffix = "greedy"
                        attr_key = tuple(sorted(tuple(results[key][0])))

                else:
                    suffix = "info"
                    attr_key = tuple(sorted(tuple(results[key][0])))

                attr_name = "_".join([key, suffix])
                attr_val = results[key][1]

                if attr_name in graph.attributes():
                    if not isinstance(graph[attr_name], dict):
                        sys.stdout.write(
                            "WARNING: attribute {} does not point to a dictionary, will overwrite\n"
                            .format(attr_name))
                        AddAttributes.add_graph_attribute(
                            graph, attr_name, {attr_key: attr_val})
                    else:
                        if attr_key in graph[attr_name]:
                            sys.stdout.write(
                                "WARNING: {} already present in the {} graph attribute, will overwrite\n"
                                .format(attr_key, attr_val))
                        graph[attr_name].update({attr_key: attr_val})
                else:
                    AddAttributes.add_graph_attribute(graph, attr_name,
                                                      {attr_key: attr_val})

            binary_prefix = "_".join([
                os.path.splitext(os.path.basename(self.args.input_file))[0],
                self.args.which, self.date
            ])
            binary_path = os.path.join(self.args.directory,
                                       binary_prefix + ".graph")
            PyntacleExporter.Binary(graph, binary_path)

        # generate and output plot
        if not self.args.no_plot and graph.vcount() < 1000:

            sys.stdout.write(u"Generating network plots in {} format\n".format(
                self.args.plot_format))
            plot_dir = os.path.join(self.args.directory, "pyntacle-plots")

            if not os.path.isdir(plot_dir):
                os.mkdir(plot_dir)

            plot_graph = PlotGraph(graph=graph)

            plot_format = self.args.plot_format
            plot_graph.set_node_labels(
                labels=graph.vs()["name"])  # assign node labels to graph
            pal = sns.color_palette("Accent", 8).as_hex()
            framepal = sns.color_palette("Accent", 8, desat=0.5).as_hex()

            other_nodes_colour = pal[2]
            other_frame_colour = framepal[2]

            other_nodes_size = 25
            # other_nodes_shape = "circle"
            other_edge_width = 1

            for metric in results:
                if self.args.which == 'kp-finder' and self.args.implementation == "brute-force":
                    results[metric][0] = list(
                        set(list(chain(*results[metric][0]))))

                if metric == "F":

                    f_nodes_colour = pal[0]
                    f_frames_colour = framepal[0]
                    # create a list of node colors
                    node_colors = [
                        f_nodes_colour if x["name"] in results[metric][0] else
                        other_nodes_colour for x in graph.vs()
                    ]
                    node_frames = [
                        f_frames_colour if x["name"] in results[metric][0] else
                        other_frame_colour for x in graph.vs()
                    ]

                    plot_graph.set_node_colors(colors=node_colors)

                    # node_shapes = ["square" if x["name"] in results[metric][1] else other_nodes_shape for x in graph.vs()]
                    # plot_graph.set_node_shapes(shapes=node_shapes)

                elif metric == "dF":
                    df_nodes_colour = pal[1]
                    df_frames_colour = framepal[1]

                    # create a list of node colors
                    node_colors = [
                        df_nodes_colour if x["name"] in results[metric][0] else
                        other_nodes_colour for x in graph.vs()
                    ]
                    node_frames = [
                        df_frames_colour if x["name"] in results[metric][0]
                        else other_frame_colour for x in graph.vs()
                    ]

                    plot_graph.set_node_colors(colors=node_colors)

                    # node_shapes = ["rectangle" if x["name"] in results[metric][1] else other_nodes_shape for x in graph.vs()]
                    # plot_graph.set_node_shapes(shapes=node_shapes)

                elif metric == "m-reach":
                    mreach_nodes_colour = pal[4]
                    mreach_frames_colour = framepal[4]
                    # create a list of node colors
                    node_colors = [
                        mreach_nodes_colour if x["name"] in results[metric][0]
                        else other_nodes_colour for x in graph.vs()
                    ]
                    node_frames = [
                        mreach_frames_colour if x["name"] in results[metric][0]
                        else other_frame_colour for x in graph.vs()
                    ]

                    plot_graph.set_node_colors(colors=node_colors)

                    # node_shapes = ["triangle-up" if x["name"] in results[metric][1] else other_nodes_shape for x in graph.vs()]
                    # plot_graph.set_node_shapes(shapes=node_shapes)

                else:  #dR
                    dr_nodes_colour = pal[3]
                    dr_frames_colour = framepal[3]

                    # create a list of node colors
                    node_colors = [
                        dr_nodes_colour if x["name"] in results[metric][0] else
                        other_nodes_colour for x in graph.vs()
                    ]
                    node_frames = [
                        dr_frames_colour if x["name"] in results[metric][0]
                        else other_frame_colour for x in graph.vs()
                    ]

                    plot_graph.set_node_colors(colors=node_colors)

                    # node_shapes = ["triangle-down" if x["name"] in results[metric][1] else other_nodes_shape for x in
                    #                graph.vs()]
                    #
                    # plot_graph.set_node_shapes(shapes=node_shapes)

                node_sizes = [
                    35 if x["name"] in results[metric][0] else other_nodes_size
                    for x in graph.vs()
                ]

                plot_graph.set_node_sizes(sizes=node_sizes)
                # print (other_edge_width)

                #     print (edge.source(), edge.target())
                # add recursive edge widths
                if metric != "mreach":

                    edge_widths = [
                        5 if any(
                            y in results[metric][0]
                            for y in x["adjacent_nodes"]) else other_edge_width
                        for x in graph.es()
                    ]

                else:
                    if self.args.m_reach > 5:
                        edge_widths = [
                            5 if any(y in results[metric][0]
                                     for y in x["adjacent_nodes"]) else
                            other_edge_width for x in graph.es()
                        ]
                        sys.stdout.write(
                            u"WARNING: you chose a very high value of m-reach, the edge width "
                            "may be too big, hence it may not be represented correctly\n"
                        )
                    else:
                        mreach_nodes = results[metric][0]
                        # get node indices of corresponding kpset
                        indices = utils.get_node_indices(mreach_nodes)

                        edge_widths = [
                            other_edge_width
                        ] * graph.ecount()  # define a starting list of values

                        mreach_width = (
                            self.args.m_reach * 2
                        ) + 2  # maxium and minimum boundaries for edge width
                        # print(mreach_width)

                        memory_indices = indices
                        step_before = indices

                        for i in range(1, self.args.m_reach + 1):
                            # print(mreach_width)
                            neighbours = Graph.neighborhood(graph,
                                                            vertices=indices)
                            # print(neighbours)

                            indices = list(
                                chain(*neighbours))  # flat out list of indices
                            # print(indices)
                            remaining_indices = list(
                                set(indices) - set(memory_indices))

                            # print(remaining_indices)
                            # print(step_before)

                            mreach_edge_ids = []

                            for elem in step_before:
                                for el in remaining_indices:
                                    if Graph.are_connected(graph, elem, el):
                                        mreach_edge_ids.append(
                                            graph.get_eid(elem, el))

                            # print (mreach_edge_ids)
                            for edge in mreach_edge_ids:
                                edge_widths[edge] = mreach_width

                            # finally
                            mreach_width = mreach_width - 2
                            memory_indices = memory_indices + remaining_indices
                            step_before = remaining_indices

                        # sys.exit()

                plot_graph.set_edge_widths(edge_widths)

                plot_graph.set_layouts(self.args.plot_layout)

                plot_path = os.path.join(
                    plot_dir,
                    "_".join([self.args.which, ["name"][0], metric, self.date
                              ]) + "." + plot_format)
                if os.path.exists(plot_path):
                    sys.stdout.write(
                        u"WARNING: a plot with the name ({}) already exists, overwriting it\n"
                        .format(os.path.basename(plot_path)))

                plot_graph.plot_graph(path=plot_path,
                                      bbox=plot_size,
                                      margin=20,
                                      edge_curved=0.2,
                                      keep_aspect_ratio=True,
                                      vertex_label_size=6,
                                      vertex_frame_color=node_frames)

        elif graph.vcount() >= 1000:
            sys.stdout.write(
                u"The graph has too many nodes ({}, we plot nodes with a maximum of 1000 nodes). It will not be drawn\n"
                .format(graph.vcount()))

        if not self.args.suppress_cursor:
            cursor.stop()

        sys.stdout.write(section_end)
        sys.stdout.write(u"Pyntacle keyplayer completed successfully\n")

        sys.exit(0)
Ejemplo n.º 3
0
    def Sif(file: str, sep: str or None = None, header: bool = True) -> Graph:
        r"""
        Imports a Simple Interaction File (SIF), a relaxed network file formats used by several visualization and analysis tools such as `Cytoscape <https://cytoscape.org/>`_

        For more specifications on the nature of the SIF we refer the user to the `File Formats Guide <http://pyntacle.css-mendel.it/resources/file_formats/file_formats.html#sif>`_
        on Pyntacle website and to the `Cytoscape documentation <http://wiki.cytoscape.org/Cytoscape_User_Manual/Network_Formats>`_

        .. note:: SIF is a flexible file format, in which the column order is generally not important. Pyntacle limits this flexibility by always reporting the source node in the 1st column, the interaction type in the 2nd column and the target node in the 3rd column.

        .. note:: We assume that the SIF does not contain any vertex attribute. To import vertex attributes, please use the :class:`~pyntacle.io_stream.import_attributes.ImportAttributes`

        .. note:: The interaction type and (if present) the header associated to the interaction will be stored in the edge attribute ``sif_interaction`` and ``sif_interaction_name``, respectively.

        :param str file: the path to the target SIF
        :param None,int sep: The field separator inside the network file. if :py:class:`None` (default) it will be guessed. Otherwise, you can place the string representing the column separator.
        :param bool header: Whether the header is present or not (default is ``True``) If present, the name of the interaction (2nd column) will be stored in the graph private attrovute ``sif_interaction__name``

        :return igraph.Graph: an iGraph.Graph object compliant with Pyntacle `Minimum Requirements <http://pyntacle.css-mendel.it/requirements.html>`_
        """

        graph = Graph()

        with open(file, "r") as f:
            """:type: list[str]"""
            if header:
                graph["sif_interaction_name"] = f.readline().strip().split(
                    sep)[1]
            else:
                graph["sif_interaction_name"] = None

            nodeslist = []
            edgeslist = OrderedDict()
            for i, elem in enumerate(f):
                elem = elem.strip().split(sep)
                if len(elem) == 0:
                    pass  # this should be an empty line

                elif len(elem) == 1:  # add the single node as isolate
                    nodeslist.append(elem[0])

                elif len(elem) == 3:
                    nodeslist.extend([elem[0], elem[2]])
                    if ((elem[0], elem[2]) not in edgeslist) and (
                        (elem[2], elem[0]) not in edgeslist):
                        edgeslist[(elem[0], elem[2])] = [elem[1]]
                    else:
                        if (elem[0], elem[2]) in edgeslist:
                            if elem[1] not in edgeslist[(elem[0], elem[2])]:
                                edgeslist[(elem[0], elem[2])].append(elem[1])
                        elif (elem[2], elem[0]) in edgeslist:
                            if elem[1] not in edgeslist[(elem[2], elem[0])]:
                                edgeslist[(elem[2], elem[0])].append(elem[1])

                elif len(elem) >= 4:
                    first = elem[0]
                    interaction = elem[1]
                    other_nodes = elem[2:]

                    nodeslist.append(first)
                    for n in other_nodes:
                        nodeslist.append(n)
                        if ((first, n) not in edgeslist) and (
                            (n, first) not in edgeslist):
                            edgeslist[(first, n)] = [interaction]
                        else:
                            if (first, n) in edgeslist:
                                if interaction not in edgeslist[(first, n)]:
                                    edgeslist[(first, n)].append(interaction)
                            elif (n, first) in edgeslist:
                                if interaction not in edgeslist[(n, first)]:
                                    edgeslist[(n, first)].append(interaction)

                else:
                    raise ImproperlyFormattedFileError(
                        "line {} is malformed".format(i))
            nodeslist.sort()
            nodeslist = list(OrderedSet(nodeslist))
            graph.add_vertices(nodeslist)
            graph.add_edges(edgeslist.keys())
            edgevals = [sorted(x) for x in edgeslist.values()
                        ]  #sort interactions lexicographically
            graph.es()["sif_interaction"] = edgevals

            # initialize graph
            util = gu(graph=graph)
            util.graph_initializer(
                graph_name=os.path.splitext(os.path.basename(file))[0])
            graph = util.get_graph()

            sys.stdout.write(u"SIF from {} imported\n".format(file))

        return graph
Ejemplo n.º 4
0
    def Dot(file: str, **kwargs):
        r"""
        Import a DOT file into a :py:class:`igraph.Graph` object.

        Dot is a network file format designed for network visualization
        by `GraphViz <https://www.graphviz.org/>`_ and other tools to trustfully reproduce network properties graphically .

        The main documentation on Dot can be found `here <https://www.graphviz.org/doc/info/lang.html>`_

        We refer the user to the `File Formats Guide <http://pyntacle.css-mendel.it/resources/file_formats/file_formats.html#dot>`_
        within the Pyntacle official page for more details regarding  the specifics of Dot Files.

        .. warning:: the attributes of the DOT file object may not be imported correctly. for this reason, we recommend to import these attributes by means of the :class:`~pyntacle.io_stream.import_attributes.ImportAttributes` module

        :param str file: the path to the target DOT file
        :param kwargs: optional arguments to specify additional keywords that are present in the imported DOT format

        :return igraph.Graph: an iGraph.Graph object compliant with Pyntacle `Minimum Requirements <http://pyntacle.css-mendel.it/requirements.html>`_
        """
        graph = Graph()
        graph.vs()["name"] = None
        graph.es()["sif_interaction"] = None

        # initialize empty graph

        dotdata = open(file)
        last_pos = dotdata.tell()

        header_comment = False
        if dotdata.readline().startswith('/*'):
            header_comment = True
        dotdata.seek(last_pos)
        if header_comment:
            dotdata = dotdata.read().split("\n", 1)[1]
        else:
            dotdata = dotdata.read()

        # Parsing dot file
        graph_beginning = 'graph' + Optional(
            Word(alphanums))('initial_name') + Word('{')

        graph_element = 'graph'
        graph_ATTR = Word(alphanums + '_"-') + Suppress('=') + Word(alphanums +
                                                                    '"[]\'_?-')
        graph_indented_block = nestedExpr('[', '];', content=Group(graph_ATTR))
        graph_elementBlock = graph_element + Optional(graph_indented_block)

        node_element = Word(alphanums)
        node_ATTR = Word(alphanums + '_"-') + Suppress('=') + Word(alphanums +
                                                                   '"_?-')
        node_elementBlock = node_element + nestedExpr(
            '[', '];', content=Group(node_ATTR))

        edgeformat = Word(alphanums) | Suppress('{') + Group(
            delimitedList(Word(alphanums), delim=White())) + Suppress('}')
        edge_element = Group(
            Word(alphanums) + OneOrMore(
                Optional(Suppress('->')) + Optional(Suppress('--')) +
                edgeformat))
        edge_ATTR = Word(alphanums + '-_"') + Suppress('=') + Word(alphanums +
                                                                   '"-_?')
        edge_indented_block = nestedExpr('[', ']', content=Group(edge_ATTR))
        edge_elementBlock = Group(edge_element +
                                  Optional(edge_indented_block) +
                                  Suppress(';'))

        graph_end = '}'

        header_parser = graph_beginning + ZeroOrMore(
            graph_elementBlock.setResultsName("graph_attrs_block", listAllMatches=False)) + \
                        ZeroOrMore(node_elementBlock)("node_attrs_block") + \
                        ZeroOrMore(edge_elementBlock)("edge_attrs_block") + graph_end

        tokens = header_parser.parseString(dotdata)
        # Converting lists to dictionaries
        graph_attrs_dict = dot_attrlist_to_dict(tokens.graph_attrs_block)
        node_attrs_dict = dot_attrlist_to_dict(tokens.node_attrs_block)
        edge_attrs_dict = dot_edgeattrlist_to_dict(tokens.edge_attrs_block)

        if tokens.initial_name:
            graphname = tokens.initial_name
        else:
            graphname = os.path.splitext(os.path.basename(file))[0]
        for a in graph_attrs_dict:
            for k in graph_attrs_dict[a]:
                clean_attr_name = re.search(
                    "[\[\'\"]*([\w\.\-\ \:\+\(\)\{\}\=]*)[\]\'\"]*",
                    graph_attrs_dict[a][k]).group(1)
                if k == "name":
                    graphname = clean_attr_name
                else:
                    AddAttributes.add_graph_attribute(graph, k,
                                                      clean_attr_name)

        ids_to_names = {}
        for a in node_attrs_dict:
            for k in node_attrs_dict[a]:
                if node_attrs_dict[a][k] not in graph.vs(
                )["name"] and k == 'name':
                    nodename = node_attrs_dict[a][k]
                    graph.add_vertex(name=nodename)
                    ids_to_names[a] = nodename
                if k != 'name':
                    AddAttributes.add_node_attribute(graph, k,
                                                     [node_attrs_dict[a][k]],
                                                     [nodename])

        for a in edge_attrs_dict:
            if graph.are_connected(ids_to_names[a[0]], ids_to_names[a[1]]):
                sys.stdout.write(
                    u"An edge already exists between node %s and node %s,"
                    "skipping this edge (we recommend to check again your file\n"
                    % (a[0], a[1]))
            else:
                graph.add_edge(source=ids_to_names[a[0]],
                               target=ids_to_names[a[1]])

        if Graph.is_directed(graph):
            sys.stdout.write(u"Converting graph to undirect\n")
            graph.to_undirected()

        util = gu(graph=graph)
        util.graph_initializer(graph_name=graphname)
        graph = util.get_graph()

        for a in edge_attrs_dict:
            for k in edge_attrs_dict[a]:
                AddAttributes.add_edge_attribute(
                    graph, k, [edge_attrs_dict[a][k]],
                    [(ids_to_names[a[0]], ids_to_names[a[1]])])

        sys.stdout.write(u"DOT from {} imported\n".format(
            os.path.basename(file)))
        return graph
Ejemplo n.º 5
0
    def AdjacencyMatrix(file: str,
                        sep: str or None = None,
                        header: bool = True) -> Graph:
        r"""
        Imports an adjacency matrix file to a :py:class:`igraph.Graph` object ready to be used by Pyntacle.

        For more information on adjacency matrices we refer the user to the `File Formats Guide <http://pyntacle.css-mendel.it/resources/file_formats/file_formats.html#adjm>`_
        on Pyntacle website.

        .. note:: We support unweighted undirected Adjacency Matrices, so only zeroes and ones are allowed in the input file.

        .. note:: If an header is present, it **must** contain unique names (two nodes can't have the same ID). if not, an  error wil be raised. The names of the node will be assigned to the vertex ``name`` attribute. If the header is not present, the node "name" attribute will be the corresponding sequential index assigned by igraph.

        :param str file: the path to the file storing the adjacency matrix
        :param None,int sep: The field separator inside the network file. if :py:class:`None` (default) it will be guessed. Otherwise, you can place the string representing the column separator.
        :param bool header: Whether the header is present or not (default is ``True``)

        :return igraph.Graph: an iGraph.Graph object compliant with Pyntacle `Minimum Requirements <http://pyntacle.css-mendel.it/requirements.html>`_

        :raise WrongArgumentError: if ``sep`` is not found in the adjacency matrix
        :raise ValueError: if the matrix is not squared
        """

        if not AdjmUtils(file=file, header=header, sep=sep).is_squared():
            raise ValueError(u"Matrix is not squared")

        with open(file, "r") as adjmatrix:
            iterator = iter(adjmatrix.readline, '')

            first_line = next(iterator, None).strip()
            if sep not in first_line:
                raise WrongArgumentError(
                    u'The specified separator "{}" is not present in the adjacency matrix file'
                    .format(sep))

            if header:
                #use pandas to parse this into
                f = pd.read_csv(filepath_or_buffer=file, sep=sep, index_col=0)
                f.index = f.index.map(
                    str
                )  #force index to string, in case is not identified properly
                f = f.reindex(sorted(f.columns),
                              axis=1)  # sort columns alphabetically
                f = f.reindex(sorted(f.index),
                              axis=0)  # sort indices alphabetically
                node_names = f.columns.values.tolist()

            else:
                f = pd.read_csv(filepath_or_buffer=file, sep=sep, header=None)
                node_names = list(map(str, f.index))

            graph = Graph.Adjacency(f.values.tolist(), mode="UPPER")
            util = gu(graph=graph)
            util.graph_initializer(graph_name=os.path.splitext(
                os.path.basename(file))[0],
                                   node_names=node_names)
            graph = util.get_graph()

            sys.stdout.write(u"Adjacency matrix from {} imported\n".format(
                os.path.basename(file)))
            return graph
Ejemplo n.º 6
0
    def EdgeList(file: str, sep: str or None = None, header: bool = False):
        r"""
        Takes an edge list and turns it into a :py:class:`igraph.Graph` object that stores the input edge list.

        An edge list is a text file that represents all the edges in a graph with a scheme, such as:

        +-------+-------+
        | nodeA | nodeB |
        | nodeB | nodeA |
        +-------+-------+

        We accept undirected edge list, so the node pairs must be repeated twice, so the reciprocal of any edge must be
        present in the edge list file.

        For more specifications on the nature of edge lists we refer the user to the `File Formats Guide <http://pyntacle.css-mendel.it/resources/file_formats/file_formats.html#egl>`_
        on Pyntacle website.

        .. note:: only the first two columns of the edge list are read, any additional column will be skipped. The first two columns will be assumed to represent and edge by default.

        :param str file: a valid path to the edge list File
        :param None,int sep: The field separator inside the network file. if :py:class:`None` (default) it will be guessed. Otherwise, you can place the string representing the column separator.
        :param bool header: Whether a first line with column name (header) is present or not (default is ``False``)

        :return igraph.Graph: an iGraph.Graph object compliant with Pyntacle `Minimum Requirements <http://pyntacle.css-mendel.it/requirements.html>`_
        """
        eglutils = EglUtils(file=file, header=header, sep=sep)

        if eglutils.is_direct():
            raise ValueError(
                u"Edgelist is not ready to be parsed by Pyntacle, it's direct. Use the `edgelist_utils` module in `tools` to make it undirect"
            )

        elif eglutils.is_multigraph():
            raise ValueError(
                u"Edgelist contains multiple edges. It is not ready to be parsed by Pyntacle, Use the `edgelist_utils` module in `tools` to turn it into a simple graph."
            )

        graph = Graph()  #initialize an empty graph that will be filled

        if header:
            adj = pd.read_csv(file, sep=sep, header=0, dtype=str)
            adj.columns = [0, 1]

        else:
            adj = pd.read_csv(file, sep=sep, header=None, dtype=str)

        adj.values.sort()
        adj = adj.drop_duplicates()
        adj.dropna(how="all", inplace=True)  #remove all empty lines
        nodeslist = list(
            str(x) for x in OrderedSet(adj[0].tolist() + adj[1].tolist()))
        nodeslist.sort()
        graph.add_vertices(nodeslist)
        edgs = adj.values.tolist()

        graph.add_edges(edgs)
        #initialize the graph by calling the graph_initializer() method
        util = gu(graph=graph)
        util.graph_initializer(
            graph_name=os.path.splitext(os.path.basename(file))[0])
        graph = util.get_graph()

        sys.stdout.write(u"Edge list from {} imported\n".format(
            os.path.basename(file)))
        return graph
Ejemplo n.º 7
0
    def run(self):
        if not self.args.suppress_cursor:
            cursor = CursorAnimation()
            cursor.daemon = True
            cursor.start()
        if self.args.no_header:
            header = False
        else:
            header = True

        if not hasattr(self.args, 'which'):
            raise Error(
                u"usage: pyntacle.py metrics {global, local} [options]")

        # Checking input file
        if self.args.input_file is None:
            sys.stderr.write(
                u"Please specify an input file using the `-i/--input-file` option. Quitting\n"
            )
            sys.exit(1)

        if not os.path.exists(self.args.input_file):
            self.logging.error(u"Cannot find {}. Is the path correct?".format(
                self.args.input_file))
            sys.exit(1)

        if hasattr(self.args, "damping_factor"):
            if self.args.damping_factor is not None:
                if self.args.damping_factor < 0.0 or self.args.damping_factor > 1.0:
                    sys.stderr.write(
                        u"Damping factor must be between 0 and 1. Quitting\n")
                    sys.exit(1)

        self.logging.debug(u'Running Pyntacle metrics, with arguments ')
        self.logging.debug(self.args)

        # Load Graph
        sys.stdout.write(import_start)
        sys.stdout.write(u"Importing graph from file\n")
        graph = GraphLoad(self.args.input_file,
                          format_dictionary.get(self.args.format, "NA"),
                          header,
                          separator=self.args.input_separator).graph_load()
        # init Utils global stuff
        utils = gu(graph=graph)

        if hasattr(self.args, "nodes"):
            if self.args.nodes is not None:

                self.args.nodes = self.args.nodes.split(",")

                if not utils.nodes_in_graph(self.args.nodes):
                    sys.stderr.write(
                        "One or more of the specified nodes is not present in the graph. Please check your spelling and the presence of empty spaces in between node names. Quitting\n"
                    )
                    sys.exit(1)

        if self.args.largest_component:
            try:
                graph = utils.get_largest_component()
                sys.stdout.write(
                    u"Taking the largest component of the input graph as you requested ({} nodes, {} edges)\n"
                    .format(graph.vcount(), graph.ecount()))
                # reinitialize graph utils class
                utils.set_graph(graph)

            except MultipleSolutionsError:
                sys.stderr.write(
                    u"The graph has two largest components of the same size. Cannot choose one. Please parse your file or remove the '--largest-component' option. Quitting\n"
                )
                sys.exit(1)

            if self.args.nodes is not None:
                if not utils.nodes_in_graph(self.args.nodes):
                    sys.stderr.write(
                        "One or more of the specified nodes is not present in the largest graph component. Select a different set or remove this option. Quitting\n"
                    )
                    sys.exit(1)

        # Decide implementation
        if 'implementation' in graph.attributes():
            implementation = graph['implementation']
        else:
            implementation = CmodeEnum.igraph

        if hasattr(self.args, "nodes"):
            if self.args.weights is not None:
                sys.stdout.write(u"Adding edge weights from file {}\n".format(
                    self.args.weights))
                if not os.path.exists(self.args.weights):
                    sys.stderr.write(
                        u"Weights file {} does not exist. Is the path correct?\n"
                        .format(self.args.weights))
                    sys.exit(1)

                ImportAttributes.import_edge_attributes(
                    graph,
                    self.args.weights,
                    sep=separator_detect(self.args.weights),
                    mode=self.args.weights_format)
                try:
                    weights = [
                        float(x) if x != None else 1.0
                        for x in graph.es()["weights"]
                    ]

                except KeyError:
                    sys.stderr.write(
                        u"The attribute file does not contain a column named 'weights'."
                        "Quitting\n")
                    sys.exit(1)
            else:
                weights = None

        # Check provided dimensions' format
        if hasattr(self.args.plot_dim, "plot_dim"):
            # define custom format
            self.args.plot_dim = self.args.plot_dim.split(",")

            if len(self.args.plot_dim) != 2:
                sys.stderr.write(
                    u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n"
                )

            for i in range(0, len(self.args.plot_dim)):
                try:
                    self.args.plot_dim[i] = int(self.args.plot_dim[i])

                except ValueError:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n"
                    )
                    sys.exit(1)

                if self.args.plot_dim[i] <= 0:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of values(e.g. 1920,1080). Quitting\n"
                    )
                    sys.exit(1)

            plot_size = tuple(self.args.plot_dim)

        else:
            # generate different formats according to graph size
            if graph.vcount() <= 150:
                plot_size = (800, 800)

            else:
                plot_size = (1600, 1600)

        sys.stdout.write(section_end)  #end report
        sys.stdout.write(run_start)  #start run

        if self.args.which == "local":

            reporter = PyntacleReporter(graph=graph)  #init reporter

            if self.args.nodes is not None:
                sys.stdout.write(
                    u"Computing local metrics for nodes {}\n".format(', '.join(
                        self.args.nodes)))
                nodes_list = self.args.nodes

            else:
                sys.stdout.write(
                    u"Computing local metrics for all nodes in the graph\n")
                nodes_list = None

            local_attributes_dict = OrderedDict({
                LocalAttributeEnum.degree.name:
                LocalTopology.degree(graph=graph, nodes=nodes_list),
                LocalAttributeEnum.clustering_coefficient.name:
                LocalTopology.clustering_coefficient(graph=graph,
                                                     nodes=nodes_list),
                LocalAttributeEnum.betweenness.name:
                LocalTopology.betweenness(graph=graph, nodes=nodes_list),
                LocalAttributeEnum.closeness.name:
                LocalTopology.closeness(graph=graph, nodes=nodes_list),
                LocalAttributeEnum.radiality.name:
                LocalTopology.radiality(graph=graph,
                                        nodes=nodes_list,
                                        cmode=implementation),
                LocalAttributeEnum.radiality_reach.name:
                LocalTopology.radiality_reach(graph=graph,
                                              nodes=nodes_list,
                                              cmode=implementation),
                LocalAttributeEnum.eccentricity.name:
                LocalTopology.eccentricity(graph=graph, nodes=nodes_list),
                LocalAttributeEnum.eigenvector_centrality.name:
                LocalTopology.eigenvector_centrality(graph=graph,
                                                     nodes=nodes_list),
                LocalAttributeEnum.pagerank.name:
                LocalTopology.pagerank(graph=graph,
                                       nodes=nodes_list,
                                       weights=weights,
                                       damping=self.args.damping_factor)
            })

            if self.args.nodes:
                local_attributes_dict["nodes"] = self.args.nodes

            sys.stdout.write("Local metrics computed\n")

            sys.stdout.write(section_end)
            sys.stdout.write(report_start)
            # check output directory
            if not os.path.isdir(self.args.directory):
                sys.stdout.write(
                    u"WARNING: Output directory does not exist; {} will be created\n"
                    .format(os.path.abspath(self.args.directory)))
                os.makedirs(os.path.abspath(self.args.directory),
                            exist_ok=True)

            sys.stdout.write(u"Producing report in {} format\n".format(
                self.args.report_format))

            reporter.create_report(ReportEnum.Local, local_attributes_dict)
            reporter.write_report(report_dir=self.args.directory,
                                  format=self.args.report_format)

            if not self.args.no_plot and graph.vcount() < 1000:

                sys.stdout.write(u"Generating plots in {} format\n".format(
                    self.args.plot_format))

                # generates plot directory
                plot_dir = os.path.join(self.args.directory, "pyntacle-plots")

                if not os.path.isdir(plot_dir):
                    os.makedirs(plot_dir, exist_ok=True)

                plot_graph = PlotGraph(graph=graph)
                plot_graph.set_node_labels(
                    labels=graph.vs()["name"])  # assign node labels to graph

                pal = sns.color_palette("Accent", 8).as_hex()
                framepal = sns.color_palette("Accent", 8, desat=0.5).as_hex()

                other_nodes_colour = pal[2]
                other_frame_colour = framepal[2]
                other_nodes_size = 25

                if self.args.nodes:  # make node selected of a different colour and bigger than the other ones, so they can be visualized
                    sys.stdout.write(
                        u"Highlighting nodes ({}) in plot\n".format(
                            ', '.join(nodes_list)))
                    selected_nodes_colour = pal[0]
                    selected_nodes_frames = framepal[0]

                    node_colors = [
                        selected_nodes_colour
                        if x["name"] in nodes_list else other_nodes_colour
                        for x in graph.vs()
                    ]
                    node_frames = [
                        selected_nodes_frames
                        if x["name"] in nodes_list else other_frame_colour
                        for x in graph.vs()
                    ]

                    #print(node_colors)

                    plot_graph.set_node_colors(colors=node_colors)

                    node_sizes = [
                        45 if x["name"] in nodes_list else other_nodes_size
                        for x in graph.vs()
                    ]
                    plot_graph.set_node_sizes(sizes=node_sizes)

                else:
                    # sys.stdout.write("Plotting network\n".format(nodes_list))
                    node_colors = [other_nodes_colour] * graph.vcount()
                    node_frames = [other_frame_colour] * graph.vcount()
                    plot_graph.set_node_colors(colors=node_colors)

                    node_sizes = [other_nodes_size] * graph.vcount()
                    plot_graph.set_node_sizes(sizes=node_sizes)

                # define layout
                plot_graph.set_layouts(self.args.plot_layout)

                plot_path = os.path.join(
                    plot_dir, ".".join([
                        "_".join([graph["name"][0], self.date]),
                        self.args.plot_format
                    ]))
                plot_graph.plot_graph(path=plot_path,
                                      bbox=plot_size,
                                      margin=20,
                                      edge_curved=0.2,
                                      keep_aspect_ratio=True,
                                      vertex_label_size=6,
                                      vertex_frame_color=node_frames)

            elif not self.args.no_plot and graph.vcount() >= 1000:
                sys.stdout.write(
                    u"The graph has too many nodes ({}). It will not be drawn\n"
                    .format(graph.vcount()))

        elif self.args.which == "global":

            sys.stdout.write(u"Computing global metrics\n")
            global_attributes_dict = OrderedDict({
                GlobalAttributeEnum.average_shortest_path_length.name:
                ShortestPath.average_global_shortest_path_length(graph=graph),
                GlobalAttributeEnum.median_shortest_path_length.name:
                ShortestPath.median_global_shortest_path_length(graph=graph),
                GlobalAttributeEnum.diameter.name:
                GlobalTopology.diameter(graph=graph),
                GlobalAttributeEnum.components.name:
                GlobalTopology.components(graph=graph),
                GlobalAttributeEnum.radius.name:
                GlobalTopology.radius(graph=graph),
                GlobalAttributeEnum.density.name:
                GlobalTopology.density(graph=graph),
                GlobalAttributeEnum.pi.name:
                GlobalTopology.pi(graph=graph),
                GlobalAttributeEnum.average_clustering_coefficient.name:
                GlobalTopology.average_clustering_coefficient(graph=graph),
                GlobalAttributeEnum.weighted_clustering_coefficient.name:
                GlobalTopology.weighted_clustering_coefficient(graph=graph),
                GlobalAttributeEnum.average_degree.name:
                GlobalTopology.average_degree(graph=graph),
                GlobalAttributeEnum.average_closeness.name:
                GlobalTopology.average_closeness(graph=graph),
                GlobalAttributeEnum.average_eccentricity.name:
                GlobalTopology.average_eccentricity(graph=graph),
                GlobalAttributeEnum.average_radiality.name:
                GlobalTopology.average_radiality(graph=graph,
                                                 cmode=implementation),
                GlobalAttributeEnum.average_radiality_reach.name:
                GlobalTopology.average_radiality_reach(graph=graph,
                                                       cmode=implementation),
                GlobalAttributeEnum.completeness_naive.name:
                Sparseness.completeness_naive(graph=graph),
                GlobalAttributeEnum.completeness.name:
                Sparseness.completeness(graph=graph),
                GlobalAttributeEnum.compactness.name:
                Sparseness.compactness(graph=graph)
            })

            sys.stdout.write(u"Global metrics computed\n")
            sys.stdout.write(section_end)
            sys.stdout.write(report_start)
            sys.stdout.write(
                u"Producing global metrics report for the input graph\n")

            reporter = PyntacleReporter(graph=graph)  # init reporter
            reporter.create_report(ReportEnum.Global, global_attributes_dict)
            reporter.write_report(report_dir=self.args.directory,
                                  format=self.args.report_format)

            if self.args.no_nodes:  # create an additional report for the graph minus the selected nodes

                sys.stdout.write(
                    u"Removing nodes:\n\t{}\nfrom input graph and computing Global Metrics\n"
                    .format(self.args.no_nodes))
                nodes_list = self.args.no_nodes.split(",")

                # this will be useful when producing the two global topology plots, one for the global graph and the other one fo all nodes
                nodes_list = [x.replace(" ", "") for x in nodes_list]
                index_list = utils.get_node_indices(nodes=nodes_list)

                # delete vertices
                graph_nonodes = graph.copy()
                graph_nonodes.delete_vertices(index_list)  #remove target nodes

                global_attributes_dict_nonodes = OrderedDict({
                    'Removed nodes':
                    ','.join(nodes_list),
                    GlobalAttributeEnum.average_shortest_path_length.name:
                    ShortestPath.average_global_shortest_path_length(
                        graph=graph_nonodes),
                    GlobalAttributeEnum.median_shortest_path_length.name:
                    ShortestPath.median_global_shortest_path_length(
                        graph=graph_nonodes),
                    GlobalAttributeEnum.diameter.name:
                    GlobalTopology.diameter(graph=graph_nonodes),
                    GlobalAttributeEnum.components.name:
                    GlobalTopology.components(graph=graph_nonodes),
                    GlobalAttributeEnum.radius.name:
                    GlobalTopology.radius(graph=graph_nonodes),
                    GlobalAttributeEnum.density.name:
                    GlobalTopology.density(graph=graph_nonodes),
                    GlobalAttributeEnum.pi.name:
                    GlobalTopology.pi(graph=graph_nonodes),
                    GlobalAttributeEnum.average_clustering_coefficient.name:
                    GlobalTopology.average_clustering_coefficient(
                        graph=graph_nonodes),
                    GlobalAttributeEnum.weighted_clustering_coefficient.name:
                    GlobalTopology.weighted_clustering_coefficient(
                        graph=graph_nonodes),
                    GlobalAttributeEnum.average_degree.name:
                    GlobalTopology.average_degree(graph=graph_nonodes),
                    GlobalAttributeEnum.average_closeness.name:
                    GlobalTopology.average_closeness(graph=graph_nonodes),
                    GlobalAttributeEnum.average_eccentricity.name:
                    GlobalTopology.average_eccentricity(graph=graph_nonodes),
                    GlobalAttributeEnum.average_radiality.name:
                    GlobalTopology.average_radiality(graph=graph_nonodes,
                                                     cmode=implementation),
                    GlobalAttributeEnum.average_radiality_reach.name:
                    GlobalTopology.average_radiality_reach(
                        graph=graph_nonodes, cmode=implementation),
                    GlobalAttributeEnum.completeness_naive.name:
                    Sparseness.completeness_naive(graph=graph_nonodes),
                    GlobalAttributeEnum.completeness.name:
                    Sparseness.completeness(graph=graph_nonodes),
                    GlobalAttributeEnum.compactness.name:
                    Sparseness.compactness(graph=graph_nonodes),
                })

                sys.stdout.write(
                    u"Producing global metrics report for the input graph after node removal\n"
                )
                graph_nonodes["name"][0] += '_without_nodes'
                reporter = PyntacleReporter(
                    graph=graph_nonodes)  # init reporter
                reporter.create_report(ReportEnum.Global,
                                       global_attributes_dict_nonodes)
                reporter.write_report(report_dir=self.args.directory,
                                      format=self.args.report_format)

            if not self.args.no_plot and graph.vcount() < 1000:

                if self.args.no_nodes:
                    sys.stdout.write(
                        u"Generating plots of both the input network and the resulting network without nodes {} in {} format\n"
                        .format(self.args.no_nodes, self.args.plot_format))

                else:
                    sys.stdout.write(
                        u"Generating network plot in {} format\n".format(
                            self.args.plot_format))

                # generates plot directory
                plot_dir = os.path.join(self.args.directory, "pyntacle-plots")

                if not os.path.isdir(plot_dir):
                    os.mkdir(plot_dir)

                other_nodes_size = 25
                pal = sns.color_palette("Accent", 8).as_hex()
                framepal = sns.color_palette("Accent", 8, desat=0.5).as_hex()
                other_nodes_colour = pal[2]
                other_frame_colour = framepal[2]
                no_nodes_size = 35
                no_nodes_colour = pal[4]
                no_nodes_frames = framepal[4]

                if self.args.no_nodes:
                    plot_path = os.path.join(
                        plot_dir, ".".join([
                            "_".join([
                                "metric", self.args.which,
                                re.sub('_nodes_removed', '', graph["name"][0]),
                                "global_metrics_plot", self.date
                            ]), self.args.plot_format
                        ]))
                    node_colors = [
                        no_nodes_colour
                        if x["name"] in nodes_list else other_nodes_colour
                        for x in graph.vs()
                    ]
                    node_frames = [
                        no_nodes_frames
                        if x["name"] in nodes_list else other_frame_colour
                        for x in graph.vs()
                    ]

                    node_sizes = [
                        no_nodes_size
                        if x["name"] in nodes_list else other_nodes_size
                        for x in graph.vs()
                    ]

                else:
                    node_colors = [other_nodes_colour] * graph.vcount()
                    node_frames = [other_frame_colour] * graph.vcount()
                    node_sizes = [other_nodes_size] * graph.vcount()
                    plot_path = os.path.join(
                        plot_dir, ".".join([
                            "_".join([
                                "Metric", self.args.which, graph["name"][0],
                                self.date
                            ]), self.args.plot_format
                        ]))

                plot_graph = PlotGraph(graph=graph)
                plot_graph.set_node_labels(
                    labels=graph.vs()["name"])  # assign node labels to graph

                plot_graph.set_node_colors(colors=node_colors)
                plot_graph.set_node_sizes(sizes=node_sizes)

                plot_graph.set_node_colors(colors=node_colors)
                plot_graph.set_node_sizes(sizes=node_sizes)

                # define layout
                plot_graph.set_layouts(self.args.plot_layout)

                plot_graph.plot_graph(path=plot_path,
                                      bbox=plot_size,
                                      margin=20,
                                      edge_curved=0.2,
                                      keep_aspect_ratio=True,
                                      vertex_label_size=6,
                                      vertex_frame_color=node_frames)

                if self.args.no_nodes:

                    plot_graph = PlotGraph(graph=graph_nonodes)
                    plot_graph.set_node_labels(
                        labels=graph_nonodes.vs()
                        ["name"])  # assign node labels to graph

                    # print(graph_copy.vs()["name"])
                    node_colors = [other_nodes_colour] * graph_nonodes.vcount()
                    node_frames = [other_frame_colour] * graph_nonodes.vcount()
                    node_sizes = [other_nodes_size] * graph_nonodes.vcount()

                    plot_graph.set_node_colors(colors=node_colors)
                    plot_graph.set_node_sizes(sizes=node_sizes)

                    # define layout
                    plot_graph.set_layouts(self.args.plot_layout)

                    plot_path = os.path.join(
                        plot_dir, ".".join([
                            "_".join(
                                [graph["name"][0], "_no_nodes", self.date]),
                            self.args.plot_format
                        ]))

                    plot_graph.plot_graph(path=plot_path,
                                          bbox=plot_size,
                                          margin=20,
                                          edge_curved=0.2,
                                          keep_aspect_ratio=True,
                                          vertex_label_size=6,
                                          vertex_frame_color=node_frames)

            elif not self.args.no_plot and graph.vcount() >= 1000:
                sys.stdout.write(
                    u"The graph has too many nodes ({}). It will not be drawn\n"
                    .format(graph.vcount()))

        if self.args.save_binary:
            sys.stdout.write(
                u"Saving graph to a binary file (ending in .graph)\n")
            basename_graph = os.path.splitext(
                os.path.basename(self.args.input_file))[0]
            binary_path = os.path.join(self.args.directory,
                                       basename_graph + ".graph")
            # elif self.args.no_nodes:
            # nodes_list = graph_nonodes.vs()
            if self.args.which == 'local':
                if self.args.nodes:
                    nodes_list = self.args.nodes.split(",")
                else:
                    nodes_list = graph.vs["name"]
                for key in local_attributes_dict:
                    AddAttributes.add_node_attributes(
                        graph, key, local_attributes_dict[key], nodes_list)
                PyntacleExporter.Binary(graph, binary_path)

            elif self.args.which == 'global':

                for key in global_attributes_dict:
                    if (key == "average_shortest_path_length"
                            or key == 'median_shortest_path_length'):
                        newkey = re.sub("_shortest_path_length",
                                        "_global_shortest_path_length", key)
                    else:
                        newkey = key
                    AddAttributes.add_graph_attributes(
                        graph, newkey, global_attributes_dict[key])
                PyntacleExporter.Binary(graph, binary_path)

                if self.args.no_nodes:
                    binary_path_nonodes = os.path.join(
                        self.args.directory,
                        basename_graph + "_no_nodes" + ".graph")
                    sys.stdout.write(
                        u"Saving a binary of the input graph without the requested nodes at path: {}\n"
                        .format(os.path.basename(binary_path_nonodes)))
                    for key in global_attributes_dict_nonodes:
                        if (key == "average_shortest_path_length"
                                or key == 'median_shortest_path_length'):
                            newkey = re.sub("_shortest_path_length",
                                            "_global_shortest_path_length",
                                            key)
                        else:
                            newkey = key
                        AddAttributes.add_graph_attributes(
                            graph_nonodes, newkey,
                            global_attributes_dict_nonodes[key])

                    PyntacleExporter.Binary(graph_nonodes, binary_path_nonodes)

        if not self.args.suppress_cursor:
            cursor.stop()

        sys.stdout.write(section_end)
        sys.stdout.write(u"Pyntacle metrics completed successfully\n")
        sys.exit(0)
Ejemplo n.º 8
0
    def run(self):
        if not self.args.suppress_cursor:
            cursor = CursorAnimation()
            cursor.daemon = True
            cursor.start()

        if self.args.input_file is None:
            sys.stderr.write(
                u"Please specify an input file using the `-i/--input-file` option. Quitting\n")
            sys.exit(1)

        if not os.path.exists(self.args.input_file):
            sys.stdout.write(u"Cannot find {}. Is the path correct?\n".format(self.args.input_file))
            sys.exit(1)

        #verify that group distance is set if group closeness is specified
        distancedict = {"min": GroupDistanceEnum.minimum, "max":GroupDistanceEnum.maximum, "mean": GroupDistanceEnum.mean}
        if self.args.type in ["all", "closeness"]:
            if self.args.group_distance not in distancedict.keys():
                sys.stdout.write("'--group-distance/-D parameter must be one of the followings: {}'. Quitting\n".format(",".join(distancedict.keys())))
                sys.exit(1)
            else:
                group_distance = distancedict[self.args.group_distance]

        # Parsing optional node list
        if hasattr(self.args, 'nodes'):
            self.args.nodes = self.args.nodes.split(',')
            # self.args.nodes = [str.lower(x) for x in self.args.nodes]

        if not hasattr(self.args, 'which'):
            raise Error(u"usage: pyntacle.py groupcentrality {gr-finder, gr-info} [options]'")

        if self.args.no_header:
            header = False
        else:
            header = True

        sys.stdout.write(import_start)
        sys.stdout.write(u"Importing graph from file\n")
        graph = GraphLoad(self.args.input_file, format_dictionary.get(self.args.format, "NA"), header,
                          separator=self.args.input_separator).graph_load()

        # init graph utils class
        utils = gu(graph=graph)

        if hasattr(self.args, 'nodes'):

            if not utils.nodes_in_graph(self.args.nodes):
                sys.stderr.write("One or more of the specified nodes is not present in the graph. Please check your spelling and the presence of empty spaces in between node names. Quitting\n")
                sys.exit(1)

        if self.args.largest_component:
            try:
                graph = utils.get_largest_component()
                sys.stdout.write(
                    u"Taking the largest component of the input graph as you requested ({} nodes, {} edges)\n".format(
                        graph.vcount(), graph.ecount()))
                #reinitialize graph utils class
                utils.set_graph(graph)

            except MultipleSolutionsError:
                sys.stderr.write(
                    u"The graph has two largest components of the same size. Cannot choose one. Please parse your file or remove the '--largest-component' option. Quitting\n")
                sys.exit(1)

            #check that the nodes are in the largest component
            if hasattr(self.args, 'nodes'):

                if not utils.nodes_in_graph(self.args.nodes):
                    sys.stderr.write("One or more of the specified nodes is not present in the largest graph component. Select a different set or remove this option. Quitting\n")
                    sys.exit(1)

        if hasattr(self.args, "k_size") and self.args.k_size >= graph.vcount():
            sys.stderr.write("The 'k' argument ({}) must be strictly less than the graph size({}). Quitting\n".format(self.args.k_size, graph.vcount()))
            sys.exit(1)

        if 'implementation' in graph.attributes():
            implementation = graph['implementation']
        else:
            implementation = CmodeEnum.igraph

        # check that output directory is properly set
        createdir = False
        if not os.path.isdir(self.args.directory):
            createdir = True

        # control plot dimensions
        if self.args.plot_dim:  # define custom format
            self.args.plot_dim = self.args.plot_dim.split(",")

            for i in range(0, len(self.args.plot_dim)):
                try:
                    self.args.plot_dim[i] = int(self.args.plot_dim[i])

                    if self.args.plot_dim[i] <= 0:
                        raise ValueError

                except ValueError:
                    sys.stderr.write(
                        u"Format specified must be a comma-separated list of positive integers (e.g. 1920,1080). Quitting\n")
                    sys.exit(1)

            plot_size = tuple(self.args.plot_dim)

        else:
            plot_size = (800, 600)

            if graph.vcount() > 150:
                plot_size = (1600, 1600)

        # initialize reporter for later usage and plot dimension for later usage
        r = PyntacleReporter(graph=graph)
        results = OrderedDict()

        sys.stdout.write(section_end)
        sys.stdout.write(run_start)

        if self.args.which == "gr-finder":

            # Greedy optimization
            if self.args.implementation == "greedy":
                if self.args.seed:
                    random.seed(self.args.seed)

                report_type = ReportEnum.GR_greedy
                go_runner = gow(graph=graph)
                sys.stdout.write(u"Using greedy optimization algorithm for searching optimal set of nodes using group centrality metrics\n")
                sys.stdout.write(sep_line)

                if self.args.type in (["all", "degree"]):
                    sys.stdout.write(
                        u"Finding a set of nodes of size {0} that optimizes group degree\n".format(
                            self.args.k_size))

                    go_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_degree,
                                                  seed=self.args.seed,
                                                  cmode=implementation)
                    sys.stdout.write(sep_line)


                if self.args.type in (["all", "betweenness"]):
                    sys.stdout.write(
                        u"Finding a set of nodes of size {0} that optimizes group betweenness\n".format(
                            self.args.k_size))

                    go_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_betweenness,
                                                  seed=self.args.seed,
                                                  cmode=implementation)
                    sys.stdout.write(sep_line)

                if self.args.type in (["all", "closeness"]):
                    sys.stdout.write(
                        u"Finding a set of nodes of size {0} that optimizes group closeness using the {1} distance from the node set\n".format(
                            self.args.k_size, group_distance.name))


                    go_runner.run_groupcentrality(k = self.args.k_size,gr_type=GroupCentralityEnum.group_closeness, seed=self.args.seed, cmode=implementation ,distance=group_distance)
                    sys.stdout.write(sep_line)

                sys.stdout.write(sep_line)
                results.update(go_runner.get_results())

            #bruteforce implementation
            elif self.args.implementation == "brute-force":

                if self.args.threads > 1:
                    plural = "s"
                else:
                    plural = ""

                report_type = ReportEnum.GR_bruteforce
                bf_runner = bfw(graph=graph)
                sys.stdout.write(u"Using brute-force search algorithm to find the best set(s) that optimize group centrality metrics\n")
                sys.stdout.write(sep_line)

                if self.args.type in (["all", "degree"]):
                    sys.stdout.write(
                        u"Finding the best set(s) of nodes of size {0} that maximizes group degree using {1} thread{2}\n".format(
                            self.args.k_size, self.args.threads, plural))
                    bf_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_degree,
                                                  cmode=implementation, threads=self.args.threads)

                    sys.stdout.write(sep_line)

                if self.args.type in (["all", "betweenness"]):
                    sys.stdout.write(
                        u"Finding the best set(s) of nodes of size {0} that maximizes group betweenness using {1} thread{2}\n".format(
                            self.args.k_size,  self.args.threads, plural))
                    bf_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_betweenness,
                                                  cmode=implementation, threads=self.args.threads)
                    sys.stdout.write(sep_line)

                if self.args.type in (["all", "closeness"]):
                    sys.stdout.write(
                        u"Finding the best set(s) of nodes of size {0} that maximizes group closeness using the {1} distance from the node set and {2} thread{3}\n".format(
                            self.args.k_size,  group_distance, self.args.threads, plural))
                    bf_runner.run_groupcentrality(k=self.args.k_size, gr_type=GroupCentralityEnum.group_closeness, cmode=implementation, threads=self.args.threads, distance=group_distance)
                    sys.stdout.write(sep_line)

                results.update(bf_runner.get_results())

            #shell output report part
            sys.stdout.write(section_end)
            sys.stdout.write(summary_start)
            sys.stdout.write(u"Node set size for group centrality search: {}\n".format(str(self.args.k_size)))
            sys.stdout.write(sep_line)

            for kk in results.keys():

                if len(results[kk][0]) > 1 and self.args.implementation == 'brute-force':
                    plurals = ['s', 'are']
                else:
                    plurals = ['', 'is']

                if results[kk][0][0] is None:  # the case in which there's no solution
                    results[kk][0] = ["None"]

                if self.args.implementation == 'brute-force':
                    list_of_results = "\n".join(['(' + ', '.join(x) + ')' for x in results[kk][0]])


                else:
                    list_of_results = "(" + ", ".join(results[kk][0]) + ")"

                sys.stdout.write(
                    u'Best node set{0} of size {1} for {5} centrality {2}:\n{3}\nwith value {4}\n'.format(
                        plurals[0], self.args.k_size, plurals[1], list_of_results, results[kk][1], " ".join(kk.split("_")[:2])))

                if kk.startswith(GroupCentralityEnum.group_closeness.name):
                    sys.stdout.write("The {} distance was considered for computing closeness\n".format(group_distance.name))

                sys.stdout.write("\n")

            sys.stdout.write(section_end)

        elif self.args.which == "gr-info":
            report_type = ReportEnum.GR_info
            sys.stdout.write("Input node set: ({})\n".format(', '.join(self.args.nodes)))
            sys.stdout.write(sep_line)

            grinfo_runner = ipw(graph=graph, nodes=self.args.nodes)

            if self.args.type in (["degree", "all"]):
                grinfo_runner.run_groupcentrality(gr_type=GroupCentralityEnum.group_degree, cmode=implementation)

            if self.args.type in (["betweenness", "all"]):
                grinfo_runner.run_groupcentrality(gr_type=GroupCentralityEnum.group_betweenness, cmode=implementation)

            if self.args.type in (["closeness", "all"]):
                grinfo_runner.run_groupcentrality(gr_type=GroupCentralityEnum.group_closeness, cmode=implementation, gr_distance=group_distance)

            results.update(grinfo_runner.get_results())

            sys.stdout.write(summary_start)

            for metric in results.keys():

                if metric == GroupCentralityEnum.group_degree.name:
                    sys.stdout.write("The group degree value for the input node set:\n({0})\nis {1}\n".format(', '.join(results[metric][0]),
                                                                 results[metric][1]))
                    sys.stdout.write("\n")

                if metric == GroupCentralityEnum.group_betweenness.name:
                    sys.stdout.write(
                        "The group betweenness value for the input node set:\n({0})\nis {1}\n".format(', '.join(results[metric][0]),
                                                                                        results[metric][1]))
                    sys.stdout.write("\n")

                if metric.startswith(GroupCentralityEnum.group_closeness.name):
                    sys.stdout.write(
                        "The group closeness value for the input node set:\n({0})\nis {1}.\nThe {2} distance was considered between the set and the rest of the graph\n".format(', '.join(results[metric][0]),
                                                                                      results[metric][1], group_distance.name))
                    sys.stdout.write("\n")

            sys.stdout.write(section_end)

        #output part#####
        sys.stdout.write(report_start)
        sys.stdout.write("Writing Results\n")

        if createdir:
            sys.stdout.write(u"WARNING: output directory does not exist, {} will be created".format(
                os.path.abspath(self.args.directory)))
            os.makedirs(os.path.abspath(self.args.directory), exist_ok=True)

        if self.args.save_binary:
            # reproduce octopus behaviour by adding kp information to the graph before saving it
            sys.stdout.write(u"Saving graph to a binary file (ending in .graph)\n")

            for key in results.keys():
                if self.args.which == "gr-finder":
                    if self.args.implementation == "brute-force":
                        suffix = "bruteforce"
                        attr_key = tuple(tuple(sorted(tuple(x))) for x in results[key][0])

                    else:
                        suffix = "greedy"
                        attr_key = tuple(sorted(tuple(results[key][0])))

                else:
                    suffix = "info"
                    attr_key = tuple(sorted(tuple(results[key][0])))

                attr_name = "_".join([key, suffix])
                attr_val = results[key][1]

                if attr_name in graph.attributes():
                    if not isinstance(graph[attr_name], dict):
                        sys.stdout.write("WARNING: attribute {} does not point to a dictionary, will overwrite".format(attr_name))
                        AddAttributes.add_graph_attribute(graph, attr_name, {attr_key: attr_val})
                    else:
                        if attr_key in graph[attr_name]:
                            sys.stdout.write("WARNING {} already present in the {} graph attribute, will overwrite\n".format(attr_key, attr_val))
                        graph[attr_name].update({attr_key: attr_val})
                else:
                    AddAttributes.add_graph_attribute(graph, attr_name, {attr_key: attr_val})

            binary_prefix = "_".join([os.path.splitext(os.path.basename(self.args.input_file))[0], self.args.which, self.date])
            binary_path = os.path.join(self.args.directory, binary_prefix + ".graph")
            PyntacleExporter.Binary(graph, binary_path)

        sys.stdout.write(u"Producing report in {} format\n".format(self.args.report_format))

        r.create_report(report_type=report_type, report=results)
        r.write_report(report_dir=self.args.directory, format=self.args.report_format)

        if not self.args.no_plot and graph.vcount() < 1000:

            sys.stdout.write(u"Generating network plots in {} format\n".format(self.args.plot_format))
            plot_dir = os.path.join(self.args.directory, "pyntacle-plots")

            if not os.path.isdir(plot_dir):
                os.mkdir(plot_dir)

            plot_graph = PlotGraph(graph=graph)
            plot_format = self.args.plot_format
            plot_graph.set_node_labels(labels=graph.vs()["name"])  # assign node labels to graph
            pal = sns.color_palette("Accent", 8).as_hex()
            framepal = sns.color_palette("Accent", 8, desat=0.5).as_hex()

            other_nodes_colour = pal[2]
            other_frame_colour = framepal[2]

            other_nodes_size = 25
            # other_nodes_shape = "circle"
            other_edge_width = 1

            for metric in results:
                if self.args.which == 'gr-finder' and self.args.implementation == "brute-force":
                    results[metric][0] = list(set(list(chain(*results[metric][0]))))

                if metric.startswith(GroupCentralityEnum.group_closeness.name):
                    cl_nodes_colour = pal[5]
                    cl_frames_colour = framepal[5]
                    # create a list of node colors
                    node_colors = [cl_nodes_colour if x["name"] in results[metric][0] else other_nodes_colour
                                   for x in graph.vs()]
                    node_frames = [cl_frames_colour if x["name"] in results[metric][0] else other_frame_colour
                                   for x in
                                   graph.vs()]

                    plot_graph.set_node_colors(colors=node_colors)

                elif metric == GroupCentralityEnum.group_degree:
                    dg_nodes_colour = pal[4]
                    dg_frames_colour = framepal[4]

                    # create a list of node colors
                    node_colors = [dg_nodes_colour if x["name"] in results[metric][0] else other_nodes_colour
                                    for x in
                                    graph.vs()]
                    node_frames = [dg_frames_colour if x["name"] in results[metric][0] else other_frame_colour
                                   for x in
                                   graph.vs()]

                    plot_graph.set_node_colors(colors=node_colors)

                else: #group betweenness
                    bt_nodes_colour = pal[6]
                    bt_frames_colour = framepal[6]

                    # create a list of node colors
                    node_colors = [bt_nodes_colour if x["name"] in results[metric][0] else other_nodes_colour
                                    for x in
                                    graph.vs()]
                    node_frames = [bt_frames_colour if x["name"] in results[metric][0] else other_frame_colour
                                   for x in
                                   graph.vs()]

                    plot_graph.set_node_colors(colors=node_colors)

                node_sizes = [35 if x["name"] in results[metric][0] else other_nodes_size for x in graph.vs()]
                plot_graph.set_node_sizes(sizes=node_sizes)

                edge_widths = [5 if any(y in results[metric][0] for y in x["adjacent_nodes"]) else other_edge_width for
                               x in graph.es()]

                plot_graph.set_edge_widths(edge_widths)
                plot_graph.set_layouts(self.args.plot_layout)

                plot_path = os.path.join(plot_dir, "_".join(
                    [self.args.which, graph["name"][0], metric, self.date]) + "." + plot_format)
                if os.path.exists(plot_path):
                    sys.stdout.write(
                        u"WARNING: a plot with the name ({}) already exists, overwriting it\n".format(
                            os.path.basename(plot_path)))

                plot_graph.plot_graph(path=plot_path, bbox=plot_size, margin=20, edge_curved=0.2,
                                      keep_aspect_ratio=True, vertex_label_size=6, vertex_frame_color=node_frames)
        elif graph.vcount() >= 1000:
            sys.stdout.write(u"The graph has too many nodes ({}, we plot nodes with a maximum of 1000 nodes). It will not be drawn\n".format(graph.vcount()))

        if not self.args.suppress_cursor:
            cursor.stop()

        sys.stdout.write(section_end)
        sys.stdout.write(u"Pyntacle groupcentrality completed successfully\n")
        sys.exit(0)
Ejemplo n.º 9
0
    def group_centrality(
        graph,
        k: int,
        metric: GroupCentralityEnum,
        seed: int or None = None,
        distance_type: GroupDistanceEnum = GroupDistanceEnum.minimum,
        cmode=CmodeEnum.igraph,
    ) -> (list, float):
        r"""
        It searches and finds the optimal set of nodes of a predefined size that exhibits the maximum group centrality
        value. It generates all the possible sets of nodes and calculates their group centrality value.
        | Available centrality metrics are :func:`~pyntacle.algorithms.local_topology.LocalTopology.group_degree`,
        :func:`~pyntacle.algorithms.local_topology.LocalTopology.group_closeness` and
        :func:`~pyntacle.algorithms.local_topology.LocalTopology.group_betweenness`.
        The best sets will be those with maximum centrality score.

        | Group Centrality measures available:

            * **group degree**: min = 0 (lowest centrality); max = 1 (highest centrality)
            * **group closeness**: min = 0 (lowest centrality); max = 1 (highest centrality)
            * **group betweenness**: min = 0 (lowest centrality); max = 1 (highest centrality)

        :param igraph.Graph graph: a :py:class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page
        :param int k: a positive integer, the size of the group of nodes to be found
        :param GroupCentralityEnum metric: The centrality algorithm to be computed. It can be any of the :class:`~pyntacle.tools.enums.GroupCentralityEnum`
        :param cmode: the implementation that will be used to compute the shortest paths. See :class:`~pyntacle.tools.enums.CmodeEnum`. Default is the igraph brute-force shortest path search.
        :param int,None seed: optional, a positive integer that can be used to replicate the greedy optimization run. If :py:class:`~None` (default), the greedy optimization may return different results at each run.
        :param GroupDistanceEnum distance_type: The definition of distance between any non-group and group nodes. It can be any value of the enumerator :class:`~pyntacle.tools.enums.GroupDistanceEnum`. By default, the minimum least distance :math:`D` between the group :math:`k` and the rest of the graph :math:`N-k` is used
        :return tuple: a tuple storing in ``[0]`` a list containing the node ``name`` attribute of the optimal *kp-set* and in ``[1]``  the optimal *kp-neg* value for the selected metric

        :raise KeyError: when an invalid :class:`~pyntacle.tools.enums.GroupCentralityEnum` is given
        :raise TypeError: if ``k`` is not a positive integer
        :raise ValueError: if ``seed`` is not a positive integer or if ``max_distance`` is not  :py:class:`None` or a positive integer lesser than the total number of nodes minus one
        :raise IllegalKpsetSizeError: if ``k`` is equal or greater to the graph size
        """

        if metric == GroupCentralityEnum.group_degree:
            type_func = partial(LocalTopology.group_degree, graph=graph)
        elif metric == GroupCentralityEnum.group_betweenness:
            np_counts = sp.get_shortestpath_count(graph,
                                                  nodes=None,
                                                  cmode=cmode)
            type_func = partial(LocalTopology.group_betweenness,
                                graph=graph,
                                cmode=cmode,
                                np_counts=np_counts)
        elif metric == GroupCentralityEnum.group_closeness:
            type_func = partial(LocalTopology.group_closeness,
                                graph=graph,
                                cmode=cmode,
                                distance=distance_type)
        else:
            raise KeyError(
                u"The argument 'metric' is not valid. It must be one of the following: {}"
                .format(list(GroupCentralityEnum)))

        node_names = graph.vs()["name"]
        node_names.sort()
        random.shuffle(node_names)
        S_names = node_names[:k]
        S = gu(graph=graph).get_node_indices(S_names)
        S.sort()

        final, group_score = GreedyOptimization.__optimization_loop(
            graph, S, type_func)

        final = graph.vs(final)["name"]
        final.sort()

        metrics_distance_str = metric.name.replace("_", " ") \
            if metric != GroupCentralityEnum.group_closeness \
            else metric.name.replace("_", " ") + " - Distance function = " + distance_type.name
        sys.stdout.write(
            u"Optimal group: {}\n Group size = {}\n Metric = {}\n Score = {}\n"
            .format("{" + str(final).replace("'", "")[1:-1] + "}", k,
                    metrics_distance_str, group_score))

        return final, round(group_score, 5)
Ejemplo n.º 10
0
    def reachability(graph,
                     k: int,
                     metric: KpposEnum,
                     seed=None,
                     max_distance: int = None,
                     m=None,
                     cmode=CmodeEnum.igraph) -> (list, float):
        r"""
        It searches for the best *key player* (*kp*) set of a predefined size :math:`k`, also defined as positive key
        players (*kp-pos*) using reachability indices, described in Pyntacle
        `introductory guide <http://pyntacle.css-mendel.it/resources/kp_guide/kp_guide.html>`_
        The optimal kp set will be the one that have the higher reachability if no switching of nodes from the set
        :math:`k` to the rest of the nodes in the graph :math:`N-k` can improve the selected reachability score.

        | Available reachability indices:

            * **m-reach**: min = 0 (unreachable); max = :math:`N - k` (graph is totally reached)
            * **dR**: min = 0 (the *k* set is disconnected from the rest of the graph); max = 1 (full reachability of the set)

        :param igraph.Graph graph: a :py:class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page.
        :param int k: the size of the kp-set. Must be a positive integer.
        :param KpposEnum metric: on of the available :class:`~pyntacle.tools.enums.KpposEnum`
        :param int,None seed: optional, a positive integer that can be used to replicate the greedy optimization run. If :py:class:`~None` (default), the greedy optimization may return different results at each run.
        :param int,None max_distance: optional, define a maximum shortest path after which two nodes will be considered disconnected. Default is  :py:class:`~None` (no maximum distance is set)
        :param int m: The number of steps of the m-reach algorithm. Required if the the required metrics is the :func:`~tools.enums.KPPosEnum.mreach`
        :param cmodeEnum cmode: the implementation that will be used to compute the shortest paths. See :class:`~pyntacle.tools.enums.CmodeEnum`. Default is the igraph brute-force shortest path search.

        :return tuple: a tuple storing in ``[0]`` a list containing the node ``name`` attribute of the optimal *kp-set* and in ``[1]``  the optimal *kp-pos* value for the selected metric

        :raise KeyError: when an invalid :class:`~pyntacle.tools.enums.KpposEnum` is given
        :raise TypeError: if ``k`` is not a positive integer
        :raise ValueError: if ``seed`` is not a positive integer or if ``max_distance`` is not  :py:class:`None` or a positive integer lesser than the total number of nodes minus one
        :raise IllegalKpsetSizeError: if ``k`` is equal or greater to the graph size
        """

        if metric == KpposEnum.mreach or metric == KpposEnum.dR:

            if max_distance is not None and not isinstance(
                    max_distance, int
            ) and max_distance > 1 and max_distance <= graph.vcount():
                raise ValueError(
                    u"'max_distance' must be an integer greater than one and lesser than the total number of nodes"
                )

            if metric == KpposEnum.mreach and m is None:
                raise WrongArgumentError(
                    "The 'm' argument is required for computing m-reach")
            elif metric == KpposEnum.mreach and (not isinstance(m, int)
                                                 or m <= 0):
                raise TypeError(
                    u"The 'm' argument must be a positive integer value")
            else:

                node_names = graph.vs()["name"]
                node_names.sort()
                random.shuffle(node_names)
                S_names = node_names[:k]
                S = gu(graph=graph).get_node_indices(S_names)
                S.sort()

                if metric == KpposEnum.mreach:
                    if cmode != CmodeEnum.igraph:
                        sps = sp.get_shortestpaths(graph=graph,
                                                   cmode=cmode,
                                                   nodes=None)
                        type_func = partial(kp.mreach,
                                            nodes=S_names,
                                            m=m,
                                            max_distance=max_distance,
                                            cmode=cmode,
                                            sp_matrix=sps)
                    else:
                        type_func = partial(kp.mreach,
                                            nodes=S_names,
                                            m=m,
                                            max_distance=max_distance,
                                            cmode=cmode)
                else:
                    if cmode != CmodeEnum.igraph:
                        sps = sp.get_shortestpaths(graph=graph,
                                                   cmode=cmode,
                                                   nodes=None)
                        type_func = partial(kp.dR,
                                            nodes=S_names,
                                            max_distance=max_distance,
                                            cmode=cmode,
                                            sp_matrix=sps)
                    else:
                        type_func = partial(kp.dR,
                                            nodes=S_names,
                                            max_distance=max_distance,
                                            cmode=cmode)

                final, reachability_score = GreedyOptimization.__optimization_loop(
                    graph, S, type_func)
                final = graph.vs(final)["name"]
                final.sort()

                sys.stdout.write(
                    u"Optimal group: {}\n Group size = {}\n Metric = {}\n Score = {}\n"
                    .format("{" + str(final).replace("'", "")[1:-1] + "}", k,
                            metric.name.replace("_", " "), reachability_score))

                return final, round(reachability_score, 5)
        else:
            raise KeyError(
                u"The parameter 'metric' is not valid. It must be one of the following: {}"
                .format(list(KpposEnum)))
Ejemplo n.º 11
0
    def fragmentation(graph,
                      k: int,
                      metric: KpnegEnum,
                      seed: int or None = None,
                      max_distance: int or None = None,
                      cmode=CmodeEnum.igraph) -> (list, float):
        r"""
        It searches for the best *key player* (*kp*) set of a predefined size :math:`k`, removes it and measures the residual
        fragmentation score for a specified negative *key player* (*kp-neg*) set. For a quick view of key player indices,
        we recommend reading our `introductory guide <http://pyntacle.css-mendel.it/resources/kp_guide/kp_guide.html>`_
        The optimal kp set will be the one that have the higher fragmentation score even if no switching from the set
        :math:`k` to the rest of the nodes in the graph :math:`N-k`.

        | Available kp-neg metrics:

            * **F**: min = 0 (the network is complete); max = 1 (all nodes are isolates)
            * **dF**: min = 0 (the network is complete); max = 1 (all nodes are isolates)

        .. warning:: fragmentation-based searches may require long times on very large graphs (:math:`N > 1000`)

        :param igraph.Graph graph: a :py:class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page.
        :param int k: the size of the kp-set. Must be a positive integer.
        :param KpnegEnum metric: on of the available :class:`~pyntacle.tools.enums.KpnegEnum`
        :param int,None seed: optional, a positive integer that can be used to replicate the greedy optimization run. If :py:class:`~None` (default), the greedy optimization may return different results at each run.
        :param int,None max_distance: optional, define a maximum shortest path after which two nodes will be considered disconnected. Default is  :py:class:`~None` (no maximum distance is set)
        :param cmodeEnum cmode: the implementation that will be used to compute the shortest paths. See :class:`~pyntacle.tools.enums.CmodeEnum`. Default is the igraph brute-force shortest path search.

        :return tuple: a tuple storing in ``[0]`` a list containing the node ``name`` attribute of the optimal *kp-set* and in ``[1]``  the optimal *kp-neg* value for the selected metric

        :raise KeyError: when an invalid :class:`~pyntacle.tools.enums.KpnegEnum` is given
        :raise TypeError: if ``k`` is not a positive integer
        :raise ValueError: if ``seed`` is not a positive integer or if ``max_distance`` is not  :py:class:`None` or a positive integer
        :raise IllegalKpsetSizeError: if ``k`` is equal or greater to the graph size
        """

        if metric == KpnegEnum.F or metric == KpnegEnum.dF:
            if max_distance is not None and not isinstance(
                    max_distance, int
            ) and max_distance > 1 and max_distance <= graph.vcount():
                raise ValueError(
                    u"'max_distance' must be an integer greater than one and lesser than the total number of nodes"
                )
            # TODO CHECK SPEED ON NODE NAME SORTING (STRING SORT) #######
            node_names = graph.vs()["name"]
            node_names.sort()  #sort node names lexicographically
            random.shuffle(node_names)
            S_names = node_names[:k]

            S = gu(graph=graph).get_node_indices(S_names)
            S.sort()

            if graph.vcount(
            ) - k == 1:  # a size that leaves only one node left, a g-k < 1 is dealt by the decorator
                final = graph.vs(S)["name"]
                sys.stdout.write(
                    u"A node set of size {} leaves only one node, returning the maximum {} score (1) and a random node set {}. \n"
                    .format(k, metric.name, final))
                return final, 1

            if metric == KpnegEnum.F:
                type_func = partial(kp.F)
            else:
                type_func = partial(kp.dF,
                                    max_distance=max_distance,
                                    cmode=cmode)

            final, fragmentation_score = GreedyOptimization.__optimization_loop(
                graph, S, type_func)

            final = graph.vs(final)["name"]
            final.sort()

            sys.stdout.write(
                u"Optimal group: {}\n Group size = {}\n Metric = {}\n Score = {}\n"
                .format("{" + str(final).replace("'", "")[1:-1] + "}", k,
                        metric.name.replace("_", " "), fragmentation_score))

            return final, round(fragmentation_score, 5)

        else:
            raise KeyError(
                u"The parameter 'metric' is not valid. It must be one of the following: {}"
                .format(list(KpnegEnum)))
Ejemplo n.º 12
0
    def dR(graph: igraph.Graph,
           nodes: list,
           max_distance: int or None = None,
           cmode: CmodeEnum = CmodeEnum.igraph,
           sp_matrix: np.ndarray or None = None) -> float:
        r"""
        Calculates the *dR* (*distance-weighted reach*) (described by the
        equation 14 in `The original article on key players <https://doi.org/10.1007/s10588-006-7084-x>`_), a positive
        key player (*kp-pos*) measure. The distance-weighted reach can be defined as the sum of the reciprocals of
        distances from the kp-set :math:`k` to all nodes, where the distance from the set to a node is defined as
        the minimum distance (minimum shortest path distance).  dR ranges from 0 to 1, where:

            * **dR** = 1 => Maximal reachability. The set :math:`k` is directly tied to the rest of the graph
            * **dR** = 0 => No reachability. The set :math:`k` is completely disconnected to the graph

        :param igraph.Graph graph: a :class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page.
        :param list nodes: a list of strings that matches the node ``name`` attribute of the selected nodes.
        :param int,None max_distance: The maximum shortest path length over which two nodes are considered unreachable. Default is :py:class:`None` (distances are preserved)
        :param None, np.ndarray sp_matrix:  A :math:`NxN` (:math:`N` being the size of the graph) :py:class:`numpy.ndarray` storing integers representing the distances between nodes. :warning: Disconected nodes **must** be represented as a distance greater than :math:`N`. If provided, ``cmode`` is ignored and the shortest paths are derived from the matrix directly. default is py:class:`None`.

        :return float : the distance-weighted reach measure of the graph

        :raise TypeError: when ``nodes`` is a list of strings matching the vertex ``name`` attribute
        :raise KeyError: when any of the node ``name`` attribute passed to the function is not present in the input graph
        :raise ValueError: when any of the node ``name`` attribute passed to the function is not present in the input graph or if a provided ``sp_matrix`` is not :py:class:`None` or a :py:class:`numpy.ndarray` storing integers
        """

        if max_distance:
            if not isinstance(max_distance, int):
                raise TypeError(
                    u"'max_distance' must be an integer value greater than one"
                )
            elif max_distance < 1:
                raise ValueError(
                    u"'max_distance' must be an integer greater than one")
        else:
            index_list = gu(graph=graph).get_node_indices(nodes=nodes)

            if cmode == CmodeEnum.igraph:
                shortest_path_lengths = sp.shortest_path_length_igraph(
                    graph=graph, nodes=nodes)
            else:
                if sp_matrix is None:
                    shortest_path_lengths = sp.get_shortestpaths(graph=graph,
                                                                 nodes=nodes,
                                                                 cmode=cmode)
                else:
                    if not isinstance(sp_matrix, np.ndarray):
                        raise ValueError(
                            u"'sp_matrix' must be a numpy.ndarray instance")
                    elif sp_matrix.shape[0] != graph.vcount():
                        raise WrongArgumentError(
                            u"The dimension of 'sp matrix' is different from the total number of nodes"
                        )
                    else:
                        shortest_path_lengths = sp_matrix[index_list, :]

            if max_distance:
                shortest_path_lengths = ShortestPathModifier.set_max_distances_nparray(
                    sp_matrix, max_distance)

            dr_num = 0
            vminusk = set(graph.vs.indices) - set(index_list)
            for j in vminusk:
                dKj = min(spl[j] for spl in shortest_path_lengths)
                dr_num += 1 / dKj

            dr = round(dr_num / float(graph.vcount()), 5)
            return dr
Ejemplo n.º 13
0
    def mreach(graph: igraph.Graph,
               nodes: list or str or None,
               m: int,
               max_distance: int or None = None,
               cmode: CmodeEnum = CmodeEnum.igraph,
               sp_matrix: np.ndarray = None) -> int:
        r"""
        Calculate the *m-reach* , a positive *key player* measure  (*kp-pos*) described by the
        equation 12 in `The original article on key players <https://doi.org/10.1007/s10588-006-7084-x>`_.
        The m-reach  returns the number of nodes that are reached by a set of nodes in :math:`m` steps or less, where
        :math:`m` is the minimum least distance between any node in a set :math:`k` and the rest of the graph :math:`N-k`.

        :param igraph.Graph graph: a :class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page.
        :param list nodes: a list of strings that matches the node ``name`` attribute of the selected nodes.
        :param int m: an integer (greater than zero) representing the maximum m-reach distance.
        :param int,None max_distance: The maximum shortest path length over which two nodes are considered unreachable. Default is :py:class:`None` (distances are preserved)
        :param cmodeEnum cmode: the implementation that will be used to compute the shortest paths required dstance based fragmentation. See :class:`~pyntacle.tools.enums.CmodeEnum`. Default is the igraph brute-force shortest path search.
        :param None, np.ndarray sp_matrix:  A :math:`NxN` (:math:`N` being the size of the graph) :py:class:`numpy.ndarray` storing integers representing the distances between nodes. :warning: Disconected nodes **must** be represented as a distance greater than :math:`N`. If provided, ``cmode`` is ignored and the shortest paths are derived from the matrix directly. default is py:class:`None`.

        :return int: An integer representing the number of nodes reached by the input node(s) in *m* steps or less

        :raise: KeyError: if ``cmode`` is not one of the valid :class:`~pyntacle.tools.enums.CmodeEnum`
        :raise TypeError: when ``nodes`` is a list of strings matching the vertex ``name`` attribute, or when ``m`` is not a :py:class:`int`
        :raise ValueError: when any of the node ``name`` attribute passed to the function is not present in the input graph, if ``m`` is lesser than 0 or greater than the size of the graph or if a provided ``sp_matrix`` is not :py:class:`None` or a :py:class:`numpy.ndarray` storing integers
        """
        if not isinstance(m, int):
            raise TypeError(u"'m' must be an integer")

        elif m < 1 or m >= graph.vcount() + 1:
            raise ValueError(
                u"'m' must be greater than zero and less or equal than the total number of vertices"
            )

        if max_distance:
            if not isinstance(max_distance, int):
                raise TypeError(
                    u"'max_distance' must be an integer value greater than one"
                )
            if max_distance < 1:
                raise ValueError(
                    u"'max_distance' must be an integer value greater than one"
                )
        else:
            index_list = gu(graph=graph).get_node_indices(nodes=nodes)

            if cmode == CmodeEnum.igraph:
                shortest_path_lengths = sp.shortest_path_length_igraph(
                    graph, nodes=nodes)
            else:
                if not sp_matrix:
                    shortest_path_lengths = sp.get_shortestpaths(graph=graph,
                                                                 cmode=cmode,
                                                                 nodes=nodes)
                else:
                    if not isinstance(sp_matrix, np.ndarray):
                        raise ValueError(
                            u"'sp_matrix' must be a numpy.ndarray instance")
                    elif sp_matrix.shape[0] != graph.vcount():
                        raise WrongArgumentError(
                            u"The dimension of 'sp matrix' is different from the total "
                            "number of nodes")
                    else:
                        shortest_path_lengths = sp_matrix[index_list, :]

        if max_distance:
            shortest_path_lengths = ShortestPathModifier.set_max_distances_nparray(
                shortest_path_lengths, max_distance)

        mreach = 0
        vminusk = set(graph.vs.indices) - set(index_list)
        for j in vminusk:
            for spl in shortest_path_lengths:
                if spl[j] <= m:
                    mreach += 1
                    break

        return mreach