예제 #1
0
    def check_index_list(self, indices: list):
        r"""
        Check that an index list (a list of integers representing the numeric indices of the graph vertices)
        is present into the input :py:class:`~igraph.Graph` objects. These indices should be positive integers ranging
        from :math:`0` to :math:`N-1`, where :math:`N` is the size of the graph.

        :param list indices: a list of  positive integers
        :raise ValueError: if ``indices`` is not a list of integers
        :raise WrongArgumentError: if any of the elements in ``indices`` does not exists in the graph.
        """

        if not isinstance(indices, list):
            raise ValueError(u"index list is not a list")

        if len(indices) == 0:
            raise WrongArgumentError(u"List is empty")

        for ind in indices:
            if not isinstance(ind, int) or ind < 0:
                raise ValueError("indices must be positive integers")

        if set(indices) > set(self.graph.vs.indices):
            raise WrongArgumentError(
                u"The input node index '{}' does not exist in the graph".
                format(indices))

        return None
예제 #2
0
    def add_node_attribute(graph: Graph, attr_name: str, attr_list: list, nodes: list):
        r"""
        Add attributes at the vertex level of a :py:class:`~igraph.Graph` object. These attributes must be stored in a
        :py:class:`list` whose elements must be sorted by ``nodes`` (a list of string storing the vertex ``name``
        attribute).

        .. warning:: if the vertex attribute name (``attr_name``) is already initialized, it will be overwritten by this method.

        :param igraph.Graph graph: a :class:`igraph.Graph` object.
        :param str attr_name: The name of the attribute that will be added to the :py:class:`~igraph.Graph`
        :param list attr_list: alist of object, sorted by the ``nodes`` parameter. Each object will be adced singularly to the corresponding node
        :param list nodes: the vertex ``name`` attribute corresponding to the vertices to which attributes will be added..
        :raise TypeError: if any of the arguments is not of the expected type
        :raise WrongArgumentError: If all the attributes pointed to non-existing nodes.
        """
        if not isinstance(graph, Graph) is not Graph:
            raise TypeError(u"graph argument is not a igraph.Graph")

        if not isinstance(attr_name, str):
            raise TypeError(u"Attribute name is not a string")
        
        if isinstance(nodes, str):
            sys.stdout.write(u"Converting string nodes to list of nodes\n")
            nodes = [nodes]

        assert len(attr_list) == len(nodes), u"In add_node_attribute, length of attributes list cannot be " \
                                             "different from length of list of nodes."
        
        count = 0
        err_count = 0
        for n, a in zip(nodes, attr_list):
            select = graph.vs.select(name=n)
            count += 1
            if len(select) == 0:
                sys.stdout.write(u"Node %s not found in graph" % n)
                err_count += 1

            elif len(select) == 1:
                select[0][attr_name] = a

            else:
                sys.stdout.write(u"Node %s has multiple name hits, please check your attribute file\n" % n)
                raise ValueError(u"Multiple node hits")
        
        if err_count == count:
            raise WrongArgumentError(u"All the attributes pointed to non-existing nodes.")
        else:
            sys.stdout.write(u"Node attribute {} added\n".format(attr_name))
예제 #3
0
    def subtract_count_dist_matrix(count_all: np.ndarray,
                                   count_nogroup: np.ndarray) -> np.ndarray:
        if count_all.shape[0] == count_all.shape[1] == count_nogroup.shape[
                0] == count_nogroup.shape[1]:
            v = count_all.shape[0]
            res = np.copy(count_all)

            for i in prange(v):
                for j in prange(i, v):
                    if count_all[j, i] == count_nogroup[j, i]:
                        res[i, j] = count_all[i, j] - count_nogroup[i, j]

            return res
        else:
            raise WrongArgumentError(
                u"Parameter error",
                "The function parameters do not have the same shape")
예제 #4
0
    def add_edge_attribute(graph: Graph, attr_name: str, attr_list: list, edges: list):
        r"""
        Add edge attributes to the input :py:class:`igraph.Graph` object under the attribute name specified in ``attr_name``.
        The attributes must be stored in a list, passed to ``attr_list`` and sorted according to the target edge list,
        specified in ``edges``.

        .. warning:: if the vertex attribute name (``attr_name``) is already initialized, it will be overwritten by this method.

        :param igraph.Graph graph: a :class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page.
        :param str attr_name: string. The name of the attribute being added to the edges of the Graph.
        :param list attr_list: a list, sorted by vertex index, storing the values that will be added to each target edge.
        :param list edges: edges to which attributes will be applied.
        :raise TypeError: if ``graph`` is not a :py:class:`~igraph.Graph`
        :raise ValueError: if one of the edges IDs points to more than one edge (edge names must be univocal)
        :raise WrongArgumentError: if all the ``edges`` does not point to existing ones
        """

        if not isinstance(graph, Graph) is not Graph:
            raise TypeError(u"graph argument is not a igraph.Graph")

        if not isinstance(attr_name, str):
            raise TypeError("Attribute name is not a string")
        
        assert len(attr_list) == len(edges), u"in add_edge_attribute, length of attributes list cannot be " \
                                             "different from length of list of nodes."
        count = 0
        err_count = 0
        for e, a in zip(edges, attr_list):
            select = graph.es.select(adjacent_nodes=e)
            if len(select) == 0:
                select = graph.es.select(adjacent_nodes=(e[1], e[0]))
            count += 1
            if len(select) == 0:
                sys.stdout.write(u"Edge %s not found in graph\n" %str(e))
                err_count += 1
            
            elif len(select) == 1:
                select[0][attr_name] = a
                
            else:
                raise ValueError(u"Edge %s has multiple name hits, edge `adjacent_nodes` must be univocal")

        if err_count == count:
            raise WrongArgumentError("All the attributes pointed to non-existing edges.")
        else:
            sys.stdout.write("Edge attribute {} added\n".format(attr_name))
예제 #5
0
    def Binary(file: str) -> Graph:
        r"""
        Loads a binary file  (a :py:class:`pickle` object) that stores a :py:class:`igraph.Graph` object and makes it
        ready to be used for Pyntacle.

        We refer the user to the `File Formats Guide <http://pyntacle.css-mendel.it/resources/file_formats/file_formats.html#bin>`_
        within the Pyntacloe official page for more details regarding  the specifics of :py:class:`igraph.Graph`
        binary objects that can be serialized by Pyntacle.

        :param str file: the location of the binary file

        :return igraph.Graph: an iGraph.Graph object compliant with Pyntacle `Minimum Requirements <http://pyntacle.css-mendel.it/requirements.html>`_

        :raise IOError: if the binary does not contain a :py:class:`igraph.Graph` object
        """

        if not is_binary_file(file):
            raise WrongArgumentError(u"file is not a binary")

        graph = pickle.load(open(file, "rb"))

        if not isinstance(graph, Graph):
            raise IOError(u"binary is not a graph object")

        else:
            if graph.ecount() < 1 and graph.vcount() < 2:
                raise IllegalGraphSizeError(
                    u"Graph must contain at least 2 nodes linked by one edge")

            else:
                utils = gu(graph=graph)
                utils.graph_initializer(
                    graph_name=os.path.splitext(os.path.basename(file))[0])

                if Graph.is_directed(graph):

                    sys.stdout.write(u"Converting graph to undirect\n")
                    graph.to_undirected()

                utils.check_graph()
                graph = utils.get_graph()
                sys.stdout.write(u"Binary from {} imported\n".format(
                    os.path.basename(file)))
                return graph
예제 #6
0
def crunch_groupcentrality_combinations(graph: Graph, node_names_list: list,
                                        np_counts: np.ndarray,
                                        np_paths: np.ndarray,
                                        gc_enum: GroupCentralityEnum,
                                        distance_type: GroupDistanceEnum,
                                        cmode: CmodeEnum) -> dict:
    score_pairs_partial = {}

    for node_names in node_names_list:
        if gc_enum == GroupCentralityEnum.group_degree:
            score = LocalTopology.group_degree(graph, nodes=node_names)
        elif gc_enum == GroupCentralityEnum.group_closeness:
            if np_paths is None or np_paths.size == 0:
                np_paths = sp.get_shortestpaths(graph, nodes=None, cmode=cmode)
            score = LocalTopology.group_closeness(graph,
                                                  node_names,
                                                  distance=distance_type,
                                                  np_paths=np_paths)
        elif gc_enum == GroupCentralityEnum.group_betweenness:
            # if graph.ecount() == 0:
            #     score = 0
            if np_counts is None or np_counts.size == 0:
                np_counts = sp.get_shortestpath_count(graph,
                                                      nodes=None,
                                                      cmode=cmode)
            score = LocalTopology.group_betweenness(graph,
                                                    node_names,
                                                    cmode=cmode,
                                                    np_counts=np_counts)
        else:
            raise WrongArgumentError("{} function not yet implemented.".format(
                gc_enum.name))

        score_pairs_partial[tuple(node_names)] = score

    return score_pairs_partial
예제 #7
0
    def AdjacencyMatrix(file: str,
                        sep: str or None = None,
                        header: bool = True) -> Graph:
        r"""
        Imports an adjacency matrix file to a :py:class:`igraph.Graph` object ready to be used by Pyntacle.

        For more information on adjacency matrices we refer the user to the `File Formats Guide <http://pyntacle.css-mendel.it/resources/file_formats/file_formats.html#adjm>`_
        on Pyntacle website.

        .. note:: We support unweighted undirected Adjacency Matrices, so only zeroes and ones are allowed in the input file.

        .. note:: If an header is present, it **must** contain unique names (two nodes can't have the same ID). if not, an  error wil be raised. The names of the node will be assigned to the vertex ``name`` attribute. If the header is not present, the node "name" attribute will be the corresponding sequential index assigned by igraph.

        :param str file: the path to the file storing the adjacency matrix
        :param None,int sep: The field separator inside the network file. if :py:class:`None` (default) it will be guessed. Otherwise, you can place the string representing the column separator.
        :param bool header: Whether the header is present or not (default is ``True``)

        :return igraph.Graph: an iGraph.Graph object compliant with Pyntacle `Minimum Requirements <http://pyntacle.css-mendel.it/requirements.html>`_

        :raise WrongArgumentError: if ``sep`` is not found in the adjacency matrix
        :raise ValueError: if the matrix is not squared
        """

        if not AdjmUtils(file=file, header=header, sep=sep).is_squared():
            raise ValueError(u"Matrix is not squared")

        with open(file, "r") as adjmatrix:
            iterator = iter(adjmatrix.readline, '')

            first_line = next(iterator, None).strip()
            if sep not in first_line:
                raise WrongArgumentError(
                    u'The specified separator "{}" is not present in the adjacency matrix file'
                    .format(sep))

            if header:
                #use pandas to parse this into
                f = pd.read_csv(filepath_or_buffer=file, sep=sep, index_col=0)
                f.index = f.index.map(
                    str
                )  #force index to string, in case is not identified properly
                f = f.reindex(sorted(f.columns),
                              axis=1)  # sort columns alphabetically
                f = f.reindex(sorted(f.index),
                              axis=0)  # sort indices alphabetically
                node_names = f.columns.values.tolist()

            else:
                f = pd.read_csv(filepath_or_buffer=file, sep=sep, header=None)
                node_names = list(map(str, f.index))

            graph = Graph.Adjacency(f.values.tolist(), mode="UPPER")
            util = gu(graph=graph)
            util.graph_initializer(graph_name=os.path.splitext(
                os.path.basename(file))[0],
                                   node_names=node_names)
            graph = util.get_graph()

            sys.stdout.write(u"Adjacency matrix from {} imported\n".format(
                os.path.basename(file)))
            return graph
예제 #8
0
    def graph_initializer(self,
                          graph_name: str,
                          node_names: list or None = None):
        r"""
        Transform the input :py:class:`igraph.Graph` object into a network that is compliant to the
        Pyntacle `Minimum requirements <http://pyntacle.css-mendel.it/requirements.html>`_.

        .. warning:: This method will prune the graph of any node isolates, as they are not accepted by Pyntacle.

        :param str graph_name: The network name (will be stored in the graph ``name`` attribute). This string must not contain illegal characters (see the Pyntacle `Minimum Requirements <http://pyntacle.css-mendel.it/requirements.html>`_ for more info on the illegal characters.
        :param str, None node_names: optional, a list of strings matching the total number of vertices of the graph. Each item in the list becomes the vertex ``name`` attribute sequentially (index-by-index correspondance). Defaults to py:class:`None` (node ``name`` attribute is filled by node indices).
        :raise: ValueError: if the ``graph_name`` argument contains illegal characters or if ``node_names`` is not of the same size of the number of graph vertices.
        :raise: WrongArgumentError: if ``node_names`` is not a list of strings.
        """

        if not isinstance(graph_name, str):
            raise ValueError("'graph_name' must be a string")

        try:
            attribute_name_checker(graph_name)
        except ValueError:
            raise ValueError("'graph_name' contains illegal characters\n")

        self.graph.to_undirected()  # reconvert graph to directed
        if "name" not in self.graph.attributes():
            self.logger.info(u"Adding file name to graph name")
            AddAttributes.add_graph_name(self.graph, graph_name)

        # add vertex names
        if "name" not in self.graph.vs.attributes():
            if node_names is None:
                self.logger.info(
                    u"Adding node names to graph corresponding to their indices"
                )
                self.graph.vs()["name"] = [
                    str(x.index) for x in self.graph.vs()
                ]

            else:
                if not isinstance(node_names, list) or not all(
                        isinstance(item, str) for item in node_names):
                    raise WrongArgumentError(
                        u"`node_names` argument must be a list of strings")

                if len(node_names) != self.graph.vcount():
                    raise ValueError(
                        u"`node_names` argument must be of the same length of vertices"
                    )

                self.logger.info(
                    u"Adding node names to graph using the provided node names"
                )
                self.graph.vs["name"] = node_names

        # add parent name to vertices
        if "parent" not in self.graph.vs().attributes():
            self.logger.info(
                u"Adding reserved attribute 'parent' to the vertices")
            AddAttributes.add_parent_name(self.graph)

        if "adjacent_nodes" not in self.graph.es().attributes():
            # add edge vertices names as an attribute 'adjacent_vertices'
            self.logger.info(
                u"Adding source and target names as 'adjacent_nodes' attribute to edges"
            )
            AddAttributes.add_edge_names(self.graph)

        # for sif file conversion purposes
        if not "sif_interaction_name" in self.graph.attributes():
            self.graph["sif_interaction_name"] = None

        if not "sif_interaction" in self.graph.es().attributes():
            self.graph.es()["sif_interaction"] = None

        self.prune_isolates(
        )  #remove any isolate and store them into the `isolates` graph attribute

        # Adding implementation for functions that require it
        sp_implementation = CmodeEnum.igraph

        n_nodes = self.graph.vcount()

        if n_nodes > 100:
            density = (2 * (self.graph.ecount())) / (n_nodes * (n_nodes - 1))
            if density < 0.5 and n_nodes <= 500:
                sp_implementation = CmodeEnum.igraph
            else:
                if n_cpus >= 2:
                    sp_implementation = CmodeEnum.cpu
                else:
                    sp_implementation = CmodeEnum.igraph

                #UNCOMMENT THIS PART FOR WHEN THE GPU MODULE WILL BE AVAILABLE
                # if cuda_avail:
                #     sp_implementation = CmodeEnum.gpu
                # else:
                #     if n_cpus >= 2:
                #         sp_implementation = CmodeEnum.cpu
                #     else:
                #         sp_implementation = CmodeEnum.igraph

        self.graph["implementation"] = sp_implementation
        self.check_graph()  #check that everything is in order
예제 #9
0
    def write_report(self,
                     report_dir=None,
                     format="tsv",
                     choices=report_format) -> str:
        r"""
        Create a text file containing the information created previously by the any of the *report* functions.
        By default, if the `report_path` function is not initialized, a generic name is created and a tab-separated file
        is generated (named *Report_**GRAPHNAME**_**COMMAND**_**DATE**.tsv* where:_

        * **GRAPHNAME** is the value stored in the graph["name"] attribute,
        * **Command** is the name of the command requested by the user and
        * **Date** is the date when the Pyntacle run was completed. This file will be stored in the current directory
        """

        if not self.report:
            raise EnvironmentError(
                u"A report must be created first using the 'create_report()' function"
            )

        else:
            #cast every element of the list of lists to string, just in case:
            for x in self.report:
                list(map(str, x))

            self.report = [list(map(str, x)) for x in self.report]
            #replace all the underscores with spaces
            self.report[0] = [x.replace("_", " ") for x in self.report[0]]

        if format not in choices.keys():
            raise WrongArgumentError(
                u"file format {} is not supported".format(format))

        if report_dir is None:
            self.logger.info(
                u"Directory not specified. Using current directory")
            report_dir = os.path.abspath(os.getcwd())

        else:
            if not os.path.isdir(report_dir):
                self.logger.warning(
                    u"Specified directory does not exists, creating it")
                os.makedirs(report_dir, exist_ok=True)

            else:
                report_dir = os.path.abspath(report_dir)

        if len(self.graph["name"]) > 1:
            self.logger.warning(
                u"Using the first 'name' attribute of graph name since more than one is specified"
            )

        graphname = self.graph["name"][0]

        extension = choices[format]

        if self.report_type.name == 'Set':
            report_path = os.path.join(
                report_dir,
                "_".join(["Report", self.report_type.name, self.dat]) + "." +
                extension)
        else:
            report_path = os.path.join(
                report_dir, "_".join(
                    ["Report", graphname, self.report_type.name, self.dat]) +
                "." + extension)

        if extension != "xlsx":
            with open(report_path, "w") as out:

                if extension == "tsv":
                    self.logger.info(
                        u"Writing Pyntacle report to a tab-separated file (tsv)"
                    )
                    for elem in self.report:
                        elem.append("\n")
                    out.writelines(["\t".join(x) for x in self.report])

                elif extension == "csv":
                    self.logger.info(
                        u"Writing Pyntacle report to a comma-separated value file (csv)"
                    )
                    writer = csv.writer(out)
                    writer.writerows(self.report)

        else:
            self.logger.info(
                u"Writing Pyntacle report to a an excel file (xlsx)")
            workbook = xlsxwriter.Workbook(report_path,
                                           {'constant_memory': True})
            workbook.use_zip64()
            format = workbook.add_format()

            worksheet = workbook.add_worksheet("Pyntacle Report")

            for row, elem in enumerate(self.report):
                for col, p in enumerate(elem):
                    worksheet.write(row, col, p, format)

            workbook.close()
예제 #10
0
    def reachability(graph,
                     k: int,
                     metric: KpposEnum,
                     seed=None,
                     max_distance: int = None,
                     m=None,
                     cmode=CmodeEnum.igraph) -> (list, float):
        r"""
        It searches for the best *key player* (*kp*) set of a predefined size :math:`k`, also defined as positive key
        players (*kp-pos*) using reachability indices, described in Pyntacle
        `introductory guide <http://pyntacle.css-mendel.it/resources/kp_guide/kp_guide.html>`_
        The optimal kp set will be the one that have the higher reachability if no switching of nodes from the set
        :math:`k` to the rest of the nodes in the graph :math:`N-k` can improve the selected reachability score.

        | Available reachability indices:

            * **m-reach**: min = 0 (unreachable); max = :math:`N - k` (graph is totally reached)
            * **dR**: min = 0 (the *k* set is disconnected from the rest of the graph); max = 1 (full reachability of the set)

        :param igraph.Graph graph: a :py:class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page.
        :param int k: the size of the kp-set. Must be a positive integer.
        :param KpposEnum metric: on of the available :class:`~pyntacle.tools.enums.KpposEnum`
        :param int,None seed: optional, a positive integer that can be used to replicate the greedy optimization run. If :py:class:`~None` (default), the greedy optimization may return different results at each run.
        :param int,None max_distance: optional, define a maximum shortest path after which two nodes will be considered disconnected. Default is  :py:class:`~None` (no maximum distance is set)
        :param int m: The number of steps of the m-reach algorithm. Required if the the required metrics is the :func:`~tools.enums.KPPosEnum.mreach`
        :param cmodeEnum cmode: the implementation that will be used to compute the shortest paths. See :class:`~pyntacle.tools.enums.CmodeEnum`. Default is the igraph brute-force shortest path search.

        :return tuple: a tuple storing in ``[0]`` a list containing the node ``name`` attribute of the optimal *kp-set* and in ``[1]``  the optimal *kp-pos* value for the selected metric

        :raise KeyError: when an invalid :class:`~pyntacle.tools.enums.KpposEnum` is given
        :raise TypeError: if ``k`` is not a positive integer
        :raise ValueError: if ``seed`` is not a positive integer or if ``max_distance`` is not  :py:class:`None` or a positive integer lesser than the total number of nodes minus one
        :raise IllegalKpsetSizeError: if ``k`` is equal or greater to the graph size
        """

        if metric == KpposEnum.mreach or metric == KpposEnum.dR:

            if max_distance is not None and not isinstance(
                    max_distance, int
            ) and max_distance > 1 and max_distance <= graph.vcount():
                raise ValueError(
                    u"'max_distance' must be an integer greater than one and lesser than the total number of nodes"
                )

            if metric == KpposEnum.mreach and m is None:
                raise WrongArgumentError(
                    "The 'm' argument is required for computing m-reach")
            elif metric == KpposEnum.mreach and (not isinstance(m, int)
                                                 or m <= 0):
                raise TypeError(
                    u"The 'm' argument must be a positive integer value")
            else:

                node_names = graph.vs()["name"]
                node_names.sort()
                random.shuffle(node_names)
                S_names = node_names[:k]
                S = gu(graph=graph).get_node_indices(S_names)
                S.sort()

                if metric == KpposEnum.mreach:
                    if cmode != CmodeEnum.igraph:
                        sps = sp.get_shortestpaths(graph=graph,
                                                   cmode=cmode,
                                                   nodes=None)
                        type_func = partial(kp.mreach,
                                            nodes=S_names,
                                            m=m,
                                            max_distance=max_distance,
                                            cmode=cmode,
                                            sp_matrix=sps)
                    else:
                        type_func = partial(kp.mreach,
                                            nodes=S_names,
                                            m=m,
                                            max_distance=max_distance,
                                            cmode=cmode)
                else:
                    if cmode != CmodeEnum.igraph:
                        sps = sp.get_shortestpaths(graph=graph,
                                                   cmode=cmode,
                                                   nodes=None)
                        type_func = partial(kp.dR,
                                            nodes=S_names,
                                            max_distance=max_distance,
                                            cmode=cmode,
                                            sp_matrix=sps)
                    else:
                        type_func = partial(kp.dR,
                                            nodes=S_names,
                                            max_distance=max_distance,
                                            cmode=cmode)

                final, reachability_score = GreedyOptimization.__optimization_loop(
                    graph, S, type_func)
                final = graph.vs(final)["name"]
                final.sort()

                sys.stdout.write(
                    u"Optimal group: {}\n Group size = {}\n Metric = {}\n Score = {}\n"
                    .format("{" + str(final).replace("'", "")[1:-1] + "}", k,
                            metric.name.replace("_", " "), reachability_score))

                return final, round(reachability_score, 5)
        else:
            raise KeyError(
                u"The parameter 'metric' is not valid. It must be one of the following: {}"
                .format(list(KpposEnum)))
예제 #11
0
    def dR(graph: igraph.Graph,
           nodes: list,
           max_distance: int or None = None,
           cmode: CmodeEnum = CmodeEnum.igraph,
           sp_matrix: np.ndarray or None = None) -> float:
        r"""
        Calculates the *dR* (*distance-weighted reach*) (described by the
        equation 14 in `The original article on key players <https://doi.org/10.1007/s10588-006-7084-x>`_), a positive
        key player (*kp-pos*) measure. The distance-weighted reach can be defined as the sum of the reciprocals of
        distances from the kp-set :math:`k` to all nodes, where the distance from the set to a node is defined as
        the minimum distance (minimum shortest path distance).  dR ranges from 0 to 1, where:

            * **dR** = 1 => Maximal reachability. The set :math:`k` is directly tied to the rest of the graph
            * **dR** = 0 => No reachability. The set :math:`k` is completely disconnected to the graph

        :param igraph.Graph graph: a :class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page.
        :param list nodes: a list of strings that matches the node ``name`` attribute of the selected nodes.
        :param int,None max_distance: The maximum shortest path length over which two nodes are considered unreachable. Default is :py:class:`None` (distances are preserved)
        :param None, np.ndarray sp_matrix:  A :math:`NxN` (:math:`N` being the size of the graph) :py:class:`numpy.ndarray` storing integers representing the distances between nodes. :warning: Disconected nodes **must** be represented as a distance greater than :math:`N`. If provided, ``cmode`` is ignored and the shortest paths are derived from the matrix directly. default is py:class:`None`.

        :return float : the distance-weighted reach measure of the graph

        :raise TypeError: when ``nodes`` is a list of strings matching the vertex ``name`` attribute
        :raise KeyError: when any of the node ``name`` attribute passed to the function is not present in the input graph
        :raise ValueError: when any of the node ``name`` attribute passed to the function is not present in the input graph or if a provided ``sp_matrix`` is not :py:class:`None` or a :py:class:`numpy.ndarray` storing integers
        """

        if max_distance:
            if not isinstance(max_distance, int):
                raise TypeError(
                    u"'max_distance' must be an integer value greater than one"
                )
            elif max_distance < 1:
                raise ValueError(
                    u"'max_distance' must be an integer greater than one")
        else:
            index_list = gu(graph=graph).get_node_indices(nodes=nodes)

            if cmode == CmodeEnum.igraph:
                shortest_path_lengths = sp.shortest_path_length_igraph(
                    graph=graph, nodes=nodes)
            else:
                if sp_matrix is None:
                    shortest_path_lengths = sp.get_shortestpaths(graph=graph,
                                                                 nodes=nodes,
                                                                 cmode=cmode)
                else:
                    if not isinstance(sp_matrix, np.ndarray):
                        raise ValueError(
                            u"'sp_matrix' must be a numpy.ndarray instance")
                    elif sp_matrix.shape[0] != graph.vcount():
                        raise WrongArgumentError(
                            u"The dimension of 'sp matrix' is different from the total number of nodes"
                        )
                    else:
                        shortest_path_lengths = sp_matrix[index_list, :]

            if max_distance:
                shortest_path_lengths = ShortestPathModifier.set_max_distances_nparray(
                    sp_matrix, max_distance)

            dr_num = 0
            vminusk = set(graph.vs.indices) - set(index_list)
            for j in vminusk:
                dKj = min(spl[j] for spl in shortest_path_lengths)
                dr_num += 1 / dKj

            dr = round(dr_num / float(graph.vcount()), 5)
            return dr
예제 #12
0
    def mreach(graph: igraph.Graph,
               nodes: list or str or None,
               m: int,
               max_distance: int or None = None,
               cmode: CmodeEnum = CmodeEnum.igraph,
               sp_matrix: np.ndarray = None) -> int:
        r"""
        Calculate the *m-reach* , a positive *key player* measure  (*kp-pos*) described by the
        equation 12 in `The original article on key players <https://doi.org/10.1007/s10588-006-7084-x>`_.
        The m-reach  returns the number of nodes that are reached by a set of nodes in :math:`m` steps or less, where
        :math:`m` is the minimum least distance between any node in a set :math:`k` and the rest of the graph :math:`N-k`.

        :param igraph.Graph graph: a :class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page.
        :param list nodes: a list of strings that matches the node ``name`` attribute of the selected nodes.
        :param int m: an integer (greater than zero) representing the maximum m-reach distance.
        :param int,None max_distance: The maximum shortest path length over which two nodes are considered unreachable. Default is :py:class:`None` (distances are preserved)
        :param cmodeEnum cmode: the implementation that will be used to compute the shortest paths required dstance based fragmentation. See :class:`~pyntacle.tools.enums.CmodeEnum`. Default is the igraph brute-force shortest path search.
        :param None, np.ndarray sp_matrix:  A :math:`NxN` (:math:`N` being the size of the graph) :py:class:`numpy.ndarray` storing integers representing the distances between nodes. :warning: Disconected nodes **must** be represented as a distance greater than :math:`N`. If provided, ``cmode`` is ignored and the shortest paths are derived from the matrix directly. default is py:class:`None`.

        :return int: An integer representing the number of nodes reached by the input node(s) in *m* steps or less

        :raise: KeyError: if ``cmode`` is not one of the valid :class:`~pyntacle.tools.enums.CmodeEnum`
        :raise TypeError: when ``nodes`` is a list of strings matching the vertex ``name`` attribute, or when ``m`` is not a :py:class:`int`
        :raise ValueError: when any of the node ``name`` attribute passed to the function is not present in the input graph, if ``m`` is lesser than 0 or greater than the size of the graph or if a provided ``sp_matrix`` is not :py:class:`None` or a :py:class:`numpy.ndarray` storing integers
        """
        if not isinstance(m, int):
            raise TypeError(u"'m' must be an integer")

        elif m < 1 or m >= graph.vcount() + 1:
            raise ValueError(
                u"'m' must be greater than zero and less or equal than the total number of vertices"
            )

        if max_distance:
            if not isinstance(max_distance, int):
                raise TypeError(
                    u"'max_distance' must be an integer value greater than one"
                )
            if max_distance < 1:
                raise ValueError(
                    u"'max_distance' must be an integer value greater than one"
                )
        else:
            index_list = gu(graph=graph).get_node_indices(nodes=nodes)

            if cmode == CmodeEnum.igraph:
                shortest_path_lengths = sp.shortest_path_length_igraph(
                    graph, nodes=nodes)
            else:
                if not sp_matrix:
                    shortest_path_lengths = sp.get_shortestpaths(graph=graph,
                                                                 cmode=cmode,
                                                                 nodes=nodes)
                else:
                    if not isinstance(sp_matrix, np.ndarray):
                        raise ValueError(
                            u"'sp_matrix' must be a numpy.ndarray instance")
                    elif sp_matrix.shape[0] != graph.vcount():
                        raise WrongArgumentError(
                            u"The dimension of 'sp matrix' is different from the total "
                            "number of nodes")
                    else:
                        shortest_path_lengths = sp_matrix[index_list, :]

        if max_distance:
            shortest_path_lengths = ShortestPathModifier.set_max_distances_nparray(
                shortest_path_lengths, max_distance)

        mreach = 0
        vminusk = set(graph.vs.indices) - set(index_list)
        for j in vminusk:
            for spl in shortest_path_lengths:
                if spl[j] <= m:
                    mreach += 1
                    break

        return mreach
예제 #13
0
    def reachability(graph,
                     k,
                     metric: KpposEnum,
                     max_distance=None,
                     m=None,
                     cmode=CmodeEnum.igraph,
                     parallel=False,
                     ncores=None) -> (list, float):
        r"""
        It searches and finds the *key player* (*kp*) set, or sets, of a predefined size that best reaches all other nodes in the graph.
        It generates all the possible kp-sets and calculates their reachability scores.
        The best kp-set will be the one that best reaches all other nodes of the graph.
        Available metrics that are used to score reachability are:

            * **m-reach**: min = 0 (unreachable); max = size(graph) - kpp_size (total reachability)
            * **dR**: min = 0 (unreachable); max = 1 (total reachability)

        :param igraph.Graph graph: a :class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page.
        :param int k: the size of the kp-set to be found
        :param KpposEnum metric: any available option of the enumerators :class:`~pyntacle.tools.enums.KpposEnum`
        :param int,None max_distance: optional, define a maximum shortest path after which two nodes will be considered disconnected. Default is  :py:class:`~None` (no maximum distance is set)
        :param int m: The number of steps of the m-reach algorithm. Required if the the required metrics is the :func:`~tools.enums.KPPosEnum.mreach`
        :param cmodeEnum cmode: the implementation that will be used to compute the shortest paths. See :class:`~pyntacle.tools.enums.CmodeEnum`. Default is the igraph brute-force shortest path search.
        :param bool parallel: whether to use multicore processors to run the algorithm iterations in parallel.
        :param int ncores: Positive integer specifying the number of cores used to perform parallel computation. If :type:`None` (default) the number of cores will be set to the maximum number of available cores -1.

        :return tuple: a tuple of two elements containing in the first position a list of all kp-sets with maximum reachability score, and the maximum achieved score for reschability in the second element.

        :raises TypeError: if ``ncores`` is not a positive integer greater than one
        :raises KeyError: when a ``kp_type`` is not one listed in  :func:`~pyntacle.tools.enums.KpposEnum`
        """

        kpset_score_pairs = {}
        """: type: dic{(), float}"""
        node_names = graph.vs["name"]

        if max_distance is not None and (not isinstance(max_distance, int)
                                         or max_distance <= 1
                                         or max_distance > graph.vcount()):
            raise ValueError(
                u"'max_distance' must be an integer value between 1 and the total number of nodes"
            )
        if parallel and ncores is None:
            ncores = mp.cpu_count() - 1
        elif parallel and (not isinstance(ncores, int) or ncores < 1):
            raise TypeError(u"'ncores' must be a positive integer value")
        if metric == KpposEnum.mreach and m is None:
            raise WrongArgumentError(u"The parameter 'm' must be specified")
        if metric == KpposEnum.mreach and isinstance(m, int) and m <= 0:
            raise TypeError(
                u"The parameter 'm' must be a positive integer value")

        # Generate all combinations of size k
        allS = list(itertools.combinations(node_names, k))
        sys.stdout.write(u"Evaluating {} possible solutions\n".format(
            len(allS)))

        if parallel:
            sys.stdout.write(
                u"Brute-force search of the best kp-set of size {} using {} cores\n"
                .format(k, ncores))

            # Create chunks
            chunklen = math.ceil(len(allS) / ncores)
            chunks = [
                allS[i * chunklen:(i + 1) * chunklen] for i in range(ncores)
            ]

            with ProcessPoolExecutor(max_workers=ncores) as executor:
                future_dict = {
                    executor.submit(crunch_reachability_combinations, graph,
                                    chunk, metric, max_distance, m, cmode):
                    chunk
                    for chunk in chunks
                }
                for future in as_completed(future_dict):
                    chunk = future_dict[future]
                    try:
                        partial_result = future.result()
                    except Exception as exc:
                        print('%r generated an exception: %s' % (chunk, exc))
                    else:
                        kpset_score_pairs = {
                            **kpset_score_pairs,
                            **partial_result
                        }
        else:
            sys.stdout.write(
                u"Brute-force search of the best kp-set of size {}\n".format(
                    k))

            chunks = allS
            partial_result = crunch_reachability_combinations(
                graph=graph,
                node_names_list=chunks,
                kp_type=metric,
                max_distance=max_distance,
                m=m,
                cmode=cmode)
            kpset_score_pairs = {**kpset_score_pairs, **partial_result}

        _group_score = max(kpset_score_pairs.values())
        final = [
            sorted(list(x)) for x in kpset_score_pairs.keys()
            if kpset_score_pairs[x] == _group_score
        ]
        _group_score = round(_group_score, 5)

        sys.stdout.write(
            u"Best group{}: {}\n Group{} size = {}\n Metric = {}{}\n Score = {}\n"
            .format("s" if len(final) > 1 else "",
                    "{" + str(final).replace("'", "")[1:-1] + "}",
                    "s" if len(final) > 1 else "", k,
                    metric.name.replace("_", " "),
                    ", m=" + str(m) if metric == KpposEnum.mreach else "",
                    _group_score))
        return final, _group_score