def check_index_list(self, indices: list): r""" Check that an index list (a list of integers representing the numeric indices of the graph vertices) is present into the input :py:class:`~igraph.Graph` objects. These indices should be positive integers ranging from :math:`0` to :math:`N-1`, where :math:`N` is the size of the graph. :param list indices: a list of positive integers :raise ValueError: if ``indices`` is not a list of integers :raise WrongArgumentError: if any of the elements in ``indices`` does not exists in the graph. """ if not isinstance(indices, list): raise ValueError(u"index list is not a list") if len(indices) == 0: raise WrongArgumentError(u"List is empty") for ind in indices: if not isinstance(ind, int) or ind < 0: raise ValueError("indices must be positive integers") if set(indices) > set(self.graph.vs.indices): raise WrongArgumentError( u"The input node index '{}' does not exist in the graph". format(indices)) return None
def add_node_attribute(graph: Graph, attr_name: str, attr_list: list, nodes: list): r""" Add attributes at the vertex level of a :py:class:`~igraph.Graph` object. These attributes must be stored in a :py:class:`list` whose elements must be sorted by ``nodes`` (a list of string storing the vertex ``name`` attribute). .. warning:: if the vertex attribute name (``attr_name``) is already initialized, it will be overwritten by this method. :param igraph.Graph graph: a :class:`igraph.Graph` object. :param str attr_name: The name of the attribute that will be added to the :py:class:`~igraph.Graph` :param list attr_list: alist of object, sorted by the ``nodes`` parameter. Each object will be adced singularly to the corresponding node :param list nodes: the vertex ``name`` attribute corresponding to the vertices to which attributes will be added.. :raise TypeError: if any of the arguments is not of the expected type :raise WrongArgumentError: If all the attributes pointed to non-existing nodes. """ if not isinstance(graph, Graph) is not Graph: raise TypeError(u"graph argument is not a igraph.Graph") if not isinstance(attr_name, str): raise TypeError(u"Attribute name is not a string") if isinstance(nodes, str): sys.stdout.write(u"Converting string nodes to list of nodes\n") nodes = [nodes] assert len(attr_list) == len(nodes), u"In add_node_attribute, length of attributes list cannot be " \ "different from length of list of nodes." count = 0 err_count = 0 for n, a in zip(nodes, attr_list): select = graph.vs.select(name=n) count += 1 if len(select) == 0: sys.stdout.write(u"Node %s not found in graph" % n) err_count += 1 elif len(select) == 1: select[0][attr_name] = a else: sys.stdout.write(u"Node %s has multiple name hits, please check your attribute file\n" % n) raise ValueError(u"Multiple node hits") if err_count == count: raise WrongArgumentError(u"All the attributes pointed to non-existing nodes.") else: sys.stdout.write(u"Node attribute {} added\n".format(attr_name))
def subtract_count_dist_matrix(count_all: np.ndarray, count_nogroup: np.ndarray) -> np.ndarray: if count_all.shape[0] == count_all.shape[1] == count_nogroup.shape[ 0] == count_nogroup.shape[1]: v = count_all.shape[0] res = np.copy(count_all) for i in prange(v): for j in prange(i, v): if count_all[j, i] == count_nogroup[j, i]: res[i, j] = count_all[i, j] - count_nogroup[i, j] return res else: raise WrongArgumentError( u"Parameter error", "The function parameters do not have the same shape")
def add_edge_attribute(graph: Graph, attr_name: str, attr_list: list, edges: list): r""" Add edge attributes to the input :py:class:`igraph.Graph` object under the attribute name specified in ``attr_name``. The attributes must be stored in a list, passed to ``attr_list`` and sorted according to the target edge list, specified in ``edges``. .. warning:: if the vertex attribute name (``attr_name``) is already initialized, it will be overwritten by this method. :param igraph.Graph graph: a :class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page. :param str attr_name: string. The name of the attribute being added to the edges of the Graph. :param list attr_list: a list, sorted by vertex index, storing the values that will be added to each target edge. :param list edges: edges to which attributes will be applied. :raise TypeError: if ``graph`` is not a :py:class:`~igraph.Graph` :raise ValueError: if one of the edges IDs points to more than one edge (edge names must be univocal) :raise WrongArgumentError: if all the ``edges`` does not point to existing ones """ if not isinstance(graph, Graph) is not Graph: raise TypeError(u"graph argument is not a igraph.Graph") if not isinstance(attr_name, str): raise TypeError("Attribute name is not a string") assert len(attr_list) == len(edges), u"in add_edge_attribute, length of attributes list cannot be " \ "different from length of list of nodes." count = 0 err_count = 0 for e, a in zip(edges, attr_list): select = graph.es.select(adjacent_nodes=e) if len(select) == 0: select = graph.es.select(adjacent_nodes=(e[1], e[0])) count += 1 if len(select) == 0: sys.stdout.write(u"Edge %s not found in graph\n" %str(e)) err_count += 1 elif len(select) == 1: select[0][attr_name] = a else: raise ValueError(u"Edge %s has multiple name hits, edge `adjacent_nodes` must be univocal") if err_count == count: raise WrongArgumentError("All the attributes pointed to non-existing edges.") else: sys.stdout.write("Edge attribute {} added\n".format(attr_name))
def Binary(file: str) -> Graph: r""" Loads a binary file (a :py:class:`pickle` object) that stores a :py:class:`igraph.Graph` object and makes it ready to be used for Pyntacle. We refer the user to the `File Formats Guide <http://pyntacle.css-mendel.it/resources/file_formats/file_formats.html#bin>`_ within the Pyntacloe official page for more details regarding the specifics of :py:class:`igraph.Graph` binary objects that can be serialized by Pyntacle. :param str file: the location of the binary file :return igraph.Graph: an iGraph.Graph object compliant with Pyntacle `Minimum Requirements <http://pyntacle.css-mendel.it/requirements.html>`_ :raise IOError: if the binary does not contain a :py:class:`igraph.Graph` object """ if not is_binary_file(file): raise WrongArgumentError(u"file is not a binary") graph = pickle.load(open(file, "rb")) if not isinstance(graph, Graph): raise IOError(u"binary is not a graph object") else: if graph.ecount() < 1 and graph.vcount() < 2: raise IllegalGraphSizeError( u"Graph must contain at least 2 nodes linked by one edge") else: utils = gu(graph=graph) utils.graph_initializer( graph_name=os.path.splitext(os.path.basename(file))[0]) if Graph.is_directed(graph): sys.stdout.write(u"Converting graph to undirect\n") graph.to_undirected() utils.check_graph() graph = utils.get_graph() sys.stdout.write(u"Binary from {} imported\n".format( os.path.basename(file))) return graph
def crunch_groupcentrality_combinations(graph: Graph, node_names_list: list, np_counts: np.ndarray, np_paths: np.ndarray, gc_enum: GroupCentralityEnum, distance_type: GroupDistanceEnum, cmode: CmodeEnum) -> dict: score_pairs_partial = {} for node_names in node_names_list: if gc_enum == GroupCentralityEnum.group_degree: score = LocalTopology.group_degree(graph, nodes=node_names) elif gc_enum == GroupCentralityEnum.group_closeness: if np_paths is None or np_paths.size == 0: np_paths = sp.get_shortestpaths(graph, nodes=None, cmode=cmode) score = LocalTopology.group_closeness(graph, node_names, distance=distance_type, np_paths=np_paths) elif gc_enum == GroupCentralityEnum.group_betweenness: # if graph.ecount() == 0: # score = 0 if np_counts is None or np_counts.size == 0: np_counts = sp.get_shortestpath_count(graph, nodes=None, cmode=cmode) score = LocalTopology.group_betweenness(graph, node_names, cmode=cmode, np_counts=np_counts) else: raise WrongArgumentError("{} function not yet implemented.".format( gc_enum.name)) score_pairs_partial[tuple(node_names)] = score return score_pairs_partial
def AdjacencyMatrix(file: str, sep: str or None = None, header: bool = True) -> Graph: r""" Imports an adjacency matrix file to a :py:class:`igraph.Graph` object ready to be used by Pyntacle. For more information on adjacency matrices we refer the user to the `File Formats Guide <http://pyntacle.css-mendel.it/resources/file_formats/file_formats.html#adjm>`_ on Pyntacle website. .. note:: We support unweighted undirected Adjacency Matrices, so only zeroes and ones are allowed in the input file. .. note:: If an header is present, it **must** contain unique names (two nodes can't have the same ID). if not, an error wil be raised. The names of the node will be assigned to the vertex ``name`` attribute. If the header is not present, the node "name" attribute will be the corresponding sequential index assigned by igraph. :param str file: the path to the file storing the adjacency matrix :param None,int sep: The field separator inside the network file. if :py:class:`None` (default) it will be guessed. Otherwise, you can place the string representing the column separator. :param bool header: Whether the header is present or not (default is ``True``) :return igraph.Graph: an iGraph.Graph object compliant with Pyntacle `Minimum Requirements <http://pyntacle.css-mendel.it/requirements.html>`_ :raise WrongArgumentError: if ``sep`` is not found in the adjacency matrix :raise ValueError: if the matrix is not squared """ if not AdjmUtils(file=file, header=header, sep=sep).is_squared(): raise ValueError(u"Matrix is not squared") with open(file, "r") as adjmatrix: iterator = iter(adjmatrix.readline, '') first_line = next(iterator, None).strip() if sep not in first_line: raise WrongArgumentError( u'The specified separator "{}" is not present in the adjacency matrix file' .format(sep)) if header: #use pandas to parse this into f = pd.read_csv(filepath_or_buffer=file, sep=sep, index_col=0) f.index = f.index.map( str ) #force index to string, in case is not identified properly f = f.reindex(sorted(f.columns), axis=1) # sort columns alphabetically f = f.reindex(sorted(f.index), axis=0) # sort indices alphabetically node_names = f.columns.values.tolist() else: f = pd.read_csv(filepath_or_buffer=file, sep=sep, header=None) node_names = list(map(str, f.index)) graph = Graph.Adjacency(f.values.tolist(), mode="UPPER") util = gu(graph=graph) util.graph_initializer(graph_name=os.path.splitext( os.path.basename(file))[0], node_names=node_names) graph = util.get_graph() sys.stdout.write(u"Adjacency matrix from {} imported\n".format( os.path.basename(file))) return graph
def graph_initializer(self, graph_name: str, node_names: list or None = None): r""" Transform the input :py:class:`igraph.Graph` object into a network that is compliant to the Pyntacle `Minimum requirements <http://pyntacle.css-mendel.it/requirements.html>`_. .. warning:: This method will prune the graph of any node isolates, as they are not accepted by Pyntacle. :param str graph_name: The network name (will be stored in the graph ``name`` attribute). This string must not contain illegal characters (see the Pyntacle `Minimum Requirements <http://pyntacle.css-mendel.it/requirements.html>`_ for more info on the illegal characters. :param str, None node_names: optional, a list of strings matching the total number of vertices of the graph. Each item in the list becomes the vertex ``name`` attribute sequentially (index-by-index correspondance). Defaults to py:class:`None` (node ``name`` attribute is filled by node indices). :raise: ValueError: if the ``graph_name`` argument contains illegal characters or if ``node_names`` is not of the same size of the number of graph vertices. :raise: WrongArgumentError: if ``node_names`` is not a list of strings. """ if not isinstance(graph_name, str): raise ValueError("'graph_name' must be a string") try: attribute_name_checker(graph_name) except ValueError: raise ValueError("'graph_name' contains illegal characters\n") self.graph.to_undirected() # reconvert graph to directed if "name" not in self.graph.attributes(): self.logger.info(u"Adding file name to graph name") AddAttributes.add_graph_name(self.graph, graph_name) # add vertex names if "name" not in self.graph.vs.attributes(): if node_names is None: self.logger.info( u"Adding node names to graph corresponding to their indices" ) self.graph.vs()["name"] = [ str(x.index) for x in self.graph.vs() ] else: if not isinstance(node_names, list) or not all( isinstance(item, str) for item in node_names): raise WrongArgumentError( u"`node_names` argument must be a list of strings") if len(node_names) != self.graph.vcount(): raise ValueError( u"`node_names` argument must be of the same length of vertices" ) self.logger.info( u"Adding node names to graph using the provided node names" ) self.graph.vs["name"] = node_names # add parent name to vertices if "parent" not in self.graph.vs().attributes(): self.logger.info( u"Adding reserved attribute 'parent' to the vertices") AddAttributes.add_parent_name(self.graph) if "adjacent_nodes" not in self.graph.es().attributes(): # add edge vertices names as an attribute 'adjacent_vertices' self.logger.info( u"Adding source and target names as 'adjacent_nodes' attribute to edges" ) AddAttributes.add_edge_names(self.graph) # for sif file conversion purposes if not "sif_interaction_name" in self.graph.attributes(): self.graph["sif_interaction_name"] = None if not "sif_interaction" in self.graph.es().attributes(): self.graph.es()["sif_interaction"] = None self.prune_isolates( ) #remove any isolate and store them into the `isolates` graph attribute # Adding implementation for functions that require it sp_implementation = CmodeEnum.igraph n_nodes = self.graph.vcount() if n_nodes > 100: density = (2 * (self.graph.ecount())) / (n_nodes * (n_nodes - 1)) if density < 0.5 and n_nodes <= 500: sp_implementation = CmodeEnum.igraph else: if n_cpus >= 2: sp_implementation = CmodeEnum.cpu else: sp_implementation = CmodeEnum.igraph #UNCOMMENT THIS PART FOR WHEN THE GPU MODULE WILL BE AVAILABLE # if cuda_avail: # sp_implementation = CmodeEnum.gpu # else: # if n_cpus >= 2: # sp_implementation = CmodeEnum.cpu # else: # sp_implementation = CmodeEnum.igraph self.graph["implementation"] = sp_implementation self.check_graph() #check that everything is in order
def write_report(self, report_dir=None, format="tsv", choices=report_format) -> str: r""" Create a text file containing the information created previously by the any of the *report* functions. By default, if the `report_path` function is not initialized, a generic name is created and a tab-separated file is generated (named *Report_**GRAPHNAME**_**COMMAND**_**DATE**.tsv* where:_ * **GRAPHNAME** is the value stored in the graph["name"] attribute, * **Command** is the name of the command requested by the user and * **Date** is the date when the Pyntacle run was completed. This file will be stored in the current directory """ if not self.report: raise EnvironmentError( u"A report must be created first using the 'create_report()' function" ) else: #cast every element of the list of lists to string, just in case: for x in self.report: list(map(str, x)) self.report = [list(map(str, x)) for x in self.report] #replace all the underscores with spaces self.report[0] = [x.replace("_", " ") for x in self.report[0]] if format not in choices.keys(): raise WrongArgumentError( u"file format {} is not supported".format(format)) if report_dir is None: self.logger.info( u"Directory not specified. Using current directory") report_dir = os.path.abspath(os.getcwd()) else: if not os.path.isdir(report_dir): self.logger.warning( u"Specified directory does not exists, creating it") os.makedirs(report_dir, exist_ok=True) else: report_dir = os.path.abspath(report_dir) if len(self.graph["name"]) > 1: self.logger.warning( u"Using the first 'name' attribute of graph name since more than one is specified" ) graphname = self.graph["name"][0] extension = choices[format] if self.report_type.name == 'Set': report_path = os.path.join( report_dir, "_".join(["Report", self.report_type.name, self.dat]) + "." + extension) else: report_path = os.path.join( report_dir, "_".join( ["Report", graphname, self.report_type.name, self.dat]) + "." + extension) if extension != "xlsx": with open(report_path, "w") as out: if extension == "tsv": self.logger.info( u"Writing Pyntacle report to a tab-separated file (tsv)" ) for elem in self.report: elem.append("\n") out.writelines(["\t".join(x) for x in self.report]) elif extension == "csv": self.logger.info( u"Writing Pyntacle report to a comma-separated value file (csv)" ) writer = csv.writer(out) writer.writerows(self.report) else: self.logger.info( u"Writing Pyntacle report to a an excel file (xlsx)") workbook = xlsxwriter.Workbook(report_path, {'constant_memory': True}) workbook.use_zip64() format = workbook.add_format() worksheet = workbook.add_worksheet("Pyntacle Report") for row, elem in enumerate(self.report): for col, p in enumerate(elem): worksheet.write(row, col, p, format) workbook.close()
def reachability(graph, k: int, metric: KpposEnum, seed=None, max_distance: int = None, m=None, cmode=CmodeEnum.igraph) -> (list, float): r""" It searches for the best *key player* (*kp*) set of a predefined size :math:`k`, also defined as positive key players (*kp-pos*) using reachability indices, described in Pyntacle `introductory guide <http://pyntacle.css-mendel.it/resources/kp_guide/kp_guide.html>`_ The optimal kp set will be the one that have the higher reachability if no switching of nodes from the set :math:`k` to the rest of the nodes in the graph :math:`N-k` can improve the selected reachability score. | Available reachability indices: * **m-reach**: min = 0 (unreachable); max = :math:`N - k` (graph is totally reached) * **dR**: min = 0 (the *k* set is disconnected from the rest of the graph); max = 1 (full reachability of the set) :param igraph.Graph graph: a :py:class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page. :param int k: the size of the kp-set. Must be a positive integer. :param KpposEnum metric: on of the available :class:`~pyntacle.tools.enums.KpposEnum` :param int,None seed: optional, a positive integer that can be used to replicate the greedy optimization run. If :py:class:`~None` (default), the greedy optimization may return different results at each run. :param int,None max_distance: optional, define a maximum shortest path after which two nodes will be considered disconnected. Default is :py:class:`~None` (no maximum distance is set) :param int m: The number of steps of the m-reach algorithm. Required if the the required metrics is the :func:`~tools.enums.KPPosEnum.mreach` :param cmodeEnum cmode: the implementation that will be used to compute the shortest paths. See :class:`~pyntacle.tools.enums.CmodeEnum`. Default is the igraph brute-force shortest path search. :return tuple: a tuple storing in ``[0]`` a list containing the node ``name`` attribute of the optimal *kp-set* and in ``[1]`` the optimal *kp-pos* value for the selected metric :raise KeyError: when an invalid :class:`~pyntacle.tools.enums.KpposEnum` is given :raise TypeError: if ``k`` is not a positive integer :raise ValueError: if ``seed`` is not a positive integer or if ``max_distance`` is not :py:class:`None` or a positive integer lesser than the total number of nodes minus one :raise IllegalKpsetSizeError: if ``k`` is equal or greater to the graph size """ if metric == KpposEnum.mreach or metric == KpposEnum.dR: if max_distance is not None and not isinstance( max_distance, int ) and max_distance > 1 and max_distance <= graph.vcount(): raise ValueError( u"'max_distance' must be an integer greater than one and lesser than the total number of nodes" ) if metric == KpposEnum.mreach and m is None: raise WrongArgumentError( "The 'm' argument is required for computing m-reach") elif metric == KpposEnum.mreach and (not isinstance(m, int) or m <= 0): raise TypeError( u"The 'm' argument must be a positive integer value") else: node_names = graph.vs()["name"] node_names.sort() random.shuffle(node_names) S_names = node_names[:k] S = gu(graph=graph).get_node_indices(S_names) S.sort() if metric == KpposEnum.mreach: if cmode != CmodeEnum.igraph: sps = sp.get_shortestpaths(graph=graph, cmode=cmode, nodes=None) type_func = partial(kp.mreach, nodes=S_names, m=m, max_distance=max_distance, cmode=cmode, sp_matrix=sps) else: type_func = partial(kp.mreach, nodes=S_names, m=m, max_distance=max_distance, cmode=cmode) else: if cmode != CmodeEnum.igraph: sps = sp.get_shortestpaths(graph=graph, cmode=cmode, nodes=None) type_func = partial(kp.dR, nodes=S_names, max_distance=max_distance, cmode=cmode, sp_matrix=sps) else: type_func = partial(kp.dR, nodes=S_names, max_distance=max_distance, cmode=cmode) final, reachability_score = GreedyOptimization.__optimization_loop( graph, S, type_func) final = graph.vs(final)["name"] final.sort() sys.stdout.write( u"Optimal group: {}\n Group size = {}\n Metric = {}\n Score = {}\n" .format("{" + str(final).replace("'", "")[1:-1] + "}", k, metric.name.replace("_", " "), reachability_score)) return final, round(reachability_score, 5) else: raise KeyError( u"The parameter 'metric' is not valid. It must be one of the following: {}" .format(list(KpposEnum)))
def dR(graph: igraph.Graph, nodes: list, max_distance: int or None = None, cmode: CmodeEnum = CmodeEnum.igraph, sp_matrix: np.ndarray or None = None) -> float: r""" Calculates the *dR* (*distance-weighted reach*) (described by the equation 14 in `The original article on key players <https://doi.org/10.1007/s10588-006-7084-x>`_), a positive key player (*kp-pos*) measure. The distance-weighted reach can be defined as the sum of the reciprocals of distances from the kp-set :math:`k` to all nodes, where the distance from the set to a node is defined as the minimum distance (minimum shortest path distance). dR ranges from 0 to 1, where: * **dR** = 1 => Maximal reachability. The set :math:`k` is directly tied to the rest of the graph * **dR** = 0 => No reachability. The set :math:`k` is completely disconnected to the graph :param igraph.Graph graph: a :class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page. :param list nodes: a list of strings that matches the node ``name`` attribute of the selected nodes. :param int,None max_distance: The maximum shortest path length over which two nodes are considered unreachable. Default is :py:class:`None` (distances are preserved) :param None, np.ndarray sp_matrix: A :math:`NxN` (:math:`N` being the size of the graph) :py:class:`numpy.ndarray` storing integers representing the distances between nodes. :warning: Disconected nodes **must** be represented as a distance greater than :math:`N`. If provided, ``cmode`` is ignored and the shortest paths are derived from the matrix directly. default is py:class:`None`. :return float : the distance-weighted reach measure of the graph :raise TypeError: when ``nodes`` is a list of strings matching the vertex ``name`` attribute :raise KeyError: when any of the node ``name`` attribute passed to the function is not present in the input graph :raise ValueError: when any of the node ``name`` attribute passed to the function is not present in the input graph or if a provided ``sp_matrix`` is not :py:class:`None` or a :py:class:`numpy.ndarray` storing integers """ if max_distance: if not isinstance(max_distance, int): raise TypeError( u"'max_distance' must be an integer value greater than one" ) elif max_distance < 1: raise ValueError( u"'max_distance' must be an integer greater than one") else: index_list = gu(graph=graph).get_node_indices(nodes=nodes) if cmode == CmodeEnum.igraph: shortest_path_lengths = sp.shortest_path_length_igraph( graph=graph, nodes=nodes) else: if sp_matrix is None: shortest_path_lengths = sp.get_shortestpaths(graph=graph, nodes=nodes, cmode=cmode) else: if not isinstance(sp_matrix, np.ndarray): raise ValueError( u"'sp_matrix' must be a numpy.ndarray instance") elif sp_matrix.shape[0] != graph.vcount(): raise WrongArgumentError( u"The dimension of 'sp matrix' is different from the total number of nodes" ) else: shortest_path_lengths = sp_matrix[index_list, :] if max_distance: shortest_path_lengths = ShortestPathModifier.set_max_distances_nparray( sp_matrix, max_distance) dr_num = 0 vminusk = set(graph.vs.indices) - set(index_list) for j in vminusk: dKj = min(spl[j] for spl in shortest_path_lengths) dr_num += 1 / dKj dr = round(dr_num / float(graph.vcount()), 5) return dr
def mreach(graph: igraph.Graph, nodes: list or str or None, m: int, max_distance: int or None = None, cmode: CmodeEnum = CmodeEnum.igraph, sp_matrix: np.ndarray = None) -> int: r""" Calculate the *m-reach* , a positive *key player* measure (*kp-pos*) described by the equation 12 in `The original article on key players <https://doi.org/10.1007/s10588-006-7084-x>`_. The m-reach returns the number of nodes that are reached by a set of nodes in :math:`m` steps or less, where :math:`m` is the minimum least distance between any node in a set :math:`k` and the rest of the graph :math:`N-k`. :param igraph.Graph graph: a :class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page. :param list nodes: a list of strings that matches the node ``name`` attribute of the selected nodes. :param int m: an integer (greater than zero) representing the maximum m-reach distance. :param int,None max_distance: The maximum shortest path length over which two nodes are considered unreachable. Default is :py:class:`None` (distances are preserved) :param cmodeEnum cmode: the implementation that will be used to compute the shortest paths required dstance based fragmentation. See :class:`~pyntacle.tools.enums.CmodeEnum`. Default is the igraph brute-force shortest path search. :param None, np.ndarray sp_matrix: A :math:`NxN` (:math:`N` being the size of the graph) :py:class:`numpy.ndarray` storing integers representing the distances between nodes. :warning: Disconected nodes **must** be represented as a distance greater than :math:`N`. If provided, ``cmode`` is ignored and the shortest paths are derived from the matrix directly. default is py:class:`None`. :return int: An integer representing the number of nodes reached by the input node(s) in *m* steps or less :raise: KeyError: if ``cmode`` is not one of the valid :class:`~pyntacle.tools.enums.CmodeEnum` :raise TypeError: when ``nodes`` is a list of strings matching the vertex ``name`` attribute, or when ``m`` is not a :py:class:`int` :raise ValueError: when any of the node ``name`` attribute passed to the function is not present in the input graph, if ``m`` is lesser than 0 or greater than the size of the graph or if a provided ``sp_matrix`` is not :py:class:`None` or a :py:class:`numpy.ndarray` storing integers """ if not isinstance(m, int): raise TypeError(u"'m' must be an integer") elif m < 1 or m >= graph.vcount() + 1: raise ValueError( u"'m' must be greater than zero and less or equal than the total number of vertices" ) if max_distance: if not isinstance(max_distance, int): raise TypeError( u"'max_distance' must be an integer value greater than one" ) if max_distance < 1: raise ValueError( u"'max_distance' must be an integer value greater than one" ) else: index_list = gu(graph=graph).get_node_indices(nodes=nodes) if cmode == CmodeEnum.igraph: shortest_path_lengths = sp.shortest_path_length_igraph( graph, nodes=nodes) else: if not sp_matrix: shortest_path_lengths = sp.get_shortestpaths(graph=graph, cmode=cmode, nodes=nodes) else: if not isinstance(sp_matrix, np.ndarray): raise ValueError( u"'sp_matrix' must be a numpy.ndarray instance") elif sp_matrix.shape[0] != graph.vcount(): raise WrongArgumentError( u"The dimension of 'sp matrix' is different from the total " "number of nodes") else: shortest_path_lengths = sp_matrix[index_list, :] if max_distance: shortest_path_lengths = ShortestPathModifier.set_max_distances_nparray( shortest_path_lengths, max_distance) mreach = 0 vminusk = set(graph.vs.indices) - set(index_list) for j in vminusk: for spl in shortest_path_lengths: if spl[j] <= m: mreach += 1 break return mreach
def reachability(graph, k, metric: KpposEnum, max_distance=None, m=None, cmode=CmodeEnum.igraph, parallel=False, ncores=None) -> (list, float): r""" It searches and finds the *key player* (*kp*) set, or sets, of a predefined size that best reaches all other nodes in the graph. It generates all the possible kp-sets and calculates their reachability scores. The best kp-set will be the one that best reaches all other nodes of the graph. Available metrics that are used to score reachability are: * **m-reach**: min = 0 (unreachable); max = size(graph) - kpp_size (total reachability) * **dR**: min = 0 (unreachable); max = 1 (total reachability) :param igraph.Graph graph: a :class:`igraph.Graph` object. The graph must satisfy a series of requirements, described in the `Minimum requirements specifications <http://pyntacle.css-mendel.it/requirements.html>`_ section of the Pyntacle official page. :param int k: the size of the kp-set to be found :param KpposEnum metric: any available option of the enumerators :class:`~pyntacle.tools.enums.KpposEnum` :param int,None max_distance: optional, define a maximum shortest path after which two nodes will be considered disconnected. Default is :py:class:`~None` (no maximum distance is set) :param int m: The number of steps of the m-reach algorithm. Required if the the required metrics is the :func:`~tools.enums.KPPosEnum.mreach` :param cmodeEnum cmode: the implementation that will be used to compute the shortest paths. See :class:`~pyntacle.tools.enums.CmodeEnum`. Default is the igraph brute-force shortest path search. :param bool parallel: whether to use multicore processors to run the algorithm iterations in parallel. :param int ncores: Positive integer specifying the number of cores used to perform parallel computation. If :type:`None` (default) the number of cores will be set to the maximum number of available cores -1. :return tuple: a tuple of two elements containing in the first position a list of all kp-sets with maximum reachability score, and the maximum achieved score for reschability in the second element. :raises TypeError: if ``ncores`` is not a positive integer greater than one :raises KeyError: when a ``kp_type`` is not one listed in :func:`~pyntacle.tools.enums.KpposEnum` """ kpset_score_pairs = {} """: type: dic{(), float}""" node_names = graph.vs["name"] if max_distance is not None and (not isinstance(max_distance, int) or max_distance <= 1 or max_distance > graph.vcount()): raise ValueError( u"'max_distance' must be an integer value between 1 and the total number of nodes" ) if parallel and ncores is None: ncores = mp.cpu_count() - 1 elif parallel and (not isinstance(ncores, int) or ncores < 1): raise TypeError(u"'ncores' must be a positive integer value") if metric == KpposEnum.mreach and m is None: raise WrongArgumentError(u"The parameter 'm' must be specified") if metric == KpposEnum.mreach and isinstance(m, int) and m <= 0: raise TypeError( u"The parameter 'm' must be a positive integer value") # Generate all combinations of size k allS = list(itertools.combinations(node_names, k)) sys.stdout.write(u"Evaluating {} possible solutions\n".format( len(allS))) if parallel: sys.stdout.write( u"Brute-force search of the best kp-set of size {} using {} cores\n" .format(k, ncores)) # Create chunks chunklen = math.ceil(len(allS) / ncores) chunks = [ allS[i * chunklen:(i + 1) * chunklen] for i in range(ncores) ] with ProcessPoolExecutor(max_workers=ncores) as executor: future_dict = { executor.submit(crunch_reachability_combinations, graph, chunk, metric, max_distance, m, cmode): chunk for chunk in chunks } for future in as_completed(future_dict): chunk = future_dict[future] try: partial_result = future.result() except Exception as exc: print('%r generated an exception: %s' % (chunk, exc)) else: kpset_score_pairs = { **kpset_score_pairs, **partial_result } else: sys.stdout.write( u"Brute-force search of the best kp-set of size {}\n".format( k)) chunks = allS partial_result = crunch_reachability_combinations( graph=graph, node_names_list=chunks, kp_type=metric, max_distance=max_distance, m=m, cmode=cmode) kpset_score_pairs = {**kpset_score_pairs, **partial_result} _group_score = max(kpset_score_pairs.values()) final = [ sorted(list(x)) for x in kpset_score_pairs.keys() if kpset_score_pairs[x] == _group_score ] _group_score = round(_group_score, 5) sys.stdout.write( u"Best group{}: {}\n Group{} size = {}\n Metric = {}{}\n Score = {}\n" .format("s" if len(final) > 1 else "", "{" + str(final).replace("'", "")[1:-1] + "}", "s" if len(final) > 1 else "", k, metric.name.replace("_", " "), ", m=" + str(m) if metric == KpposEnum.mreach else "", _group_score)) return final, _group_score