Ejemplo n.º 1
0
def network_to_networkx(network):
    """method to export a pathpy Network to a networkx compatible graph

    Parameters
    ----------
    network: Network

    Returns
    -------
    networkx Graph or DiGraph
    """
    # keys to exclude since they are handled differently in networkx
    excluded_node_props = {"degree", "inweight", "outweight", "indegree", "outdegree"}
    try:
        import networkx as nx
    except ImportError:
        raise PathpyError("To export a network to networkx it must be installed")

    directed = network.directed
    if directed:
        graph = nx.DiGraph()
    else:
        graph = nx.Graph()

    for node_id, node_props in network.nodes.items():
        valid_props = {k: v for k, v in node_props.items() if k not in excluded_node_props}
        graph.add_node(node_id, **valid_props)

    for edge, edge_props in network.edges.items():
        graph.add_edge(*edge, **edge_props)

    return graph
Ejemplo n.º 2
0
def network_from_networkx(graph):
    """method to load a networkx graph into a pathpy.Network instance

    Parameters
    ----------
    garph

    Returns
    -------
    Network
    """
    try:
        import networkx as nx
    except ImportError:
        raise PathpyError("To load a network from networkx it must be installed")

    if isinstance(graph, nx.DiGraph):
        directed = True
    elif isinstance(graph, nx.Graph):
        directed = False
    else:
        raise PathpyNotImplemented("At the moment only DiGraph and Graph are supported.")

    net = Network(directed=directed)
    for node_id in graph.nodes:
        net.add_node(str(node_id), **graph.node[node_id])

    for edge in graph.edges:
        net.add_edge(str(edge[0]), str(edge[1]), **graph.edges[edge])

    return net
Ejemplo n.º 3
0
    def add_path(self, path, frequency=1, expand_subpaths=True, separator=','):
        """Adds a path to this Paths instance. The path argument can either be a list, tuple or
        a string ngram with a customisable node separator.

        Parameters
        ----------
        path: tuple, list, str
            The path to be added to this Paths instance. This can either be a list or tuple of
            objects that can be turned into strings, e.g. ('a', 'b', 'c') or (1, 3, 5), or
            a single string ngram "a,b,c", where nodes are separated by a user-defined
            separator character (default separator is ',').
        frequency: int, tuple
            Either an integer frequency, or a tuple (x,y) indicating the frequency of this
            path as subpath (first component) and as longest path (second component). Integer
            values x are automatically converted to (0, x). Default value is 1.
        expand_subpaths: bool
            Whether or not to calculate subpath statistics. Default value is True.            
        separator: str
            A string sepcifying the character that separates nodes in the ngram. Default is 
            ','.
        Returns
        -------    
        """
        assert isinstance(path, tuple) or isinstance(path, list) or isinstance(
            path, str), 'Path must be tuple or ngram string.'

        # Turn string ngram into tuple
        if isinstance(path, str):
            path = tuple(path.split(separator))

        assert path, 'Path must contain at least one element'

        for x in path:
            if isinstance(x, str) and self.separator in x:
                raise PathpyError('Node name contains separator character. '
                                  'Choose different separator.')

        # Convert tuple elements to strings
        path_str = path if isinstance(path, str) else tuple(map(str, path))

        path_length = len(path) - 1

        if isinstance(frequency, int):
            frequency = (0, frequency)
        self.paths[path_length][path_str] += frequency

        if expand_subpaths:

            max_length = min(self.max_subpath_length + 1, path_length)

            for k in range(0, max_length):
                for s in range(len(path_str) - k):
                    # for all start indices from 0 to n-k

                    subpath = ()
                    # construct subpath
                    for i in range(s, s + k + 1):
                        subpath += (path_str[i], )
                    # add subpath weight to first component of occurrences
                    self.paths[k][subpath][0] += frequency[1]
Ejemplo n.º 4
0
    def add_layers(self, max_order):
        """Add higher-order layers up to the given maximum order.

        Parameters
        ----------
        max_order: int
            up to which order to add higher order layers, if below the current maximum the
            operation will have no effect and the HigherOrderNetwork will remain unchanged.

        """
        from pathpy import ENABLE_MULTICORE_SUPPORT

        current_max_order = self.max_order if self.max_order else -1
        if max_order < 0:
            raise PathpyError("max_order must be a positive integer not %d" %
                              max_order)

        if max_order <= current_max_order:
            return
#             Log.add("Layers up to order %d already added. Nothing changed." % self.max_order)

        orders_to_add = list(range(current_max_order + 1, max_order + 1))
        if len(orders_to_add) > 1 and ENABLE_MULTICORE_SUPPORT:
            self.__add_layers_parallel(orders_to_add)
        else:
            self.__add_layers_sequential(orders_to_add)
Ejemplo n.º 5
0
    def test_network_hypothesis(self, paths, method='AIC'):
        """
        Tests whether the assumption that paths are constrained
        to the (first-order) network topology is justified.
        Roughly speaking, this test yields true if the gain in
        explanatory power that is due to the network topology
        justifies the additional model complexity.

        The decision will be made based on a comparison between the zero-
        and the first-order layer of the model. Different from the multi-order
        model selection method implemented in estimate_order and likelihoodRatioTest,
        here we do *not* consider nested models, so we cannot use a likelihood ratio
        test. We instead use the AIC or BIC.
        """
        from pathpy.utils.exceptions import PathpyError
        assert method in ['AIC', 'BIC', 'AICc'], \
            'Expected method AIC, AICc or BIC "%s" given.' % method

        # count number of omitted paths with length zero
        p_sum = 0
        for p in paths.paths[0]:
            p_sum += paths.paths[0][p][1]
        if p_sum > 0:
            msg = 'Omitting {} zero-length paths ' \
                  'for test of network assumption'.format(p_sum)
            Log.add(msg, Severity.INFO)

        # log-likelihood and observation count of zero-order model
        likelihood_0, n_0 = self.layer_likelihood(paths,
                                                  l=0,
                                                  consider_longer_paths=True,
                                                  log=True,
                                                  min_path_length=1)

        # log-likelihood and observation count of first-order model
        likelihood_1, n_1 = self.layer_likelihood(paths,
                                                  l=1,
                                                  consider_longer_paths=True,
                                                  log=True,
                                                  min_path_length=1)

        # By definition, the number of observations for both models should be the total
        # weighted degree of the first-order network
        if n_0 != n_1:
            raise PathpyError('Observation count for 0-order ({n0}) and '
                              '1-st order model ({n1}) do not match'.format(
                                  n0=n_0, n1=n_1))

        # degrees of freedom = |V|-1
        dof0 = self.layers[0].degrees_of_freedom(assumption='ngrams')

        # degrees of freedom based on network assumption
        dof1 = self.layers[1].degrees_of_freedom(assumption='paths')

        Log.add('Log-Likelihood (k=0) = ' + str(likelihood_0), Severity.INFO)
        Log.add('Degrees of freedom (k=0) = ' + str(dof0), Severity.INFO)

        Log.add('Log-Likelihood (k=1) = ' + str(likelihood_1), Severity.INFO)
        Log.add('Degrees of freedom (k=1) = ' + str(dof0 + dof1),
                Severity.INFO)

        if method == 'AIC':
            ic0 = 2 * dof0 - 2 * likelihood_0
            ic1 = 2 * (dof0 + dof1) - 2 * likelihood_1
        elif method == 'AICc':
            dof10 = dof0 + dof1
            assert n_1 > dof10 - 2, \
                'Error: number of samples too small for model complexity'
            dof10 = dof0 + dof1
            ic0 = 2 * dof0 - 2 * likelihood_0 + (2 * (dof0 + 1) *
                                                 (dof0 + 2)) / (n_0 - dof0 - 2)
            ic1 = 2 * dof10 - 2 * likelihood_1 + (2 * (dof10 + 1) *
                                                  (dof10 + 2)) / (n_1 - dof10 -
                                                                  2)
        elif method == 'BIC':
            ic0 = np.log(n_0) * dof0 - 2 * likelihood_0
            ic1 = np.log(n_1) * (dof0 + dof1) - 2 * likelihood_1
        else:
            raise PathpyError("Method check has not filtered out illegal "
                              "method %s " % method)

        # if the AIC/AICc/BIC of the zero-order model is larger than that of the
        # first-order model, we do not reject the network hypothesis
        return ic0 > ic1, ic0, ic1