def test_largest_connected_component():
    """Test to find the largest connected component."""
    net = Network(directed=False)
    net.add_edge('a', 'b')
    net.add_edge('b', 'c')
    net.add_edge('x', 'y')
    lcc = pp.algorithms.components.largest_connected_component(net)
def test_find_connected_components():
    """Test to find the connected components."""
    net = Network(directed=False)
    net.add_edge('a', 'b')
    net.add_edge('b', 'c')
    net.add_edge('x', 'y')
    cn = pp.algorithms.components.find_connected_components(net)
Example #3
0
def test_call_edges():
    """Test to call edges"""

    net = Network()
    net.add_edge('a', 'b', uid='a-b')

    assert isinstance(net.edges['a-b'], Edge)

    assert net.edges['a-b'].uid == 'a-b'

    assert net.edges['a', 'b'].uid == 'a-b'

    net = Network(multiedges=True)
    net.add_edge('a', 'b')
    net.add_edge('a', 'b')
    net.add_edge('a', 'b', uid='a-b')

    assert net.number_of_edges() == 3
    assert len(net.edges['a', 'b']) == 3

    net = Network()
    net.add_edge('a', 'b')
    net.add_edge('b', 'a')

    assert net.number_of_edges() == 2

    net = Network(directed=False)
    net.add_edge('a', 'b')
Example #4
0
def test_network_plot():
    """Test the plot function on a network."""
    net = Network()
    net.add_node('a', color='red')
    net.add_node('b', size=40)
    net.add_edge('a', 'b', uid='a-b', color='blue')

    net.plot(filename='simple_plot.html', node_color={'a': 'green'})
def test_properties():
    """Test network properties."""
    net = Network(directed=False)
    net.add_edge('a', 'b', uid='a-b')

    net.edges['a-b']['color'] = 'red'

    assert net.edges['a-b']['color'] == 'red'
Example #6
0
def test_remove_edge():
    """Test to remove an edge from the network."""

    net = Network()
    a = Node('a')
    b = Node('b')
    c = Node('c')
    e = Edge(a, b, uid='e')
    f = Edge(b, a, uid='f')
    g = Edge(b, c, uid='g')
    net.add_edge(e)
    net.add_edge(f)

    net.remove_edge(g)

    assert net.number_of_edges() == 2
    assert isinstance(net.edges['e'], Edge)
    assert g not in net.edges
    assert net.edges['a', 'b'] in net.edges

    assert net.successors['a'] == {b}
    assert net.outgoing['a'] == {e}
    assert net.incident_edges['a'] == {e, f}

    net.remove_edge(e)

    assert net.number_of_edges() == 1
    assert net.successors['a'] == set()
    assert net.outgoing['a'] == set()
    assert net.incident_edges['a'] == {f}

    net.remove_edge('f')

    assert net.number_of_edges() == 0
    assert net.incident_edges['a'] == set()

    a = Node('a')
    b = Node('b')
    e = Edge(a, b, uid='e')
    f = Edge(a, b, uid='f')
    g = Edge(a, b, uid='g')

    net = Network(multiedges=True)
    net.add_edges(e, f, g)

    assert net.number_of_edges() == 3
    assert e and f and g in net.edges['a', 'b']

    net.remove_edge('a', 'b', uid='g')
    assert net.number_of_edges() == 2
    assert g not in net.edges['a', 'b']

    net.remove_edge('a', 'b')
    assert net.number_of_edges() == 0
    assert len(net.edges['a', 'b']) == 0
Example #7
0
def test_network_undirected():
    """Test undirected networks"""
    net = Network(directed=False)
    net.add_edge('a', 'b', timestamp=1, color='red', size=4)
    net.add_edge('b', 'a', timestamp=3, color='blue', frequency=30)

    assert net.number_of_edges() == 1

    assert net.edges['a', 'b']['color'] == 'blue'
    assert net.edges['b', 'a']['size'] == 4
    assert net.edges['a', 'b']['timestamp'] == 3
def test_sub_networks():
    """Test to remove a network"""
    net_1 = Network()
    net_2 = Network()
    net_1.add_edge('a', 'b', uid='a-b')
    net_2.add_edge('c', 'd', uid='c-d')
    net_1 += net_2
    net_2.add_edge('d', 'e', uid='d-e')

    net_3 = net_1 - net_2

    assert net_3.number_of_nodes() == 2
    assert net_3.number_of_edges() == 1
    assert 'a' and 'b' in net_3.nodes
    assert 'a-b' in net_3.edges
    assert net_1.number_of_nodes() == 4
    assert net_1.number_of_edges() == 2
    assert net_2.number_of_nodes() == 3
    assert net_2.number_of_edges() == 2

    net_4 = Network()
    net_4.add_edge('x', 'y', uid='x-y')

    net_5 = net_3 - net_4

    assert net_5.number_of_nodes() == 2
    assert net_5.number_of_edges() == 1
    assert 'a' and 'b' in net_5.nodes
    assert 'a-b' in net_5.edges
def test_network_properties():
    """Test network properties."""
    net = Network()
    net.add_edge('a', 'b', uid='a-b')
    net.add_edge('b', 'c', uid='b-c')
    net.add_edge('c', 'a', uid='c-a')

    assert net.successors['c'] == {net.nodes['a']}
    assert net.incoming['a'] == {net.edges['c-a']}

    net.remove_edge('c-a')

    assert net.successors['c'] == set()
    assert net.incoming['a'] == set()
Example #10
0
def test_get_edge():
    """Test to get edges."""
    net = Network(directed=False)
    net.add_edge('a', 'b')
    assert (('a', 'b') in net.edges) is True
    assert (('b', 'a') in net.edges) is True
    assert (('a', 'c') in net.edges) is False

    a = Node('a')
    b = Node('b')
    e = Edge(a, b)
    net = Network(directed=True)
    net.add_edge(e)
    assert ((a, b) in net.edges) is True
    assert (e in net.edges) is True
    assert (('a', b) in net.edges) is True
    assert ((b, a) in net.edges) is False
Example #11
0
def test_remove_node():
    """Test to remove a node from the network."""

    net = Network(directed=True)
    net.add_edge('a', 'b')
    net.add_edge('a', 'c')
    net.add_edge('b', 'd')
    net.add_edge('b', 'e')
    net.add_edge('d', 'b')
    net.add_edge('d', 'e')
    net.add_edge('e', 'd')

    assert net.shape == (5, 7)

    net.remove_node('b')
    assert net.shape == (4, 3)
Example #12
0
def test_isub_networks():
    """Test to remove a network with isub"""
    net_1 = Network()
    net_2 = Network()
    net_1.add_edge('a', 'b', uid='a-b')
    net_2.add_edge('c', 'd', uid='c-d')
    net_1 += net_2
    net_2.add_edge('d', 'e', uid='d-e')

    net_1 -= net_2

    assert net_1.number_of_nodes() == 2
    assert net_1.number_of_edges() == 1
    assert 'a' and 'b' in net_1.nodes
    assert 'a-b' in net_1.edges
    assert net_2.number_of_nodes() == 3
    assert net_2.number_of_edges() == 2
Example #13
0
def test_iadd_networks():
    """Test to add networks together"""
    net_1 = Network()
    net_1.add_edges(('a', 'b'), ('b', 'c'))

    net_2 = Network()
    net_2.add_edges(('x', 'y'), ('y', 'z'))

    net_1 += net_2

    assert net_1.number_of_nodes() == 6
    assert net_1.number_of_edges() == 4
    assert net_2.number_of_nodes() == 3
    assert net_2.number_of_edges() == 2

    # test same node objects
    a = Node('a')
    b = Node('b')
    c = Node('c')

    net_1 = Network()
    net_2 = Network()
    net_1.add_edge(a, b)
    net_2.add_edge(b, c)

    net_1 += net_2
    assert net_1.number_of_nodes() == 3
    assert net_1.number_of_edges() == 2
    assert net_2.number_of_nodes() == 2
    assert net_2.number_of_edges() == 1

    # nodes with same uids but different objects
    net_1 = Network()
    net_2 = Network()
    net_1.add_edge(a, b)
    net_2.add_edge('b', c)

    with pytest.raises(Exception):
        net_1 += net_2

    # test same edge objects
    a = Node('a')
    b = Node('b')
    c = Node('c')

    net_1 = Network()
    net_2 = Network()
    net_1.add_edge(a, b, uid='e1')
    net_2.add_edge(a, b, uid='e2')

    with pytest.raises(Exception):
        net_1 += net_2
    # assert net_1.number_of_edges() == 2
    # assert net_1.number_of_nodes() == 2
    # assert 'e1' in net_1.edges and 'e2' in net_1.edges

    # edges with same uids but different objects
    net_1 = Network()
    net_2 = Network()
    net_1.add_edge(a, b, uid='e1')
    net_2.add_edge(a, b, uid='e1')

    with pytest.raises(Exception):
        net_1 += net_2

    # add multiple networks
    net_1 = Network()
    net_2 = Network()
    net_3 = Network()
    net_1.add_edge('a', 'b')
    net_2.add_edge('c', 'd')
    net_3.add_edge('e', 'f')
    net_1 += net_2 + net_3

    assert net_1.number_of_edges() == 3
    assert net_1.number_of_nodes() == 6
Example #14
0
def test_add_edge():
    """Test the edge assignment."""

    a = Node('a')
    b = Node('b')
    c = Node('c')

    # add edges with no uids
    e = Edge(a, b)
    f = Edge(b, c)
    g = Edge(a, b)

    net = Network()
    net.add_edge(e)
    net.add_edge(f)
    with pytest.raises(Exception):
        net.add_edge(g)

    assert len(net.edges) == 2
    assert len(net.nodes) == 3

    with pytest.raises(Exception):
        net.add_node(a)

    with pytest.raises(Exception):
        net.add_edge(e)

    # add edges with uids
    e = Edge(a, b, uid='a-b')
    f = Edge(b, c, uid='b-c')
    g = Edge(a, b, uid='a-b')
    h = Edge(a, b, uid='ab')

    net = Network()
    net.add_edge(e)
    net.add_edge(f)

    with pytest.raises(Exception):
        net.add_edge(h)

    assert len(net.edges) == 2
    assert len(net.nodes) == 3

    with pytest.raises(Exception):
        net.add_edge(g)

    with pytest.raises(Exception):
        net.add_edge(e)

    # add edges and nodes
    net = Network()
    net.add_edge(e)

    # add new node with same uid
    with pytest.raises(Exception):
        net.add_node('a')

    # add same node
    with pytest.raises(Exception):
        net.add_node(a)

    # add node and edge with the node
    a1 = Node('a')
    a2 = Node('a')
    b = Node('b')
    e1 = Edge(a2, b)
    net = Network()
    net.add_node(a1)

    with pytest.raises(Exception):
        net.add_edge(e1)

    e2 = Edge(net.nodes['a'], b)
    net.add_edge(e2)

    # net add edge via string and nodes
    net = Network()
    net.add_node('a')
    net.add_node('b')
    net.add_edge('a', 'b')

    assert len(net.nodes) == 2
    assert len(net.edges) == 1

    with pytest.raises(Exception):
        net.add_edge('a', 'b')

    net = Network(multiedges=True)
    net.add_node('a')
    net.add_node('b')
    net.add_edge('a', 'b')

    assert len(net.nodes) == 2
    assert len(net.edges) == 1

    net.add_edge('a', 'b')

    assert len(net.nodes) == 2
    assert len(net.edges) == 2

    c = Node('c')

    net.add_edge('b', c)

    assert len(net.nodes) == 3
    assert len(net.edges) == 3

    a = Node('a')

    with pytest.raises(Exception):
        net.add_edge(a, 'b')

    with pytest.raises(Exception):
        net.add_edge(None)

    net = Network()
    net.add_edge('a', 'b', uid='a-b', length=10)

    assert net.number_of_nodes() == 2
    assert net.number_of_edges() == 1
    assert isinstance(net.edges['a-b'], Edge)
    assert net.edges['a-b'].uid == 'a-b'
    assert net.edges['a-b']['length'] == 10
    assert net.nodes['a'].uid == 'a'
    assert net.nodes['b'].uid == 'b'

    b = net.nodes['b']
    c = Node('c')
    net.add_edge(b, c, uid='c-d', length=5)

    assert net.number_of_edges() == 2

    net.add_edge('c', 'd', uid='c-2-d')

    assert net.number_of_edges() == 3
    assert net.edges['c-2-d'].v.uid == 'c'

    net.add_edge('a', 'd', uid='a-d')
    assert net.edges['a-d'].uid == 'a-d'

    ab = Edge(Node('a'), Node('b'), uid='a-b')
    net = Network()
    net.add_edge(ab, color='blue')

    assert net.edges['a-b']['color'] == 'blue'

    net = Network()
    net.add_node("A")
    net.add_edge("A", "B")

    assert net.number_of_edges() == 1
    assert net.number_of_nodes() == 2

    net = Network()
    edges = [("A", "B"), ("B", "C")]
    for edge in edges:
        net.add_edge(edge)

    assert net.number_of_edges() == 2
    assert net.number_of_nodes() == 3
Example #15
0
class MOGen:
    """A generative mulit-order model for variable-length paths in networks."""
    def __init__(self, paths, max_order=1, model_selection=True):
        """Initialise MOGen."""
        self.paths = {
            tuple(x.uid for x in p.nodes): paths[p]['frequency']
            for p in paths
        }
        self.network = Network()
        for e in paths.edges:
            self.network.add_edge(e)
        self.max_order = max_order
        self.model_selection = model_selection

        # initialise variables
        self.optimal_maximum_order = None
        self.A = None
        self.T = None
        self.log_L = None
        self.dof = None
        self.AIC = None
        self.models = collections.defaultdict(lambda: {})
        self.log_L_offset = None

    def update_max_order(self, max_order):
        """Updates the maximum order considered by MOGen's model selection.
           Note that a new estimate is required for this to take effect."""
        self.max_order = max_order

    def update_model_selection(self, model_selection):
        """Updates the model_selection parameter. If True models up to max_order will be considered.
           Otherwise only the model with max_order is computed.
           Note that a new estimate is required for this to take effect."""
        self.model_selection = model_selection

    def _get_log_likelihood_offset(self, paths):
        """Computes the log likelihood offset."""
        def log_factorial(n, thresh=1000):
            """Computes the log factorial of a given number."""
            # For n < thresh we compute the log factorial directly.
            if n < thresh:
                return math.log(math.factorial(n))
            # For larger n we use Stirling's approximation
            else:
                return n * math.log(n) - n + 1  # Stirling's approximation

        return log_factorial(sum(self.paths.values())) - sum(
            map(log_factorial, self.paths.values()))

    def _chunks(self, iterable, n):
        if n > len(iterable):
            n = len(iterable)

        chunksize = len(iterable) / n
        for i in range(n):
            iterator = itertools.islice(iterable, int(i * chunksize),
                                        int((i + 1) * chunksize))
            if type(iterable) is list:
                yield list(iterator)
            elif type(iterable) is dict:
                yield {x: iterable[x] for x in iterator}
            else:
                assert True == False

    def _count_transitions(args):
        counter = collections.Counter()

        for path, frequency in args['paths'].items():
            mask = toeplitz(min(len(path), args['order'])*[1] + \
                            (len(path)-args['order'])*[0], \
                            1*[1] + (len(path)-1)*[0])
            multi_order_path = tuple(
                map(lambda x: tuple(x[x != None]), np.where(mask, path, None)))
            multi_order_path = (
                ('*', ), ) + multi_order_path + (multi_order_path[-1] +
                                                 ('+', ), )

            for s, t in zip(multi_order_path, multi_order_path[1:]):
                counter[(s, t)] += frequency

        return counter

    def _get_multi_order_transitions(
            self,
            order,
            no_of_processes=multiprocessing.cpu_count(),
            verbose=True):
        n = min(
            int(np.ceil(len(self.paths) / config['MOGen']['paths_per_chunk'])),
            no_of_processes)

        args = [{
            'paths': path_chunk,
            'order': order
        } for path_chunk in self._chunks(self.paths, n)]

        counter = collections.Counter()
        with multiprocessing.Pool(no_of_processes) as p:
            with tqdm(total=len(args),
                      desc='order:{1:>3}; T     ({0} prcs)'.format(
                          no_of_processes, order),
                      disable=not verbose) as pbar:
                for c in p.imap_unordered(unwrap_self_count_transitions,
                                          args,
                                          chunksize=1):
                    counter += c
                    pbar.update(1)

        return counter

    def _get_multi_order_adjacency_matrix(
            self,
            order,
            no_of_processes=multiprocessing.cpu_count(),
            verbose=True):
        multi_order_transitions = self._get_multi_order_transitions(
            order, no_of_processes=no_of_processes, verbose=verbose)

        nodes = list(
            set(n for transition in multi_order_transitions.keys()
                for n in transition))
        nodes.sort(key=lambda x: (x[-1] == '#', len(x), x[-1]))
        node_id_dict = dict(zip(nodes, range(len(nodes))))

        row = []
        col = []
        data = []
        for s, t in multi_order_transitions:
            row.append(node_id_dict[s])
            col.append(node_id_dict[t])
            data.append(multi_order_transitions[(s, t)])
        A = dok_matrix((len(node_id_dict), len(node_id_dict)))
        A[row, col] = data

        return MultiOrderMatrix(A, node_id_dict)

    def _get_multi_order_transition_matrix(
            self,
            order,
            no_of_processes=multiprocessing.cpu_count(),
            A=None,
            verbose=True):
        if not A:
            A = self._get_multi_order_adjacency_matrix(
                order,
                no_of_processes=multiprocessing.cpu_count(),
                verbose=verbose)

        T = copy(A)
        T.matrix = normalize(T.matrix, norm='l1', axis=1)
        return T

    def _get_log_likelihood_path(args):
        log_L = 0
        for path, frequency in args['paths'].items():
            mask = toeplitz(min(len(path), args['order'])*[1] + \
                            (len(path)-args['order'])*[0], \
                            1*[1] + (len(path)-1)*[0])
            multi_order_path = tuple(
                map(lambda x: tuple(x[x != None]), np.where(mask, path, None)))
            multi_order_path = (
                ('*', ), ) + multi_order_path + (multi_order_path[-1] +
                                                 ('+', ), )

            for s, t in zip(multi_order_path, multi_order_path[1:]):
                if s in args['node_id_dict'] and t in args['node_id_dict']:
                    log_L += np.log(
                        args['T'][args['node_id_dict'][s],
                                  args['node_id_dict'][t]]) * frequency
                else:  # the transition is not in the model and therefore has probability 0
                    log_L += -np.inf
        return log_L

    def _compute_log_likelihood(self,
                                order,
                                T,
                                no_of_processes=multiprocessing.cpu_count(),
                                verbose=True):
        n = min(
            int(np.ceil(len(self.paths) / config['MOGen']['paths_per_chunk'])),
            no_of_processes)

        args = [{
            'paths': path_chunk,
            'order': order,
            'T': T.matrix,
            'node_id_dict': T.node_id_dict
        } for path_chunk in self._chunks(self.paths, n)]

        log_L = 0
        with multiprocessing.Pool(no_of_processes) as p:
            with tqdm(total=len(args),
                      desc='order:{1:>3}; log_L ({0} prcs)'.format(
                          no_of_processes, order),
                      disable=not verbose) as pbar:
                for log_likelihood_path in p.imap_unordered(
                        unwrap_self_get_log_likelihood_path, args,
                        chunksize=1):
                    log_L += log_likelihood_path
                    pbar.update(1)

        return log_L

    def _compute_degrees_of_freedom(self, order):
        # generate binary adjacency matrix
        A = self.network.adjacency_matrix(weight=None)

        # compute k
        P = A.copy()
        dof = A.shape[0] - 1 + P.sum()
        for i in range(1, order):
            P *= A
            dof += P.sum()
        return int(dof)

    def _compute_AIC(self,
                     order,
                     T,
                     no_of_processes=multiprocessing.cpu_count(),
                     verbose=True):

        log_L = self._compute_log_likelihood(order, T, no_of_processes=no_of_processes, verbose=verbose) + \
                self.log_L_offset
        dof = self._compute_degrees_of_freedom(order)

        AIC = 2 * dof - 2 * log_L

        return AIC, log_L, dof

    def _compute_order(self,
                       order,
                       no_of_processes=multiprocessing.cpu_count(),
                       verbose=True):
        A = self._get_multi_order_adjacency_matrix(
            order, no_of_processes=no_of_processes, verbose=verbose)
        T = self._get_multi_order_transition_matrix(
            order, no_of_processes=no_of_processes, A=A, verbose=verbose)
        AIC, log_L, dof = self._compute_AIC(order,
                                            T,
                                            no_of_processes=no_of_processes,
                                            verbose=verbose)

        self.models[order]['A'] = A
        self.models[order]['T'] = T
        self.models[order]['log_L'] = log_L
        self.models[order]['dof'] = dof
        self.models[order]['AIC'] = AIC

    def summary(self, print_summary=True):
        """Returns a summary of the multi-order model."""

        # TODO: Find better solution for printing
        # TODO: Move to util
        line_length = 54
        row = {}
        row['==='] = '=' * line_length
        row['s|ss|sss'] = '{:^3s} | {:^9s} {:^9s} | {:^9s} {:^6s} {:>9s}'
        row['d|dd|fdf'] = '{:^3d} | {:^9d} {:^9d} | {:^9.2f} {:^6d} {:>9.2f}'
        row['d|dd|fdf (highlight)'] = '\033[1m{:^3d}\033[0m | \033[1m{:^9d} {:^9d}\033[0m |' + \
                                      ' \033[1m{:^9.2f} {:^6d} {:>9.2f}\033[0m'

        # initialize summary text
        summary: list = [
            row['==='], 'MOGen model',
            '- Model Selection '.ljust(line_length, '-')
        ]

        # add general information
        summary.append(row['s|ss|sss'].format('K', 'nodes', 'edges', 'log L',
                                              'dof', 'AIC'))

        # add row for each order
        data = [[], [], [], [], []]

        if self.model_selection:
            orders = list(range(1, self.max_order + 1))
        else:
            orders = [self.max_order]

        for order in orders:
            try:
                data[0].append(len(self.models[order]['A'].node_id_dict))
                data[1].append(
                    int(np.sum(np.sum(self.models[order]['A'].matrix))))
                data[2].append(self.models[order]['log_L'])
                data[3].append(self.models[order]['dof'])
                data[4].append(self.models[order]['AIC'])
            except KeyError:
                if print_summary:
                    print('Model has not been fit')
                    return None
                else:
                    return 'Model has not been fit'
            if order == self.optimal_maximum_order:
                summary.append(row['d|dd|fdf (highlight)'].format(
                    order, *[v[-1] for v in data]))
            else:
                summary.append(row['d|dd|fdf'].format(order,
                                                      *[v[-1] for v in data]))

        # add double line
        summary.append('=' * line_length, )

        if print_summary:
            for line in summary:
                print(line.rstrip())
            return None
        else:
            return '\n'.join(summary)

    def __str__(self):
        return self.summary(print_summary=False)

    def __repr__(self):
        return self.summary(print_summary=False)

    def fit(self, no_of_processes=multiprocessing.cpu_count(), verbose=True):
        """Estimate the optimal MOGen from all models up to max_order."""

        LOG.debug('start estimate optimal order')
        a = datetime.datetime.now()

        # log likelihood offset
        if self.log_L_offset == None:
            self.log_L_offset = self._get_log_likelihood_offset(self)

        # orders that still have to be computed
        cur_orders = set(self.models.keys())
        if self.model_selection:
            req_orders = set(range(1, self.max_order + 1))
        else:
            req_orders = {self.max_order}

        # compute orders not yet computed
        for order in req_orders.difference(cur_orders):
            self._compute_order(order,
                                no_of_processes=no_of_processes,
                                verbose=verbose)

        AICs = collections.defaultdict(lambda: list())
        for order in req_orders:
            AICs[self.models[order]['AIC']].append(order)

        self.optimal_maximum_order = min(AICs[min(AICs.keys())])
        if verbose:
            print(
                'Selected optimal maximum order K={} from candidates.'.format(
                    self.optimal_maximum_order))
            self.summary()

        self.A = self.models[self.optimal_maximum_order]['A']
        self.T = self.models[self.optimal_maximum_order]['T']
        self.log_L = self.models[self.optimal_maximum_order]['log_L']
        self.dof = self.models[self.optimal_maximum_order]['dof']
        self.AIC = self.models[self.optimal_maximum_order]['AIC']

        b = datetime.datetime.now()
        LOG.debug('end estimate optiomal order:' +
                  ' {} seconds'.format((b - a).total_seconds()))

        return self

    def plot(self):
        if self.model_selection:
            orders = list(range(1, self.max_order + 1))
        else:
            orders = [self.max_order]

        assert all(order in self.models for order in orders)

        AIC = collections.OrderedDict(
            (order, self.models[order]['AIC']) for order in orders)
        log_L = collections.OrderedDict(
            (order, self.models[order]['log_L']) for order in orders)
        dof = collections.OrderedDict(
            (order, self.models[order]['dof']) for order in orders)

        style = {
            'color': '#218380',
            'marker': 'o',
            'linestyle': 'dashed',
            'linewidth': 2,
            'markersize': 9
        }
        highlight = {
            'color': '#218380',
            'marker': 'o',
            'markersize': 20,
            'alpha': .3
        }

        fig = plt.figure(figsize=[21, 6])
        plt.subplot(1, 3, 1)
        plt.plot(self.optimal_maximum_order, self.AIC, **highlight)
        plt.plot(AIC.keys(), AIC.values(), **style)
        plt.xlabel('max order')
        plt.ylabel('AIC')
        plt.yscale('log')
        plt.subplot(1, 3, 2)
        plt.plot(self.optimal_maximum_order, -self.log_L, **highlight)
        plt.plot(log_L.keys(), [-x for x in log_L.values()], **style)
        plt.xlabel('max order')
        plt.ylabel('-log(L)')
        plt.yscale('log')
        plt.subplot(1, 3, 3)
        plt.plot(self.optimal_maximum_order, self.dof, **highlight)
        plt.plot(dof.keys(), dof.values(), **style)
        plt.xlabel('max order')
        plt.ylabel('dof')
        plt.yscale('log')
        plt.show()

    def _generate_path_chunk(args):
        generated_paths_hon_chunk = collections.Counter()

        for i in range(args['no_of_paths']):
            generated_path = (args['start_node'], )
            while generated_path[-1][-1] != '+':
                c = np.random.choice(list(args['id_node_dict'].keys()),
                                     p=np.ravel(
                                         args['mat'][args['node_id_dict'][
                                             generated_path[-1]]].todense()))
                generated_path += ((args['id_node_dict'][c]), )

            generated_paths_hon_chunk[generated_path] += 1

        return generated_paths_hon_chunk

    def predict(self,
                no_of_paths,
                max_order=None,
                seed=None,
                start_node=('*', ),
                no_of_processes=multiprocessing.cpu_count(),
                paths_per_process=1000):

        np.random.seed(None)

        if max_order:
            assert max_order in self.models
            mat = self.models[max_order]['T'].matrix
            node_id_dict = self.models[max_order]['T'].node_id_dict
        else:
            mat = self.T.matrix
            node_id_dict = self.T.node_id_dict
        id_node_dict = {v: k for k, v in node_id_dict.items()}
        nodes = [id_node_dict[k] for k in sorted(id_node_dict)]

        assert start_node in node_id_dict.keys()

        splits = []
        for i in range(max(1, int(np.floor(no_of_paths / paths_per_process))),
                       0, -1):
            splits.append(round((no_of_paths - sum(splits)) / i))

        args = [{
            'no_of_paths': split,
            'start_node': start_node,
            'id_node_dict': id_node_dict,
            'node_id_dict': node_id_dict,
            'mat': mat
        } for split in splits]

        generated_paths_hon = collections.Counter()
        with multiprocessing.Pool(no_of_processes) as p:
            with tqdm(total=len(args)) as pbar:
                for generated_paths_hon_chunk in p.imap_unordered(
                        unwrap_self_generate_paths_chunk, args, chunksize=1):
                    generated_paths_hon += generated_paths_hon_chunk
                    pbar.update(1)

        generated_paths = {}

        for k, v in generated_paths_hon.items():
            if start_node == ('*', ):
                generated_paths[tuple(x[-1] for x in k[1:-1])] = v
            else:
                generated_paths[k[0] + tuple(x[-1] for x in k[1:-1])] = v

        return generated_paths

    def pagerank(self, max_order=None):
        if max_order:
            T = self.models[max_order]['T'].integrate_zero_order()
        else:
            T = self.T.integrate_zero_order()

        _, v = sla.eigs(T.matrix.T, k=1, which="LM")
        v = list(map(np.abs, np.real(v)))
        v = v / sum(v)
        c = collections.defaultdict(lambda: 0)
        for node in T.node_id_dict.keys():
            pr = v[T.node_id_dict[node]]
            if pr.imag == 0:
                c[node[-1]] += pr.real
            else:
                assert True == False
        pagerank = pd.DataFrame([v for k, v in c.items()],
                                index=c.keys(),
                                columns=['score']).sort_values('score',
                                                               ascending=False)
        return pagerank

    def mean_first_passage_time(self, max_order=None, recurrence=False):
        if max_order:
            T = self.models[max_order]['T'].integrate_zero_order()
        else:
            T = self.T.integrate_zero_order()

        M = MultiOrderMatrix(
            np.zeros(shape=(len(T.node_id_dict), len(T.node_id_dict))),
            T.node_id_dict)
        M.matrix = M.matrix.tolil()

        for target in T.node_id_dict.keys():
            T_target = T.matrix.tolil()

            for node in T.node_id_dict:
                if node == target:
                    T_target[T.node_id_dict[node], :] = 0

            res = np.linalg.inv(np.eye(T_target.shape[0]) - T_target) - np.eye(
                T_target.shape[0])

            res = MultiOrderMatrix(res, T.node_id_dict)

            M.matrix[:, [res.node_id_dict[target]]] = res.matrix @ np.ones(
                shape=(res.matrix.shape[0], 1))

        M = M.to_first_order()

        if recurrence:
            pr = self.pagerank(max_order=max_order)
            M.matrix += np.diag(
                [1 / pr.loc[node[-1], 'score'] for node in T.nodes])

        return M

    def fundamental_matrix(self, max_order=None):
        if max_order:
            T = self.models[max_order]['T'].remove_zero_order()
        else:
            T = self.T.remove_zero_order()

        N = np.linalg.inv(np.identity(T.matrix.shape[0]) - T.matrix)
        return MultiOrderMatrix(N, T.node_id_dict)

    def transient_matrix(self, max_order=None):
        N = self.fundamental_matrix(max_order=max_order)

        H = (N.matrix - np.identity(N.matrix.shape[0])) @ np.linalg.inv(
            np.diag(np.diag(N.matrix.todense())))

        return MultiOrderMatrix(H, N.node_id_dict)