def _(self, data: PathCollection, order: Optional[int] = None, subpaths: bool = True) -> None: # update if order is not None: self._order = order # iterate over all paths for uid, path in tqdm(data.items(), desc='convert paths to hon'): # generate subpaths of order-1 for higher-order nodes nodes = path.subpaths(min_length=self.order - 1, max_length=self.order - 1, include_self=True, paths=False) # add higher-order nodes to the network for node in nodes: if node not in self.nodes: self.add_node(*node, uid='-'.join(node), count=0) self.nodes.counter[self.nodes[node].uid] += data.counter[uid] # do not create edges if order is 0 nodes = nodes if self.order > 0 else [] # generat higher-order edges for _v, _w in zip(nodes[:-1], nodes[1:]): _v, _w = self.nodes[_v], self.nodes[_w] # check if edge exist otherwise add new edge if (_v, _w) not in self.edges: self.add_edge(_v, _w, count=0) # get edge and update counters edge = self.edges[_v, _w] self.edges.counter[edge.uid] += data.counter[uid] if order == len(path): self._observed[ edge.first_order_relations] += data.counter[uid] else: self._subpaths[ edge.first_order_relations] += data.counter[uid] # calculate frequencies for a zero-order network if self.order == 0: total = sum(self.nodes.counter.values()) for key, value in self.nodes.counter.items(): self.nodes.counter[key] = value / total # create all possible higher-order nodes if subpaths and self.order > 1: for node in self.possible_relations(data, self.order - 1): if node not in self.nodes: self.add_node(*node, count=0)
def likelihood(self, data: PathCollection, log: bool = False) -> float: """Returns the likelihood given some observation data.""" # some information for debugging LOG.debug('I\'m a likelihood of a HigherOrderNetwork') # get a list of nodes for the matrix indices idx = self.nodes.index # get the transition matrix matrix = transition_matrix(self, count=True, transposed=True) # initialize likelihood likelihood, _path_likelihood = (0, 0) # iterate over observed hon paths for uid, path in data.items(): # get frequency of the observed path frequency = data.counter[uid] # initial path likelihood path_likelihood = _path_likelihood # generate subpaths of order-1 for higher-order nodes nodes = path.subpaths(min_length=self.order - 1, max_length=self.order - 1, include_self=True, paths=False) for _v, _w in zip(nodes[:-1], nodes[1:]): path_likelihood += np.log(matrix[idx[self.nodes[_w].uid], idx[self.nodes[_v].uid]]) likelihood += path_likelihood * frequency return likelihood if log else np.exp(likelihood)