def score(self): if self._score is not None: return self._score # find log P(D | <) (taking logs for numerical stability) self._score = 0 score_config = self._family_cache.family_score for i in xrange(self.order_size): child = self.order[i] child_scores = [score_config(child,parents) for parents in self.parent_sets(i)] # gPy uses the log of the score. this makes sense when marginalising over # structures. however, friedman & koller have: # P(D | <) = \prod_i \sum_U score(X_i, U | D) # where i \in 1,..,N, U \in U < X_i, |U| <= k # so we must watch out for numerical instability self._score += logsumexp(child_scores) # P(< | D) = P(D | <) P(<) / P(D) # P(D) = \sum_< P(D | <) P(<) # < ~ Uniform => P(<) terms cancel # hence P(< | D) = Z^{-1} P(D | <) where Z = \sum_< P(D | <) # Z does not help us discriminate between the scores and so we # do not compute it to score an order (assuming D is fixed). # hence this function returns log P(D | <) return self._score
def conditional_probability(self, child, family_evidence): # log P(x_k | w_k) = log sum_U P(Pa(child) = U) score(X,U | evidence) cond = [] for family_parents in self.parent_sets(self.order.index(child)): score = self._family_cache.family_score(child, family_parents) cond.append(score + self.parents_score(child, family_parents)) return logsumexp(cond)
def score(self): if self._score is not None: return self._score # find log P(D | <) (taking logs for numerical stability) self._score = 0 score_config = self._family_cache.family_score for i in xrange(self.order_size): child = self.order[i] child_scores = [ score_config(child, parents) for parents in self.parent_sets(i) ] # gPy uses the log of the score. this makes sense when marginalising over # structures. however, friedman & koller have: # P(D | <) = \prod_i \sum_U score(X_i, U | D) # where i \in 1,..,N, U \in U < X_i, |U| <= k # so we must watch out for numerical instability self._score += logsumexp(child_scores) # P(< | D) = P(D | <) P(<) / P(D) # P(D) = \sum_< P(D | <) P(<) # < ~ Uniform => P(<) terms cancel # hence P(< | D) = Z^{-1} P(D | <) where Z = \sum_< P(D | <) # Z does not help us discriminate between the scores and so we # do not compute it to score an order (assuming D is fixed). # hence this function returns log P(D | <) return self._score
def parents_contains_score(self, parents, child): # log P(parents \in Pa(child) | <, D) = log sum_{U : parents subseteq U} score(X,U | U) - # log sum_U score(X,U | D) parents = frozenset(parents) # for sets, in python, not(a <= b) != a > b if not (parents <= self._family_cache.potential_parents(child)): return negative_infinity scores, edge_scores = [], [] for family_parents in self.parent_sets(self.order.index(child)): score = self._family_cache.family_score(child, family_parents) if parents <= set(family_parents): edge_scores.append(score) scores.append(score) if not edge_scores: return negative_infinity return logsumexp(edge_scores) - logsumexp(scores)
def parents_score(self, child, parents): # log P(Pa(child) = parents | <, D) = log score(X, parents | parents) - # log sum_U score(X,U | D) parents = frozenset(parents) if parents not in self._family_cache.potential_parents(child): return negative_infinity score = self._family_cache.family_score(child, parents) scores = [] for family_parents in self.parent_sets(self.order.index(child)): scores.append(self._family_cache.family_score(child, family_parents)) return score - logsumexp(scores)
def parents_score(self, child, parents): # log P(Pa(child) = parents | <, D) = log score(X, parents | parents) - # log sum_U score(X,U | D) parents = frozenset(parents) if parents not in self._family_cache.potential_parents(child): return negative_infinity score = self._family_cache.family_score(child, parents) scores = [] for family_parents in self.parent_sets(self.order.index(child)): scores.append( self._family_cache.family_score(child, family_parents)) return score - logsumexp(scores)