def compute_single_likelihood_MPI(self, input_args): d_index, d, P = input_args posteriors = self.L[d_index] + P Z = logsumexp(posteriors) w = np.exp(posteriors - Z) # weights for each hypothesis r_i = np.transpose(self.R[d_index]) w_times_R = w * r_i likelihood = 0.0 # Compute likelihood of producing same output (yes/no) as data for q, r, m in d.get_queries(): # col `m` of boolean matrix `R[i]` weighted by `w` query_col = w_times_R[m, :] exp_p = query_col.sum() p = log(exp_p) ## p = log((np.exp(w) * self.R[d_index][:, m]).sum()) # NOTE: with really small grammars sometimes we get p > 0 if p >= 0: print 'P ERROR!' yes, no = r k = yes # num. yes responses n = yes + no # num. trials bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1)) # binomial coefficient l1mp = log1mexp(p) likelihood += bc + (k*p) + (n-k)*l1mp # likelihood we got human output
def compute_likelihood(self, data, update_post=True, **kwargs): """Use bayesian model averaging with `self.hypotheses` to estimate likelihood of generating the data. This is taken as a weighted sum over all hypotheses, sum_h { p(h | X) } . Args: data(list): List of FunctionData objects. Returns: float: Likelihood summed over all outputs, summed over all hypotheses & weighted for each hypothesis by posterior score p(h|X). """ self.update() hypotheses = self.hypotheses likelihood = 0.0 for d in data: posteriors = [sum(h.compute_posterior(d.input)) for h in hypotheses] Z = logsumexp(posteriors) weights = [(post-Z) for post in posteriors] for o in d.output.keys(): # probability for yes on output `o` is sum of posteriors for hypos that contain `o` p = logsumexp([w if o in h() else -Infinity for h, w in zip(hypotheses, weights)]) p = -1e-10 if p >= 0 else p k = d.output[o][0] # num. yes responses n = k + d.output[o][1] # num. trials bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1)) # binomial coefficient likelihood += bc + (k*p) + (n-k)*log1mexp(p) # likelihood we got human output if update_post: self.likelihood = likelihood self.update_posterior() return likelihood
def likelihood_optimized(self, data, update_post=True): """ Compute the likelihood of producing human data, given: H (self.hypotheses) & x (self.value) """ # The following must be computed for this specific GrammarHypothesis # ------------------------------------------------------------------ x = self.normalized_value() # vector of rule probabilites P = np.dot(self.C, x) # prior for each hypothesis likelihood = 0.0 for d_key, d in enumerate(data): # Initialize unfilled values for L[data] & R[data] if d_key not in self.L: self.init_L(d, d_key) if d_key not in self.R: self.init_R(d, d_key) posteriors = self.L[d_key] + P Z = lse_numba(posteriors) w = posteriors - Z # weights for each hypothesis # Compute likelihood of producing same output (yes/no) as data for m, o in enumerate(d.output.keys()): # col `m` of boolean matrix `R[i]` weighted by `w` p = calc_prob(w, self.R[d_key][:, m]) # p = log((np.exp(w) * self.R[d_key][:, m]).sum()) # NOTE: with really small grammars sometimes we get p > 0 if p >= 0: print 'P ERROR!' yes, no = d.output[o] k = yes n = yes + no bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1)) # binomial coefficient likelihood += bc + (k*p) + (n-k)*log1mexp_numba(p) # likelihood we got human output if update_post: self.likelihood = likelihood self.update_posterior() return likelihood
def compute_likelihood(self, data, **kwargs): self.update() hypotheses = self.hypotheses likelihood = 0.0 for d in data: posteriors = [h.compute_posterior(d.input)[0] + h.compute_posterior(d.input)[1] for h in hypotheses] zo = logsumexp(posteriors) weights = [(post - zo) for post in posteriors] for o in d.output.keys(): # probability for yes on output `o` is sum of posteriors for hypos that contain `o` p = logsumexp( [w if o.Y in h(o.word, o.context, set([o.Y])) else -Infinity for h, w in zip(hypotheses, weights)]) p = -1e-10 if p >= 0 else p k = d.output[o][0] # num. yes responses n = k + d.output[o][1] # num. trials bc = gammaln(n + 1) - (gammaln(k + 1) + gammaln(n - k + 1)) # binomial coefficient likelihood += bc + (k * p) + (n - k) * log1mexp(p) # likelihood we got human output return likelihood
def compute_likelihood(self, data, update_post=True, **kwargs): """ Compute the likelihood of producing human data, given: H (self.hypotheses) & x (self.value) """ # Initialize unfilled values for L[data] & R[data] for d in data: if d not in self.L: self.init_L(d) if d not in self.R: self.init_R(d) # The following must be computed for this specific GrammarHypothesis # ------------------------------------------------------------------ x = self.normalize_value_vector() # vector of rule probabilites P = np.dot(self.C, x) # prior for each hypothesis likelihood = 0.0 for d in data: posteriors = self.L[d] + P Z = logsumexp(posteriors) w = posteriors - Z # weights for each hypothesis # Compute likelihood of producing same output (yes/no) as data for m, o in enumerate(d.output.keys()): # col `m` of boolean matrix `R[i]` weighted by `w` -- TODO could this be logsumexp? p = log((np.exp(w) * self.R[d][:, m]).sum()) if p >= 0: print 'P ERROR!' k = d.output[o][0] # num. yes responses n = k + d.output[o][1] # num. trials bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1)) # binomial coefficient likelihood += bc + (k*p) + (n-k)*log1mexp(p) # likelihood we got human output if update_post: self.likelihood = likelihood self.update_posterior() return likelihood