def compute_single_likelihood_MPI(self, input_args):
        d_index, d, P = input_args
        posteriors = self.L[d_index] + P
        Z = logsumexp(posteriors)
        w = np.exp(posteriors - Z)              # weights for each hypothesis
        r_i = np.transpose(self.R[d_index])
        w_times_R = w * r_i

        likelihood = 0.0

        # Compute likelihood of producing same output (yes/no) as data
        for q, r, m in d.get_queries():
            # col `m` of boolean matrix `R[i]` weighted by `w`
            query_col = w_times_R[m, :]
            exp_p = query_col.sum()
            p = log(exp_p)
            ## p = log((np.exp(w) * self.R[d_index][:, m]).sum())

            # NOTE: with really small grammars sometimes we get p > 0
            if p >= 0:
                print 'P ERROR!'

            yes, no = r
            k = yes             # num. yes responses
            n = yes + no        # num. trials
            bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1))     # binomial coefficient
            l1mp = log1mexp(p)
            likelihood += bc + (k*p) + (n-k)*l1mp                   # likelihood we got human output
Exemple #2
0
    def compute_likelihood(self, data, update_post=True, **kwargs):
        """Use bayesian model averaging with `self.hypotheses` to estimate likelihood of generating the data.

        This is taken as a weighted sum over all hypotheses, sum_h { p(h | X) } .

        Args:
            data(list): List of FunctionData objects.

        Returns:
            float: Likelihood summed over all outputs, summed over all hypotheses & weighted for each
            hypothesis by posterior score p(h|X).

        """
        self.update()
        hypotheses = self.hypotheses
        likelihood = 0.0

        for d in data:
            posteriors = [sum(h.compute_posterior(d.input)) for h in hypotheses]
            Z = logsumexp(posteriors)
            weights = [(post-Z) for post in posteriors]

            for o in d.output.keys():
                # probability for yes on output `o` is sum of posteriors for hypos that contain `o`
                p = logsumexp([w if o in h() else -Infinity for h, w in zip(hypotheses, weights)])
                p = -1e-10 if p >= 0 else p
                k = d.output[o][0]         # num. yes responses
                n = k + d.output[o][1]     # num. trials
                bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1))     # binomial coefficient
                likelihood += bc + (k*p) + (n-k)*log1mexp(p)            # likelihood we got human output

        if update_post:
            self.likelihood = likelihood
            self.update_posterior()
        return likelihood
Exemple #3
0
    def likelihood_optimized(self, data, update_post=True):
        """
        Compute the likelihood of producing human data, given:  H (self.hypotheses)  &  x (self.value)

        """
        # The following must be computed for this specific GrammarHypothesis
        # ------------------------------------------------------------------
        x = self.normalized_value()         # vector of rule probabilites
        P = np.dot(self.C, x)               # prior for each hypothesis
        likelihood = 0.0

        for d_key, d in enumerate(data):
            # Initialize unfilled values for L[data] & R[data]
            if d_key not in self.L:
                self.init_L(d, d_key)
            if d_key not in self.R:
                self.init_R(d, d_key)

            posteriors = self.L[d_key] + P
            Z = lse_numba(posteriors)
            w = posteriors - Z              # weights for each hypothesis

            # Compute likelihood of producing same output (yes/no) as data
            for m, o in enumerate(d.output.keys()):
                # col `m` of boolean matrix `R[i]` weighted by `w`
                p = calc_prob(w, self.R[d_key][:, m])
                # p = log((np.exp(w) * self.R[d_key][:, m]).sum())

                # NOTE: with really small grammars sometimes we get p > 0
                if p >= 0:
                    print 'P ERROR!'

                yes, no = d.output[o]
                k = yes
                n = yes + no
                bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1))     # binomial coefficient
                likelihood += bc + (k*p) + (n-k)*log1mexp_numba(p)            # likelihood we got human output

        if update_post:
            self.likelihood = likelihood
            self.update_posterior()
        return likelihood
    def compute_likelihood(self, data, **kwargs):
        self.update()
        hypotheses = self.hypotheses
        likelihood = 0.0

        for d in data:
            posteriors = [h.compute_posterior(d.input)[0] + h.compute_posterior(d.input)[1] for h in hypotheses]
            zo = logsumexp(posteriors)
            weights = [(post - zo) for post in posteriors]

            for o in d.output.keys():
                # probability for yes on output `o` is sum of posteriors for hypos that contain `o`
                p = logsumexp(
                    [w if o.Y in h(o.word, o.context, set([o.Y])) else -Infinity for h, w in zip(hypotheses, weights)])
                p = -1e-10 if p >= 0 else p
                k = d.output[o][0]  # num. yes responses
                n = k + d.output[o][1]  # num. trials
                bc = gammaln(n + 1) - (gammaln(k + 1) + gammaln(n - k + 1))  # binomial coefficient
                likelihood += bc + (k * p) + (n - k) * log1mexp(p)  # likelihood we got human output

        return likelihood
    def compute_likelihood(self, data, update_post=True, **kwargs):
        """
        Compute the likelihood of producing human data, given:  H (self.hypotheses)  &  x (self.value)

        """
        # Initialize unfilled values for L[data] & R[data]
        for d in data:
            if d not in self.L:
                self.init_L(d)
            if d not in self.R:
                self.init_R(d)

        # The following must be computed for this specific GrammarHypothesis
        # ------------------------------------------------------------------
        x = self.normalize_value_vector()   # vector of rule probabilites
        P = np.dot(self.C, x)               # prior for each hypothesis
        likelihood = 0.0

        for d in data:
            posteriors = self.L[d] + P
            Z = logsumexp(posteriors)
            w = posteriors - Z              # weights for each hypothesis

            # Compute likelihood of producing same output (yes/no) as data
            for m, o in enumerate(d.output.keys()):
                # col `m` of boolean matrix `R[i]` weighted by `w`  -- TODO could this be logsumexp?
                p = log((np.exp(w) * self.R[d][:, m]).sum())
                if p >= 0:
                    print 'P ERROR!'
                k = d.output[o][0]          # num. yes responses
                n = k + d.output[o][1]      # num. trials
                bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1))     # binomial coefficient
                likelihood += bc + (k*p) + (n-k)*log1mexp(p)            # likelihood we got human output

        if update_post:
            self.likelihood = likelihood
            self.update_posterior()
        return likelihood