Ejemplo n.º 1
0
    def compute_likelihood(self, data, update_post=True, **kwargs):
        """Use bayesian model averaging with `self.hypotheses` to estimate likelihood of generating the data.

        This is taken as a weighted sum over all hypotheses, sum_h { p(h | X) } .

        Args:
            data(list): List of FunctionData objects.

        Returns:
            float: Likelihood summed over all outputs, summed over all hypotheses & weighted for each
            hypothesis by posterior score p(h|X).

        """
        self.update()
        hypotheses = self.hypotheses
        likelihood = 0.0

        for d in data:
            posteriors = [sum(h.compute_posterior(d.input)) for h in hypotheses]
            Z = logsumexp(posteriors)
            weights = [(post-Z) for post in posteriors]

            for o in d.output.keys():
                # probability for yes on output `o` is sum of posteriors for hypos that contain `o`
                p = logsumexp([w if o in h() else -Infinity for h, w in zip(hypotheses, weights)])
                p = -1e-10 if p >= 0 else p
                k = d.output[o][0]         # num. yes responses
                n = k + d.output[o][1]     # num. trials
                bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1))     # binomial coefficient
                likelihood += bc + (k*p) + (n-k)*log1mexp(p)            # likelihood we got human output

        if update_post:
            self.likelihood = likelihood
            self.update_posterior()
        return likelihood
Ejemplo n.º 2
0
def compute_kl(current_dict, next_dict):
    current_Z = logsumexp([v for h, v in current_dict.iteritems()])
    next_Z = logsumexp([v for h, v in next_dict.iteritems()])

    kl = 0.0
    for h, v in current_dict.iteritems():
        p = np.exp(v - current_Z)
        if p == 0: continue
        kl += p * (v - next_dict[h] + next_Z - current_Z)

    return kl
Ejemplo n.º 3
0
def compute_kl(current_dict, next_dict):
    current_Z = logsumexp([v for h, v in current_dict.iteritems()])
    next_Z = logsumexp([v for h, v in next_dict.iteritems()])

    kl = 0.0
    for h, v in current_dict.iteritems():
        p = np.exp(v - current_Z)
        if p == 0: continue
        kl += p * (v - next_dict[h] + next_Z - current_Z)

    return kl
Ejemplo n.º 4
0
    def in_concept_avg(self, domain):
        """
        p(y in C | `self.hypotheses`)

        for each hypothesis h, if y in C_h, accumulated w_h where w is the weight of a hypothesis,
        determined by the hypothesis's posterior score p(h | y)

        ==> This is the weighted bayesian model averaging described in (Murphy, 2007)

        """
        self.update()
        probs_in_c = {}

        for y in domain:
            prob_in_c = 0
            Z = logsumexp([h.posterior_score for h in self.hypotheses])

            # for h in self.hypotheses:
            #     h.set_value(h.value)
            # print self.hypotheses[0].prior, self.hypotheses[3].prior, self.hypotheses[5].prior

            for h in self.hypotheses:
                C = h()
                w = h.posterior_score - Z
                if y in C:
                    prob_in_c += exp(w)
            probs_in_c[y] = prob_in_c

        return probs_in_c
Ejemplo n.º 5
0
    def compute_single_likelihood(self, datum, llcounts, distance_factor=100.0):
        assert isinstance(datum.output, dict), "Data supplied must be a dict (function outputs to counts)"

        lo = sum(llcounts.values()) # normalizing constant

        # We are going to compute a pseudo-likelihood, counting close strings as being close
        return sum([datum.output[k]*logsumexp([log(llcounts[r])-log(lo) - distance_factor*distance(r, k) for r in llcounts.keys()]) for k in datum.output.keys()])
Ejemplo n.º 6
0
    def compute_proposal_probability(self, grammar, t1, t2, resampleProbability=lambdaOne, recurse=True):
        # NOTE: This is not strictly necessary since we don't actually have to sum over trees
        # if we use an auxiliary variable argument. But this fits nicely with the other proposers
        # and is not much slower.

        chosen_node1 , chosen_node2 = least_common_difference(t1,t2)

        lps = []
        if chosen_node1 is None: # any node in the tree could have been regenerated
            for node in t1:
                lp_of_choosing_node = t1.sampling_log_probability(node,resampleProbability=resampleProbability)
                with BVRuleContextManager(grammar, node.parent, recurse_up=True):
                    lp_of_generating_tree = grammar.log_probability(node)
                lps += [lp_of_choosing_node + lp_of_generating_tree]
        else: # we have a specific path up the tree
            while chosen_node1:
                lp_of_choosing_node = t1.sampling_log_probability(chosen_node1,resampleProbability=resampleProbability)
                with BVRuleContextManager(grammar, chosen_node2.parent, recurse_up=True):
                    lp_of_generating_tree = grammar.log_probability(chosen_node2)
                lps += [lp_of_choosing_node + lp_of_generating_tree]
                if recurse:
                    chosen_node1 = chosen_node1.parent
                    chosen_node2 = chosen_node2.parent
                else:
                    chosen_node1 = None

        return logsumexp(lps)
Ejemplo n.º 7
0
    def compute_weights(self):
        """
        Here we compute weights defaultly and then add an extra penalty for unfilled holes to decide which to use next.
        Returning a tuple lets these weights get sorted by each successive element.

        This also exponentiates and re-normalizes the posterior among children, keeping it within [0,1]
        """

        # Here what we call x_bar is really the mean log posterior. So we convert it out of that.
        es = [c.get_xbar() if c.nsteps > 0 else Infinity for c in self.children]

        Z = logsumexp(es) ## renormalize, for converting to logprob

        # We need to preserve -inf here as well as +inf since these mean something special
        # -inf means we should never ever visit; +inf means we can't not visit
        es = [ exp(x-Z) if abs(x) < Infinity else x for x in es]

        N = sum([c.nsteps for c in self.children])

        # the weights we return
        weights = [None] * len(self.children)

        for i, c in enumerate(self.children):

            v = 0.0 # the adjustment
            if es[i] == Infinity: # so break the ties.
                # This must prevent us from wandering off to infinity. To do that, we impose a penalty for each nonterminal
                for fn in c.value.value:
                    for a in fn.argStrings():
                        if self.grammar.is_nonterminal(a):
                            v += self.hole_penalty.get(a, -1.0) # pay this much for this hole. -1 is for those weird nonterminals that need bv introduced

            weights[i] = (es[i] + self.C * sqrt(2.0 * log(N)/float(c.nsteps+1)) if c.nsteps > 0 else Infinity, v)

        return weights
Ejemplo n.º 8
0
def probe_MHsampler(h,
                    language,
                    options,
                    name,
                    size=64,
                    data=None,
                    init_size=None,
                    iters_per_stage=None,
                    sampler=None,
                    ret_sampler=False):
    get_data = language.sample_data_as_FuncData
    evaluation_data = get_data(size, max_length=options.FINITE)

    if data is None:
        if init_size is None:
            data = evaluation_data
        else:
            data = get_data(n=size, max_length=init_size)

    if sampler is None:
        sampler = MHSampler(h, data)
    else:
        sampler.data = data

    best_hypotheses = TopN(N=options.TOP_COUNT)

    iter = 0

    for h in sampler:
        if iter == options.STEPS: break
        if iter % 100 == 0: print '---->', iter

        best_hypotheses.add(h)

        if iter % options.PROBE == 0:

            for h in best_hypotheses:
                h.compute_posterior(evaluation_data)
            Z = logsumexp([h.posterior_score for h in best_hypotheses])

            pr_data = get_data(1024, max_length=options.FINITE)
            weighted_score = 0
            for h in best_hypotheses:
                precision, recall = language.estimate_precision_and_recall(
                    h, pr_data)
                if precision + recall != 0:
                    f_score = precision * recall / (precision + recall)
                    weighted_score += np.exp(h.posterior_score - Z) * f_score
            weighted_score *= 2

            to_file([[iter, Z, weighted_score]], name)

        if init_size is not None and iter % iters_per_stage == 0:
            init_size += 2
            sampler.data = get_data(n=size, max_length=init_size)

        iter += 1

    if ret_sampler:
        return sampler
Ejemplo n.º 9
0
 def compute_proposal_probability(self,grammar, t1, t2, resampleProbability=lambdaOne, recurse=True):
     chosen_node1 , chosen_node2 = least_common_difference(t1,t2)
 
     lps = []
     if chosen_node1 is None: # any node in the tree could have been copied
         for node in t1:
             could_be_source = lambda x: 1.0 * nodes_equal_except_parents(grammar,x,node) * resampleProbability(x)
             lp_of_choosing_source = (nicelog(t1.sample_node_normalizer(could_be_source) - could_be_source(node)) - nicelog(t1.sample_node_normalizer(resampleProbability)))
             lp_of_choosing_target = t1.sampling_log_probability(chosen_node1,resampleProbability=resampleProbability)
             lps += [lp_of_choosing_source + lp_of_choosing_target]
     else: # we have a specific path up the tree
         while chosen_node1:
             could_be_source = lambda x: 1.0 * nodes_equal_except_parents(grammar,x,chosen_node2) * resampleProbability(x)
 
             lp_of_choosing_source = nicelog(t1.sample_node_normalizer(could_be_source)) - nicelog(t1.sample_node_normalizer(resampleProbability))
             lp_of_choosing_target = t1.sampling_log_probability(chosen_node1,resampleProbability=resampleProbability)
             lps += [lp_of_choosing_source + lp_of_choosing_target]
 
             if recurse:
                 chosen_node1 = chosen_node1.parent
                 chosen_node2 = chosen_node2.parent
             else:
                 chosen_node1 = None
 
     return logsumexp(lps)
Ejemplo n.º 10
0
    def compute_single_likelihood(self, datum):
        assert isinstance(datum.output, dict)

        hp = self(*datum.input)  # output dictionary, output->probabilities
        assert isinstance(hp, dict)

        s = 0.0
        for k, dc in datum.output.items():
            if k in hp:
                s += dc * hp[k]
            elif len(hp.keys()) > 0:
                # probability fo each string under this editing model
                s += dc * logsumexp([
                    v + edit_likelihood(x,
                                        k,
                                        alphabet_size=self.alphabet_size,
                                        alpha=datum.alpha)
                    for x, v in hp.items()
                ])  # the highest probability string; or we could logsumexp
            else:
                s += dc * edit_likelihood(
                    '', k, alphabet_size=self.alphabet_size, alpha=datum.alpha)

            # This is the mixing {a,b}* noise model
            # lp = log(1.0-datum.alpha) - log(self.alphabet_size+1)*(len(k)+1) #the +1s here count the character marking the end of the string
            # if k in hp:
            #     lp = logplusexp(lp, log(datum.alpha) + hp[k]) # if non-noise possible
            # s += dc*lp
        return s
    def compute_single_likelihood_MPI(self, input_args):
        d_index, d, P = input_args
        posteriors = self.L[d_index] + P
        Z = logsumexp(posteriors)
        w = np.exp(posteriors - Z)              # weights for each hypothesis
        r_i = np.transpose(self.R[d_index])
        w_times_R = w * r_i

        likelihood = 0.0

        # Compute likelihood of producing same output (yes/no) as data
        for q, r, m in d.get_queries():
            # col `m` of boolean matrix `R[i]` weighted by `w`
            query_col = w_times_R[m, :]
            exp_p = query_col.sum()
            p = log(exp_p)
            ## p = log((np.exp(w) * self.R[d_index][:, m]).sum())

            # NOTE: with really small grammars sometimes we get p > 0
            if p >= 0:
                print 'P ERROR!'

            yes, no = r
            k = yes             # num. yes responses
            n = yes + no        # num. trials
            bc = gammaln(n+1) - (gammaln(k+1) + gammaln(n-k+1))     # binomial coefficient
            l1mp = log1mexp(p)
            likelihood += bc + (k*p) + (n-k)*l1mp                   # likelihood we got human output
Ejemplo n.º 12
0
    def runTest(self):
        NSAMPLES = 10000

        from LOTlib.DefaultGrammars import finiteTestGrammar as grammar

        from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis

        class MyH(LOTHypothesis):
            @attrmem('likelihood')
            def compute_likelihood(self, *args, **kwargs):
                return 0.0

            @attrmem('prior')
            def compute_prior(self):
                return grammar.log_probability(self.value)

        print "# Taking MHSampler for a test run"
        cnt = Counter()
        h0 = MyH(grammar=grammar)
        for h in break_ctrlc(
                MHSampler(h0, [], steps=NSAMPLES,
                          skip=10)):  # huh the skip here seems to be important
            cnt[h] += 1
        trees = list(cnt.keys())
        print "# Done taking MHSampler for a test run"

        ## TODO: When the MCMC methods get cleaned up for how many samples they return, we will assert that we got the right number here
        # assert sum(cnt.values()) == NSAMPLES # Just make sure we aren't using a sampler that returns fewer samples! I'm looking at you, ParallelTempering

        Z = logsumexp([grammar.log_probability(t.value) for t in trees
                       ])  # renormalize to the trees in self.trees
        obsc = [cnt[t] for t in trees]
        expc = [
            exp(grammar.log_probability(t.value)) * sum(obsc) for t in trees
        ]

        # And plot here
        expc, obsc, trees = zip(*sorted(zip(expc, obsc, trees), reverse=True))
        import matplotlib.pyplot as plt
        plt.subplot(111)
        # Log here spaces things out at the high end, where we can see it!
        plt.scatter(log(range(len(trees))), expc, color="red", alpha=1.)
        plt.scatter(log(range(len(trees))),
                    obsc,
                    color="blue",
                    marker="x",
                    alpha=1.)
        plt.savefig('finite-sampler-test.pdf')
        plt.clf()

        # Do chi squared test
        csq, pv = chisquare(obsc, expc)
        self.assertAlmostEqual(sum(obsc), sum(expc))

        # And examine
        for t, c, s in zip(trees, obsc, expc):
            print c, s, t
        print(csq, pv), sum(obsc)

        self.assertGreater(pv, 0.01, msg="Sampler failed chi squared!")
Ejemplo n.º 13
0
    def in_concept_avg(self, domain):
        """
        p(y in C | `self.hypotheses`)

        for each hypothesis h, if y in C_h, accumulated w_h where w is the weight of a hypothesis,
        determined by the hypothesis's posterior score p(h | y)

        ==> This is the weighted bayesian model averaging described in (Murphy, 2007)

        """
        self.update()
        probs_in_c = {}

        for y in domain:
            prob_in_c = 0
            Z = logsumexp([h.posterior_score for h in self.hypotheses])

            # for h in self.hypotheses:
            #     h.set_value(h.value)
            # print self.hypotheses[0].prior, self.hypotheses[3].prior, self.hypotheses[5].prior

            for h in self.hypotheses:
                C = h()
                w = h.posterior_score - Z
                if y in C:
                    prob_in_c += exp(w)
            probs_in_c[y] = prob_in_c

        return probs_in_c
Ejemplo n.º 14
0
    def plot_sampler(self, opath, sampler):
        """
        Plot the sampler, for cases with many zeros where chisquared won't work well
        """
        cnt = Counter()
        for h in lot_iter(sampler):
            cnt[h.value] += 1

        Z = logsumexp([t.log_probability() for t in self.trees]) # renormalize to the trees in self.trees
        obsc = [cnt[t] for t in self.trees]
        expc = [exp(t.log_probability()-Z)*sum(obsc) for t in self.trees]

        for t, c, s in zip(self.trees, obsc, expc):
            print c, "\t", s, "\t", t


        expc, obsc, trees = zip(*sorted(zip(expc, obsc, self.trees), reverse=True))

        import matplotlib.pyplot as plt
        from numpy import log
        plt.subplot(111)
        # Log here spaces things out at the high end, where we can see it!
        plt.scatter(log(range(len(trees))), expc, color="red", alpha=1.)
        plt.scatter(log(range(len(trees))), obsc, color="blue", marker="x", alpha=1.)
        plt.savefig(opath)
        plt.clf()
Ejemplo n.º 15
0
def probe(best_hypotheses, evaluation_data, pr_data, estimate_precision_and_recall):
    for h in best_hypotheses:
        h.compute_posterior(evaluation_data)
    Z = logsumexp([h.posterior_score for h in best_hypotheses])

    score_sum = 0
    best = 0
    s = None
    rec = []

    for h in best_hypotheses:
        precision, recall = estimate_precision_and_recall(h, pr_data)
        base = precision + recall

        if base != 0:
            p = np.exp(h.posterior_score - Z)
            weighted_score = p * (precision * recall / base)
            if weighted_score > best:
                best = weighted_score
                s = str(h)
            score_sum += weighted_score

            if p > 1e-2:
                rec.append([p, 2 * precision * recall / base])

    score_sum *= 2
    rec.sort(key=lambda x: x[0], reverse=True)
    return Z, score_sum, best*2, s, rec
Ejemplo n.º 16
0
def test_hypo_stat():
    """
    objective: test how does those high prob hypotheses look like

    run: mpiexec -n 12
    """

    seq = load(open('seq_' + str(rank) + ''))

    cnt = 0
    for e in seq:
        Z = logsumexp([p for h, p in e.iteritems()])
        e_list = [[h, p] for h, p in e.iteritems()]
        e_list.sort(key=lambda x: x[1], reverse=True)
        f = open('hypo_stat_' + str(rank) + suffix, 'a')

        print >> f, '=' * 40
        for iii in xrange(4):
            print >> f, 'rank: %i' % rank, 'prob', np.exp(e_list[iii][1] - Z)
            print >> f, Counter([e_list[iii][0]() for _ in xrange(512)])
            print >> f, str(e_list[iii][0])

        print cnt, 'done'
        cnt += 1
        f.close()
Ejemplo n.º 17
0
def test_lis_disp(names):
    ll = [load(open(name)) for name in names]
    for li in ll:
        print '='*50
        Z = logsumexp([h[0] for h in li])
        for i in xrange(3):
                print 'p ', np.exp(li[i][0] -Z), 'x_f-score ', li[i][1], 'axb_f-score', li[i][2]
                print li[i][4]
Ejemplo n.º 18
0
def run(*args):
    #print "# Running data"

    global hypotheses

    data_size = args[0]

    p_representation = defaultdict(int) # how often do you get the right representation
    p_response = defaultdict(int) # how often do you get the right response?
    p_representation_literal = defaultdict(int) # how often do you get the right representation
    p_response_literal = defaultdict(int)  # how often do you get the right response?
    p_representation_presup = defaultdict(int) # how often do you get the right representation
    p_response_presup = defaultdict(int) # how often do you get the right response?

    #print "# Generating data"
    data = generate_data(data_size)

    # recompute these
    #print "# Computing posterior"
    #[ x.unclear_functions() for x in hypotheses ]
    [ x.compute_posterior(data) for x in hypotheses ]

    # normalize the posterior in fs
    #print "# Computing normalizer"
    Z = logsumexp([x.posterior_score for x in hypotheses])

    # and output the top hypotheses
    qq = FiniteBestSet(max=True, N=25)
    for h in hypotheses: qq.push(h, h.posterior_score) # get the tops
    for i, h in enumerate(qq.get_all(sorted=True)):
        for w in h.all_words():
            fprintn(8, data_size, i, w, h.posterior_score, q(h.value[w]), f=options.OUT_PATH+"-hypotheses."+str(get_rank())+".txt")

    # and compute the probability of being correct
    #print "# Computing correct probability"
    for h in hypotheses:
        hstr = str(h)
        #print data_size, len(data), exp(h.posterior_score), correct[ str(h)+":"+w ]
        for w in words:
            p = exp(h.posterior_score - Z)
            key = w + ":" + hstr

            p_representation[w] += p * (agree_pct[key] == 1.)
            p_representation_presup[w]  += p * (agree_pct_presup[key] == 1.) # if we always agree with the target, then we count as the right rep.
            p_representation_literal[w] += p * (agree_pct_literal[key] == 1.)

            # and just how often does the hypothesis agree?
            p_response[w] += p * agree_pct[key]
            p_response_presup[w]  += p * agree_pct_presup[key]
            p_response_literal[w] += p * agree_pct_literal[key]

    #print "# Outputting"


    for w in words:
        fprintn(10, str(get_rank()), q(w), data_size, p_representation[w], p_representation_presup[w], p_representation_literal[w], p_response[w], p_response_presup[w], p_response_literal[w], f=options.OUT_PATH+"-stats."+str(get_rank())+".txt")

    return 0
Ejemplo n.º 19
0
def most_prob(suffix):
    topn = load(open('out/SimpleEnglish/hypotheses__' + suffix))
    Z = logsumexp([h.posterior_score for h in topn])
    h_set = [h for h in topn]; h_set.sort(key=lambda x: x.posterior_score, reverse=True)
    
    for i in xrange(10):
        print h_set[i]
        print 'prob`: ', np.exp(h_set[i].posterior_score - Z)
        print Counter([h_set[i]() for _ in xrange(512)])
Ejemplo n.º 20
0
def prob_correct(data_size, hypotheses, agree_pct, agree_pct_presup,
                 agree_pct_literal):
    p_representation = defaultdict(
        int)  # how often do you get the right representation
    p_response = defaultdict(int)  # how often do you get the right response?
    p_representation_literal = defaultdict(
        int)  # how often do you get the right representation
    p_response_literal = defaultdict(
        int)  # how often do you get the right response?
    p_representation_presup = defaultdict(
        int)  # how often do you get the right representation
    p_response_presup = defaultdict(
        int)  # how often do you get the right response?

    weight = 1. / EVAL_NUM
    for _ in range(EVAL_NUM):
        data = generate_data(data_size)
        # recompute posterior
        print 'Compute posterior for ', str(data_size)
        [x.compute_posterior(data) for x in hypotheses]
        # normalize the posterior in fs
        Z = logsumexp([x.posterior_score for x in hypotheses])

        words = hypotheses.best().all_words()
        # and compute the probability of being correct
        for h in hypotheses:
            hstr = str(h)
            for w in words:
                p = np.exp(h.posterior_score - Z)
                key = w + ":" + hstr
                p_representation[w] += weight * p * (agree_pct[key] == 1.)
                p_representation_presup[w] += weight * p * (
                    agree_pct_presup[key] == 1.
                )  # if we always agree with the target, then we count as the right rep.
                p_representation_literal[w] += weight * p * (
                    agree_pct_literal[key] == 1.)

                # and just how often does the hypothesis agree?
                p_response[w] += weight * p * agree_pct[key]
                p_response_presup[w] += weight * p * agree_pct_presup[key]
                p_response_literal[w] += weight * p * agree_pct_literal[key]

    filename = 'results/correctness_' + GRAMMAR_TYPE + '_' + str(
        SAMPLE_SIZE) + '.txt'
    f = open(filename, 'a')
    for w in words:
        col = [
            w,
            str(data_size),
            str(p_representation[w]),
            str(p_representation_presup[w]),
            str(p_representation_literal[w]),
            str(p_response[w]),
            str(p_response_presup[w]),
            str(p_response_literal[w])
        ]
        f.write(','.join(col) + '\n')
Ejemplo n.º 21
0
def test_lis_disp(names):
    ll = [load(open(name)) for name in names]
    for li in ll:
        print '=' * 50
        Z = logsumexp([h[0] for h in li])
        for i in xrange(3):
            print 'p ', np.exp(
                li[i][0] - Z), 'x_f-score ', li[i][1], 'axb_f-score', li[i][2]
            print li[i][4]
Ejemplo n.º 22
0
def most_prob(suffix):
    topn = load(open('out/SimpleEnglish/hypotheses__' + suffix))
    Z = logsumexp([h.posterior_score for h in topn])
    h_set = [h for h in topn]
    h_set.sort(key=lambda x: x.posterior_score, reverse=True)

    for i in xrange(10):
        print h_set[i]
        print 'prob`: ', np.exp(h_set[i].posterior_score - Z)
        print Counter([h_set[i]() for _ in xrange(512)])
Ejemplo n.º 23
0
    def compute_single_likelihood(self, datum):
        assert isinstance(datum.output, dict), "Data supplied must be a dict (function outputs to counts)"

        llcounts = self.make_ll_counts(datum.input)

        lo = sum(llcounts.values())

        ll = 0.0 # We are going to compute a pseudo-likelihood, counting close strings as being close
        for k in datum.output.keys():
            ll += datum.output[k] * logsumexp([ log(llcounts[r])-log(lo) - 100.0 * distance(r, k) for r in llcounts.keys() ])
        return ll
Ejemplo n.º 24
0
    def compute_likelihood(self, data, **kwargs):
        self.update()
        hypotheses = self.hypotheses
        likelihood = 0.0

        for d in data:
            posteriors = [h.compute_posterior(d.input)[0] + h.compute_posterior(d.input)[1] for h in hypotheses]
            zo = logsumexp(posteriors)
            weights = [(post - zo) for post in posteriors]

            for o in d.output.keys():
                # probability for yes on output `o` is sum of posteriors for hypos that contain `o`
                p = logsumexp(
                    [w if o.Y in h(o.word, o.context, set([o.Y])) else -Infinity for h, w in zip(hypotheses, weights)])
                p = -1e-10 if p >= 0 else p
                k = d.output[o][0]  # num. yes responses
                n = k + d.output[o][1]  # num. trials
                bc = gammaln(n + 1) - (gammaln(k + 1) + gammaln(n - k + 1))  # binomial coefficient
                likelihood += bc + (k * p) + (n - k) * log1mexp(p)  # likelihood we got human output

        return likelihood
Ejemplo n.º 25
0
 def compute_proposal_probability(self,grammar, t1, t2, resampleProbability=lambdaOne, **kwargs):
     """
         sum over all possible ways of generating t2 from t1 over all
         proposers, adjusted for their weight
     """
     lps = []
     for idx,proposer in enumerate(self.proposers):
         lp = proposer.compute_proposal_probability(grammar,t1,t2,
                                                    resampleProbability=resampleProbability,
                                                    **kwargs)
         lw = nicelog(self.proposer_weights[idx])
         lps += [lw+lp]
     return logsumexp(lps)
Ejemplo n.º 26
0
    def runTest(self):
        NSAMPLES = 10000

        from LOTlib.DefaultGrammars import finiteTestGrammar as grammar

        from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
        class MyH(LOTHypothesis):

            @attrmem('likelihood')
            def compute_likelihood(self, *args, **kwargs):
                return 0.0

            @attrmem('prior')
            def compute_prior(self):
                return grammar.log_probability(self.value)

        print "# Taking MHSampler for a test run"
        cnt = Counter()
        h0 = MyH(grammar=grammar)
        for h in break_ctrlc(MHSampler(h0, [], steps=NSAMPLES, skip=10)): # huh the skip here seems to be important
            cnt[h] += 1
        trees = list(cnt.keys())
        print "# Done taking MHSampler for a test run"

        ## TODO: When the MCMC methods get cleaned up for how many samples they return, we will assert that we got the right number here
        # assert sum(cnt.values()) == NSAMPLES # Just make sure we aren't using a sampler that returns fewer samples! I'm looking at you, ParallelTempering

        Z = logsumexp([grammar.log_probability(t.value) for t in trees]) # renormalize to the trees in self.trees
        obsc = [cnt[t] for t in trees]
        expc = [exp( grammar.log_probability(t.value))*sum(obsc) for t in trees]

        # And plot here
        expc, obsc, trees = zip(*sorted(zip(expc, obsc, trees), reverse=True))
        import matplotlib.pyplot as plt
        plt.subplot(111)
        # Log here spaces things out at the high end, where we can see it!
        plt.scatter(log(range(len(trees))), expc, color="red", alpha=1.)
        plt.scatter(log(range(len(trees))), obsc, color="blue", marker="x", alpha=1.)
        plt.savefig('finite-sampler-test.pdf')
        plt.clf()

        # Do chi squared test
        csq, pv = chisquare(obsc, expc)
        self.assertAlmostEqual(sum(obsc), sum(expc))

        # And examine
        for t, c, s in zip(trees, obsc, expc):
            print c, s, t
        print (csq, pv), sum(obsc)

        self.assertGreater(pv, 0.01, msg="Sampler failed chi squared!")
Ejemplo n.º 27
0
def probe_MHsampler(h, language, options, name, size=64, data=None, init_size=None, iters_per_stage=None, sampler=None, ret_sampler=False):
    get_data = language.sample_data_as_FuncData
    evaluation_data = get_data(size, max_length=options.FINITE)

    if data is None:
        if init_size is None:
            data = evaluation_data
        else:
            data = get_data(n=size, max_length=init_size)

    if sampler is None:
        sampler = MHSampler(h, data)
    else:
        sampler.data = data

    best_hypotheses = TopN(N=options.TOP_COUNT)

    iter = 0

    for h in sampler:
        if iter == options.STEPS: break
        if iter % 100 == 0: print '---->', iter

        best_hypotheses.add(h)

        if iter % options.PROBE == 0:

            for h in best_hypotheses:
                h.compute_posterior(evaluation_data)
            Z = logsumexp([h.posterior_score for h in best_hypotheses])

            pr_data = get_data(1024, max_length=options.FINITE)
            weighted_score = 0
            for h in best_hypotheses:
                precision, recall = language.estimate_precision_and_recall(h, pr_data)
                if precision + recall != 0:
                    f_score = precision * recall / (precision + recall)
                    weighted_score += np.exp(h.posterior_score - Z) * f_score
            weighted_score *= 2

            to_file([[iter, Z, weighted_score]], name)

        if init_size is not None and iter % iters_per_stage == 0:
            init_size += 2
            sampler.data = get_data(n=size, max_length=init_size)

        iter += 1

    if ret_sampler:
        return sampler
Ejemplo n.º 28
0
def evaluate_sampler(my_sampler, print_every=1000, out_aggregate=sys.stdout, trace=False, pthreshold=0.999, prefix=""):
    """
            Print the stats for a single sampler run

            *my_sampler* -- a generator of samples
            print_every -- display the output every this many steps
            out_hypothesis -- where we put hypothesis stats
            out_aggregate  -- where we put aggregate stats

            trace -- print every sample
            prefix -- display before lines
    """
    visited_at = defaultdict(list)

    startt = time()
    for n, s in break_ctrlc(enumerate(my_sampler)): # each sample should have an .posterior_score defined
        if trace: print "#", n, s

        visited_at[s].append(n)

        if (n%print_every)==0 and n>0:
            post =  sorted([x.posterior_score for x in visited_at.keys()], reverse=True) # the unnormalized posteriors of everything found
            ll   =  sorted([x.likelihood for x in visited_at.keys()], reverse=True)
            Z = logsumexp(post) # just compute total probability mass found -- the main measure

            # determine how many you need to get pthreshold of the posterior mass
            J=0
            while J < len(post):
                if logsumexp(post[J:]) < Z + log(1.0-pthreshold):
                    break
                J += 1

            out_aggregate.write('\t'.join(map(str, [prefix, n, r3(time()-startt), r5(Z), r5(post[0]), J, len(post)] )) + '\n')
            out_aggregate.flush()

    return
def run(data, TOP=100, STEPS=1000):
    #if LOTlib.SIG_INTERRUPTED:
      #  return ""
    #data = [FunctionData(input=(), output={lst: len(lst)})]
    h0 = MyHypothesis()
    tn = TopN(N=TOP)
    # run the sampler
    counter = Counter()
    for h in MHSampler(h0, data, steps=STEPS, acceptance_temperature=1.0, likelihood_temperature=1.0):#, likelihood_temperature=10.0):
        # counter[h] += 1
        tn.add(h)

    z = logsumexp([h.posterior_score for h in tn])
    sort_post_probs = [(h, exp(h.posterior_score - z)) for h in tn.get_all(sorted=True)][::-1]
    return sort_post_probs
Ejemplo n.º 30
0
    def compute_single_likelihood(self, datum):
        distance_scale = self.__dict__.get('distance', 1.0)

        assert isinstance(datum.output, dict)

        hp = self(*datum.input)  # output dictionary, output->probabilities
        assert isinstance(hp, dict)
        try:

            # now we have to add up every string that we could get
            return sum(dc * ( logsumexp([rlp - distance_scale*prefix_distance(r, k) for r, rlp in hp.items()]))\
                           for k, dc in datum.output.items())

        except ValueError as e:
            print "*** Math domain error", hp, str(self)
            raise e
Ejemplo n.º 31
0
    def compute_single_likelihood(self, datum):
        distance_scale = self.__dict__.get('distance', 1.0)

        assert isinstance(datum.output, dict)

        hp = self(*datum.input)  # output dictionary, output->probabilities
        assert isinstance(hp, dict)
        try:

            # now we have to add up every string that we could get
            return sum(dc * ( logsumexp([rlp - distance_scale*prefix_distance(r, k) for r, rlp in hp.items()]))\
                           for k, dc in datum.output.items())

        except ValueError as e:
            print "*** Math domain error", hp, str(self)
            raise e
Ejemplo n.º 32
0
    def compute_single_likelihood(self,
                                  datum,
                                  llcounts,
                                  distance_factor=100.0):
        assert isinstance(
            datum.output,
            dict), "Data supplied must be a dict (function outputs to counts)"

        lo = sum(llcounts.values())  # normalizing constant

        # We are going to compute a pseudo-likelihood, counting close strings as being close
        return sum([
            datum.output[k] * logsumexp([
                log(llcounts[r]) - log(lo) - distance_factor * distance(r, k)
                for r in llcounts.keys()
            ]) for k in datum.output.keys()
        ])
Ejemplo n.º 33
0
    def compute_weights(self):
        """
        Here we compute weights defaultly and then add an extra penalty for unfilled holes to decide which to use next.
        Returning a tuple lets these weights get sorted by each successive element.

        This also exponentiates and re-normalizes the posterior among children, keeping it within [0,1]
        """

        # Here what we call x_bar is really the mean log posterior. So we convert it out of that.
        es = [
            c.get_xbar() if c.nsteps > 0 else Infinity for c in self.children
        ]

        Z = logsumexp(es)  ## renormalize, for converting to logprob

        # We need to preserve -inf here as well as +inf since these mean something special
        # -inf means we should never ever visit; +inf means we can't not visit
        es = [exp(x - Z) if abs(x) < Infinity else x for x in es]

        N = sum([c.nsteps for c in self.children])

        # the weights we return
        weights = [None] * len(self.children)

        for i, c in enumerate(self.children):

            v = 0.0  # the adjustment
            if es[i] == Infinity:  # so break the ties.
                # This must prevent us from wandering off to infinity. To do that, we impose a penalty for each nonterminal
                for fn in c.value.value:
                    for a in fn.argStrings():
                        if self.grammar.is_nonterminal(a):
                            v += self.hole_penalty.get(
                                a, -1.0
                            )  # pay this much for this hole. -1 is for those weird nonterminals that need bv introduced

            weights[i] = (es[i] +
                          self.C * sqrt(2.0 * log(N) / float(c.nsteps + 1))
                          if c.nsteps > 0 else Infinity, v)

        return weights
Ejemplo n.º 34
0
 def compute_proposal_probability(self,
                                  grammar,
                                  t1,
                                  t2,
                                  resampleProbability=lambdaOne,
                                  **kwargs):
     """
         sum over all possible ways of generating t2 from t1 over all
         proposers, adjusted for their weight
     """
     lps = []
     for idx, proposer in enumerate(self.proposers):
         lp = proposer.compute_proposal_probability(
             grammar,
             t1,
             t2,
             resampleProbability=resampleProbability,
             **kwargs)
         lw = nicelog(self.proposer_weights[idx])
         lps += [lw + lp]
     return logsumexp(lps)
Ejemplo n.º 35
0
    def compute_likelihood(self, data, update_post=True, **kwargs):
        """
        Compute the likelihood of producing human data, given:  H (self.hypotheses)  &  x (self.value)

        """
        # The following must be computed for this specific GrammarHypothesis
        # ------------------------------------------------------------------
        x = self.normalized_value()  # vector of rule probabilites
        P = np.dot(self.C, x)  # prior for each hypothesis
        likelihood = 0.0

        for d_key, d in enumerate(data):
            # Initialize unfilled values for L[data] & R[data]
            if d_key not in self.L:
                self.init_L(d, d_key)
            if d_key not in self.R:
                self.init_R(d, d_key)

            posteriors = self.L[d_key] + P
            Z = logsumexp(posteriors)
            w = posteriors - Z  # weights for each hypothesis

            # Compute likelihood of producing same output (yes/no) as data
            for m, o in enumerate(d.output.keys()):
                # col `m` of boolean matrix `R[i]` weighted by `w`
                p = log((np.exp(w) * self.R[d_key][:, m]).sum())

                # NOTE: with really small grammars sometimes we get p > 0
                if p >= 0:
                    print "P ERROR!"

                k = d.output[o][0]  # num. yes responses
                n = k + d.output[o][1]  # num. trials
                bc = gammaln(n + 1) - (gammaln(k + 1) + gammaln(n - k + 1))  # binomial coefficient
                likelihood += bc + (k * p) + (n - k) * log1mexp(p)  # likelihood we got human output

        if update_post:
            self.likelihood = likelihood
            self.update_posterior()
        return likelihood
Ejemplo n.º 36
0
    def compute_proposal_probability(self,
                                     grammar,
                                     t1,
                                     t2,
                                     resampleProbability=lambdaOne,
                                     recurse=True):
        chosen_node1, chosen_node2 = least_common_difference(t1, t2)

        lps = []
        if chosen_node1 is None:  # any node in the tree could have been copied
            for node in t1:
                could_be_source = lambda x: 1.0 * nodes_equal_except_parents(
                    grammar, x, node) * resampleProbability(x)
                lp_of_choosing_source = (
                    nicelog(
                        t1.sample_node_normalizer(could_be_source) -
                        could_be_source(node)) -
                    nicelog(t1.sample_node_normalizer(resampleProbability)))
                lp_of_choosing_target = t1.sampling_log_probability(
                    chosen_node1, resampleProbability=resampleProbability)
                lps += [lp_of_choosing_source + lp_of_choosing_target]
        else:  # we have a specific path up the tree
            while chosen_node1:
                could_be_source = lambda x: 1.0 * nodes_equal_except_parents(
                    grammar, x, chosen_node2) * resampleProbability(x)

                lp_of_choosing_source = nicelog(
                    t1.sample_node_normalizer(could_be_source)) - nicelog(
                        t1.sample_node_normalizer(resampleProbability))
                lp_of_choosing_target = t1.sampling_log_probability(
                    chosen_node1, resampleProbability=resampleProbability)
                lps += [lp_of_choosing_source + lp_of_choosing_target]

                if recurse:
                    chosen_node1 = chosen_node1.parent
                    chosen_node2 = chosen_node2.parent
                else:
                    chosen_node1 = None

        return logsumexp(lps)
Ejemplo n.º 37
0
    def compute_single_likelihood(self, datum):
        assert isinstance(datum.output, dict)

        hp = self(*datum.input)  # output dictionary, output->probabilities
        assert isinstance(hp, dict)

        s = 0.0
        for k, dc in datum.output.items():
            if k in hp:
                s += dc * hp[k]
            elif len(hp.keys()) > 0:
                # probability fo each string under this editing model
                s += dc * logsumexp([ v + edit_likelihood(x, k, alphabet_size=self.alphabet_size, alpha=datum.alpha) for x, v in hp.items() ]) # the highest probability string; or we could logsumexp
            else:
                s += dc * edit_likelihood('', k, alphabet_size=self.alphabet_size, alpha=datum.alpha)

            # This is the mixing {a,b}* noise model
            # lp = log(1.0-datum.alpha) - log(self.alphabet_size+1)*(len(k)+1) #the +1s here count the character marking the end of the string
            # if k in hp:
            #     lp = logplusexp(lp, log(datum.alpha) + hp[k]) # if non-noise possible
            # s += dc*lp
        return s
Ejemplo n.º 38
0
    def weighted_sample(self, n, strings, probs):
        length = len(probs)
        prob_sum = logsumexp(probs)
        cumu_prob = np.zeros(length, dtype=np.float64)

        mass = 0
        for i in xrange(length):
            mass += np.exp(probs[i] - prob_sum)
            cumu_prob[i] = mass

        output = []

        for _ in xrange(n):

            rand = np.random.rand()

            for i in xrange(length):
                if rand < cumu_prob[i]:
                    output.append(strings[i])
                    break

        return output
Ejemplo n.º 39
0
def test_hypo_stat():
    """
    objective: test how does those high prob hypotheses look like

    run: mpiexec -n 12
    """

    seq = load(open('seq_'+str(rank)+''))

    cnt = 0
    for e in seq:
        Z = logsumexp([p for h, p in e.iteritems()])
        e_list = [[h, p] for h, p in e.iteritems()]; e_list.sort(key=lambda x:x[1], reverse=True)
        f = open('hypo_stat_'+str(rank)+suffix, 'a')

        print >> f, '='*40
        for iii in xrange(4):
            print >> f, 'rank: %i' % rank, 'prob', np.exp(e_list[iii][1] - Z)
            print >> f, Counter([e_list[iii][0]() for _ in xrange(512)])
            print >> f, str(e_list[iii][0])

        print cnt, 'done'; cnt += 1
        f.close()
Ejemplo n.º 40
0
    def csv_compare_model_human(self, data, filename):
        """
        Save csv stuff for making the regression plot.

        Format is list of input/outputs, with human & model probabilities for each.

        Note
        ----
        This is specific to NumberGameHypothesis (because of 'o in h()')

        """
        import math
        import csv

        self.update()
        for h in self.hypotheses:
            h.compute_prior()
            h.update_posterior()

        with open(filename, "a") as f:
            writer = csv.writer(f)
            hypotheses = self.hypotheses
            writer.writerow(["input", "output", "human p", "model p"])
            i = 0

            for d in data:
                posteriors = [sum(h.compute_posterior(d.input)) for h in hypotheses]
                Z = logsumexp(posteriors)
                weights = [(post - Z) for post in posteriors]
                print i, "\t|\t", d.input
                i += 1

                for o in d.output.keys():
                    # Probability for yes on output `o` is sum of posteriors for hypos that contain `o`
                    p_human = float(d.output[o][0]) / float(d.output[o][0] + d.output[o][1])
                    p_model = sum([math.exp(w) if o in h() else 0 for h, w in zip(hypotheses, weights)])
                    writer.writerow([d.input, o, p_human, p_model])
Ejemplo n.º 41
0
    def evaluate_sampler(self, sampler):

        cnt = Counter()
        for h in lot_iter(sampler):
            cnt[h.value] += 1

        ## TODO: When the MCMC methods get cleaned up for how many samples they return, we will assert that we got the right number here
        # assert sum(cnt.values()) == NSAMPLES # Just make sure we aren't using a sampler that returns fewer samples! I'm looking at you, ParallelTempering

        Z = logsumexp([t.log_probability() for t in self.trees]) # renormalize to the trees in self.trees
        obsc = [cnt[t] for t in self.trees]
        expc = [exp(t.log_probability()-Z)*sum(obsc) for t in self.trees]
        csq, pv = chisquare(obsc, expc)
        assert abs(sum(obsc) - sum(expc)) < 0.01

        # assert min(expc) > 5 # or else chisq sux

        for t, c, s in zip(self.trees, obsc, expc):
            print c, s, t
        print (csq, pv), sum(obsc)

        self.assertGreater(pv, PVALUE, msg="Sampler failed chi squared!")

        return csq, pv
Ejemplo n.º 42
0
    def compute_likelihood(self, data):

        ll = 0.0
        for cl in self.concept2hypotheses.keys(): # for each concept and list

            if cl not in concept2data:
                print "# Warning, %s not in concept2data."%cl
                continue

            d = concept2data[cl]

            for si in xrange(len(d)): # for each prefix of the data

                hypotheses = self.concept2hypotheses[cl]
                assert len(hypotheses) > 0

                # update the posteriors for this amount of data
                for h in hypotheses:
                    h.compute_posterior(d[:si]) # up to but not including this set

                # get their normalizer
                Z = logsumexp([h.posterior_score for h in hypotheses])

                nxtd = d[si]
                pred = [0.0] * len(nxtd.input) # how we respond to each
                # compute the predictive
                for h in hypotheses:
                    p = exp(h.posterior_score-Z)
                    for i, ri in enumerate(h.evaluate_on_set(nxtd.input)):
                        pred[i] += p*((ri==True)*h.alpha + (1.0-h.alpha)*h.baserate)
                for ri, pi in enumerate(pred):
                    key = tuple([cl, si, ri])
                    # assert key in human_yes and key in human_no, "No key " + key # Does not have to be there because there can be zero counts
                    # print pi
                    ll += human_yes[key]*log(pi) + human_no[key]*log(1.-pi)
        return ll
Ejemplo n.º 43
0
def evaluate_sampler(my_sampler, print_every=1000, out_hypotheses=sys.stdout, out_aggregate=sys.stdout, trace=False, prefix=""):
    """
            Print the stats for a single sampler run

            *my_sampler* -- a generator of samples
            print_every -- display the output every this many steps
            out_hypothesis -- where we put hypothesis stats
            out_aggregate  -- where we put aggregate stats

            trace -- print every sample
            prefix -- display before lines
    """
    visited_at = defaultdict(list)

    startt = time()
    for n, s in lot_iter(enumerate(my_sampler)): # each sample should have an .posterior_score defined
        if trace: print "#", n, s

        visited_at[s].append(n)

        if (n%print_every)==0 and n>0:
            post =  sorted([x.posterior_score for x in visited_at.keys()], reverse=True) # the unnormalized posteriors of everything found
            ll   =  sorted([x.likelihood for x in visited_at.keys()], reverse=True)
            Z = logsumexp(post) # just compute total probability mass found -- the main measure

            out_aggregate.write('\t'.join(map(str, [prefix, n, r3(time()-startt), r5(Z), len(post)]+mydisplay(post))) + '\n')

    # Now once we're done, output the hypothesis stats
    for k,v in visited_at.items():

        mean_diff = "NA"
        if len(v) > 1: mean_diff = mean(diff(v))

        out_hypotheses.write('\t'.join(map(str, [prefix, k.posterior_score, k.prior, k.likelihood, len(v), min(v), max(v), mean_diff, sum(diff(v)==0) ])) +'\n') # number of rejects from this

    return 0.0
Ejemplo n.º 44
0
def run(*args):
    #print "# Running data"

    global hypotheses

    data_size = args[0]

    p_representation = defaultdict(
        int)  # how often do you get the right representation
    p_response = defaultdict(int)  # how often do you get the right response?
    p_representation_literal = defaultdict(
        int)  # how often do you get the right representation
    p_response_literal = defaultdict(
        int)  # how often do you get the right response?
    p_representation_presup = defaultdict(
        int)  # how often do you get the right representation
    p_response_presup = defaultdict(
        int)  # how often do you get the right response?

    #print "# Generating data"
    data = generate_data(data_size)

    # recompute these
    #print "# Computing posterior"
    #[ x.unclear_functions() for x in hypotheses ]
    [x.compute_posterior(data) for x in hypotheses]

    # normalize the posterior in fs
    #print "# Computing normalizer"
    Z = logsumexp([x.posterior_score for x in hypotheses])

    # and output the top hypotheses
    qq = FiniteBestSet(max=True, N=25)
    for h in hypotheses:
        qq.push(h, h.posterior_score)  # get the tops
    for i, h in enumerate(qq.get_all(sorted=True)):
        for w in h.all_words():
            fprintn(8,
                    data_size,
                    i,
                    w,
                    h.posterior_score,
                    q(h.value[w]),
                    f=options.OUT_PATH + "-hypotheses." + str(get_rank()) +
                    ".txt")

    # and compute the probability of being correct
    #print "# Computing correct probability"
    for h in hypotheses:
        hstr = str(h)
        #print data_size, len(data), exp(h.posterior_score), correct[ str(h)+":"+w ]
        for w in words:
            p = exp(h.posterior_score - Z)
            key = w + ":" + hstr

            p_representation[w] += p * (agree_pct[key] == 1.)
            p_representation_presup[w] += p * (
                agree_pct_presup[key] == 1.
            )  # if we always agree with the target, then we count as the right rep.
            p_representation_literal[w] += p * (agree_pct_literal[key] == 1.)

            # and just how often does the hypothesis agree?
            p_response[w] += p * agree_pct[key]
            p_response_presup[w] += p * agree_pct_presup[key]
            p_response_literal[w] += p * agree_pct_literal[key]

    #print "# Outputting"

    for w in words:
        fprintn(10,
                str(get_rank()),
                q(w),
                data_size,
                p_representation[w],
                p_representation_presup[w],
                p_representation_literal[w],
                p_response[w],
                p_response_presup[w],
                p_response_literal[w],
                f=options.OUT_PATH + "-stats." + str(get_rank()) + ".txt")

    return 0
Ejemplo n.º 45
0
def parse_cube(lang_name, finite):
    """
        reads hypotheses of lang_name, estimates the p/r and posterior score, and saves them into a cube (list of tables)

        data structure:
            stats = [cube, topn]
            cube = [[size, Z, table], [size, Z, table], ...]
            table = [[ind, score, p, r, f, strs], [ind, score, p, r, f, strs], ...]

        NOTE: topn here is a dict, you can use ind to find the h

        example script:
            mpiexec -n 12 python parse_hypothesis.py --mode=parse_cube --language=An --finite=3/10
    """
    _dir = 'out/'
    global size
    global rank
    topn = dict()
    prf_dict = {}
    language = instance(lang_name, finite)

    if rank == 0:
        truncate_flag = False if (lang_name == 'An' and finite <= 3) else True
        set_topn = set()
        print 'loading..'
        fff()
        for file_name in listdir(_dir):
            if lang_name + '_' in file_name:
                _set = load(open(_dir + file_name))
                set_topn.update([h for h in _set])

        print 'getting p&r..'
        fff()
        pr_data = language.sample_data_as_FuncData(2048)
        for h in set_topn:
            p, r, h_llcounts = language.estimate_precision_and_recall(
                h, pr_data, truncate=truncate_flag)
            prf_dict[h] = [p, r, 0 if p + r == 0 else 2 * p * r / (p + r)]
            h.fixed_ll_counts = h_llcounts

        topn = dict(enumerate(set_topn))
        print 'bcasting..'
        fff()

    topn = comm.bcast(topn, root=0)
    prf_dict = comm.bcast(prf_dict, root=0)

    print rank, 'getting posterior'
    fff()
    # work_list = slice_list(np.arange(0, 72, 6), size)
    work_list = slice_list(np.arange(120, 264, 12), size)

    cube = []
    for s in work_list[rank]:
        eval_data = language.sample_data_as_FuncData(s)
        for ind, h in topn.iteritems():
            h.likelihood_temperature = 100
            h.compute_posterior(eval_data)

        Z = logsumexp([h.posterior_score for ind, h in topn.iteritems()])

        table = [[
            ind, h.posterior_score, prf_dict[h][0], prf_dict[h][1],
            prf_dict[h][2], h.fixed_ll_counts
        ] for ind, h in topn.iteritems()]
        table.sort(key=lambda x: x[1], reverse=True)
        cube += [[s, Z, table]]

        print rank, s, 'done'
        fff()

    if rank == 0:
        for i in xrange(1, size):
            cube += comm.recv(source=i)
    else:
        comm.send(cube, dest=0)
        print rank, 'table sent'
        fff()
        sys.exit(0)

    cube.sort(key=lambda x: x[0])
    dump([cube, topn], open(lang_name + '_stats' + suffix, 'w'))
Ejemplo n.º 46
0
def parse_nonadjacent(_dir, temperature):
    """
        1. read raw hypos
        2. get fixed llcnts
        3. compute posterior given different data pool sizes

        NOTE: if _dir is previously dumped topn then load it
    """

    if 'nonadjacent_topn' not in _dir:
        topn = set()
        for filename in os.listdir(_dir):
            if 'nonadjacent' in filename and 'seq' not in filename:
                print 'load', filename
                _set = load(open(_dir + filename))
                topn.update([h for h in _set])
        topn = list(topn)

        # fix the llcnts to save time and make curve smooth
        print 'get llcnts...'
        topn = gen_fixlen_llcnts(topn, 5)
        dump(topn, open(_dir + '_nonadjacent_topn' + suffix, 'w'))

    else:
        print 'load', _dir
        topn = load(open(_dir))

    # find all correct hypotheses
    topn = list(topn)
    correct_set = set()

    for i in xrange(len(topn)):

        flag = True
        for k, v in topn[i].fixed_ll_counts.iteritems():
            if len(k) < 2:
                continue
            elif k[0] == 'a' and k[-1] in 'b':
                continue
            elif k[0] == 'c' and k[-1] in 'bd':
                continue
            elif k[0] == 'e' and k[-1] in 'bdf':
                continue
            flag = False
            break

        if flag: correct_set.add(i)

    print len(correct_set), 'of', len(topn), 'are correct'

    # get posterior
    w_list = range(2, 25, 1)
    amount_list = range(24, 144, 5)
    posterior_seq = []
    for i in xrange(len(w_list)):
        pool_size = w_list[i]
        language = LongDependency(max_length=pool_size)
        eval_data = [
            FunctionData(input=[],
                         output={
                             e: float(amount_list[i]) / pool_size
                             for e in language.str_sets
                         })
        ]

        for h in topn:
            h.likelihood_temperature = temperature
            h.compute_posterior(eval_data)

        Z = logsumexp([h.posterior_score for h in topn])

        prob = 0
        for i in xrange(len(topn)):
            if i in correct_set:
                prob += np.exp(topn[i].posterior_score - Z)
        print 'pool_size', pool_size, 'prob', prob
        posterior_seq.append([pool_size, prob])

        #debug
        _list = [h for h in topn]
        _list.sort(key=lambda x: x.posterior_score, reverse=True)
        for i in xrange(3):
            print 'prob: ', np.exp(_list[i].posterior_score - Z),
            print h.fixed_ll_counts
            print _list[i]
        print '=' * 50
        fff()

    dump(posterior_seq, open('nonadjacent_posterior_seq' + suffix, 'w'))
Ejemplo n.º 47
0
def dis_pos(jump, is_plot, file_name, axb_bound, x_bound):
    """
    1. read posterior sequence
    2. set bound for axb and x hypotheses
    3. plot

    run: serial
    """
    # space_seq, pr_dict = load(open('non_seq%i_' % i + file_name))
    print 'loading..'
    fff()
    _set = [load(open('non_seq%i_' % i + file_name)) for i in xrange(4)]

    print 'avging..'
    fff()
    avg_space_seq, avg_pr_dict = _set.pop(0)
    for space_seq, pr_dict in _set:
        for i in xrange(len(space_seq)):
            prob_dict, ada_dict = space_seq[i]
            avg_prob_dict, avg_ada_dict = avg_space_seq[i]
            for h in prob_dict:
                avg_prob_dict[h] = logsumexp([avg_prob_dict[h], prob_dict[h]])
                avg_ada_dict[h] += ada_dict[h]
        for h in pr_dict:
            avg_pr_dict[h] += pr_dict[h]

    for prob_dict, ada_dict in avg_space_seq:
        for h in prob_dict:
            prob_dict[h] -= 4
            ada_dict[h] /= 4
    for h in avg_pr_dict:
        avg_pr_dict[h] /= 4

    for axb_bound in np.arange(0.1, 1, 0.1):
        for x_bound in np.arange(0.1, 1, 0.1):
            seq = []
            seq1 = []
            seq2 = []
            for seen in avg_space_seq:
                Z = logsumexp([p for h, p in seen[0].iteritems()])

                axb_prob = -Infinity
                x_prob = -Infinity

                for h, v in seen[0].iteritems():
                    if avg_pr_dict[h] > axb_bound:
                        axb_prob = logsumexp([axb_prob, v])
                    if seen[1][h] > x_bound: x_prob = logsumexp([x_prob, v])

                seq.append(np.exp(axb_prob - Z))
                seq1.append(np.exp(x_prob - Z))
                seq2.append(np.exp(axb_prob - Z) - np.exp(x_prob - Z))
                print 'done'
                fff()

            flag = True
            for i in xrange(len(seq2) - 1):
                if seq2[i] - seq2[i + 1] > 1e-4:
                    flag = False
                    break
            if not flag: continue

            print axb_bound, x_bound, '=' * 50
            print 'axb_prob: ', seq
            print 'x_prob: ', seq1
            print 'difference_prob: ', seq2
            fff()
            dump([seq, seq1, seq2],
                 open('nonadjacent_%.2f_%.2f' % (axb_bound, x_bound) + suffix,
                      'w'))

            if is_plot == 'yes':
                f, axarr = plt.subplots(1, 3)
                axarr[0].plot(range(2, 65, jump), seq)
                axarr[1].plot(range(2, 65, jump), seq1)
                axarr[2].plot(range(2, 65, jump), seq2)

                # plt.legend(handles=[x])
                plt.ylabel('posterior')
                plt.xlabel('poo_size')
                plt.show()
Ejemplo n.º 48
0
 def Z(self):
     """
     Normalizer of everything
     """
     return logsumexp([h.posterior_score for h in self.get_all(sorted=False)])
Ejemplo n.º 49
0
def parse_plot(lang_name, finite, is_plot):
    """
        run: mpi supported

        example:
            mpiexec -n 12 python parse_hypothesis.py --mode=parse_plot --language=An --finite=3 --plot=yes --wfs=yes
    """
    _dir = 'out/final/'
    global size
    global rank
    topn = set()
    prf_dict = {}
    language = instance(lang_name, finite)

    if rank == 0:
        print 'loading..'
        fff()
        for file_name in listdir(_dir):
            if lang_name + '_' in file_name:
                _set = load(open(_dir + file_name))
                topn.update([h for h in _set])

        print 'getting p&r..'
        fff()
        pr_data = language.sample_data_as_FuncData(1024)
        for h in topn:
            p, r = language.estimate_precision_and_recall(h, pr_data)
            prf_dict[h] = [p, r, 0 if p + r == 0 else 2 * p * r / (p + r)]

        dump(prf_dict, open(lang_name + '_prf_dict' + suffix, 'w'))

    topn = comm.bcast(topn, root=0)
    prf_dict = comm.bcast(prf_dict, root=0)

    print rank, 'getting posterior'
    fff()
    work_list = slice_list(np.arange(235, 300, 5), size)
    seq = []

    pnt_str = 'Weighted F-score' if options.WFS == 'yes' else 'Posterior Probability'
    for s in work_list[rank]:
        eval_data = language.sample_data_as_FuncData(s)
        for h in topn:
            h.likelihood_temperature = 100
            h.compute_posterior(eval_data)

        Z = logsumexp([h.posterior_score for h in topn])

        if options.WFS == 'yes':
            tmp = sum(
                [prf_dict[h][2] * np.exp(h.posterior_score - Z) for h in topn])
            # TODO
            # else: tmp = sum([np.exp(h.posterior_score - Z) for h in topn if prf_dict[h][2] > 0.9])
        else:
            tmp = sum([
                np.exp(h.posterior_score - Z) for h in topn
                if (prf_dict[h][0] < 0.3 and prf_dict[h][1] > 0.9)
            ])

        if options.PROB == 'yes':
            dump([topn, Z], open(lang_name + '_prob_' + str(s) + suffix, 'w'))

        seq.append([s, tmp])
        print 'size: %.1f' % s, '%s: %.2f' % (pnt_str, tmp)
        fff()

        #debug
        _list = [h for h in topn]
        _list.sort(key=lambda x: x.posterior_score, reverse=True)
        for i in xrange(3):
            print 'prob: ', np.exp(_list[i].posterior_score -
                                   Z), 'p,r: ', prf_dict[_list[i]][:2],
            print Counter([_list[i]() for _ in xrange(256)])
            print _list[i]
        print '=' * 50
        fff()

    if rank == 0:
        for i in xrange(1, size):
            seq += comm.recv(source=i)
    else:
        comm.send(seq, dest=0)
        sys.exit(0)

    seq.sort(key=lambda x: x[0])
    dump(seq, open(lang_name + '_seq' + suffix, 'w'))

    if is_plot == 'yes':
        x, y = zip(*seq)
        plt.plot(x, y)

        plt.ylabel(pnt_str)
        plt.xlabel('Size of Data')
        plt.title(lang_name)
        plt.show()
Ejemplo n.º 50
0
def make_pos(jump, temp):
    """
    1. read raw output
    2. compute precision & recall on nonadjacent and adjacent contents
    3. evaluate posterior probability on different data sizes
    4. dump the sequence

    run: mpiexec -n 4
    """

    print 'loading..'
    fff()
    rec = load_hypo('out/simulations/nonadjacent/', ['0'])

    print 'estimating pr'
    fff()
    pr_dict = {}
    _set = set()
    cnt_tmp = {}
    for e in rec:
        for h in e[1]:
            if h in _set: continue
            cnt = Counter([h() for _ in xrange(256)])
            # cnt = Counter([h() for _ in xrange(10)])
            cnt_tmp[h] = cnt
            base = sum(cnt.values())
            num = 0
            for k, v in cnt.iteritems():
                if k is None or len(k) < 2: continue
                if k[0] == 'a' and k[-1] == 'b': num += v
            pr_dict[h] = float(num) / base
            _set.add(h)

    work_list = range(2, 24, jump)
    space_seq = []
    for i in work_list:
        language = LongDependency(max_length=i)

        eval_data = {}
        for e in language.str_sets:
            eval_data[e] = 144.0 / len(language.str_sets)
        eval_data = [FunctionData(input=[], output=eval_data)]

        prob_dict = {}
        ada_dict = {}
        test_list = []

        for h in _set:
            h.likelihood_temperature = temp
            prob_dict[h] = h.compute_posterior(eval_data)
            p, r = language.estimate_precision_and_recall(h, cnt_tmp[h])
            ada_dict[h] = 2 * p * r / (p + r) if p + r != 0 else 0

            test_list.append([
                h.posterior_score, ada_dict[h], pr_dict[h], cnt_tmp[h],
                str(h)
            ])

        Z = logsumexp([h.posterior_score for h in _set])
        test_list.sort(key=lambda x: x[0], reverse=True)

        weighted_x = 0
        weighted_axb = 0
        for e in test_list:
            weighted_x += np.exp(e[0] - Z) * e[1]
            weighted_axb += np.exp(e[0] - Z) * e[2]
        f = open('non_w' + suffix, 'a')
        print >> f, weighted_x, weighted_axb
        f.close()
        # print rank, i, '='*50
        # for i_t in xrange(3):
        #     print 'prob: ', np.exp(test_list[i_t][0] - Z), 'x_f-score',  test_list[i_t][1], 'axb_f-score',  test_list[i_t][2]
        #     print test_list[i_t][3]
        # print test_list[i_t][5].compute_posterior(eval_data)
        # print language.estimate_precision_and_recall(test_list[i_t][5], cnt_tmp[test_list[i_t][5]])
        # fff()
        # dump(test_list, open('test_list_'+str(rank)+'_'+str(i)+suffix, 'w'))

        # space_seq.append([prob_dict, ada_dict])
        print 'rank', rank, i, 'done'
        fff()

    dump([space_seq, pr_dict], open('non_seq' + str(rank) + suffix, 'w'))
Ejemplo n.º 51
0
def make_staged_posterior_seq(_dir, temperature, lang_name, dtype):
    """
        script: python parse_hypothesis.py --mode=make_staged_posterior_seq --file=file --temp=1 --language=AnBn --dtype=staged/uniform

        1. read raw file
        2. compute fixed Counter
        3. compute posterior for different amounts

        dumped posterior format: [topn, [z,amount,finite,[s1,s2,....]], [], [], ....]

        NOTE: if _dir is previously dumped posterior seq, then we use it
    """

    if not (os.path.isfile(_dir) and 'posterior_seq' in _dir):

        topn = set()

        for filename in os.listdir(_dir):
            if ('staged' in filename
                    or 'normal' in filename) and 'seq' not in filename:
                print 'load', filename
                _set = load(open(_dir + filename))
                topn.update([h for h in _set])
        topn = list(topn)

        # fix the llcnts to save time and make curve smooth
        print 'get llcnts...'
        for h in topn:
            llcnts = Counter([h() for _ in xrange(2048)])
            h.fixed_ll_counts = llcnts

        seq = []
        seq.append(topn)

        for amount, finite in mk_staged_wlist(0, 200, 2, [48, 96]):

            print 'posterior on', amount, finite

            if dtype == 'staged':
                language = instance(lang_name, finite)
                eval_data = language.sample_data_as_FuncData(amount)
            elif dtype == 'uniform':
                eval_data = uniform_data(amount, 12)

            for h in topn:
                h.likelihood_temperature = temperature
                h.compute_posterior(eval_data)

            Z = logsumexp([h.posterior_score for h in topn])
            seq.append([Z, amount, finite, [h.posterior_score for h in topn]])

        dump(seq, open(dtype + '_posterior_seq' + suffix, 'w'))

    else:
        seq = load(open(_dir))

    # ====================== compute KL based on seq =======================

    print 'compute kl seq...'
    kl_seq = []
    topn = seq.pop(0)
    for i in xrange(len(seq) - 1):
        kl_seq.append([seq[i][1], compute_kl2(seq[i], seq[i + 1])])

    dump(kl_seq, open(dtype + '_kl_seq' + suffix, 'w'))
Ejemplo n.º 52
0
def make_pos2(jump, temp):
    """
    1. read raw output
    2. compute precision & recall on nonadjacent and adjacent contents
    3. evaluate posterior probability on different data sizes
    4. dump the sequence

    run: mpiexec -n 4
    """

    print 'loading..'
    fff()
    rec = load_hypo('out/simulations/nonadjacent/', ['0'])

    # TODO one do this
    print 'estimating pr'
    fff()
    pr_dict = {}
    _set = set()
    cnt_tmp = {}
    for e in rec:
        for h in e[1]:
            if h in _set: continue
            cnt = Counter([h() for _ in xrange(1024)])
            cnt_tmp[h] = cnt
            base = sum(cnt.values())
            num = 0
            for k, v in cnt.iteritems():
                if k is None or len(k) < 2: continue
                if k[0] + k[-1] in ['ab', 'cd', 'ef']: num += v
            pr_dict[h] = float(num) / base

            # fix the h_output
            h.h_output = cnt
            _set.add(h)

    work_list = range(2, 17, jump)
    for i in work_list:
        language = LongDependency(max_length=i)

        eval_data = {}
        for e in language.str_sets:
            eval_data[e] = 144.0 / len(language.str_sets)
        eval_data = [FunctionData(input=[], output=eval_data)]

        score = np.zeros(len(_set), dtype=np.float64)
        prec = np.zeros(len(_set), dtype=np.float64)

        # prob_dict = {}
        # test_list = []

        for ind, h in enumerate(_set):
            h.likelihood_temperature = temp
            score[ind] = h.compute_posterior(eval_data)
            prec[ind] = pr_dict[h]
            # prob_dict[h] = h.compute_posterior(eval_data)
            # test_list.append([h.posterior_score, pr_dict[h], cnt_tmp[h], str(h), h])

        # test_list.sort(key=lambda x: x[0], reverse=True)
        # Z = logsumexp([h.posterior_score for h in _set])
        #
        # weighted_axb = sum([np.exp(e[0] - Z) * e[1] for e in test_list])
        # print i, weighted_axb
        # for i_t in xrange(3):
        #     print 'prob: ', np.exp(test_list[i_t][0] - Z), 'axb_f-score',  test_list[i_t][1]
        #     print test_list[i_t][2]
        #     # print test_list[i_t][4].compute_posterior(eval_data)
        #     # print language.estimate_precision_and_recall(test_list[i_t][5], cnt_tmp[test_list[i_t][5]])
        # print '='*50
        # fff()

        #
        # f = open('non_w'+suffix, 'a')
        # print >> f, Z, weighted_axb
        # print
        # f.close()
        #
        # print 'size: %i' % i, Z, weighted_axb; fff()

        if rank != 0:
            comm.send(score, dest=0)
            comm.send(prec, dest=0)
            sys.exit(0)
        else:
            for r in xrange(size - 1):
                score += comm.recv(source=r + 1)
                prec += comm.recv(source=r + 1)
            score /= size
            prec /= size
            Z = logsumexp(score)

            weighted_axb = np.sum(np.exp(score - Z) * prec)

            f = open('non_w' + suffix, 'a')
            print >> f, Z, weighted_axb
            print i, Z, weighted_axb
            fff()
            f.close()
Ejemplo n.º 53
0
def parse_nonadjacent(temperature):
    """
        load the hypothesis space and compute weighted F-scores of nonadjacent dependency on different pool sizes.
        replace the make_pos function

        example script:
            mpiexec -n 12 python parse_hypothesis.py --mode=nonadjacent_mk --temp=100
    """
    eval_data_size = 1024
    global size
    global rank
    pr_dict = {}
    _set = set()

    if rank == 0:
        print 'loading..'
        fff()
        rec = load_hypo('out/simulations/nonadjacent/', ['_'])

        print 'estimating pr'
        fff()

        for e in rec:
            for h in e[1]:

                if h in _set: continue

                cnt = Counter([h() for _ in xrange(eval_data_size)])
                num = 0
                for k, v in cnt.iteritems():
                    if k is None or len(k) < 2: continue
                    if k[0] + k[-1] in ['ab', 'cd', 'ef']: num += v

                pr_dict[h] = float(num) / eval_data_size
                _set.add(h)

        #debug
        _list = [[h, pr] for h, pr in pr_dict.iteritems()]
        _list.sort(key=lambda x: x[1], reverse=True)
        for i in xrange(10):
            print 'p,r: ', _list[i][1],
            print Counter([_list[i][0]() for _ in xrange(256)])
            print _list[i][0]
        print '=' * 50
        fff()

    print "sync..."
    fff()
    pr_dict = comm.bcast(pr_dict, root=0)
    _set = comm.bcast(_set, root=0)

    # work_list = slice_list(np.arange(2, 65, 2), size)
    work_list = slice_list(np.arange(10, 66, 5), size)
    seq = []
    for s in work_list[rank]:
        wfs = 0.0
        language = LongDependency(max_length=s)
        eval_data = [
            FunctionData(input=[],
                         output={
                             e: float(eval_data_size) / s
                             for e in language.str_sets
                         })
        ]

        for h in _set:
            h.likelihood_temperature = temperature
            h.compute_posterior(eval_data)

        Z = logsumexp([h.posterior_score for h in _set])
        seq.append([
            s,
            sum([pr_dict[h] * np.exp(h.posterior_score - Z) for h in _set])
        ])

        #debug
        _list = [h for h in _set]
        _list.sort(key=lambda x: x.posterior_score, reverse=True)
        print 'pool size: ', s
        for i in xrange(3):
            print 'prob: ', np.exp(_list[i].posterior_score -
                                   Z), 'p,r: ', pr_dict[_list[i]],
            print Counter([_list[i]() for _ in xrange(256)])
            print _list[i]
        print '=' * 50
        fff()

    if rank == 0:
        for i in xrange(1, size):
            seq += comm.recv(source=i)
    else:
        comm.send(seq, dest=0)
        sys.exit(0)

    seq.sort(key=lambda x: x[0])
    f = open('nonadjacent_wfs_seq' + suffix, 'w')
    for s, wfs in seq:
        print >> f, s, wfs
    f.close()
Ejemplo n.º 54
0
def parse_cube(lang_name, finite):
    """
        reads hypotheses of lang_name, estimates the p/r and posterior score, and saves them into a cube (list of tables)

        data structure:
            stats = [cube, topn]
            cube = [[size, Z, table], [size, Z, table], ...]
            table = [[ind, score, p, r, f, strs], [ind, score, p, r, f, strs], ...]

        NOTE: topn here is a dict, you can use ind to find the h

        example script:
            mpiexec -n 12 python parse_hypothesis.py --mode=parse_cube --language=An --finite=3/10
    """
    _dir = 'out/'
    global size
    global rank
    topn = dict()
    prf_dict = {}
    language = instance(lang_name, finite)

    if rank == 0:
        truncate_flag = False if (lang_name == 'An' and finite <= 3) else True
        set_topn = set()
        print 'loading..'; fff()
        for file_name in listdir(_dir):
            if lang_name + '_' in file_name:
                _set = load(open(_dir+file_name))
                set_topn.update([h for h in _set])

        print 'getting p&r..'; fff()
        pr_data = language.sample_data_as_FuncData(2048)
        for h in set_topn:
            p, r, h_llcounts = language.estimate_precision_and_recall(h, pr_data, truncate=truncate_flag)
            prf_dict[h] = [p, r, 0 if p+r == 0 else 2*p*r/(p+r)]
            h.fixed_ll_counts = h_llcounts

        topn = dict(enumerate(set_topn))
        print 'bcasting..'; fff()

    topn = comm.bcast(topn, root=0)
    prf_dict = comm.bcast(prf_dict, root=0)

    print rank, 'getting posterior'; fff()
    # work_list = slice_list(np.arange(0, 72, 6), size)
    work_list = slice_list(np.arange(120, 264, 12), size)

    cube = []
    for s in work_list[rank]:
        eval_data = language.sample_data_as_FuncData(s)
        for ind, h in topn.iteritems():
            h.likelihood_temperature = 100
            h.compute_posterior(eval_data)

        Z = logsumexp([h.posterior_score for ind, h in topn.iteritems()])

        table = [[ind, h.posterior_score, prf_dict[h][0], prf_dict[h][1], prf_dict[h][2], h.fixed_ll_counts] for ind, h in topn.iteritems()]
        table.sort(key=lambda x: x[1], reverse=True)
        cube += [[s, Z, table]]

        print rank, s, 'done'; fff()

    if rank == 0:
        for i in xrange(1, size):
            cube += comm.recv(source=i)
    else:
        comm.send(cube, dest=0)
        print rank, 'table sent'; fff()
        sys.exit(0)

    cube.sort(key=lambda x: x[0])
    dump([cube, topn], open(lang_name+'_stats'+suffix, 'w'))
Ejemplo n.º 55
0
        # print h.prior, h.likelihood
        # print h
        # print sorted(list(h.top_strings))
        # print sorted(list(top_data_strings))
        # print top_data_strings - h.top_strings
        # print "---------------"

    print "# Computed hypotheses for ", options.LANG

    precision, recall = numpy.zeros(options.NDATA), numpy.zeros(options.NDATA)

    for i, di in enumerate(data_range):

        posteriors = [h.prior + h.likelihood * float(di) / float(LARGE_SAMPLE) for h in hypotheses]
        # posteriors = [h.posteriors[i] for h in hypotheses]
        Z = logsumexp(posteriors)

        # print [(h.accuracy, exp(p-Z)) for h,p in zip(hypotheses, posteriors)]

        precision[i] = sum([h.precision*exp(p-Z) for h,p in zip(hypotheses, posteriors)])
        recall[i]    = sum([h.recall   *exp(p-Z) for h,p in zip(hypotheses, posteriors)])
    print "# Computed precision and recall for ", options.LANG, precision[-1], recall[-1]

    ####################################################################################################################
    # Plot it
    ####################################################################################################################

    import matplotlib.pyplot as plt
    fig = plt.figure(figsize=(2,1.5))
    p = fig.add_subplot(111)
    p.semilogx(data_range, precision, linewidth=3)