Example #1
0
def lp_propose(self, grammar, x, y, resampleProbability=lambdaOne, xZ=None):
    """
            Returns a log probability of starting at x and ending up at y from a regeneration move.
            Any node is a candidate if the trees are identical except for what's below those nodes
            (although what's below *can* be identical!)

            NOTE: This does NOT take into account insert/delete
            NOTE: Not so simple because we must count multiple paths


            NOTE: This is currently not correct because it will mess up with bound variables, which now have
            unique names. Also it seems to add too many rules to the grammar, probably via recurse_up
    """
    RP = -Infinity

    if isinstance(x, FunctionNode) and isinstance(y, FunctionNode) and x.returntype == y.returntype:

        # compute the normalizer
        if xZ is None:
            xZ = x.sample_node_normalizer(resampleProbability=resampleProbability)

        # Well we could select x's root to go to Y, but we must recompute y under the current grammar
        with BVRuleContextManager(grammar, x, recurse_up=True):
            RP = logplusexp(RP, log(1.0*resampleProbability(x)) - log(xZ) + grammar.log_probability(y))

        if x.name == y.name and x.args is not None and y.args is not None and len(x.args) == len(y.args):

            # how many kids are not equal, and where was the last?
            mismatch_count, mismatch_index = 0, 0
            for i, xa, ya in zip(xrange(len(x.args)), x.args, y.args):
                if xa != ya: # checks whole subtree!
                    mismatch_count += 1
                    mismatch_index = i
                if mismatch_count > 1: break # can't win

            if mismatch_count > 1: # We have to have only selected x,y to regenerate

                pass

            elif mismatch_count == 1: # we could propose to x, or x.args[mismatch_index], but nothing else (nothing else will fix the mismatch)

                with BVRuleContextManager(grammar, x, recurse_up=False): # recurse, but keep track of bv
                    RP = logplusexp(RP, lp_propose(grammar, x.args[mismatch_index], y.args[mismatch_index], resampleProbability=resampleProbability, xZ=xZ))

            else: # identical trees -- we could propose to any, so that's just the tree probability below convolved with the resample p

                for xi in x.iterate_subnodes(grammar, recurse_up=True):
                    if xi is not x: # but we already counted ourself (NOTE: Must be "is", not ==)
                        # Here we use grammar.log_probability since the grammar may have changed with bv
                        RP = logplusexp(RP, log(resampleProbability(xi)*1.0) - log(xZ) + grammar.log_probability(xi))

    return RP
Example #2
0
def probs_data_rule(grammar, rule, data, probs=np.arange(0, 2, 0.2), num_iters=10000, alpha=0.9):
    """Return the probabilities of set of data given distribution of probabilities for a given rule

    Args:
        grammar (LOTlib.Grammar): The grammar.
        rule (LOTlib.GrammarRule): Specify a specific rule of which to vary the probability. Use get_rule
            to get the GrammarRule for a name, e.g. 'union_'.

    Returns:
        list: Probability of human data for each value in `probs`.

    Example:
        >> data = [([2, 8, 16], {4: (10, 2), 6: (4, 8), 12: (7, 5)}),      # (data set 1)
        ..         ([3, 9, 13], {6: (11, 1), 5: (3, 9), 12: (8, 4)})]      # (data set 2)
        >> probHDataGivenRuleProbs(G.grammar, 'SET', 'union_', data, probs=[0.,1.,2.,3.,4.])
        [-0.923, -2.48, -5.12, -0.44, -6.36]

    """
    dist = []
    orig_p = rule.p
    for p in probs:
        rule.p = p
        p_human = logplusexp([prob_data(grammar, d[0], d[1], num_iters, alpha) for d in data])
        dist.append(p_human)
    rule.p = orig_p
    return dist
Example #3
0
def prob_data(grammar, input_data, output_data, num_iters=10000, alpha=0.9):
    """Compute the probability of generating human data given our grammar & input data.

    Args:
        grammar (LOTlib.Grammar): The grammar.
        input_data (list): List of integers, the likelihood of the model is initially computed with these.
        output_data (list): List of tuples corresponding to (# yes, # no) responses in human data.

    Returns:
         float: Estimated probability of generating human data.

    """
    model_likelihoods = likelihood_data(grammar, input_data, output_data, num_iters, alpha)
    p_output = -Infinity

    for o in output_data.keys():
        p = model_likelihoods[o]
        k = output_data[o][0]       # num. yes responses
        n = k + output_data[o][1]   # num. trials
        bc = factorial(n) / (factorial(k) * factorial(n-k))             # binomial coefficient
        p_o = log(bc) + (k*p) + (n-k)*log1mexp(p)                       # log version
        p_output = logplusexp(p_output, p_o)
        # p_gen_human_data[o] = bc * pow(p, k) * pow(1-p, n-k)          # linear version

    return p_output
Example #4
0
def cons_d(x, y):
    out = defaultdict(lambdaMinusInfinity)

    for a, av in x.items():
        for b, bv in y.items():
            out[a + b] = logplusexp(out[a + b], av + bv)
    return out
Example #5
0
def cons_d(x,y):
    out = defaultdict(lambdaMinusInfinity)

    for a, av in x.items():
        for b, bv in y.items():
            out[a+b] = logplusexp(out[a+b], av + bv)
    return out
Example #6
0
def or_d(x, y):
    out = defaultdict(lambdaMinusInfinity)
    out[True] = logplusexp(
        x.get(True, -Infinity) + y.get(False, -Infinity), x.get(False, -Infinity) + y.get(True, -Infinity)
    )
    out[False] = log1mexp(out[True])
    return out
Example #7
0
def likelihood_data(grammar, input_data, output_data, num_iters=10000, alpha=0.9):
    """Generate a set of hypotheses, and use these to estimate likelihood of generating the human data.

    This is taken as a weighted sum over all hypotheses.

    Args:
        input_data(list): List of input integers.
        output_data(dict):

    Returns:
        dict: Each output key returns the summed likelihood of that single data point. Keys are the same as
        those of argument `output_data`.

    """
    hypotheses = mh_sample(input_data, grammar=grammar, num_iters=num_iters, alpha=alpha)
    Z = normalizing_constant(hypotheses)
    likelihoods = defaultdict(lambda: -Infinity)

    for h in hypotheses:
        w = h.posterior_score - Z
        for o in output_data.keys():
            old_likelihood = h.likelihood
            # TODO: is h.compute_likelihood updating posterior_score each loop?
            weighted_likelihood = h.compute_likelihood([o]) + w
            h.likelihood = old_likelihood
            likelihoods[o] = logplusexp(likelihoods[o], weighted_likelihood)

    return likelihoods
Example #8
0
File: Z.py Project: ebigelow/LOTlib
    def add(self, x):
        if (not self.unique) or x not in self.set:
            v = getattr(x, self.key)
            if not isnan(v):
                self.Z = logplusexp(self.Z, v)

        if self.unique:
            self.set.add(x)
Example #9
0
def if_d(prb,x,y):
    out = defaultdict(lambdaMinusInfinity)
    pt = prb[True]
    pf = prb[False]
    for a, av in x.items():
        out[a] = av + pt
    for b, bv in y.items():
        out[b] = logplusexp(out[b], bv + pf)

    return out
Example #10
0
def if_d(prb, x, y):
    out = defaultdict(lambdaMinusInfinity)
    pt = prb[True]
    pf = prb[False]
    for a, av in x.items():
        out[a] = av + pt
    for b, bv in y.items():
        out[b] = logplusexp(out[b], bv + pf)

    return out
Example #11
0
def prob_data_rule(grammar, rule, data, p, num_iters=10000, alpha=0.9):
    """Return the probabilities of set of data given a single p value for a rule."""
    orig_p = rule.p
    rule.p = p
    p_human = 0
    for d in data:
        # get probability of producing this data pair, add to total
        p_human_d = prob_data(grammar, d[0], d[1], num_iters, alpha)
        p_human = logplusexp(p_human, p_human_d)
    rule.p = orig_p
    return p_human
Example #12
0
def compute_outcomes(f, *args, **kwargs):
    """
    Return a dictionary of outcomes using our RandomContext tools, giving each possible trace (up to the given depth)
    and its probability.
    f here is a function of context, as in f(context, *args)

    kwargs['Cfirst'] constrols whether C is the first or last argument to f. It cannot be anything else

    In kwargs you can pass "catchandpass" as a tuple of exceptions to catch and do nothing with
    """

    out = defaultdict(
        lambdaMinusInfinity)  # dict from strings to lps that we accumulate

    cs = ContextSet()  # this is the "open" set of contexts we need to explore
    cs.add(RandomContext(cs))  # add a single context with no history

    i = 0
    while len(cs) > 0:
        context = cs.pop()  # pop an element from Context set.
        # print "CTX", context.lp, context#, "  \t", cs.Q

        try:
            # figure out the ordering of where C is passed to the lambda
            if kwargs.get('Cfirst',
                          True):  # does C go at the beginning or the end?
                v = f(
                    context, *args
                )  # when we call context.flip, we may update cs with new paths to explore
            else:
                newargs = args + (context, )
                v = f(*newargs)
            # print ">>>", v
            # add up the lp for this outcomem
            out[v] = logplusexp(out[v], context.lp)
        except kwargs.get('catchandpass', None) as e:
            pass
        except ContextSizeException:  # prune that path
            pass

        if i >= kwargs.get('maxit', 1000):
            return out  ## TODO: Hmm can either return the partial answer here or raise an exception

        if len(cs) > kwargs.get(
                'maxcontext', 1000
        ):  # sometimes we can generate way too many contexts, so let's avoid that
            raise TooManyContextsException

        i += 1

    return out
Example #13
0
def add_to_collapsed_trees(t):
    resps = ';'.join(map(str, get_tree_set_responses(t, TESTING_SET)))
    tprior = grammar.log_probability(t)

    if resps in collapsed_forms: # add to the existing collapsed form if no recursion
        collapsed_prob = grammar.log_probability(collapsed_forms[resps])
        collapsed_forms[resps].my_log_probability = logplusexp(collapsed_prob, tprior)
        if tprior > collapsed_forms[resps].display_tree_probability: # display the most concise form
            collapsed_forms[resps] = t
            collapsed_forms[resps].display_tree_probability = tprior
    else:
        collapsed_forms[resps] = t
        collapsed_forms[resps].display_tree_probability = tprior
        t.my_log_probability = tprior # FunctionNode uses this value when we call log_probability()
        print ">>", all_tree_count, len(collapsed_forms),  t, tprior
Example #14
0
    def next(self):

        nxt = MultipleChainMCMC.next(self)  # get the next one
        idx = self.chain_idx
        if nxt not in self.seen:
            self.chainZ[idx] = logplusexp(self.chainZ[idx], nxt.posterior_score)
            self.seen.add(nxt)

            # # Process the situation where we need to re-organize
        if self.nsamples % (self.within_steps * self.nchains) == 0 and self.nsamples > 0:
            self.refresh()

        self.nsamples += 1

        return nxt
Example #15
0
    def compute_single_likelihood(self, datum):
        assert isinstance(datum.output, dict)

        hp = self(*datum.input)  # output dictionary, output->probabilities
        assert isinstance(hp, dict)

        s = 0.0
        for k, dc in datum.output.items():

            lp = -log(self.alphabet_size+1)*(len(k)+1) + log(1.0-datum.alpha) # probability of generating under random typing; +1 is for an EOS marker
            if k in hp:
                lp = logplusexp(lp, hp[k] + log(datum.alpha)) # if we could have been generated
            s += dc*lp

        return s
Example #16
0
    def next(self):

        nxt = MultipleChainMCMC.next(self)  # get the next one
        idx = self.chain_idx
        if nxt not in self.seen:
            self.chainZ[idx] = logplusexp(self.chainZ[idx],
                                          nxt.posterior_score)
            self.seen.add(nxt)

            # # Process the situation where we need to re-organize
        if self.nsamples % (self.within_steps *
                            self.nchains) == 0 and self.nsamples > 0:
            self.refresh()

        self.nsamples += 1

        return nxt
Example #17
0
def compute_outcomes(f, *args, **kwargs):
    """
    Return a dictionary of outcomes using our RandomContext tools, giving each possible trace (up to the given depth)
    and its probability.
    f here is a function of context, as in f(context, *args)

    kwargs['Cfirst'] constrols whether C is the first or last argument to f. It cannot be anything else

    In kwargs you can pass "catchandpass" as a tuple of exceptions to catch and do nothing with
    """

    out = defaultdict(lambdaMinusInfinity)  # dict from strings to lps that we accumulate

    cs = ContextSet() # this is the "open" set of contexts we need to explore
    cs.add(RandomContext(cs)) # add a single context with no history

    i = 0
    while len(cs) > 0:
        context = cs.pop()  # pop an element from Context set. TODO: We should probably do a heapq of the highest probability sequences
        # print "CTX", context.lp, context#, "  \t", cs.Q

        try:

            # does C go at the beginning or the end?
            if kwargs.get('Cfirst', True):
                v = f(context, *args) # when we call context.flip, we may update cs with new paths to explore
            else:
                newargs = args + (context,)
                v = f(*newargs)

            out[v] = logplusexp(out[v], context.lp)  # add up the lp for this outcomem
        except kwargs.get('catchandpass', None) as e:
            pass
        except ContextSizeException: # prune that path
            pass

        if i >= kwargs.get('maxit', 1000):
            return out ## TODO: Hmm can either return the partial answer here or raise an exception

        if len(cs) > kwargs.get('maxcontext', 1000): # sometimes we can generate way too many contexts, so let's avoid that
            raise TooManyContextsException

        i += 1

    return out
Example #18
0
    def compute_single_likelihood(self, datum):
        assert isinstance(datum.output, dict)

        hp = self(*datum.input)  # output dictionary, output->probabilities
        assert isinstance(hp, dict)

        s = 0.0
        for k, dc in datum.output.items():

            lp = -log(self.alphabet_size + 1) * (len(k) + 1) + log(
                1.0 - datum.alpha
            )  # probability of generating under random typing; +1 is for an EOS marker
            if k in hp:
                lp = logplusexp(
                    lp, hp[k] +
                    log(datum.alpha))  # if we could have been generated
            s += dc * lp

        return s
Example #19
0
    def lp_propose(self, x, y, resampleProbability=lambdaOne, xZ=None):
        """
                Returns a log probability of starting at x and ending up at y from a regeneration move.
                Any node is a candidate if the trees are identical except for what's below those nodes
                (although what's below *can* be identical!)

                NOTE: This does NOT take into account insert/delete
                NOTE: Not so simple because we must count multiple paths


                NOTE: This is currently not correct because it will mess up with bound variables, which now have
                unique names. Also it seems to add too many rules to the grammar, probably via recurse_up
        """
        RP = -Infinity

        if isinstance(x, FunctionNode) and isinstance(
                y, FunctionNode) and x.returntype == y.returntype:

            # compute the normalizer
            if xZ is None:
                xZ = x.sample_node_normalizer(
                    resampleProbability=resampleProbability)

            # Well we could select x's root to go to Y, but we must recompute y under the current grammar
            with BVRuleContextManager(self.grammar, x, recurse_up=True):
                RP = logplusexp(
                    RP,
                    log(1.0 * resampleProbability(x)) - log(xZ) +
                    self.grammar.log_probability(y))

            if x.name == y.name and x.args is not None and y.args is not None and len(
                    x.args) == len(y.args):

                # how many kids are not equal, and where was the last?
                mismatch_count, mismatch_index = 0, 0
                for i, xa, ya in zip(xrange(len(x.args)), x.args, y.args):
                    if xa != ya:  # checks whole subtree!
                        mismatch_count += 1
                        mismatch_index = i
                    if mismatch_count > 1: break  # can't win

                if mismatch_count > 1:  # We have to have only selected x,y to regenerate

                    pass

                elif mismatch_count == 1:  # we could propose to x, or x.args[mismatch_index], but nothing else (nothing else will fix the mismatch)

                    with BVRuleContextManager(
                            self.grammar, x,
                            recurse_up=False):  # recurse, but keep track of bv
                        RP = logplusexp(
                            RP,
                            self.lp_propose(
                                x.args[mismatch_index],
                                y.args[mismatch_index],
                                resampleProbability=resampleProbability,
                                xZ=xZ))

                else:  # identical trees -- we could propose to any, so that's just the tree probability below convolved with the resample p

                    for xi in x.iterate_subnodes(self.grammar,
                                                 recurse_up=True):
                        if xi is not x:  # but we already counted ourself (NOTE: Must be "is", not ==)
                            # Here we use grammar.log_probability since the grammar may have changed with bv
                            RP = logplusexp(
                                RP,
                                log(resampleProbability(xi) * 1.0) - log(xZ) +
                                self.grammar.log_probability(xi))

        return RP
Example #20
0
    def process(self, x):
        v = getattr(x, self.key)
        if not isnan(v):
            self.Z = logplusexp(self.Z, v)

        return x
Example #21
0
File: Z.py Project: wrongu/LOTlib
    def process(self, x):
        v = getattr(x, self.key)
        if not isnan(v):
            self.Z = logplusexp(self.Z, v)

        return x
Example #22
0
def equal_d(x, y):
    peq = -Infinity
    for a, v in x.items():
        peq = logplusexp(peq, v + y.get(a, -Infinity))  # P(x=a,y=a)
    return {True: peq, False: log1mexp(peq)}
Example #23
0
def car_d(x):
    out = defaultdict(lambdaMinusInfinity)
    for a, av in x.items():
        v = a[1] if len(a) > 1 else ''
        out[v] = logplusexp(out[v], av)
    return out
Example #24
0
def car_d(x):
    out = defaultdict(lambdaMinusInfinity)
    for a, av in x.items():
        v = a[1] if len(a) > 1 else ""
        out[v] = logplusexp(out[v], av)
    return out
Example #25
0
def equal_d(x,y):
    peq = -Infinity
    for a,v in x.items():
        peq = logplusexp(peq, v + y.get(a,-Infinity)) # P(x=a,y=a)
    return {True: peq, False:log1mexp(peq)}
Example #26
0
def or_d(x,y):
    out = defaultdict(lambdaMinusInfinity)
    out[True] = logplusexp(x.get(True,-Infinity) + y.get(False,-Infinity),
                           x.get(False,-Infinity) + y.get(True,-Infinity))
    out[False] = log1mexp(out[True])
    return out