Esempi in Python per TopN.add, esempi in Python per LOTlib.TopN.TopN.add

Esempio n. 1

0

Mostra file

File: Top.py Progetto: TerryLew/BinLOTlib

class Top(SampleStream):
    """
    This stores the top samples and then *only* on exit does it allow them to pass through. (It can't before
    exit since it won't know what the samples are!)
    """
    def __init__(self, N=1000, key='posterior_score', sorted=True):
        """

        :param N: How many samples to store.
        :param key:  The key we sort by
        :param sorted: When we output, do we output sorted? (slightly slower)
        :return:
        """
        self.__dict__.update(locals())
        SampleStream.__init__(self)
        self.top = TopN(N=N, key=key)

    def process(self, x):
        """ Overwrite process so all outputs are NOT sent to children.
        """
        self.top.add(x)
        return None  # Do no pass through

    def __exit__(self, t, value, traceback):

        ## Here, only on exit do I give my data (the tops) to my outputs
        for v in self.top.get_all(sorted=sorted):
            # Cannot just call self.process_and_push since self.process always returns None
            if v is not None:
                for a in self.outputs:
                    a.process_and_push(v)

        return SampleStream.__exit__(self, t, value, traceback)

Esempio n. 2

0

Mostra file

def run(damount):
    lexicon, L, hugeData = normalize(damount)
    words = target.all_words()

    def propose(current_state, bag=lexicon, probs=L):
        mod = len(current_state.all_words())
        proposal = copy(current_state)
        proposal.value[words[propose.inx % mod]].value = weighted_sample(
            bag[words[propose.inx % mod]],
            probs=probs[words[propose.inx % mod]],
            log=True).value
        propose.inx += 1
        return proposal

    propose.inx = 0
    proposer = lambda x: propose(x)

    h0 = KinshipLexicon(alpha=options.alpha,
                        epsilon=options.epsilon,
                        s=options.s)
    for w in target.all_words():
        h0.set_word(
            w, LOTHypothesis(my_grammar, display='lambda recurse_, C, X: %s'))

    gs = Gibbs(h0, hugeData, proposer=proposer, steps=options.samples)
    hyps = TopN(N=options.top_count)
    for s, h in enumerate(gs):
        hyps.add(h)
        print h.prior, \
            h.likelihood, \
            h
    return hyps

Esempio n. 3

0

Mostra file

def runparts(x,datamt):
    #problem: right now only recording last partition, never saving from others.
    print "Start: " + str(x) + " on this many: " + str(datamt)
    messup = TopN(options.top)
    try:
        #make new TopN for each data amount
        topn= TopN(N=200, key="posterior_score")
        for p in break_ctrlc(partitions):
            print "Starting on partition ", p

            # Now we have to go in and fill in the nodes that are nonterminals
            # We can do this with generate
            v = grammar.generate(deepcopy(p))

            h0 = MyHypothesis(grammar, value=v)
            size = datamt
            data = [FunctionData(input=[],
                             output={'n i k': size, 'h i N': size, 'f a n': size, 'g i f': size, 'm a N': size, 'f a m': size, 'g i k': size, 'k a n': size, 'f a f': size, 'g i n': size, 'g i m': size, 'g i s': size, 's i f': size, 's i n': size, 'n i s': size, 's i m': size, 's i k': size, 'h a N': size, 'f i N': size, 'h i m': size, 'h i n': size, 'h a m': size, 'n i N': size, 'h i k': size, 'f a s': size, 'f i n': size, 'h i f': size, 'n i m': size, 'g i N': size, 'h a g': size, 's i N': size, 'n i n': size, 'f i m': size, 's i s': size, 'h i s': size, 'n a s': size, 'k a s': size, 'f i s': size, 'n i f': size, 'm i n': size, 's a s': size, 'f a g': size, 'k a g': size, 'k a f': size, 's a m': size, 'n a f': size, 'n a g': size, 'm i N': size, 's a g': size, 'f i k': size, 'k a m': size, 'n a n': size, 's a f': size, 'n a m': size, 'm a s': size, 'h a f': size, 'h a s': size, 'n a N': size, 'm i s': size, 's a n': size, 's a N': size, 'm i k': size, 'f a N': size, 'm i m': size, 'm a g': size, 'm a f': size, 'f i f': size, 'k a N': size, 'h a n': size, 'm a n': size, 'm a m': size, 'm i f': size})]



            for h in break_ctrlc(MHSampler(h0, data, steps=options.steps, trace=False)):
                topn.add(h)
        return set(topn)

    except:
        #if we fail, we can return a blank TopN
        return messup

Esempio n. 4

0

Mostra file

File: Top.py Progetto: joshrule/LOTlib

class Top(SampleStream):
    """
    This stores the top samples and then *only* on exit does it allow them to pass through. (It can't before
    exit since it won't know what the samples are!)
    """

    def __init__(self, N=1000, key='posterior_score', sorted=True):
        """

        :param N: How many samples to store.
        :param key:  The key we sort by
        :param sorted: When we output, do we output sorted? (slightly slower)
        :return:
        """
        self.__dict__.update(locals())
        SampleStream.__init__(self)
        self.top = TopN(N=N, key=key)

    def process(self, x):
        """ Overwrite process so all outputs are NOT sent to children.
        """
        self.top.add(x)
        return None # Do no pass through

    def __exit__(self, t, value, traceback):

        ## Here, only on exit do I give my data (the tops) to my outputs
        for v in self.top.get_all(sorted=sorted):
            # Cannot just call self.process_and_push since self.process always returns None
            if v is not None:
                for a in self.outputs:
                    a.process_and_push(v)

        return SampleStream.__exit__(self, t,value,traceback)

Esempio n. 5

0

Mostra file

def construct_hypothesis_space(data_size):
    all_hypotheses = TopN()
    print 'Data size: ', data_size
    for i in range(RUNS):
        print 'Run: ', i
        hypotheses = TopN(25)
        data = generate_data(data_size)
        learner = GriceanQuantifierLexicon(make_my_hypothesis,
                                           my_weight_function)
        for w in target.all_words():
            learner.set_word(w, make_my_hypothesis())
        j = 0
        for h in MHSampler(learner, data, SAMPLES, skip=0):
            hypotheses.add(h)
            j += 1
            if j > 0 and j % 1000 == 0:
                pickle.dump(
                    hypotheses,
                    open(
                        'data/hypset_' + GRAMMAR_TYPE + '_' + str(data_size) +
                        '_' + str(j) + '.pickle', 'w'))
            #sstr = str(h)
            #sstr = re.sub("[_ ]", "", sstr)
            #sstr = re.sub("presup", u"\u03BB A B . presup", sstr)
            #print sstr
        all_hypotheses.update(hypotheses)
    return all_hypotheses

Esempio n. 6

0

Mostra file

File: utils.py Progetto: flrgsr/LOTlib

def probe_MHsampler(h,
                    language,
                    options,
                    name,
                    size=64,
                    data=None,
                    init_size=None,
                    iters_per_stage=None,
                    sampler=None,
                    ret_sampler=False):
    get_data = language.sample_data_as_FuncData
    evaluation_data = get_data(size, max_length=options.FINITE)

    if data is None:
        if init_size is None:
            data = evaluation_data
        else:
            data = get_data(n=size, max_length=init_size)

    if sampler is None:
        sampler = MHSampler(h, data)
    else:
        sampler.data = data

    best_hypotheses = TopN(N=options.TOP_COUNT)

    iter = 0

    for h in sampler:
        if iter == options.STEPS: break
        if iter % 100 == 0: print '---->', iter

        best_hypotheses.add(h)

        if iter % options.PROBE == 0:

            for h in best_hypotheses:
                h.compute_posterior(evaluation_data)
            Z = logsumexp([h.posterior_score for h in best_hypotheses])

            pr_data = get_data(1024, max_length=options.FINITE)
            weighted_score = 0
            for h in best_hypotheses:
                precision, recall = language.estimate_precision_and_recall(
                    h, pr_data)
                if precision + recall != 0:
                    f_score = precision * recall / (precision + recall)
                    weighted_score += np.exp(h.posterior_score - Z) * f_score
            weighted_score *= 2

            to_file([[iter, Z, weighted_score]], name)

        if init_size is not None and iter % iters_per_stage == 0:
            init_size += 2
            sampler.data = get_data(n=size, max_length=init_size)

        iter += 1

    if ret_sampler:
        return sampler

Esempio n. 7

0

Mostra file

def run(options, ndata):
    if LOTlib.SIG_INTERRUPTED: return 0, set()

    language = eval(options.LANG + "()")
    data = language.sample_data(LARGE_SAMPLE)

    assert len(data) == 1
    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE

    z = sum(data[0].output.values())
    if z > 0:
        best_ll = sum([(p / z) * log(p / z) for p in data[0].output.values()])
    else:
        best_ll = 0.0

    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule('ATOM', "'%s'" % t, None, 1.0)

    # set up the hypothesis
    h0 = IncrementalLexiconHypothesis(grammar=grammar,
                                      alphabet_size=len(language.terminals()))
    h0.set_word(
        0,
        h0.make_hypothesis(grammar=grammar))  # make the first word at random
    h0.N = 1

    tn = TopN(N=options.TOP_COUNT)

    for outer in xrange(options.N):  # how many do we add?
        if LOTlib.SIG_INTERRUPTED: return 0, set()

        # and re-set the posterior or else it's something weird
        h0.compute_posterior(data)

        # now run mcmc
        for h in break_ctrlc(MHSampler(h0, data, steps=options.STEPS)):
            h.best_ll = best_ll  # just store this
            tn.add(copy(h))

            if options.TRACE:
                print h.posterior_score, h.prior, h.likelihood, h.likelihood / ndata, h
                v = h()
                sortedv = sorted(v.items(),
                                 key=operator.itemgetter(1),
                                 reverse=True)
                print "{" + ', '.join(["'%s':%s" % i for i in sortedv]) + "}"

        # and start from where we ended
        h0 = copy(h)
        h0.deepen()

    return ndata, tn

Esempio n. 8

0

Mostra file

File: search.py Progetto: piantado/LOTlib

def run(options, ndata):
    if LOTlib.SIG_INTERRUPTED: return 0, set()

    language = eval(options.LANG+"()")
    data = language.sample_data(LARGE_SAMPLE)

    assert len(data) == 1
    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE

    z = sum(data[0].output.values())
    if z > 0:
        best_ll = sum([ (p/z)*log(p/z) for p in data[0].output.values() ])
    else:
        best_ll = 0.0

    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule('ATOM', "'%s'" % t, None, 1.0)

    # set up the hypothesis
    h0 = IncrementalLexiconHypothesis(grammar=grammar, alphabet_size=len(language.terminals()))
    h0.set_word(0, h0.make_hypothesis(grammar=grammar)) # make the first word at random
    h0.N = 1

    tn = TopN(N=options.TOP_COUNT)

    for outer in xrange(options.N): # how many do we add?
        if LOTlib.SIG_INTERRUPTED: return 0, set()

        # and re-set the posterior or else it's something weird
        h0.compute_posterior(data)

        # now run mcmc
        for h in break_ctrlc(MHSampler(h0, data, steps=options.STEPS)):
            h.best_ll = best_ll # just store this
            tn.add(copy(h))

            if options.TRACE:
                print h.posterior_score, h.prior, h.likelihood, h.likelihood / ndata, h
                v = h()
                sortedv = sorted(v.items(), key=operator.itemgetter(1), reverse=True )
                print "{" + ', '.join(["'%s':%s"% i for i in sortedv]) + "}"


        # and start from where we ended
        h0 = copy(h)
        h0.deepen()

    return ndata, tn

Esempio n. 9

0

Mostra file

File: utils.py Progetto: joshrule/LOTlib

def probe_MHsampler(h, language, options, name, size=64, data=None, init_size=None, iters_per_stage=None, sampler=None, ret_sampler=False):
    get_data = language.sample_data_as_FuncData
    evaluation_data = get_data(size, max_length=options.FINITE)

    if data is None:
        if init_size is None:
            data = evaluation_data
        else:
            data = get_data(n=size, max_length=init_size)

    if sampler is None:
        sampler = MHSampler(h, data)
    else:
        sampler.data = data

    best_hypotheses = TopN(N=options.TOP_COUNT)

    iter = 0

    for h in sampler:
        if iter == options.STEPS: break
        if iter % 100 == 0: print '---->', iter

        best_hypotheses.add(h)

        if iter % options.PROBE == 0:

            for h in best_hypotheses:
                h.compute_posterior(evaluation_data)
            Z = logsumexp([h.posterior_score for h in best_hypotheses])

            pr_data = get_data(1024, max_length=options.FINITE)
            weighted_score = 0
            for h in best_hypotheses:
                precision, recall = language.estimate_precision_and_recall(h, pr_data)
                if precision + recall != 0:
                    f_score = precision * recall / (precision + recall)
                    weighted_score += np.exp(h.posterior_score - Z) * f_score
            weighted_score *= 2

            to_file([[iter, Z, weighted_score]], name)

        if init_size is not None and iter % iters_per_stage == 0:
            init_size += 2
            sampler.data = get_data(n=size, max_length=init_size)

        iter += 1

    if ret_sampler:
        return sampler

Esempio n. 10

0

Mostra file

File: parentheses_main.py Progetto: samcheyette/Monkey-Recursion

def run(data, TOP=100, STEPS=1000):
    #if LOTlib.SIG_INTERRUPTED:
      #  return ""
    #data = [FunctionData(input=(), output={lst: len(lst)})]
    h0 = MyHypothesis()
    tn = TopN(N=TOP)
    # run the sampler
    counter = Counter()
    for h in MHSampler(h0, data, steps=STEPS, acceptance_temperature=1.0, likelihood_temperature=1.0):#, likelihood_temperature=10.0):
        # counter[h] += 1
        tn.add(h)

    z = logsumexp([h.posterior_score for h in tn])
    sort_post_probs = [(h, exp(h.posterior_score - z)) for h in tn.get_all(sorted=True)][::-1]
    return sort_post_probs

Esempio n. 11

0

Mostra file

File: Model.py Progetto: TerryLew/BinLOTlib

def runme(chain, dataamt):

    if LOTlib.SIG_INTERRUPTED: return ()

    data = make_data(dataamt)

    tn = TopN(options.top)

    h0 = make_hypothesis()
    for h in break_ctrlc(MHSampler(h0, data, steps=options.steps, skip=0)):
        # print h.posterior_score, h.prior, h.likelihood, h
        h.likelihood_per_data = h.likelihood/dataamt
        tn.add(h)

    return tn

Esempio n. 12

0

Mostra file

File: MakeHypotheses.py Progetto: wrongu/LOTlib

def myrun(observed_set):

    if LOTlib.SIG_INTERRUPTED:
        return set()

    h0 = NumberGameHypothesis(grammar=grammar)

    data = [FunctionData(input=[], output=observed_set, alpha=ALPHA)]

    tn = TopN(N=options.TOP_COUNT)
    for h in break_ctrlc(MHSampler(h0, data, steps=options.STEPS)):
        tn.add(h)

    print "# Finished %s" % str(observed_set)

    return set(tn.get_all())

Esempio n. 13

0

Mostra file

File: MakeHypotheses.py Progetto: joshrule/LOTlib

def myrun(observed_set):

    if LOTlib.SIG_INTERRUPTED:
        return set()

    h0 = NumberGameHypothesis(grammar=grammar)

    data = [FunctionData(input=[], output=observed_set, alpha=ALPHA)]

    tn = TopN(N=options.TOP_COUNT)
    for h in break_ctrlc(MHSampler(h0, data, steps=options.STEPS)):
        tn.add(h)

    print "# Finished %s" % str(observed_set)

    return set(tn.get_all())

Esempio n. 14

0

Mostra file

File: StandardSample.py Progetto: TerryLew/BinLOTlib

def standard_sample(make_hypothesis, make_data, show_skip=9, show=True, N=100, save_top='top.pkl', alsoprint='None', **kwargs):
    """
        Just a simplified interface for sampling, allowing printing (showing), returning the top, and saving.
        This is used by many examples, and is meant to easily allow running with a variety of parameters.
        NOTE: This skip is a skip *only* on printing
        **kwargs get passed to sampler
    """
    if LOTlib.SIG_INTERRUPTED:
        return TopN()  # So we don't waste time!

    h0 = make_hypothesis()
    data = make_data()


    best_hypotheses = TopN(N=N)

    f = eval(alsoprint)

    sampler = MHSampler(h0, data, **kwargs)

#    # TODO change acceptance temperature over times
#    sampler.acceptance_temperature = 0.5

    for i, h in enumerate(break_ctrlc(sampler)):

#        if i % 10000 == 0 and i != 0:
#            sampler.acceptance_temperature = min(1.0, sampler.acceptance_temperature+0.1)
#            print '='*50
#            print 'change acc temperature to', sampler.acceptance_temperature 

        best_hypotheses.add(h)

        if show and i%(show_skip+1) == 0:

            print i, \
                h.posterior_score, \
                h.prior, \
                h.likelihood, \
                f(h) if f is not None else '', \
                qq(cleanFunctionNodeString(h))

    if save_top is not None:
        print "# Saving top hypotheses"
        with open(save_top, 'w') as f:
            pickle.dump(best_hypotheses, f)

    return best_hypotheses

Esempio n. 15

0

Mostra file

def partitionMCMC(data,partitions):
    print data
    topn= TopN(N=200, key="posterior_score")
    for p in break_ctrlc(partitions):
        print "Starting on partition ", p

        # Now we have to go in and fill in the nodes that are nonterminals
        v = grammar.generate(deepcopy(p))

        #h0 = MyHypothesis(grammar, value=v)
        h0= make_hypothesis()
        print h0
        for h in break_ctrlc(MHSampler(h0, data, steps=5000, skip=0)):
            # Show the partition and the hypothesis
            print h.posterior_score, p, h, howyoudoin(h)
            topn.add(h)
    return set(topn)

Esempio n. 16

0

Mostra file

File: search.py Progetto: joshrule/LOTlib

def run(options, ndata):
    """
    This out on the DATA_RANGE amounts of data and returns all hypotheses in top count
    """
    if LOTlib.SIG_INTERRUPTED:
        return 0, set()

    language = eval(options.LANG+"()")
    data = language.sample_data(LARGE_SAMPLE)
    assert len(data) == 1

    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE
    #print data

    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule('ATOM', q(t), None, 2)

    h0 = IncrementalLexiconHypothesis(grammar=grammar)

    tn = TopN(N=options.TOP_COUNT)

    for outer in xrange(options.N): # how many do we add?
        # add to the grammar
        grammar.add_rule('SELFF', '%s' % (outer), None, 1.0)

        # Add one more to the number of words here
        h0.set_word(outer, h0.make_hypothesis(grammar=grammar))
        h0.N = outer+1
        assert len(h0.value.keys())==h0.N==outer+1

        # now run mcmc
        for h in break_ctrlc(MHSampler(h0, data, steps=options.STEPS)):
            tn.add(h)

            # print h.posterior_score, h
            # print getattr(h, 'll_counts', None)

        # and start from where we ended
        h0 = deepcopy(h) # must deepcopy

    return ndata, tn

Esempio n. 17

0

Mostra file

def run(options, ndata):
    """
    This out on the DATA_RANGE amounts of data and returns all hypotheses in top count
    """
    if LOTlib.SIG_INTERRUPTED:
        return 0, set()

    language = eval(options.LANG+"()")
    data = language.sample_data(LARGE_SAMPLE)
    assert len(data) == 1

    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE
    #print data

    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule('ATOM', q(t), None, 2)

    h0 = IncrementalLexiconHypothesis(grammar=grammar)

    tn = TopN(N=options.TOP_COUNT)

    for outer in xrange(options.N): # how many do we add?
        # add to the grammar
        grammar.add_rule('SELFF', '%s' % (outer), None, 1.0)

        # Add one more to the number of words here
        h0.set_word(outer, h0.make_hypothesis(grammar=grammar))
        h0.N = outer+1
        assert len(h0.value.keys())==h0.N==outer+1

        # now run mcmc
        for h in break_ctrlc(MHSampler(h0, data, steps=options.STEPS)):
            tn.add(h)

            print h.posterior_score, h
            print getattr(h, 'll_counts', None)

        # and start from where we ended
        h0 = deepcopy(h) # must deepcopy

    return ndata, tn

Esempio n. 18

0

Mostra file

File: search3.py Progetto: piantado/LOTlib

def run(options, ndata):
    """
    This out on the DATA_RANGE amounts of data and returns all hypotheses in top count
    """
    if LOTlib.SIG_INTERRUPTED:
        return set()

    language = eval(options.LANG + "()")
    data = language.sample_data(LARGE_SAMPLE)
    assert len(data) == 1

    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE
    # print data

    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule("ATOM", q(t), None, 2)

    h0 = AugustHypothesis(grammar=grammar, display="lambda recurse_ :%s")
    print "# Starting on ", h0

    tn = TopN(N=options.TOP_COUNT)

    # print h0.compute_posterior(data)
    # for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))):
    # # for h in MHSampler(h0, data, steps=options.STEPS, trace=True):
    #     print h.posterior_score, h
    #     print getattr(h, 'll_counts', None)

    with open(
        prefix + "hypotheses_" + options.LANG + "_" + str(rank) + "_" + str(ndata) + "_" + suffix + ".txt", "a"
    ) as ofile:

        for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))):
            tn.add(h)
            # print h.posterior_score, getattr(h, 'll_counts', None), h
            if i % options.SKIP == 0 and h.posterior_score > -Infinity:
                print >> ofile, i, ndata, h.posterior_score, h.prior, h.likelihood, h.likelihood / ndata
                print >> ofile, getattr(h, "ll_counts", None)
                print >> ofile, h, "\0"  # must add \0 when not Lexicon

    return tn

Esempio n. 19

0

Mostra file

File: search3.py Progetto: TerryLew/BinLOTlib

def run(options, ndata):
    """
    This out on the DATA_RANGE amounts of data and returns all hypotheses in top count
    """
    if LOTlib.SIG_INTERRUPTED:
        return set()

    language = eval(options.LANG+"()")
    data = language.sample_data(LARGE_SAMPLE)
    assert len(data) == 1

    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE
    # print data

    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule('ATOM', q(t), None, 2)

    h0 = AugustHypothesis(grammar=grammar, display="lambda recurse_ :%s")
    print "# Starting on ", h0

    tn = TopN(N=options.TOP_COUNT)

    # print h0.compute_posterior(data)
    # for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))):
    # # for h in MHSampler(h0, data, steps=options.STEPS, trace=True):
    #     print h.posterior_score, h
    #     print getattr(h, 'll_counts', None)

    with open(prefix+'hypotheses_'+options.LANG+'_'+str(rank)+'_'+str(ndata)+'_'+suffix+".txt", 'a') as ofile:

        for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))):
            tn.add(h)
            # print h.posterior_score, getattr(h, 'll_counts', None), h
            if i%options.SKIP == 0 and h.posterior_score > -Infinity:
                print >>ofile, i, ndata, h.posterior_score, h.prior, h.likelihood, h.likelihood/ndata
                print >>ofile, getattr(h,'ll_counts', None)
                print >>ofile, h, '\0' # must add \0 when not Lexicon


    return tn

Esempio n. 20

0

Mostra file

File: LearnLexicon.py Progetto: MollicaF/LogicalWordLearning

def run(data_amount):
    print "Starting chain on %s data points"%data_amount
    data = makeLexiconData(target, four_gen_tree_context, n=data_amount, alpha=options.alpha, verbose=True)

    h0 = KinshipLexicon(alpha=options.alpha)
    for w in target_words:
        h0.set_word(w, LOTHypothesis(my_grammar, display='lambda recurse_, C, X: %s'))

    hyps = TopN(N=options.top_count)

    mhs = MHSampler(h0, data, options.steps, likelihood_temperature=options.llt, prior_temperature=options.prior_temp)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        hyps.add(h)

    import pickle
    print 'Writing ' + data[0].X + data[0].Y + str(data_amount) + data[0].word + '.pkl'
    with open('Chains/' + data[0].X + data[0].Y + str(data_amount) + data[0].word + '.pkl', 'w') as f:
        pickle.dump(hyps, f)

    return hyps

Esempio n. 21

0

Mostra file

File: search1.py Progetto: TerryLew/BinLOTlib

def run(options, ndata):
    """
    This out on the DATA_RANGE amounts of data and returns all hypotheses in top count
    """
    if LOTlib.SIG_INTERRUPTED:
        return set()

    language = eval(options.LANG + "()")
    data = language.sample_data(LARGE_SAMPLE)
    assert len(data) == 1

    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE

    print data
    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule('ATOM', q(t), None, 2)

    h0 = MyHypothesis(grammar=grammar, N=options.N)

    tn = TopN(N=options.TOP_COUNT)

    with open(
            prefix + 'hypotheses_' + options.LANG + '_' + str(rank) + '_' +
            str(ndata) + '_' + suffix + ".txt", 'a') as ofile:

        for i, h in enumerate(
                break_ctrlc(MHSampler(h0, data, steps=options.STEPS))):
            tn.add(h)
            # print h.posterior_score, getattr(h, 'll_counts', None), h
            if i % options.SKIP == 0:
                print >> ofile, "\n"
                print >> ofile, i, ndata, h.posterior_score, h.prior, h.likelihood, h.likelihood / ndata
                print >> ofile, getattr(h, 'll_counts', None),
                print >> ofile, h  # ends in \0 so we can sort with sort -g -z

    return tn

Esempio n. 22

0

Mostra file

File: search1.py Progetto: joshrule/LOTlib

def run(options, ndata):
    """
    This out on the DATA_RANGE amounts of data and returns all hypotheses in top count
    """
    if LOTlib.SIG_INTERRUPTED:
        return set()

    language = eval(options.LANG+"()")
    data = language.sample_data(LARGE_SAMPLE)
    assert len(data) == 1

    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE

    print data
    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule('ATOM', q(t), None, 2)

    h0 = MyHypothesis(grammar=grammar, N=options.N)

    tn = TopN(N=options.TOP_COUNT)

    with open(prefix+'hypotheses_'+options.LANG+'_'+str(rank)+'_'+str(ndata)+'_'+suffix+".txt", 'a') as ofile:

        for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))):
            tn.add(h)
            # print h.posterior_score, getattr(h, 'll_counts', None), h
            if i%options.SKIP == 0:
                print >>ofile, "\n"
                print >>ofile, i, ndata, h.posterior_score, h.prior, h.likelihood, h.likelihood/ndata
                print >>ofile, getattr(h,'ll_counts', None),
                print >>ofile, h # ends in \0 so we can sort with sort -g -z


    return tn

Esempio n. 23

0

Mostra file

File: ContinueLearning.py Progetto: zizai/LogicalWordLearning

def run(hypothesis, data_amount):
    print "Starting chain on %s data points" % data_amount
    data = makeLexiconData(target,
                           four_gen_tree_context,
                           n=data_amount,
                           alpha=options.alpha,
                           verbose=True)

    h0 = KinshipLexicon(alpha=options.alpha)
    for w in target_words:
        h0.set_word(
            w,
            LOTHypothesis(grammar=my_grammar,
                          value=hypothesis.value[w].value,
                          display='lambda recurse_, C, X: %s'))

    hyps = TopN(N=options.top_count)

    mhs = MHSampler(h0,
                    data,
                    options.steps,
                    likelihood_temperature=options.llt,
                    prior_temperature=options.prior_temp)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        if samples_yielded % 100 == 0:
            pass  #print h.likelihood, h.prior, h
        hyps.add(h)

    import pickle
    print 'Writing ' + data[0].X + data[0].Y + str(
        data_amount) + data[0].word + '.pkl'
    with open(
            'Chains/' + data[0].X + data[0].Y + str(data_amount) +
            data[0].word + '.pkl', 'w') as f:
        pickle.dump(hyps, f)

    return hyps

Esempio n. 24

0

Mostra file

File: MixHypotheses.py Progetto: MollicaF/LogicalWordLearning

def run(damount):
    lexicon, L, hugeData = normalize(damount)
    words = target.all_words()
    def propose(current_state, bag=lexicon, probs=L):
        mod = len(current_state.all_words())
        proposal = copy(current_state)
        proposal.value[words[propose.inx % mod]].value = weighted_sample(bag[words[propose.inx % mod]],
                                                                probs=probs[words[propose.inx % mod]], log=True).value
        propose.inx += 1
        return proposal
    propose.inx = 0
    proposer = lambda x : propose(x)

    h0 = KinshipLexicon(alpha=options.alpha)
    for w in target.all_words():
        h0.set_word(w, LOTHypothesis(my_grammar, display='lambda recurse_, C, X: %s'))

    gs = Gibbs(h0, hugeData, proposer=proposer, steps=options.samples)
    hyps = TopN(N=options.top_count)
    for s, h in enumerate(gs):
        hyps.add(h)

    return hyps

Esempio n. 25

0

Mostra file

def run(data_size, my_finite_trees):
    data = generate_data(data_size)

    # the prior for each tree
    prior = np.array([x.compute_prior() for x in my_finite_trees])
    prior = prior - logsumexp(prior)

    # the likelihood weights for each hypothesis
    weights = np.array([my_weight_function(h) for h in my_finite_trees])
    # response[h,di] gives the response of the h'th tree to data di
    response = np.array(
        [mapto012(get_tree_set_responses(t, data)) for t in my_finite_trees])

    # Now actually run:
    hypset = TopN(N=TOP_COUNT)

    learner = VectorizedLexicon_DistanceMetricProposal(target.all_words(),
                                                       my_finite_trees, prior)
    databundle = [response, weights]
    generator = MHSampler(learner, databundle, STEPS, skip=SKIP)
    for g in generator:
        hypset.add(VectorizedLexicon_to_SimpleLexicon(g), g.posterior_score)
    return hypset

Esempio n. 26

0

Mostra file

File: StandardSample.py Progetto: wrongu/LOTlib

def standard_sample(
    make_hypothesis, make_data, skip=9, show=True, N=100, save_top="top.pkl", alsoprint="None", **kwargs
):
    """
        Just a simplified interface for sampling, allowing printing (showing), returning the top, and saving.
        This is used by many examples, and is meant to easily allow running with a variety of parameters.
        NOTE: This skip is a skip *only* on printing
        **kwargs get passed to sampler
    """
    if LOTlib.SIG_INTERRUPTED:
        return TopN()  # So we don't waste time!

    h0 = make_hypothesis()
    data = make_data()

    best_hypotheses = TopN(N=N)

    f = eval(alsoprint)

    sampler = MHSampler(h0, data, **kwargs)

    for i, h in enumerate(break_ctrlc(sampler)):
        best_hypotheses.add(h)

        if show and i % (skip + 1) == 0:

            print i, h.posterior_score, h.prior, h.likelihood, f(h) if f is not None else "", qq(
                cleanFunctionNodeString(h)
            )

    if save_top is not None:
        print "# Saving top hypotheses"
        with open(save_top, "w") as f:
            pickle.dump(best_hypotheses, f)

    return best_hypotheses

Esempio n. 27

0

Mostra file

File: Model.py Progetto: joshrule/LOTlib

grammar.add_rule('PREDICATE', 'is_size_(x, "miniature")',  None,  1.0)
grammar.add_rule('PREDICATE', 'is_size_(x, "intermediate")',  None,  1.0)
grammar.add_rule('PREDICATE', 'is_size_(x, "colossal")',  None,  1.0)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Hypothesis
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.Hypotheses.RationalRulesLOTHypothesis import RationalRulesLOTHypothesis

def make_hypothesis(grammar=grammar, **kwargs):
    return RationalRulesLOTHypothesis(grammar=grammar, rrAlpha=1.0, **kwargs)


if __name__ == "__main__":

    from LOTlib.TopN import TopN
    hyps = TopN(N = 1000)

    from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler
    from LOTlib import break_ctrlc
    mhs = MHSampler(make_hypothesis(), make_data(), 1000000, likelihood_temperature = 1., prior_temperature = 1.)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        h.ll_decay = 0.
        hyps.add(h)

    import pickle
    with open('HypothesisSpace.pkl', 'w') as f:
        pickle.dump(hyps, f)

Esempio n. 28

0

Mostra file

        collapsed_prob = grammar.log_probability(collapsed_forms[resps])
        collapsed_forms[resps].my_log_probability = logplusexp(collapsed_prob, tprior)
        if tprior > collapsed_forms[resps].display_tree_probability: # display the most concise form
            collapsed_forms[resps] = t
            collapsed_forms[resps].display_tree_probability = tprior
    else:
        collapsed_forms[resps] = t
        collapsed_forms[resps].display_tree_probability = tprior
        t.my_log_probability = tprior # FunctionNode uses this value when we call log_probability()
        print ">>", all_tree_count, len(collapsed_forms),  t, tprior

############################################
### Now actually enumarate trees
for t in grammar.enumerate(d=DEPTH):
    if 'presup_(False' in str(t):
        continue
    if not check_expansion(t):
        continue
    if t.count_subnodes() <= MAX_NODES:
        add_to_collapsed_trees(t)
        all_tree_count += 1
        print ">", t, grammar.log_probability(t)

## for kinder saving and unsaving:
upq = TopN()
for k in collapsed_forms.values():
    upq.add(LOTHypothesis(grammar, k, display='lambda context: %s'), 0.0)
pickle.dump(upq, open(OUT, 'w'))

print "Total tree count: ", all_tree_count

Esempio n. 29

0

Mostra file

File: TopMass.py Progetto: MollicaF/LogicalWordLearning

print "Loading Space 1: " + options.filename
with open(options.filename, 'r') as f:
    d.update(pickle.load(f))

if options.filename2 is not None:
    print "Loading Space 2: " + options.filename2
    with open(options.filename2, 'r') as f:
        d.update(pickle.load(f))

Mass = set()

for a in range(1, 25, 2) + range(25, 251, 25):
    print "Grabbing Top " + str(options.Nsize) + " from " + str(a) + ' dp'
    data = makeLexiconData(target, four_gen_tree_context, n=a)
    simplicity_mass = TopN(N=options.Nsize)
    reuse_mass = TopN(N=options.Nsize)
    for h in d:
        h.posterior_score = h.compute_likelihood(data) + compute_reuse_prior(h)
        reuse_mass.add(h)
        h.compute_posterior(data)
        simplicity_mass.add(h)
    Mass.update(simplicity_mass)
    Mass.update(reuse_mass)


print "Writing output file for " + str(len(Mass)) + ' hypotheses.'
with open(options.out_path, 'w') as f:
    pickle.dump(Mass, f)

Esempio n. 30

0

Mostra file

# Hypothesis
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.Hypotheses.RationalRulesLOTHypothesis import RationalRulesLOTHypothesis


def make_hypothesis(grammar=grammar, **kwargs):
    return RationalRulesLOTHypothesis(grammar=grammar, rrAlpha=1.0, **kwargs)


if __name__ == "__main__":

    from LOTlib.TopN import TopN
    hyps = TopN(N=1000)

    from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler
    from LOTlib import break_ctrlc
    mhs = MHSampler(make_hypothesis(),
                    make_data(),
                    1000000,
                    likelihood_temperature=1.,
                    prior_temperature=1.)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        h.ll_decay = 0.
        hyps.add(h)

    import pickle
    with open('HypothesisSpace.pkl', 'w') as f:
        pickle.dump(hyps, f)

Esempio n. 31

0

Mostra file

    d.update(pickle.load(f))

if options.filename2 is not None:
    print "Loading Space 2: " + options.filename2
    with open(options.filename2, 'r') as f:
        d.update(pickle.load(f))

Mass = set()

for a in range(1, 25, 2) + range(25, 251, 25):
    print "Grabbing Top " + str(options.Nsize) + " from " + str(a) + ' dp'
    data = makeZipfianLexiconData(target,
                                  four_gen_tree_context,
                                  n=a,
                                  alpha=0.9,
                                  s=0.0,
                                  epsilon=0.0)
    simplicity_mass = TopN(N=options.Nsize)
    reuse_mass = TopN(N=options.Nsize)
    for h in d:
        h.posterior_score = h.compute_likelihood(data) + compute_reuse_prior(h)
        reuse_mass.add(h)
        h.compute_posterior(data)
        simplicity_mass.add(h)
    Mass.update(simplicity_mass)
    Mass.update(reuse_mass)

print "Writing output file for " + str(len(Mass)) + ' hypotheses.'
with open(options.out_path, 'w') as f:
    pickle.dump(Mass, f)

Esempio n. 32

0

Mostra file

def runparts(size, x, p):
    #problem: right now only recording last partition, never saving from others.
    print "Start: " + str(x) + " on this many: " + str(size)
    try:
        #make new TopN for each data amount
        topn = TopN(N=200, key="posterior_score")
        print "Starting on partition ", p

        # Now we have to go in and fill in the nodes that are nonterminals
        # We can do this with generate
        v = grammar.generate(copy(p))

        h0 = MyHypothesis(grammar, value=v)
        data = [
            FunctionData(input=[],
                         output={
                             'n i k': size,
                             'h i N': size,
                             'f a n': size,
                             'g i f': size,
                             'm a N': size,
                             'f a m': size,
                             'g i k': size,
                             'k a n': size,
                             'f a f': size,
                             'g i n': size,
                             'g i m': size,
                             'g i s': size,
                             's i f': size,
                             's i n': size,
                             'n i s': size,
                             's i m': size,
                             's i k': size,
                             'h a N': size,
                             'f i N': size,
                             'h i m': size,
                             'h i n': size,
                             'h a m': size,
                             'n i N': size,
                             'h i k': size,
                             'f a s': size,
                             'f i n': size,
                             'h i f': size,
                             'n i m': size,
                             'g i N': size,
                             'h a g': size,
                             's i N': size,
                             'n i n': size,
                             'f i m': size,
                             's i s': size,
                             'h i s': size,
                             'n a s': size,
                             'k a s': size,
                             'f i s': size,
                             'n i f': size,
                             'm i n': size,
                             's a s': size,
                             'f a g': size,
                             'k a g': size,
                             'k a f': size,
                             's a m': size,
                             'n a f': size,
                             'n a g': size,
                             'm i N': size,
                             's a g': size,
                             'f i k': size,
                             'k a m': size,
                             'n a n': size,
                             's a f': size,
                             'n a m': size,
                             'm a s': size,
                             'h a f': size,
                             'h a s': size,
                             'n a N': size,
                             'm i s': size,
                             's a n': size,
                             's a N': size,
                             'm i k': size,
                             'f a N': size,
                             'm i m': size,
                             'm a g': size,
                             'm a f': size,
                             'f i f': size,
                             'k a N': size,
                             'h a n': size,
                             'm a n': size,
                             'm a m': size,
                             'm i f': size
                         })
        ]

        for h in break_ctrlc(
                MHSampler(h0, data, steps=options.steps, trace=False)):
            # print "\t", h.posterior_score, h
            topn.add(h)

        return size, set(topn)

    except Exception as e:
        print "*** Exception ignored: ", e
        #if we fail, we can return a blank TopN
        return size, set()

Esempio n. 33

0

Mostra file

            print_star("")
            print from_seq, to_seq
            data = [
                FunctionData(alpha=ALPHA,
                             input=[from_seq],
                             output={to_seq: len(to_seq)})
            ]
            h0 = MyHypothesis()
            step = 0
            tn = TopN(N=N_H)
            # Stream from the sampler to a printer
            for h in MHSampler(h0,
                               data,
                               steps=STEPS,
                               acceptance_temperature=5.):
                tn.add(h)

            print

            for h in tn.get_all(sorted=True):
                out = h(from_seq)
                if len(out) >= len(to_seq):
                    hd = hamming_distance(out[:len(to_seq)], to_seq)
                else:
                    hd = 15

                print h.posterior_score, h.likelihood, h.prior, h, hd
                print out[:len(to_seq)]

            print_star("")

Esempio n. 34

0

Mostra file

File: Run.py Progetto: TerryLew/BinLOTlib

from LOTlib import break_ctrlc
from LOTlib.TopN import TopN
from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler
from Model import *
from TargetConcepts import TargetConcepts

NDATA = 20 # How many data points for each function?
NSTEPS = 100000
BEST_N = 500 # How many from each hypothesis to store

# Where we keep track of all hypotheses (across concepts)
all_hypotheses = TopN(N=BEST_N)

if __name__ == "__main__":
    # Now loop over each target concept and get a set of hypotheses
    for i, f in enumerate(TargetConcepts):

        # Set up the hypothesis
        h0 = make_hypothesis()

        # Set up some data
        data = make_data(NDATA, f)

        # Now run some MCMC
        fs = TopN(N=BEST_N, key="posterior_score")
        fs.add(break_ctrlc(MHSampler(h0, data, steps=NSTEPS, trace=False)))

        all_hypotheses.update(fs)

    pickle.dump(all_hypotheses, open("hypotheses.pkl", 'w'))

Esempio n. 35

0

Mostra file

File: childAlgoExperiment.py Progetto: Bin-ary-Li/LOTlib

#     for h in MHSampler(h0, data_1, steps=5000):
#         print h.posterior_score

# Running and show only the top choice
from LOTlib.TopN import TopN

topChoice = TopN(N=10)
posProbs = []
stepNum = 40000

for step, h in enumerate(MHSampler(h0, make_data(data_size=1), steps=stepNum)):
    if step % 5000 == 0:
        print('current step: %d, current posterior score: %f' %
              (step, h.posterior_score))
    posProbs.append(h.posterior_score)
    topChoice.add(h)
    h0 = h

# for step, h in enumerate(MHSampler(h0, make_data(data_size=2), steps=stepNum)):
#     if step % 5000 == 0:
#         print ('current step: %d, current posterior score: %f' % (step, h.posterior_score))
#     posProbs.append(h.posterior_score)
#     topChoice.add(h)
#     h0 = h

# for step, h in enumerate(MHSampler(h0, make_data(data_size=3), steps=stepNum)):
#     if step % 5000 == 0:
#         print ('current step: %d, current posterior score: %f' % (step, h.posterior_score))
#     posProbs.append(h.posterior_score)
#     topChoice.add(h)
#     h0 = h