Example #1
0
def standard_sample(make_hypothesis, make_data, skip=9, show=True, N=100, save_top='top.pkl', alsoprint='None', **kwargs):
    """
        Just a simplified interface for sampling, allowing printing (showing), returning the top, and saving.
        This is used by many examples, and is meant to easily allow running with a variety of parameters.
        NOTE: This skip is a skip *only* on printing
        **kwargs get passed to sampler
    """
    if LOTlib.SIG_INTERRUPTED:
        return TopN()  # So we don't waste time!

    h0 = make_hypothesis()
    data = make_data()

    best_hypotheses = TopN(N=N)

    f = eval(alsoprint)

    for i, h in enumerate(break_ctrlc(MHSampler(h0, data, **kwargs))):
        best_hypotheses.add(h)

        if show and i%(skip+1) == 0:
            print i, \
                h.posterior_score, \
                h.prior, \
                h.likelihood, \
                f(h) if f is not None else '', \
                qq(cleanFunctionNodeString(h))

    if save_top is not None:
        print "# Saving top hypotheses"
        with open(save_top, 'w') as f:
            pickle.dump(best_hypotheses, f)

    return best_hypotheses
Example #2
0
def run():
    data = generate_data(target, NDATA, data_sd) # generate some data
    h0 = MAPSymbolicRegressionHypothesis(grammar, args=['x']+CONSTANT_NAMES)
    h0.CONSTANT_VALUES = numpy.zeros(NCONSTANTS) ## TODO: Move this to an itializer

    from LOTlib.Inference.MetropolisHastings import MHSampler
    for h in lot_iter(MHSampler(h0, data, STEPS, skip=SKIP, trace=False)):
        print h.posterior_score, h.likelihood, h.prior, h.CONSTANT_VALUES, qq(h)
Example #3
0
 def __str__(self):
     """
         This defaultly puts a \0 at the end so that we can sort -z if we want (e.g. if we print out a posterior first)
     """
     return '\n' + '\n'.join([
         "%-15s: %s" % (qq(w), str(v))
         for w, v in sorted(self.value.iteritems())
     ]) + '\0'
Example #4
0
    def process(self, x):
        # print "PrintH.process ", x

        print >>self.file_, self.prefix, \
              round(x.posterior_score,3), \
              round(x.prior,3), \
              round(x.likelihood,3), \
              qq(x)
              # qq(cleanFunctionNodeString(x))
        return x
Example #5
0
    def process(self, x):
        # print "PrintH.process ", x

        print >>self.file_, self.prefix, \
              round(x.posterior_score,3), \
              round(x.prior,3), \
              round(x.likelihood,3), \
              qq(x)
        # qq(cleanFunctionNodeString(x))
        return x
Example #6
0
def standard_sample(make_hypothesis, make_data, show_skip=9, show=True, N=100, save_top='top.pkl', alsoprint='None', **kwargs):
    """
        Just a simplified interface for sampling, allowing printing (showing), returning the top, and saving.
        This is used by many examples, and is meant to easily allow running with a variety of parameters.
        NOTE: This skip is a skip *only* on printing
        **kwargs get passed to sampler
    """
    if LOTlib.SIG_INTERRUPTED:
        return TopN()  # So we don't waste time!

    h0 = make_hypothesis()
    data = make_data()


    best_hypotheses = TopN(N=N)

    f = eval(alsoprint)

    sampler = MHSampler(h0, data, **kwargs)

#    # TODO change acceptance temperature over times
#    sampler.acceptance_temperature = 0.5

    for i, h in enumerate(break_ctrlc(sampler)):

#        if i % 10000 == 0 and i != 0:
#            sampler.acceptance_temperature = min(1.0, sampler.acceptance_temperature+0.1)
#            print '='*50
#            print 'change acc temperature to', sampler.acceptance_temperature 

        best_hypotheses.add(h)

        if show and i%(show_skip+1) == 0:

            print i, \
                h.posterior_score, \
                h.prior, \
                h.likelihood, \
                f(h) if f is not None else '', \
                qq(cleanFunctionNodeString(h))

    if save_top is not None:
        print "# Saving top hypotheses"
        with open(save_top, 'w') as f:
            pickle.dump(best_hypotheses, f)

    return best_hypotheses
Example #7
0
           bv_type='INNER-BOOL',
           bv_args=['OBJECT'],
           bv_prefix='F')

# Define a predicate that will just check if something is in a BASE-SET
g.add_rule('lambdaDefinePredicate',
           'lambda', ['lambdaDefinePredicateINNER'],
           1.0,
           bv_type='OBJECT',
           bv_args=None,
           bv_prefix='z')
# the function on objects, that allows them to be put into classes (analogous to a logical model here)
g.add_rule('lambdaDefinePredicateINNER', 'is_in_', ['OBJECT', 'BASE-SET'], 1.0)

# After we've defined F, these are used to construct the concept
g.add_rule('INNER-BOOL', 'and_', ['INNER-BOOL', 'INNER-BOOL'], 1.0)
g.add_rule('INNER-BOOL', 'or_', ['INNER-BOOL', 'INNER-BOOL'], 1.0)
g.add_rule('INNER-BOOL', 'not_', ['INNER-BOOL'], 1.0)

g.add_rule('OBJECT', 'x', None, 1.0)
g.add_rule('OBJECT', 'y', None, 1.0)
g.add_rule('OBJECT', '', ['BASE-OBJECT'], 1.0)  # maybe or maybe not?

# BASE-SET is here a set of BASE-OBJECTS (non-args)
g.add_rule('BASE-SET', 'set_add_', ['BASE-OBJECT', 'BASE-SET'], 1.0)
g.add_rule('BASE-SET', 'set_', [], 1.0)

g.add_rule('BASE-OBJECT', qq('p1'), None, 1.0)
g.add_rule('BASE-OBJECT', qq('p2'), None, 1.0)
g.add_rule('BASE-OBJECT', qq('n1'), None, 1.0)
g.add_rule('BASE-OBJECT', qq('n2'), None, 1.0)
Example #8
0
# -*- coding: utf-8 -*-
"""
A simple symbolic regression demo

"""
from LOTlib import lot_iter
from LOTlib.Hypotheses.GaussianLOTHypothesis import GaussianLOTHypothesis
from LOTlib.Inference.MetropolisHastings import MHSampler
from LOTlib.Miscellaneous import qq
from LOTlib.Examples.SymbolicRegression.Grammar import grammar
from Data import generate_data

CHAINS = 4
STEPS = 50000
SKIP = 0

if __name__ == "__main__":

    print grammar

    # generate some data
    data = generate_data(50) # how many data points?

    # starting hypothesis -- here this generates at random
    h0 = GaussianLOTHypothesis(grammar)

    for h in lot_iter(MHSampler(h0, data, STEPS, skip=SKIP)):
        print h.posterior_score, qq(h)
Example #9
0
    def next(self):
        if LOTlib.SIG_INTERRUPTED or self.samples_yielded >= self.steps:
            raise StopIteration
        else:
            for _ in lot_iter(xrange(self.skip+1)):

                self.proposal, fb = self.proposer(self.current_sample)

                # either compute this, or use the memoized version
                np, nl = self.compute_posterior(self.proposal, self.data)

                #print np, nl, current_sample.prior, current_sample.likelihood
                # NOTE: IT is important that we re-compute from the temperature since these may be altered externally from ParallelTempering and others
                prop = (np/self.prior_temperature+nl/self.likelihood_temperature)
                cur  = (self.current_sample.prior/self.prior_temperature + self.current_sample.likelihood/self.likelihood_temperature)

                if MH_acceptance(cur, prop, fb, acceptance_temperature=self.acceptance_temperature):
                    self.current_sample = self.proposal
                    self.was_accepted = True
                    self.acceptance_count += 1
                else:
                    self.was_accepted = False

                self.internal_sample(self.current_sample)
                self.proposal_count += 1

            if self.trace:
                print self.current_sample.posterior_score, self.current_sample.likelihood, self.current_sample.prior, qq(self.current_sample)

            self.samples_yielded += 1
            return self.current_sample
Example #10
0
    def next(self):
        """Generate another sample."""

        if self.samples_yielded >= self.steps:
            raise StopIteration
        else:
            for _ in xrange(self.skip+1):

                self.proposal, fb = self.proposer(self.current_sample)

                # print self.proposal
                assert self.proposal is not self.current_sample, "*** Proposal cannot be the same as the current sample!"
                assert self.proposal.value is not self.current_sample.value, "*** Proposal cannot be the same as the current sample!"

                # Call myself so memoized subclasses can override
                self.compute_posterior(self.proposal, self.data)

                np, nl = self.proposal.prior, self.proposal.likelihood

                # Note: It is important that we re-compute from the temperature since these may be altered
                #    externally from ParallelTempering and others
                prop = (np/self.prior_temperature +
                        nl/self.likelihood_temperature)
                cur = (self.current_sample.prior/self.prior_temperature +
                       self.current_sample.likelihood/self.likelihood_temperature)
                
                # print "# Current:", self.current_sample
                # print "# Proposal:", self.proposal
                
                if MH_acceptance(cur, prop, fb, acceptance_temperature=self.acceptance_temperature):
                    self.current_sample = self.proposal
                    self.was_accepted = True
                    self.acceptance_count += 1
                else:
                    self.was_accepted = False

                self.internal_sample(self.current_sample)
                self.proposal_count += 1

            if self.trace:
                print self.current_sample.posterior_score, self.current_sample.likelihood, self.current_sample.prior, qq(self.current_sample)

            self.samples_yielded += 1
            return self.current_sample
Example #11
0
 def display(self):
     for h in self.get_all():
         print h.posterior_score, h.prior, h.likelihood, qq(h)
Example #12
0
# After we've defined F, these are used to construct the concept
grammar.add_rule('INNER-BOOL', 'and_', ['INNER-BOOL', 'INNER-BOOL'], 1.0)
grammar.add_rule('INNER-BOOL', 'or_', ['INNER-BOOL', 'INNER-BOOL'], 1.0)
grammar.add_rule('INNER-BOOL', 'not_', ['INNER-BOOL'], 1.0)

grammar.add_rule('OBJECT', 'x', None, 1.0)
grammar.add_rule('OBJECT', 'y', None, 1.0)

# BASE-SET is here a set of BASE-OBJECTS (non-args)
grammar.add_rule('BASE-SET', 'set_add_', ['BASE-OBJECT', 'BASE-SET'], 1.0)
grammar.add_rule('BASE-SET', 'set_', [], 1.0)

objects = [t + str(i) for t, i in itertools.product('pnx', range(3))]

for o in objects:
    grammar.add_rule('BASE-OBJECT', qq(o), None, 1.0)

#from LOTlib.Subtrees import *
#for t in generate_trees(grammar):
#print t

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set up data -- true output means attraction (p=positive; n=negative)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
data = []

for a, b in itertools.product(objects, objects):

    myinput = [a, b]

    # opposites (n/p) interact; x interacts with nothing
Example #13
0
grammar.add_rule('lambdaDefinePredicateINNER', 'is_in_', ['OBJECT', 'BASE-SET'], 1.0)

# After we've defined F, these are used to construct the concept
grammar.add_rule('INNER-BOOL', 'and_', ['INNER-BOOL', 'INNER-BOOL'], 1.0)
grammar.add_rule('INNER-BOOL', 'or_', ['INNER-BOOL', 'INNER-BOOL'], 1.0)
grammar.add_rule('INNER-BOOL', 'not_', ['INNER-BOOL'], 1.0)

grammar.add_rule('OBJECT', 'x', None, 1.0)
grammar.add_rule('OBJECT', 'y', None, 1.0)

# BASE-SET is here a set of BASE-OBJECTS (non-args)
grammar.add_rule('BASE-SET', 'set_add_', ['BASE-OBJECT', 'BASE-SET'], 1.0)
grammar.add_rule('BASE-SET', 'set_', [], 1.0)

for o in OBJECTS:
    grammar.add_rule('BASE-OBJECT', qq(o), None, 1.0)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.DataAndObjects import FunctionData

# Set up data -- true output means attraction (p=positive; n=negative)
def make_data(n=1):

    data = []

    for _ in xrange(n):
        for a,b in itertools.product(OBJECTS, OBJECTS):
Example #14
0
        return str(self.value)

    def __call__(self, *args):
        try:
            return LOTHypothesis.__call__(self, *args)
        except EvaluationException:
            return None


def make_hypothesis(**kwargs):
    """Define a new kind of LOTHypothesis, that gives regex strings.

    These have a special interpretation function that compiles differently than straight python eval.
    """
    return RegexHypothesis(**kwargs)


# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Main
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

if __name__ == "__main__":

    from LOTlib.Inference.Samplers.StandardSample import standard_sample
    from LOTlib import break_ctrlc
    from LOTlib.Miscellaneous import qq

    for h in break_ctrlc(
            standard_sample(make_hypothesis, make_data, steps=10000)):
        print h.posterior_score, h.prior, h.likelihood, qq(h)
Example #15
0
from Data import generate_data
from Grammar import grammar, NCONSTANTS

STEPS = 500000
SKIP = 0
data_sd = 0.1  # the SD of the data
NDATA = 50
MEMOIZE = 1000  # 0 means don't memoize

## The target function for symbolic regression
target = lambda x: 3. * x + sin(4.3 / x)

# # # # # # # # # # # # # # # # # # # # # # # # # # # #
# starting hypothesis -- here this generates at random

data = generate_data(target, NDATA, data_sd)  # generate some data
h0 = MAPSymbolicRegressionHypothesis(grammar)
h0.CONSTANT_VALUES = numpy.zeros(
    NCONSTANTS)  ## TODO: Move this to an itializer

from LOTlib.Inference.MetropolisHastings import mh_sample
for h in mh_sample(h0,
                   data,
                   STEPS,
                   skip=SKIP,
                   trace=False,
                   debug=False,
                   memoize=MEMOIZE):
    print h.posterior_score, h.likelihood, h.prior, h.CONSTANT_VALUES, qq(h)
Example #16
0
        print h.posterior_score, h.prior, h.likelihood, h.likelihood_temperature
        print h

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Play around with some different inference schemes
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

#h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=0.01)
#for i, h in lot_iter(enumerate(mh_sample(h0, data, 400000000, skip=0, debug=False))):
    #print h.posterior_score, h.prior, h.likelihood, qq(re.sub(r"\n", ";", str(h)))

from LOTlib.Inference.IncreaseTemperatureMH import increase_temperature_mh_sample

h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=0.01)
for i, h in lot_iter(enumerate(increase_temperature_mh_sample(h0, data, 400000000, skip=0, increase_amount=1.50))):
    print h.posterior_score, h.prior, h.likelihood, qq(re.sub(r"\n", ";", str(h)))


# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Run on a single computer, printing out
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#fbs = FiniteBestSet(N=100)
#h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=0.051)
#for i, h in lot_iter(enumerate(mh_sample(h0, data, 400000000, skip=0, debug=False))):
    #fbs.add(h, h.posterior_score)

    #if i%100==0:
        #print h.posterior_score, h.prior, h.likelihood #, re.sub(r"\n", ";", str(h))
        #print h

#for h in fbs.get_all(sorted=True):
Example #17
0
    def next(self):
        if LOTlib.SIG_INTERRUPTED or self.samples_yielded >= self.steps:
            raise StopIteration
        else:
            for _ in lot_iter(xrange(self.skip + 1)):

                self.proposal, fb = self.proposer(self.current_sample)

                # either compute this, or use the memoized version
                np, nl = self.compute_posterior(self.proposal, self.data)

                #print np, nl, current_sample.prior, current_sample.likelihood
                # NOTE: IT is important that we re-compute from the temperature since these may be altered externally from ParallelTempering and others
                prop = (np / self.prior_temperature +
                        nl / self.likelihood_temperature)
                cur = (self.current_sample.prior / self.prior_temperature +
                       self.current_sample.likelihood /
                       self.likelihood_temperature)

                if MH_acceptance(
                        cur,
                        prop,
                        fb,
                        acceptance_temperature=self.acceptance_temperature):
                    self.current_sample = self.proposal
                    self.was_accepted = True
                    self.acceptance_count += 1
                else:
                    self.was_accepted = False

                self.internal_sample(self.current_sample)
                self.proposal_count += 1

            if self.trace:
                print self.current_sample.posterior_score, self.current_sample.likelihood, self.current_sample.prior, qq(
                    self.current_sample)

            self.samples_yielded += 1
            return self.current_sample
Example #18
0
from Data import data
from Grammar import grammar
from Utilities import make_h0


def run(*args):
    """The running function."""
    # starting hypothesis -- here this generates at random
    h0 = GaussianLOTHypothesis(grammar)

    # We store the top 100 from each run
    pq = FiniteBestSet(N=100, max=True, key="posterior_score")
    pq.add(MHSampler(h0, data, STEPS, skip=SKIP))

    return pq


if __name__ == "__main__":

    CHAINS = 10
    STEPS = 10000000
    SKIP = 0

    finitesample = FiniteBestSet(max=True) # the finite sample of all
    results = map(run, [ [None] ] * CHAINS ) # Run on a single core
    finitesample.merge(results)

    ## and display
    for r in finitesample.get_all(decreasing=False, sorted=True):
        print r.posterior_score, r.prior, r.likelihood, qq(str(r))

from LOTlib.DataAndObjects import FunctionData
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
from LOTlib.Miscellaneous import qq

from MAPSymbolicRegressionHypothesis import MAPSymbolicRegressionHypothesis, grammar

from Data import generate_data
from Grammar import grammar, NCONSTANTS

STEPS = 500000
SKIP = 0
data_sd = 0.1 # the SD of the data
NDATA = 50
MEMOIZE = 1000 # 0 means don't memoize

## The target function for symbolic regression
target = lambda x: 3.*x + sin(4.3/x)

# # # # # # # # # # # # # # # # # # # # # # # # # # # #
# starting hypothesis -- here this generates at random

data = generate_data(target, NDATA, data_sd) # generate some data
h0 = MAPSymbolicRegressionHypothesis(grammar)
h0.CONSTANT_VALUES = numpy.zeros(NCONSTANTS) ## TODO: Move this to an itializer

from LOTlib.Inference.MetropolisHastings import mh_sample
for h in mh_sample(h0, data, STEPS, skip=SKIP, trace=False, debug=False, memoize=MEMOIZE):
    print h.posterior_score, h.likelihood, h.prior, h.CONSTANT_VALUES, qq(h)
Example #20
0
        if options.EVAL_DATA > 0:
            eval_data = make_data(options.EVAL_DATA)


    # choose the appropriate map function
    args = list(itertools.product([make_hypothesis],[make_data], data_amounts * options.CHAINS) )

    # set the output codec -- needed to display lambda to stdout
    sys.stdout = codecs.getwriter('utf8')(sys.stdout)

    seen = set()
    for fs in MPI_unorderedmap(run, numpy.random.permutation(args)):
        assert is_master_process()

        for h in fs:

            if h not in seen:
                seen.add(h)

                if eval_data is not None:
                    h.compute_posterior(eval_data) # evaluate on the big data
                    print h.posterior_score, h.prior, h.likelihood / options.EVAL_DATA, \
                            alsoprint(h) if alsoprint is not None else '',\
                            qq(cleanFunctionNodeString(h))


    import pickle
    with open(options.OUT_PATH, 'w') as f:
        pickle.dump(seen, f)

Example #21
0
from LOTlib.Miscellaneous import qq

# What are the objects we may use?
OBJECTS              = ['JOHN', 'MARY', 'SUSAN', 'BILL']
SEMANTIC_1PREDICATES = ['SMILED', 'LAUGHED', 'MAN', 'WOMAN']
SEMANTIC_2PREDICATES = ['SAW', 'LOVED']

## Define the grammar
grammar = Grammar()

grammar.add_rule('START', '', ['FUNCTION'], 2.0)
grammar.add_rule('START', '', ['BOOL'], 1.0)
grammar.add_rule('START', '', ['OBJECT'], 1.0)

for m in SEMANTIC_1PREDICATES:
    grammar.add_rule('BOOL', 'C.relation_', [ qq(m), 'OBJECT'], 1.0)

for m in SEMANTIC_2PREDICATES:
    grammar.add_rule('BOOL', 'C.relation_', [ qq(m), 'OBJECT', 'OBJECT'], 1.0)

for o in OBJECTS:
    grammar.add_rule('OBJECT', qq(o), None, 1.0)

grammar.add_rule('BOOL', 'exists_', ['FUNCTION.O2B', 'C.objects'], 1.00) # can quantify over objects->bool functions
grammar.add_rule('BOOL', 'forall_', ['FUNCTION.O2B', 'C.objects'], 1.00)
grammar.add_rule('FUNCTION.O2B', 'lambda', ['BOOL'], 1.0, bv_type='OBJECT')

grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.0)
grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1.0)
grammar.add_rule('BOOL', 'not_', ['BOOL'], 1.0)
Example #22
0
        display_option_summary(options)

        eval_data = None
        if options.EVAL_DATA > 0:
            eval_data = make_data(options.EVAL_DATA)


    # choose the appropriate map function
    args = list(itertools.product([make_hypothesis],[make_data], data_amounts * options.CHAINS) )

    # set the output codec -- needed to display lambda to stdout
    sys.stdout = codecs.getwriter('utf8')(sys.stdout)

    seen = set()
    for fs in MPI_unorderedmap(run, numpy.random.permutation(args)):
        assert is_master_process()

        for h in fs:

            if h not in seen:
                seen.add(h)

                if eval_data is not None:
                    h.compute_posterior(eval_data) # evaluate on the big data
                    print h.prior, h.likelihood / options.EVAL_DATA, qq(cleanFunctionNodeString(h))

    import pickle
    with open(options.OUT_PATH, 'w') as f:
        pickle.dump(seen, f)

Example #23
0
grammar = lot_grammar

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Load the hypotheses
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# map each concept to a hypothesis
with open('hypotheses/lot_hypotheses-10.pkl', 'r') as f:
    hypotheses = pickle.load(f)

print "# Loaded hypotheses: ", len(hypotheses)

# - - logging - - - - - - - -
with open(LOG+"/hypotheses.txt", 'w') as f:
    for i, h in enumerate(hypotheses):
        print >>f, i, qq(h)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Load the human data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# Load the concepts from the human data
from Data import load_human_data

human_nyes, human_ntrials = load_human_data()
print "# Loaded human data"

observed_sets = set([ k[0] for k in human_nyes.keys() ])

## TRIM TO FEWER
# observed_sets = set(list(observed_sets)[:100])
Example #24
0
 def __str__(self):
     return ('\n'.join([
         u"%-15s: %s" % (qq(w), lambdastring(v.value))
         for w, v in sorted(self.value.iteritems())
     ]) + '\0').encode('utf-8')
Example #25
0
 def __str__(self):
     return ('\n'.join([u"%-15s: %s" % (qq(w), lambdastring(v.value)) for w, v in sorted(self.value.iteritems())]) + '\0').encode('utf-8')
Example #26
0
'''
from LOTlib.Inference.Proposals.InsertDeleteProposal import InsertDeleteProposal
h0 = NumberExpression(grammar, proposal_function=InsertDeleteProposal(grammar))
'''

# store hypotheses we've found
allhyp = TopN(N=1000)

# ========================================================================================================
# Run the standard RationalRules sampler

mh_sampler = MHSampler(h0, data, STEPS, skip=SKIP)

for h in lot_iter(mh_sampler):
    if TRACE:
        print q(get_knower_pattern(h)), h.posterior_score, h.compute_prior(), h.compute_likelihood(data), qq(h)

    # add h to our priority queue, with priority of its log probability, h.posterior_score
    allhyp.add(h)

# ========================================================================================================
#  now re-evaluate everything we found on new data
'''
huge_data = generate_data(LARGE_DATA_SIZE)

save this with a huge data set -- eval with average ll
H = allhyp.get_sorted()

compute the posterior for each hypothesis
[ h.compute_posterior(huge_data) for h in H]
Example #27
0
if __name__ == "__main__":
    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # Main running

    if is_master_process():
        display_option_summary(options)
        huge_data = generate_data(options.LARGE_DATA_SIZE)

    # choose the appropriate map function
    argarray = map(lambda x: [x], options.DATA_AMOUNTS * options.CHAINS)

    seen = set()
    for fs in MPI_unorderedmap(run, numpy.random.permutation(argarray)):
        for h in fs.get_all():
            if h not in seen:
                seen.add(h)
                h.compute_posterior(huge_data)

                if h.prior > float("-inf"):
                    print h.prior, \
                        h.likelihood /float(options.LARGE_DATA_SIZE), \
                        q(get_knower_pattern(h)), \
                        qq(h)

            sys.stdout.flush()

    import pickle
    with open(options.OUT_PATH, 'w') as f:
        pickle.dump(seen, f)
Example #28
0

def make_h0(value=None):
    return GaussianLOTHypothesis(grammar, value=value)


if __name__ == "__main__":

    # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # the running function

    def run(*args):

        # starting hypothesis -- here this generates at random
        h0 = GaussianLOTHypothesis(grammar,
                                   prior_temperature=PRIOR_TEMPERATURE)

        # We store the top 100 from each run
        pq = FiniteBestSet(100, max=True, key="posterior_score")
        pq.add(mh_sample(h0, data, STEPS, skip=SKIP))

        return pq

    finitesample = FiniteBestSet(max=True)  # the finite sample of all
    results = map(run, [[None]] * CHAINS)  # Run on a single core
    finitesample.merge(results)

    ## and display
    for r in finitesample.get_all(decreasing=False, sorted=True):
        print r.posterior_score, r.prior, r.likelihood, qq(str(r))
Example #29
0
 def __repr__(self):
     return qq(str(self.utterance))+' in '+ str(self.context) + " from " + str(self.possible_utterances)
Example #30
0

# After we've defined F, these are used to construct the concept
grammar.add_rule('INNER-BOOL', 'and_', ['INNER-BOOL', 'INNER-BOOL'], 1.0)
grammar.add_rule('INNER-BOOL', 'or_', ['INNER-BOOL', 'INNER-BOOL'], 1.0)
grammar.add_rule('INNER-BOOL', 'not_', ['INNER-BOOL'], 1.0)

grammar.add_rule('OBJECT', 'x', None, 1.0)
grammar.add_rule('OBJECT', 'y', None, 1.0)
grammar.add_rule('OBJECT', '', ['BASE-OBJECT'], 1.0) # maybe or maybe not?

# BASE-SET is here a set of BASE-OBJECTS (non-args)
grammar.add_rule('BASE-SET', 'set_add_', ['BASE-OBJECT', 'BASE-SET'], 1.0)
grammar.add_rule('BASE-SET', 'set_', [], 1.0)

grammar.add_rule('BASE-OBJECT', qq('p1'), None, 1.0)
grammar.add_rule('BASE-OBJECT', qq('p2'), None, 1.0)
grammar.add_rule('BASE-OBJECT', qq('n1'), None, 1.0)
grammar.add_rule('BASE-OBJECT', qq('n2'), None, 1.0)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set up data -- true output means attraction (p=positive; n=negative)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

data = [ FunctionData(input=[ "p1", "n1" ], output=True),
                 FunctionData(input=[ "p1", "n2" ], output=True),
                 FunctionData(input=[ "p1", "p1" ], output=False),
                 FunctionData(input=[ "p1", "p2" ], output=False),

                 FunctionData(input=[ "p2", "n1" ], output=True),
                 FunctionData(input=[ "p2", "n2" ], output=True),
Example #31
0
# Load the hypotheses
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# map each concept to a hypothesis
with open('hypotheses.pkl', 'r') as f:
    # with open('hypotheses/hypotheses-1.pkl', 'r') as f:
    concept2hypotheses = pickle.load(f)

hypotheses = set()
for hset in concept2hypotheses.values():
    hypotheses.update(hset)

print "# Loaded %s hypotheses" % len(hypotheses)
with open(LOG + "/hypotheses.txt", 'w') as f:
    for i, h in enumerate(hypotheses):
        print >> f, i, qq(h)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Load the human data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# We will map tuples of concept-list, set, response to counts.
import pandas
import math
from collections import Counter
human_data = pandas.read_csv('HumanData/TurkData-Accuracy.txt',
                             sep='\t',
                             low_memory=False,
                             index_col=False)
human_yes, human_no = Counter(), Counter()
for r in xrange(human_data.shape[0]):  # for each row
Example #32
0
"""
Define a new kind of LOTHypothesis, that gives regex strings.

These have a special interpretation function that compiles differently than straight python eval.

"""
from LOTlib import lot_iter
from LOTlib.Inference.MetropolisHastings import MHSampler
from LOTlib.Miscellaneous import qq
from Model import *

if __name__ == "__main__":
    for h in lot_iter(MHSampler(make_h0(), data, steps=10000)):
        print h.posterior_score, h.prior, h.likelihood, qq(h)
Example #33
0
    pr_data = language.sample_data_as_FuncData(1024, max_length=options.FINITE)
    p = []
    r = []
    print 'compute precision and recall..'
    for h in hypotheses:
        precision, recall = language.estimate_precision_and_recall(h, pr_data)
        p.append(precision)
        r.append(recall)

    # Now go through each hypothesis and print out some summary stats
    for data_size in DATA_RANGE:
        print 'get stats from size : ', data_size

        evaluation_data = language.sample_data_as_FuncData(data_size, max_length=options.FINITE)

        # Now update everyone's posterior
        for h in hypotheses:
            h.compute_posterior(evaluation_data)

        # compute the normalizing constant. This is the log of the sum of the probabilities
        Z = logsumexp([h.posterior_score for h in hypotheses])

        f = open('out' + suffix, 'a')
        cnt = 0
        for h in hypotheses:
            #compute the number of different strings we generate
            generated_strings = set([h() for _ in xrange(1000)])
            print >> f, data_size, np.exp(h.posterior_score-Z), h.posterior_score, h.prior, \
                h.likelihood, len(generated_strings), qq(h), p[cnt], r[cnt]
            cnt += 1
        f.close()
Example #34
0
        eval_data = None
        if options.EVAL_DATA > 0:
            eval_data = make_data(options.EVAL_DATA)

    # choose the appropriate map function
    args = list(
        itertools.product([make_hypothesis], [make_data],
                          data_amounts * options.CHAINS))

    # set the output codec -- needed to display lambda to stdout
    sys.stdout = codecs.getwriter('utf8')(sys.stdout)

    seen = set()
    for fs in MPI_unorderedmap(run, numpy.random.permutation(args)):
        assert is_master_process()

        for h in fs:

            if h not in seen:
                seen.add(h)

                if eval_data is not None:
                    h.compute_posterior(eval_data)  # evaluate on the big data
                    print h.posterior_score, h.prior, h.likelihood / options.EVAL_DATA, \
                            alsoprint(h) if alsoprint is not None else '',\
                            qq(cleanFunctionNodeString(h))

    import pickle
    with open(options.OUT_PATH, 'w') as f:
        pickle.dump(seen, f)
Example #35
0
 def __repr__(self):
     return qq(str(self.utterance)) + ' in ' + str(
         self.context) + " from " + str(self.possible_utterances)
Example #36
0
    args = list(itertools.product([make_hypothesis], [make_data], DATA_RANGE))

    # run on MPI
    results = MPI_map(run, args)

    # collapse all returned sets
    hypotheses = set()
    for r in results:
        hypotheses.update(r) # add the ith's results to the set

    # Now go through each hypothesis and print out some summary stats
    for data_size in DATA_RANGE:

        evaluation_data = make_data(data_size)

        # Now update everyone's posterior
        for h in hypotheses:
            h.compute_posterior(evaluation_data)

        # compute the normalizing constant. This is the log of the sum of the probabilities
        Z = logsumexp([h.posterior_score for h in hypotheses])

        for h in hypotheses:
            #compute the number of different strings we generate
            generated_strings = set([h() for _ in xrange(1000)])

            # print out some info. We can here use np.exp(h.posterior_score-Z) because Z is computed via logsumexp, so is more numerically stable
            # This is the probability at this amount of data
            print data_size, np.exp(h.posterior_score-Z), h.posterior_score, h.prior, h.likelihood, len(generated_strings), qq(h)

Example #37
0
 def __str__(self):
     """
         This defaultly puts a \0 at the end so that we can sort -z if we want (e.g. if we print out a posterior first)
     """
     return '\n'+'\n'.join(["%-15s: %s" % (qq(w), str(v)) for w, v in sorted(self.value.iteritems())]) + '\0'
Example #38
0
from LOTlib.Grammar import Grammar
from LOTlib.Miscellaneous import qq
from Shared import OBJECTS, SEMANTIC_1PREDICATES, SEMANTIC_2PREDICATES


grammar = Grammar()

grammar.add_rule('START', '', ['FUNCTION'], 2.0)
grammar.add_rule('START', '', ['BOOL'], 1.0)
grammar.add_rule('START', '', ['OBJECT'], 1.0)

for m in SEMANTIC_1PREDICATES:
    grammar.add_rule('BOOL', 'C.relation_', [ qq(m), 'OBJECT'], 1.0)

for m in SEMANTIC_2PREDICATES:
    grammar.add_rule('BOOL', 'C.relation_', [ qq(m), 'OBJECT', 'OBJECT'], 1.0)

for o in OBJECTS:
    grammar.add_rule('OBJECT', qq(o), None, 1.0)

grammar.add_rule('BOOL', 'exists_', ['FUNCTION.O2B', 'C.objects'], 1.00) # can quantify over objects->bool functions
grammar.add_rule('BOOL', 'forall_', ['FUNCTION.O2B', 'C.objects'], 1.00)
grammar.add_rule('FUNCTION.O2B', 'lambda', ['BOOL'], 1.0, bv_type='OBJECT')

grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.0)
grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1.0)
grammar.add_rule('BOOL', 'not_', ['BOOL'], 1.0)

# And for outermost functions
grammar.add_rule('FUNCTION', 'lambda', ['START'], 1.0, bv_type='OBJECT')
Example #39
0
                probs=[v.posterior_score for v in population],
                log=True)

            try:
                kid = mutate(crossover(mom, dad))
            except (ProposalFailedException, NodeSamplingException):
                continue

            kid.compute_posterior(data)
            yield kid

            nextpopulation.append(kid)

            # # if MH_acceptance(population[i].posterior_score, kid.posterior_score, 0.0):
            # if kid.posterior_score > population[i].posterior_score:
            #     population[i] = kid
            #     yield kid
        population = nextpopulation


if __name__ == "__main__":
    from LOTlib import break_ctrlc
    from LOTlib.Examples.Number.Model import make_hypothesis, make_data
    from LOTlib.Miscellaneous import qq
    data = make_data(400)

    for h in break_ctrlc(
            genetic_algorithm(make_hypothesis, data, mutate_lot,
                              crossover_lot)):
        print h.posterior_score, h.get_knower_pattern(), qq(h)
Example #40
0
            mom = weighted_sample(population, probs=[v.posterior_score for v in population], log=True)
            dad = weighted_sample(population, probs=[v.posterior_score for v in population], log=True)

            try:
                kid = mutate(crossover(mom, dad))
            except (ProposalFailedException, NodeSamplingException):
                continue

            kid.compute_posterior(data)
            yield kid

            nextpopulation.append(kid)

            # # if MH_acceptance(population[i].posterior_score, kid.posterior_score, 0.0):
            # if kid.posterior_score > population[i].posterior_score:
            #     population[i] = kid
            #     yield kid
        population = nextpopulation

if __name__ == "__main__":
    from LOTlib import break_ctrlc
    from LOTlib.Examples.Number.Model import make_hypothesis, make_data
    from LOTlib.Miscellaneous import qq
    data = make_data(400)

    for h in break_ctrlc(genetic_algorithm(make_hypothesis, data, mutate_lot, crossover_lot)):
        print h.posterior_score, h.get_knower_pattern(), qq(h)