Example #1
0
 FunctionData(input=[],
              output={
                  'h e s': size,
                  'm e s': size,
                  'm e g': size,
                  'h e g': size,
                  'm e n': size,
                  'h e m': size,
                  'm e k': size,
                  'k e s': size,
                  'h e k': size,
                  'k e N': size,
                  'k e g': size,
                  'h e n': size,
                  'm e N': size,
                  'k e n': size,
                  'h e N': size,
                  'f e N': size,
                  'g e N': size,
                  'n e N': size,
                  'n e s': size,
                  'f e n': size,
                  'g e n': size,
                  'g e m': size,
                  'f e m': size,
                  'g e k': size,
                  'f e k': size,
                  'f e g': size,
                  'f e s': size,
                  'n e g': size,
                  'k e m': size,
                  'n e m': size,
                  'g e s': size,
                  'n e k': size
              })
Example #2
0
        while (i < len(categories)) and (phase[i] != 'g'):
            selected_so_far.append(selected[i])
            categories_so_far[selected[i]] = categories[selected[i]]
            i += 1

        if ((i < len(categories)) and (i > 0)):

            stim = "".join(categories_so_far)
            all_stim.append(stim)
            all_cats.append(categories)
            all_obs.append(observed_cats)
            tot_guess += len(categories) - i
            #print selected_so_far
            # print stim
            # print
            data = [FunctionData(alpha=1. - 1e-7, input=all_C, output=stim)]
            # output="".join(categories))]

            #output=stim)]

            h0 = MyHypothesis()

            MAP = None
            best_post = -float("inf")
            best_out = ""

            n_comp = n_compatible(stim, concepts)
            print stim, n_comp

            while n_compatible(stim, concepts) < 2:
Example #3
0
def make_data(N=20, f=TargetConcepts[0]):
    data = []
    for _ in xrange(N):
        o = sample_one(all_objects)
        data.append(FunctionData(input=[o], output=f(o), alpha=0.90))
    return data
Example #4
0
def make_staged_seq2(jump, temp):
    """
    run: mpiexec -n 12
    """
    rec = load_hypo('out/simulations/staged/',
                    ['staged', 'normal0', 'normal1'])
    seen = set()
    work_list = slice_list(range(size), 3)

    for e in rec:
        for h in e[1]:
            if h in seen: continue
            seen.add(h)

    if rank in work_list[0]:
        seq = []
        infos = [[i, min(4 * ((int(i) - 1) / 48 + 1), 12)]
                 for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            language = AnBn(max_length=e[1] + (e[1] % 2 != 0))
            eval_data = language.sample_data_as_FuncData(e[0])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'
            fff()

    elif rank in work_list[1]:
        seq = []
        infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            language = AnBn(max_length=e[1])
            eval_data = language.sample_data_as_FuncData(e[0])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'
            fff()

    else:
        seq = []
        infos = [[i, 12] for i in [10**e for e in np.arange(0, 2.2, 0.1)]]

        for e in infos:
            prob_dict = {}
            eval_data = uniform_data(e[0], e[1])

            for h in seen:
                h.likelihood_temperature = temp
                prob_dict[h] = h.compute_posterior(eval_data)

            seq.append(prob_dict)
            print 'rank: ', rank, e, 'done'
            fff()

    # TODO no need ?
    from copy import deepcopy
    dict_0 = deepcopy(seq[0])
    for h in dict_0:
        dict_0[h] = h.compute_posterior(
            [FunctionData(input=[], output=Counter())])
    seq.insert(0, dict_0)
    dump(seq, open('seq' + str(rank) + suffix, 'w'))
Example #5
0
def make_pos2(jump, temp):
    """
    1. read raw output
    2. compute precision & recall on nonadjacent and adjacent contents
    3. evaluate posterior probability on different data sizes
    4. dump the sequence

    run: mpiexec -n 4
    """

    print 'loading..'
    fff()
    rec = load_hypo('out/simulations/nonadjacent/', ['0'])

    # TODO one do this
    print 'estimating pr'
    fff()
    pr_dict = {}
    _set = set()
    cnt_tmp = {}
    for e in rec:
        for h in e[1]:
            if h in _set: continue
            cnt = Counter([h() for _ in xrange(1024)])
            cnt_tmp[h] = cnt
            base = sum(cnt.values())
            num = 0
            for k, v in cnt.iteritems():
                if k is None or len(k) < 2: continue
                if k[0] + k[-1] in ['ab', 'cd', 'ef']: num += v
            pr_dict[h] = float(num) / base

            # fix the h_output
            h.h_output = cnt
            _set.add(h)

    work_list = range(2, 17, jump)
    for i in work_list:
        language = LongDependency(max_length=i)

        eval_data = {}
        for e in language.str_sets:
            eval_data[e] = 144.0 / len(language.str_sets)
        eval_data = [FunctionData(input=[], output=eval_data)]

        score = np.zeros(len(_set), dtype=np.float64)
        prec = np.zeros(len(_set), dtype=np.float64)

        # prob_dict = {}
        # test_list = []

        for ind, h in enumerate(_set):
            h.likelihood_temperature = temp
            score[ind] = h.compute_posterior(eval_data)
            prec[ind] = pr_dict[h]
            # prob_dict[h] = h.compute_posterior(eval_data)
            # test_list.append([h.posterior_score, pr_dict[h], cnt_tmp[h], str(h), h])

        # test_list.sort(key=lambda x: x[0], reverse=True)
        # Z = logsumexp([h.posterior_score for h in _set])
        #
        # weighted_axb = sum([np.exp(e[0] - Z) * e[1] for e in test_list])
        # print i, weighted_axb
        # for i_t in xrange(3):
        #     print 'prob: ', np.exp(test_list[i_t][0] - Z), 'axb_f-score',  test_list[i_t][1]
        #     print test_list[i_t][2]
        #     # print test_list[i_t][4].compute_posterior(eval_data)
        #     # print language.estimate_precision_and_recall(test_list[i_t][5], cnt_tmp[test_list[i_t][5]])
        # print '='*50
        # fff()

        #
        # f = open('non_w'+suffix, 'a')
        # print >> f, Z, weighted_axb
        # print
        # f.close()
        #
        # print 'size: %i' % i, Z, weighted_axb; fff()

        if rank != 0:
            comm.send(score, dest=0)
            comm.send(prec, dest=0)
            sys.exit(0)
        else:
            for r in xrange(size - 1):
                score += comm.recv(source=r + 1)
                prec += comm.recv(source=r + 1)
            score /= size
            prec /= size
            Z = logsumexp(score)

            weighted_axb = np.sum(np.exp(score - Z) * prec)

            f = open('non_w' + suffix, 'a')
            print >> f, Z, weighted_axb
            print i, Z, weighted_axb
            fff()
            f.close()
Example #6
0
#for t in generate_trees(grammar):
#print t

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set up data -- true output means attraction (p=positive; n=negative)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
data = []

for a, b in itertools.product(objects, objects):

    myinput = [a, b]

    # opposites (n/p) interact; x interacts with nothing
    myoutput = (a[0] != b[0]) and (a[0] != 'x') and (b[0] != 'x')

    data.append(FunctionData(input=myinput, output=myoutput))

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Run mcmc
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if __name__ == "__main__":

    from LOTlib.Proposals.RegenerationProposal import RegenerationProposal
    #mp = MixtureProposal([RegenerationProposal(grammar), InsertDeleteProposal(grammar)] )
    mp = RegenerationProposal(grammar)

    from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
    h0 = LOTHypothesis(
        grammar, args=['x', 'y'], ALPHA=0.999, proposal_function=mp
    )  # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary
Example #7
0
def make_data(n=1, alpha=0.99):
    data = []
    for x in xrange(1, 10):
        data.append( FunctionData(input=['even', x], output=(x % 2 == 0), alpha=alpha) )
        data.append( FunctionData(input=['odd',  x], output=(x % 2 == 1), alpha=alpha) )
    return data*n
Example #8
0
# Build up the info about the data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.DataAndObjects import FunctionData

L = []  # each hypothesis's cumulative likelihood to each data point
GroupLength = []
NYes = []
NTrials = []
Output = []

domain = range(1, 101)

for os in observed_sets:

    datum = FunctionData(input=[], output=os, alpha=ALPHA)

    # compute the likelihood for all the data here
    for h in hypotheses:
        h.cached_set = h()
        h.stored_likelihood = h.compute_single_likelihood(
            datum, cached_set=h.cached_set)

    L.append([h.stored_likelihood for h in hypotheses])  # each likelihood

    gl = 0  # how many did we actually ad?
    for i in domain:
        k = tuple([os, i])

        if k in human_nyes and k in human_ntrials:
            gl += 1
Example #9
0
# -*- coding: utf-8 -*-
from LOTlib.Hypotheses.GaussianLOTHypothesis import GaussianLOTHypothesis
from LOTlib.DataAndObjects import FunctionData
from LOTlib.FiniteBestSet import FiniteBestSet
from LOTlib.Inference.MetropolisHastings import mh_sample
from LOTlib.Miscellaneous import qq
from Grammar import grammar
"""
        This uses Galileo's data on a falling ball. See: http://www.amstat.org/publications/jse/v3n1/datasets.dickey.html
        See also, Jeffreys, W. H., and Berger, J. O. (1992), "Ockham's Razor and Bayesian Analysis," American Scientist, 80, 64-72 (Erratum, p. 116).
"""

# NOTE: these must be floats, else we get hung up on powers of ints
data_sd = 50.0
data = [
    FunctionData(input=[1000.], output=1500., ll_sd=data_sd),
    FunctionData(input=[828.], output=1340., ll_sd=data_sd),
    FunctionData(input=[800.], output=1328., ll_sd=data_sd),
    FunctionData(input=[600.], output=1172., ll_sd=data_sd),
    FunctionData(input=[300.], output=800., ll_sd=data_sd),
    FunctionData(input=[0.], output=0.,
                 ll_sd=data_sd)  # added 0,0 since it makes physical sense.
]

CHAINS = 10
STEPS = 10000000
SKIP = 0
PRIOR_TEMPERATURE = 1.0

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Define the grammar
Example #10
0
def make_data(n=1, alpha=0.99, *args, **kwargs):
    # Set up data -- true output means attraction (p=positive; n=negative)
    return [
        FunctionData(input=["p1", "n1"], output=True, alpha=alpha),
        FunctionData(input=["p1", "n2"], output=True, alpha=alpha),
        FunctionData(input=["p1", "p1"], output=False, alpha=alpha),
        FunctionData(input=["p1", "p2"], output=False, alpha=alpha),
        FunctionData(input=["p2", "n1"], output=True, alpha=alpha),
        FunctionData(input=["p2", "n2"], output=True, alpha=alpha),
        FunctionData(input=["p2", "p1"], output=False, alpha=alpha),
        FunctionData(input=["p2", "p2"], output=False, alpha=alpha),
        FunctionData(input=["n1", "n1"], output=False, alpha=alpha),
        FunctionData(input=["n1", "n2"], output=False, alpha=alpha),
        FunctionData(input=["n1", "p1"], output=True, alpha=alpha),
        FunctionData(input=["n1", "p2"], output=True, alpha=alpha),
        FunctionData(input=["n2", "n1"], output=False, alpha=alpha),
        FunctionData(input=["n2", "n2"], output=False, alpha=alpha),
        FunctionData(input=["n2", "p1"], output=True, alpha=alpha),
        FunctionData(input=["n2", "p2"], output=True, alpha=alpha)
    ] * n
Example #11
0

import re
import os
from collections import defaultdict
from LOTlib.DataAndObjects import FunctionData, Obj

CONCEPT_DIR="Concepts"
concept2data = defaultdict(list)
for pth in os.listdir(CONCEPT_DIR):

    if not re.search(r"L[34]", pth): # skip these!
        with open(CONCEPT_DIR+"/"+pth, 'r') as f:
            description = f.next() # the first line of the file

            for l in f:
                parts = re.split(r"\t", l.strip())

                # parse the true/false
                output = [ x == "#t" for x in re.findall("\#[tf]", parts[0])]

                # parse the set
                input = []
                for theobj in parts[1:]:
                    x = re.split(r",", theobj) # split within obj via commas
                    input.append( Obj(shape=x[0], color=x[1], size=int(x[2])) )

                concept2data[pth].append( FunctionData(input=input, output=output) )
Example #12
0
#for i in xrange(100):
#print grammar.generate()

# Or we can make them as hypotheses (functions of S):
#for i in xrange(100):
#print LOTHypothesis(grammar, args=['S'])

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Or real inference:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.DataAndObjects import FunctionData, Obj  # for nicely managing data
from LOTlib.Inference.MetropolisHastings import mh_sample  # for running MCMC

# Make up some data -- here just one set containing {red, red, green} colors
data = [ FunctionData(input=[ {Obj(color='red'), Obj(color='red'), Obj(color='green')} ], \
                      output=True) ]

# Create an initial hypothesis
h0 = LOTHypothesis(grammar, args=['S'])

# OR if we want to specify and use insert/delete proposals
#from LOTlib.Proposals import *
#h0 = LOTHypothesis(grammar, proposal_function=MixtureProposal(grammar, [RegenerationProposal(grammar), InsertDeleteProposal(grammar)] ) )

if __name__ == "__main__":

    # MCMC!
    for h in mh_sample(h0, data, 4000):  # run sampler
        #for h in unique(mh_sample(h0, data, 4000)): # get unique samples
        # hypotheses' .prior, .likelihood, and .posterior_score are set in mh_sample
        print h.likelihood, h.prior, h.posterior_score, h
Example #13
0
    # ========================================================================================================
    # Process command line arguments
    # ========================================================================================================
    (options, args) = parser.parse_args()

    suffix = time.strftime('_' + options.NAME + '_%m%d_%H%M%S',
                           time.localtime())
    prefix = '../out/simulations/skewed/'

    # ========================================================================================================
    # Running
    # ========================================================================================================
    language = AnBn()

    show_info('running skewed input case..')
    rec = probe_MHsampler(make_hypothesis('AnBn'), language, options,
                          prefix + 'skewed_out_' + str(rank) + suffix)

    show_info('running normal input case..')
    CASE += 1
    cnt = Counter()
    num = 64.0 * 2 / options.FINITE
    for i in xrange(1, options.FINITE / 2 + 1):
        cnt['a' * i + 'b' * i] = num

    rec1 = probe_MHsampler(make_hypothesis('AnBn'),
                           language,
                           options,
                           prefix + 'normal_out' + str(rank) + suffix,
                           data=[FunctionData(input=[], output=cnt)])
Example #14
0
def make_data(n):
    return [FunctionData(input=[], output={val : n for val in DATA_STRINGS}, alpha=0.999)]
Example #15
0
File: Shared.py Project: sa-/LOTlib
grammar.add_rule('COLOR', q('mauve'), None, 1.0)

grammar.add_rule('SHAPE', q('square'), None, 1.0)
grammar.add_rule('SHAPE', q('circle'), None, 1.0)
grammar.add_rule('SHAPE', q('triangle'), None, 1.0)
grammar.add_rule('SHAPE', q('diamond'), None, 1.0)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Make up some data
# Let's give data from a simple conjunction (note this example data is not exhaustive)

from LOTlib.DataAndObjects import FunctionData, Obj

# FunctionData takes a list of arguments and a return value. The arguments are objects (which are handled correctly automatically
# by is_color_ and is_shape_
data = [ FunctionData( [Obj(shape='square', color='red')], True), \
         FunctionData( [Obj(shape='square', color='blue')], False), \
         FunctionData( [Obj(shape='triangle', color='blue')], False), \
         FunctionData( [Obj(shape='triangle', color='red')], False), \
         ]

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Other standard exports

from LOTlib.Hypotheses.RationalRulesLOTHypothesis import RationalRulesLOTHypothesis


def make_h0(value=None):
    return RationalRulesLOTHypothesis(grammar=DNF, value=value, rrAlpha=1.0)

Example #16
0
from Run import *
from LOTlib.Grammar import Grammar
from LOTlib.DataAndObjects import FunctionData

# --------------------------------------------------------------------------------------------------------
# Mixture model

if __name__ == "__main__":

    path = os.getcwd()

    interval_data = [
        FunctionData(input=[16], output={
            99: (30, 5),
            64: (5, 30)
        })
    ]

    math_data = [FunctionData(input=[16], output={99: (5, 30), 64: (30, 5)})]

    # run(grammar=mix_grammar, mixture_model=1, data=math_data,
    #     ngh='enum7', domain=100, alpha=0.9,
    #     iters=120000, skip=120, cap=1000,
    #     print_stuff='', pickle_file='out/mix_math_120k.p',
    #     csv_file=path+'/out/mix_math_120k')

    grammar_n = 10000
    skip = 10
    cap = grammar_n / skip

    hypotheses = []
Example #17
0
"""

if __name__ == '__main__':
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    # ========================================================================================================
    # Process command line arguments
    # ========================================================================================================
    (options, args) = parser.parse_args()

    suffix = time.strftime('_' + options.NAME + '_%m%d_%H%M%S', time.localtime())
    prefix = '../out/simulations/skewed/'

    # ========================================================================================================
    # Running
    # ========================================================================================================
    language = AnBn()

    show_info('running skewed input case..')
    rec = probe_MHsampler(make_hypothesis('AnBn'), language, options, prefix + 'skewed_out_' + str(rank) + suffix)

    show_info('running normal input case..')
    CASE += 1
    cnt = Counter()
    num = 64.0 * 2 / options.FINITE
    for i in xrange(1, options.FINITE/2+1):
        cnt['a'*i+'b'*i] = num

    rec1 = probe_MHsampler(make_hypothesis('AnBn'), language, options, prefix + 'normal_out' + str(rank) + suffix, data=[FunctionData(input=[], output=cnt)])
Example #18
0
# Define a grammar object
# Defaultly this has a start symbol called 'START' but we want to call
# it 'EXPR'
grammar = Grammar(start='EXPR')

# Define some operations
grammar.add_rule('EXPR', '(%s + %s)', ['EXPR', 'EXPR'], 1.0)
grammar.add_rule('EXPR', '(%s * %s)', ['EXPR', 'EXPR'], 1.0)
grammar.add_rule('EXPR', '(float(%s) / float(%s))', ['EXPR', 'EXPR'], 1.0)
grammar.add_rule('EXPR', '(-%s)', ['EXPR'], 1.0)

# And define some numbers. We'll give them a 1/n^2 probability
for n in xrange(1, 10):
    grammar.add_rule('EXPR', str(n), None, 10.0 / n**2)

data = [FunctionData(input=[6], output=12, alpha=0.95)]

#h = MyHypothesis()
#print h.compute_prior(), h.compute_likelihood(data), h
# define a "starting hypothesis". This one is essentially copied by
# all proposers, so the sampler doesn't need to know its type or anything.

h0 = MyHypothesis()
from collections import Counter

count = Counter()
for h in MHSampler(h0, data, steps=10000):
    count[h] += 1

#for h in sorted(count.keys(), key=lambda x: count[x]):
#    print count[h], h.posterior_score, h
Example #19
0
# BASE-SET is here a set of BASE-OBJECTS (non-args)
grammar.add_rule('BASE-SET', 'set_add_', ['BASE-OBJECT', 'BASE-SET'], 1.0)
grammar.add_rule('BASE-SET', 'set_', [], 1.0)

grammar.add_rule('BASE-OBJECT', qq('p1'), None, 1.0)
grammar.add_rule('BASE-OBJECT', qq('p2'), None, 1.0)
grammar.add_rule('BASE-OBJECT', qq('n1'), None, 1.0)
grammar.add_rule('BASE-OBJECT', qq('n2'), None, 1.0)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set up data -- true output means attraction (p=positive; n=negative)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

data = [
    FunctionData(input=["p1", "n1"], output=True),
    FunctionData(input=["p1", "n2"], output=True),
    FunctionData(input=["p1", "p1"], output=False),
    FunctionData(input=["p1", "p2"], output=False),
    FunctionData(input=["p2", "n1"], output=True),
    FunctionData(input=["p2", "n2"], output=True),
    FunctionData(input=["p2", "p1"], output=False),
    FunctionData(input=["p2", "p2"], output=False),
    FunctionData(input=["n1", "n1"], output=False),
    FunctionData(input=["n1", "n2"], output=False),
    FunctionData(input=["n1", "p1"], output=True),
    FunctionData(input=["n1", "p2"], output=True),
    FunctionData(input=["n2", "n1"], output=False),
    FunctionData(input=["n2", "n2"], output=False),
    FunctionData(input=["n2", "p1"], output=True),
    FunctionData(input=["n2", "p2"], output=True)
Example #20
0
def make_data(size=options.datasize):
    return [FunctionData(input=[],
                         output={'h e s': size, 'm e s': size, 'm e g': size, 'h e g': size, 'm e n': size, 'h e m': size, 'm e k': size, 'k e s': size, 'h e k': size, 'k e N': size, 'k e g': size, 'h e n': size, 'm e N': size, 'k e n': size, 'h e N': size, 'f e N': size, 'g e N': size, 'n e N': size, 'n e s': size, 'f e n': size, 'g e n': size, 'g e m': size, 'f e m': size, 'g e k': size, 'f e k': size, 'f e g': size, 'f e s': size, 'n e g': size, 'k e m': size, 'n e m': size, 'g e s': size, 'n e k': size})]
Example #21
0
def parse_nonadjacent(_dir, temperature):
    """
        1. read raw hypos
        2. get fixed llcnts
        3. compute posterior given different data pool sizes

        NOTE: if _dir is previously dumped topn then load it
    """

    if 'nonadjacent_topn' not in _dir:
        topn = set()
        for filename in os.listdir(_dir):
            if 'nonadjacent' in filename and 'seq' not in filename:
                print 'load', filename
                _set = load(open(_dir + filename))
                topn.update([h for h in _set])
        topn = list(topn)

        # fix the llcnts to save time and make curve smooth
        print 'get llcnts...'
        topn = gen_fixlen_llcnts(topn, 5)
        dump(topn, open(_dir + '_nonadjacent_topn' + suffix, 'w'))

    else:
        print 'load', _dir
        topn = load(open(_dir))

    # find all correct hypotheses
    topn = list(topn)
    correct_set = set()

    for i in xrange(len(topn)):

        flag = True
        for k, v in topn[i].fixed_ll_counts.iteritems():
            if len(k) < 2:
                continue
            elif k[0] == 'a' and k[-1] in 'b':
                continue
            elif k[0] == 'c' and k[-1] in 'bd':
                continue
            elif k[0] == 'e' and k[-1] in 'bdf':
                continue
            flag = False
            break

        if flag: correct_set.add(i)

    print len(correct_set), 'of', len(topn), 'are correct'

    # get posterior
    w_list = range(2, 25, 1)
    amount_list = range(24, 144, 5)
    posterior_seq = []
    for i in xrange(len(w_list)):
        pool_size = w_list[i]
        language = LongDependency(max_length=pool_size)
        eval_data = [
            FunctionData(input=[],
                         output={
                             e: float(amount_list[i]) / pool_size
                             for e in language.str_sets
                         })
        ]

        for h in topn:
            h.likelihood_temperature = temperature
            h.compute_posterior(eval_data)

        Z = logsumexp([h.posterior_score for h in topn])

        prob = 0
        for i in xrange(len(topn)):
            if i in correct_set:
                prob += np.exp(topn[i].posterior_score - Z)
        print 'pool_size', pool_size, 'prob', prob
        posterior_seq.append([pool_size, prob])

        #debug
        _list = [h for h in topn]
        _list.sort(key=lambda x: x.posterior_score, reverse=True)
        for i in xrange(3):
            print 'prob: ', np.exp(_list[i].posterior_score - Z),
            print h.fixed_ll_counts
            print _list[i]
        print '=' * 50
        fff()

    dump(posterior_seq, open('nonadjacent_posterior_seq' + suffix, 'w'))
Example #22
0
female(michelle).
parent(michelle, sasha).
parent(michelle, malia).
parent(barak, sasha).
parent(barak, malia).
female(sasha).
female(malia).

parent(baraksr, barak).
parent(ann, barak).

parent(hussein, baraksr).
parent(akumu, baraksr).
"""

data = [FunctionData(input=["grandparent(baraksr, QUERY)"], output="sahsa", alpha=0.99),
        FunctionData(input=["grandparent(baraksr, QUERY)"], output="malia", alpha=0.99),
        FunctionData(input=["grandparent(ann, QUERY)"], output="sahsa", alpha=0.99),
        FunctionData(input=["grandparent(ann, QUERY)"], output="malia", alpha=0.99),
        FunctionData(input=["grandparent(hussein, QUERY)"], output="barak", alpha=0.99),
        FunctionData(input=["grandparent(akumu, QUERY)"], output="barak", alpha=0.99)
        ]


def make_hypothesis(**kwargs):
    return PrologHypothesis(base_facts=BASE_FACTS, **kwargs)

def make_data(n=1):
    return data*n

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Example #23
0
def make_pos(jump, temp):
    """
    1. read raw output
    2. compute precision & recall on nonadjacent and adjacent contents
    3. evaluate posterior probability on different data sizes
    4. dump the sequence

    run: mpiexec -n 4
    """

    print 'loading..'
    fff()
    rec = load_hypo('out/simulations/nonadjacent/', ['0'])

    print 'estimating pr'
    fff()
    pr_dict = {}
    _set = set()
    cnt_tmp = {}
    for e in rec:
        for h in e[1]:
            if h in _set: continue
            cnt = Counter([h() for _ in xrange(256)])
            # cnt = Counter([h() for _ in xrange(10)])
            cnt_tmp[h] = cnt
            base = sum(cnt.values())
            num = 0
            for k, v in cnt.iteritems():
                if k is None or len(k) < 2: continue
                if k[0] == 'a' and k[-1] == 'b': num += v
            pr_dict[h] = float(num) / base
            _set.add(h)

    work_list = range(2, 24, jump)
    space_seq = []
    for i in work_list:
        language = LongDependency(max_length=i)

        eval_data = {}
        for e in language.str_sets:
            eval_data[e] = 144.0 / len(language.str_sets)
        eval_data = [FunctionData(input=[], output=eval_data)]

        prob_dict = {}
        ada_dict = {}
        test_list = []

        for h in _set:
            h.likelihood_temperature = temp
            prob_dict[h] = h.compute_posterior(eval_data)
            p, r = language.estimate_precision_and_recall(h, cnt_tmp[h])
            ada_dict[h] = 2 * p * r / (p + r) if p + r != 0 else 0

            test_list.append([
                h.posterior_score, ada_dict[h], pr_dict[h], cnt_tmp[h],
                str(h)
            ])

        Z = logsumexp([h.posterior_score for h in _set])
        test_list.sort(key=lambda x: x[0], reverse=True)

        weighted_x = 0
        weighted_axb = 0
        for e in test_list:
            weighted_x += np.exp(e[0] - Z) * e[1]
            weighted_axb += np.exp(e[0] - Z) * e[2]
        f = open('non_w' + suffix, 'a')
        print >> f, weighted_x, weighted_axb
        f.close()
        # print rank, i, '='*50
        # for i_t in xrange(3):
        #     print 'prob: ', np.exp(test_list[i_t][0] - Z), 'x_f-score',  test_list[i_t][1], 'axb_f-score',  test_list[i_t][2]
        #     print test_list[i_t][3]
        # print test_list[i_t][5].compute_posterior(eval_data)
        # print language.estimate_precision_and_recall(test_list[i_t][5], cnt_tmp[test_list[i_t][5]])
        # fff()
        # dump(test_list, open('test_list_'+str(rank)+'_'+str(i)+suffix, 'w'))

        # space_seq.append([prob_dict, ada_dict])
        print 'rank', rank, i, 'done'
        fff()

    dump([space_seq, pr_dict], open('non_seq' + str(rank) + suffix, 'w'))
Example #24
0
def runparts(size, x, p):
    #problem: right now only recording last partition, never saving from others.
    print "Start: " + str(x) + " on this many: " + str(size)
    try:
        #make new TopN for each data amount
        topn = TopN(N=200, key="posterior_score")
        print "Starting on partition ", p

        # Now we have to go in and fill in the nodes that are nonterminals
        # We can do this with generate
        v = grammar.generate(copy(p))

        h0 = MyHypothesis(grammar, value=v)
        data = [
            FunctionData(input=[],
                         output={
                             'n i k': size,
                             'h i N': size,
                             'f a n': size,
                             'g i f': size,
                             'm a N': size,
                             'f a m': size,
                             'g i k': size,
                             'k a n': size,
                             'f a f': size,
                             'g i n': size,
                             'g i m': size,
                             'g i s': size,
                             's i f': size,
                             's i n': size,
                             'n i s': size,
                             's i m': size,
                             's i k': size,
                             'h a N': size,
                             'f i N': size,
                             'h i m': size,
                             'h i n': size,
                             'h a m': size,
                             'n i N': size,
                             'h i k': size,
                             'f a s': size,
                             'f i n': size,
                             'h i f': size,
                             'n i m': size,
                             'g i N': size,
                             'h a g': size,
                             's i N': size,
                             'n i n': size,
                             'f i m': size,
                             's i s': size,
                             'h i s': size,
                             'n a s': size,
                             'k a s': size,
                             'f i s': size,
                             'n i f': size,
                             'm i n': size,
                             's a s': size,
                             'f a g': size,
                             'k a g': size,
                             'k a f': size,
                             's a m': size,
                             'n a f': size,
                             'n a g': size,
                             'm i N': size,
                             's a g': size,
                             'f i k': size,
                             'k a m': size,
                             'n a n': size,
                             's a f': size,
                             'n a m': size,
                             'm a s': size,
                             'h a f': size,
                             'h a s': size,
                             'n a N': size,
                             'm i s': size,
                             's a n': size,
                             's a N': size,
                             'm i k': size,
                             'f a N': size,
                             'm i m': size,
                             'm a g': size,
                             'm a f': size,
                             'f i f': size,
                             'k a N': size,
                             'h a n': size,
                             'm a n': size,
                             'm a m': size,
                             'm i f': size
                         })
        ]

        for h in break_ctrlc(
                MHSampler(h0, data, steps=options.steps, trace=False)):
            # print "\t", h.posterior_score, h
            topn.add(h)

        return size, set(topn)

    except Exception as e:
        print "*** Exception ignored: ", e
        #if we fail, we can return a blank TopN
        return size, set()
Example #25
0
def parse_nonadjacent(temperature):
    """
        load the hypothesis space and compute weighted F-scores of nonadjacent dependency on different pool sizes.
        replace the make_pos function

        example script:
            mpiexec -n 12 python parse_hypothesis.py --mode=nonadjacent_mk --temp=100
    """
    eval_data_size = 1024
    global size
    global rank
    pr_dict = {}
    _set = set()

    if rank == 0:
        print 'loading..'
        fff()
        rec = load_hypo('out/simulations/nonadjacent/', ['_'])

        print 'estimating pr'
        fff()

        for e in rec:
            for h in e[1]:

                if h in _set: continue

                cnt = Counter([h() for _ in xrange(eval_data_size)])
                num = 0
                for k, v in cnt.iteritems():
                    if k is None or len(k) < 2: continue
                    if k[0] + k[-1] in ['ab', 'cd', 'ef']: num += v

                pr_dict[h] = float(num) / eval_data_size
                _set.add(h)

        #debug
        _list = [[h, pr] for h, pr in pr_dict.iteritems()]
        _list.sort(key=lambda x: x[1], reverse=True)
        for i in xrange(10):
            print 'p,r: ', _list[i][1],
            print Counter([_list[i][0]() for _ in xrange(256)])
            print _list[i][0]
        print '=' * 50
        fff()

    print "sync..."
    fff()
    pr_dict = comm.bcast(pr_dict, root=0)
    _set = comm.bcast(_set, root=0)

    # work_list = slice_list(np.arange(2, 65, 2), size)
    work_list = slice_list(np.arange(10, 66, 5), size)
    seq = []
    for s in work_list[rank]:
        wfs = 0.0
        language = LongDependency(max_length=s)
        eval_data = [
            FunctionData(input=[],
                         output={
                             e: float(eval_data_size) / s
                             for e in language.str_sets
                         })
        ]

        for h in _set:
            h.likelihood_temperature = temperature
            h.compute_posterior(eval_data)

        Z = logsumexp([h.posterior_score for h in _set])
        seq.append([
            s,
            sum([pr_dict[h] * np.exp(h.posterior_score - Z) for h in _set])
        ])

        #debug
        _list = [h for h in _set]
        _list.sort(key=lambda x: x.posterior_score, reverse=True)
        print 'pool size: ', s
        for i in xrange(3):
            print 'prob: ', np.exp(_list[i].posterior_score -
                                   Z), 'p,r: ', pr_dict[_list[i]],
            print Counter([_list[i]() for _ in xrange(256)])
            print _list[i]
        print '=' * 50
        fff()

    if rank == 0:
        for i in xrange(1, size):
            seq += comm.recv(source=i)
    else:
        comm.send(seq, dest=0)
        sys.exit(0)

    seq.sort(key=lambda x: x[0])
    f = open('nonadjacent_wfs_seq' + suffix, 'w')
    for s, wfs in seq:
        print >> f, s, wfs
    f.close()
Example #26
0
def make_data(size=options.datasize):
    return [
        FunctionData(input=[],
                     output={
                         'n i k': size,
                         'h i N': size,
                         'f a n': size,
                         'g i f': size,
                         'm a N': size,
                         'f a m': size,
                         'g i k': size,
                         'k a n': size,
                         'f a f': size,
                         'g i n': size,
                         'g i m': size,
                         'g i s': size,
                         's i f': size,
                         's i n': size,
                         'n i s': size,
                         's i m': size,
                         's i k': size,
                         'h a N': size,
                         'f i N': size,
                         'h i m': size,
                         'h i n': size,
                         'h a m': size,
                         'n i N': size,
                         'h i k': size,
                         'f a s': size,
                         'f i n': size,
                         'h i f': size,
                         'n i m': size,
                         'g i N': size,
                         'h a g': size,
                         's i N': size,
                         'n i n': size,
                         'f i m': size,
                         's i s': size,
                         'h i s': size,
                         'n a s': size,
                         'k a s': size,
                         'f i s': size,
                         'n i f': size,
                         'm i n': size,
                         's a s': size,
                         'f a g': size,
                         'k a g': size,
                         'k a f': size,
                         's a m': size,
                         'n a f': size,
                         'n a g': size,
                         'm i N': size,
                         's a g': size,
                         'f i k': size,
                         'k a m': size,
                         'n a n': size,
                         's a f': size,
                         'n a m': size,
                         'm a s': size,
                         'h a f': size,
                         'h a s': size,
                         'n a N': size,
                         'm i s': size,
                         's a n': size,
                         's a N': size,
                         'm i k': size,
                         'f a N': size,
                         'm i m': size,
                         'm a g': size,
                         'm a f': size,
                         'f i f': size,
                         'k a N': size,
                         'h a n': size,
                         'm a n': size,
                         'm a m': size,
                         'm i f': size
                     })
    ]
Example #27
0
def make_data(n=1, alpha=0.999):
    return [FunctionData(input=[Obj(shape='square', color='red')], output=True, alpha=alpha),
            FunctionData(input=[Obj(shape='square', color='blue')], output=False, alpha=alpha),
            FunctionData(input=[Obj(shape='triangle', color='blue')], output=False, alpha=alpha),
            FunctionData(input=[Obj(shape='triangle', color='red')], output=False, alpha=alpha)]*n
Example #28
0
    ALPHA = 0.001
    STEPS = 1000
    N_H = 20

    seqs_trans = {}

    c1 = vanilla_conditions(True, False)[0:2]
    c2 = vanilla_conditions(False, True)[0:1]

    for to_seq in c1:
        for from_seq in c2:
            print_star("")
            print from_seq, to_seq
            data = [
                FunctionData(alpha=ALPHA,
                             input=[from_seq],
                             output={to_seq: len(to_seq)})
            ]
            h0 = MyHypothesis()
            step = 0
            tn = TopN(N=N_H)
            # Stream from the sampler to a printer
            for h in MHSampler(h0,
                               data,
                               steps=STEPS,
                               acceptance_temperature=5.):
                tn.add(h)

            print

            for h in tn.get_all(sorted=True):
Example #29
0
def uniform_data(size, max_length=None):
    cnt = Counter()
    num = size * 2 / max_length
    for i in xrange(1, max_length / 2 + 1):
        cnt['a' * i + 'b' * i] = num
    return [FunctionData(input=[], output=cnt)]
Example #30
0
File: Shared.py Project: sa-/LOTlib
    elif fn.name == 'question_': return '(%s)?' % to_regex(fn.args[0])
    elif fn.name == 'or_': return '(%s|%s)' % tuple(map(to_regex, fn.args))
    elif fn.name == 'str_append_':
        return '%s%s' % (fn.args[0], to_regex(fn.args[1]))
    elif fn.name == 'terminal_':
        return '%s' % fn.args[0]
    elif fn.name == '':
        return to_regex(fn.args[0])
    else:
        assert False, fn


##########################################################
# Define some data

data = [ FunctionData(input=['aaaa'], output=True),\
         FunctionData(input=['aaab'], output=False),\
         FunctionData(input=['aabb'], output=False),\
         FunctionData(input=['aaba'], output=False),\
         FunctionData(input=['aca'],  output=True),\
         FunctionData(input=['aaca'], output=True),\
         FunctionData(input=['a'],    output=True) ]

##########################################################
# make_h0


def make_h0(value=None):
    return RegexHypothesis(grammar, value=value, ALPHA=0.999)