예제 #1
0
def construct_hypothesis_space(data_size):
    all_hypotheses = TopN()
    print 'Data size: ', data_size
    for i in range(RUNS):
        print 'Run: ', i
        hypotheses = TopN(25)
        data = generate_data(data_size)
        learner = GriceanQuantifierLexicon(make_my_hypothesis,
                                           my_weight_function)
        for w in target.all_words():
            learner.set_word(w, make_my_hypothesis())
        j = 0
        for h in MHSampler(learner, data, SAMPLES, skip=0):
            hypotheses.add(h)
            j += 1
            if j > 0 and j % 1000 == 0:
                pickle.dump(
                    hypotheses,
                    open(
                        'data/hypset_' + GRAMMAR_TYPE + '_' + str(data_size) +
                        '_' + str(j) + '.pickle', 'w'))
            #sstr = str(h)
            #sstr = re.sub("[_ ]", "", sstr)
            #sstr = re.sub("presup", u"\u03BB A B . presup", sstr)
            #print sstr
        all_hypotheses.update(hypotheses)
    return all_hypotheses
예제 #2
0
def get_hypotheses():
    all_hypotheses = TopN()
    for data_size in range(100, MAX_DATA_SIZE, 100):
        hypotheses = None
        sample_size = 10000
        while hypotheses is None:
            try:
                hypotheses = pickle.load(
                    open('data/hypset_' + GRAMMAR_TYPE + '_' + str(data_size) +
                         '_' + str(sample_size) + '.pickle'))
            except:
                sample_size -= 1000
        all_hypotheses.update(hypotheses)
    return all_hypotheses
예제 #3
0
파일: Run.py 프로젝트: TerryLew/BinLOTlib
from LOTlib import break_ctrlc
from LOTlib.TopN import TopN
from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler
from Model import *
from TargetConcepts import TargetConcepts

NDATA = 20 # How many data points for each function?
NSTEPS = 100000
BEST_N = 500 # How many from each hypothesis to store

# Where we keep track of all hypotheses (across concepts)
all_hypotheses = TopN(N=BEST_N)

if __name__ == "__main__":
    # Now loop over each target concept and get a set of hypotheses
    for i, f in enumerate(TargetConcepts):

        # Set up the hypothesis
        h0 = make_hypothesis()

        # Set up some data
        data = make_data(NDATA, f)

        # Now run some MCMC
        fs = TopN(N=BEST_N, key="posterior_score")
        fs.add(break_ctrlc(MHSampler(h0, data, steps=NSTEPS, trace=False)))

        all_hypotheses.update(fs)

    pickle.dump(all_hypotheses, open("hypotheses.pkl", 'w'))
예제 #4
0
    databundle = [response, weights]
    generator = MHSampler(learner, databundle, STEPS, skip=SKIP)
    for g in generator:
        hypset.add(VectorizedLexicon_to_SimpleLexicon(g), g.posterior_score)
    return hypset


####################################################################################
## Main running
####################################################################################

# Load the trees from a file
my_finite_trees = pickle.load(open(IN_PATH))
print "# Done loading", len(my_finite_trees), "trees"

# Gives index to each hypothesis
for i, h in enumerate(my_finite_trees):
    hyp2index[h] = i

# Compute or load proposals
get_proposal_dist(my_finite_trees)

DATA_AMOUNTS = range(0, 2050, 100)
allret = map(run, DATA_AMOUNTS)

# combine into a single hypothesis set and save
outhyp = TopN()
for r in allret:
    outhyp.update(r)
pickle.dump(outhyp, open(OUT_PATH, 'w'))