def run(data_amount): print "Starting chain on %s data points" % data_amount data = makeVariableLexiconData(eval(options.word), options.word, the_context, n=data_amount, s=options.s, alpha=options.alpha, verbose=True) h0 = KinshipLexicon(words=[options.word], alpha=options.alpha) h0.set_word(options.word, LOTHypothesis(grammar, value=None, args=['recurse_', 'C', 'X'])) hyps = TopN(N=options.top_count) mhs = MHSampler(h0, data, options.steps, likelihood_temperature=options.llt, prior_temperature=options.prior_temp) for samples_yielded, h in break_ctrlc(enumerate(mhs)): #if samples_yielded % 100 == 0: # print h.prior, h.likelihood, h hyps.add(h) return hyps
for dp in hypothesized_word_data: if dp in true_word_data: correct_count += 1 # Figure out if it was characteristic or defining hyptype = hypothesis.value[w].value.args[0].returntype # Add it to the results: So Ideally we would have a line like this for every different context and all the contexts together findings.append([hyptype, hypothesis.prior, hypothesis.point_ll[0], hypothesis.point_ll[1], hypothesis.point_ll[2], w, correct_count, len(hypothesized_word_data), len(true_word_data), hypothesis.value]) return findings ############################################################################################# # MAIN CODE ############################################################################################# print "Making data sets of size %s . . ." % options.data_size huge_data_0 = makeVariableLexiconData(eval(options.word), options.word, the_context, n=options.data_size, s=0, alpha=options.alpha) huge_data_1 = makeVariableLexiconData(eval(options.word), options.word, the_context, n=options.data_size, s=1, alpha=options.alpha) huge_data_2 = makeVariableLexiconData(eval(options.word), options.word, the_context, n=options.data_size, s=2, alpha=options.alpha) print "Loading hypothesis space . . ." hypothesis_space = set() for i in os.listdir(options.input_loc): with open(options.input_loc + i, 'r') as f: hypothesis_space.update(pickle.load(f)) print "Assessing hypotheses . . ." results = [] result_strings = [] for s, h in enumerate(hypothesis_space):
hyptype, hypothesis.prior, hypothesis.point_ll[0], hypothesis.point_ll[1], hypothesis.point_ll[2], w, correct_count, len(hypothesized_word_data), len(true_word_data), hypothesis.value ]) return findings ############################################################################################# # MAIN CODE ############################################################################################# print "Making data sets of size %s . . ." % options.data_size huge_data_0 = makeVariableLexiconData(eval(options.word), options.word, the_context, n=options.data_size, s=0, alpha=options.alpha) huge_data_1 = makeVariableLexiconData(eval(options.word), options.word, the_context, n=options.data_size, s=1, alpha=options.alpha) huge_data_2 = makeVariableLexiconData(eval(options.word), options.word, the_context, n=options.data_size, s=2, alpha=options.alpha)