def main(n, parser_dir, text_loc=None): parser = redshift.parser.load_parser(parser_dir) text = open(text_loc) if text_loc is not None else sys.stdin w = 0; r = 0 w_scores = []; r_scores = [] for sent_str in text: versions = [sent_str] seen = set() seen.add(sent_str.strip()) tokens = sent_str.split() if len(tokens) < 3: continue for i in range(n): random.shuffle(tokens) reordering = ' '.join(tokens) if reordering not in seen: versions.append(reordering) seen.add(reordering) if len(seen) < 3: continue parsed = redshift.io_parse.read_pos('\n'.join(versions)) parser.add_parses(parsed) scores = parsed.scores idx, score = max(enumerate(scores), key=lambda i_s: i_s[1]) if idx == 0: r += 1 r_scores.append(score) else: w += 1 w_scores.append(score) print r, w, float(r) / r + w
def main(parser_dir, text_loc, out_dir, use_gold=False, profile=False, debug=False): if debug: redshift.parser.set_debug(debug) if not os.path.exists(out_dir): os.mkdir(out_dir) yield "Loading parser" parser = redshift.parser.load_parser(parser_dir) sentences = redshift.io_parse.read_pos(open(text_loc).read()) #sentences.connect_sentences(1700) if profile: cProfile.runctx("parser.add_parses(sentences,gold=gold_sents)", globals(), locals(), "Profile.prof") s = pstats.Stats("Profile.prof") s.strip_dirs().sort_stats("time").print_stats() else: t1 = time.time() parser.add_parses(sentences) t2 = time.time() print '%d sents took %0.3f ms' % (sentences.length, (t2 - t1) * 1000.0) sentences.write_parses(open(os.path.join(out_dir, 'parses'), 'w'))
def wrapped(l1): parser = redshift.parser.Parser(parser_dir, solver_type=solver_type, clean=True, C=l1) dev_gold = redshift.io_parse.read_conll(dev_loc.open().read()) train = redshift.io_parse.read_conll(train_loc.open().read()) parser.train(train) dev = redshift.io_parse.read_pos(get_pos(dev_loc.open().read())) acc = parser.add_parses(dev, gold=dev_gold) * 100 wrapped.models[l1] = acc return acc
def main(parser_dir, text_loc, out_dir, use_gold=False, profile=False, debug=False): if debug: redshift.parser.set_debug(debug) if not os.path.exists(out_dir): os.mkdir(out_dir) yield "Loading parser" parser = redshift.parser.load_parser(parser_dir) sentences = redshift.io_parse.read_pos(open(text_loc).read()) #sentences.connect_sentences(1700) if profile: cProfile.runctx("parser.add_parses(sentences,gold=gold_sents)", globals(), locals(), "Profile.prof") s = pstats.Stats("Profile.prof") s.strip_dirs().sort_stats("time").print_stats() else: t1 = time.time() parser.add_parses(sentences) t2 = time.time() print '%d sents took %0.3f ms' % (sentences.length, (t2-t1)*1000.0) sentences.write_parses(open(os.path.join(out_dir, 'parses'), 'w'))