Ejemplo n.º 1
0
def main(n, parser_dir, text_loc=None):
    parser = redshift.parser.load_parser(parser_dir)
    text = open(text_loc) if text_loc is not None else sys.stdin
    w = 0; r = 0
    w_scores = []; r_scores = []
    for sent_str in text:
        versions = [sent_str]
        seen = set()
        seen.add(sent_str.strip())
        tokens = sent_str.split()
        if len(tokens) < 3:
            continue
        for i in range(n):
            random.shuffle(tokens)
            reordering = ' '.join(tokens)
            if reordering not in seen:
                versions.append(reordering)
                seen.add(reordering)
        if len(seen) < 3:
            continue
        parsed = redshift.io_parse.read_pos('\n'.join(versions))
        parser.add_parses(parsed)
        scores = parsed.scores
        idx, score = max(enumerate(scores), key=lambda i_s: i_s[1])
        if idx == 0:
            r += 1
            r_scores.append(score)
        else:
            w += 1
            w_scores.append(score) 
    print r, w, float(r) / r + w
Ejemplo n.º 2
0
def main(parser_dir,
         text_loc,
         out_dir,
         use_gold=False,
         profile=False,
         debug=False):
    if debug:
        redshift.parser.set_debug(debug)
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    yield "Loading parser"
    parser = redshift.parser.load_parser(parser_dir)
    sentences = redshift.io_parse.read_pos(open(text_loc).read())
    #sentences.connect_sentences(1700)
    if profile:
        cProfile.runctx("parser.add_parses(sentences,gold=gold_sents)",
                        globals(), locals(), "Profile.prof")
        s = pstats.Stats("Profile.prof")
        s.strip_dirs().sort_stats("time").print_stats()
    else:
        t1 = time.time()
        parser.add_parses(sentences)
        t2 = time.time()
        print '%d sents took %0.3f ms' % (sentences.length, (t2 - t1) * 1000.0)
    sentences.write_parses(open(os.path.join(out_dir, 'parses'), 'w'))
Ejemplo n.º 3
0
 def wrapped(l1):
     parser = redshift.parser.Parser(parser_dir, solver_type=solver_type,
                                     clean=True, C=l1)
     dev_gold = redshift.io_parse.read_conll(dev_loc.open().read())
     train = redshift.io_parse.read_conll(train_loc.open().read())
     parser.train(train)
     dev = redshift.io_parse.read_pos(get_pos(dev_loc.open().read()))
     acc = parser.add_parses(dev, gold=dev_gold) * 100
     wrapped.models[l1] = acc
     return acc
Ejemplo n.º 4
0
def main(parser_dir, text_loc, out_dir, use_gold=False, profile=False, debug=False):
    if debug:
        redshift.parser.set_debug(debug)
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    yield "Loading parser"
    parser = redshift.parser.load_parser(parser_dir)
    sentences = redshift.io_parse.read_pos(open(text_loc).read())
    #sentences.connect_sentences(1700)
    if profile:
        cProfile.runctx("parser.add_parses(sentences,gold=gold_sents)",
                        globals(), locals(), "Profile.prof")
        s = pstats.Stats("Profile.prof")
        s.strip_dirs().sort_stats("time").print_stats()
    else:
        t1 = time.time()
        parser.add_parses(sentences)
        t2 = time.time()
        print '%d sents took %0.3f ms' % (sentences.length, (t2-t1)*1000.0)
    sentences.write_parses(open(os.path.join(out_dir, 'parses'), 'w'))