class Subcats(Tabulation( 'frames', value_maker=Counter, reducer=len, additional_info_maker=compose( '; '.join, lambda vs: map(lambda v: '\n\\glosE{%s}{}' % v, vs), lambda vs: map(operator.itemgetter(0), vs), lambda e: e.most_common(5)), separator='&', row_terminator=' ', additional_row_terminator='\\\\ \n', ), Filter): def __init__(self): super(Subcats, self).__init__() def accept_derivation(self, bundle): for node, ctx in find_all(bundle.derivation, r'/VP/ < /V[VACE]/=T ! < /CC/', with_context=True): lex = ':'.join(ctx.t.text()) self.frames[signature(node)][lex] += 1 def output(self): super(Subcats, self).output()
lex.input(expression) for tok in iter(lex.token, None): debug("\t%s %s", tok.type, tok.value) queries = [yacc.parse(expression) for expression in query_callback_map.keys()] for node in nodes(deriv): for query_expr, query_str in izip(queries, query_callback_map.keys()): context = Context() if query_expr.is_satisfied_by(node, context): if context: query_callback_map[query_str](node, **smash_key_case(context)) else: query_callback_map[query_str](node) find_all = tgrep find_first = compose(curry(take, 1), find_all) SmallSubtreeThreshold = 10 def find_small(*args, **kwargs): matches = tgrep(*args, **kwargs) with_context = kwargs['with_context'] if with_context: return ifilter(lambda (match, context): match.leaf_count() <= SmallSubtreeThreshold, matches) else: return ifilter(lambda match: match.leaf_count() <= SmallSubtreeThreshold, matches) SmallSentenceThreshold = 18 def find_small_sents(*args, **kwargs): deriv = args[0]
# cache the last locator sequence so it can be referred to quickly last_locator_bits = None for (sec, doc), commands in group_by_derivation( parse_instruction_lines(sys.stdin.readlines())): cur_trees = load_trees(base, sec, doc, extension, guessers_to_use) for ((_, _, deriv), command) in commands: print command cur_bundle = cur_trees[deriv] locator, instr = command.split(' ', 2) locator_bits = list( flatten( map( compose( maybe_int, lambda value: desugar( value, last_locator_bits, cur_bundle.derivation)), locator.split(';')))) cur_bundle.derivation = process(cur_bundle.derivation, locator_bits, instr) last_locator_bits = locator_bits # Write tree back here write_doc(opts.out, extension, sec, doc, cur_trees)
def group_by_derivation(lines): def doc_comparator( ( (sec, doc, deriv), line) ): return (sec, doc) def spec_comparator( ( spec, line ) ): return spec return groupby(sorted(lines, key=spec_comparator), doc_comparator) # cache the last locator sequence so it can be referred to quickly last_locator_bits = None for (sec, doc), commands in group_by_derivation(parse_instruction_lines(sys.stdin.readlines())): cur_trees = load_trees(base, sec, doc, extension, guessers_to_use) for ((_, _, deriv), command) in commands: print command cur_bundle = cur_trees[deriv] locator, instr = command.split(' ', 2) locator_bits = list(flatten(map( compose(maybe_int, lambda value: desugar(value, last_locator_bits, cur_bundle.derivation)), locator.split(';')))) cur_bundle.derivation = process(cur_bundle.derivation, locator_bits, instr) last_locator_bits = locator_bits # Write tree back here write_doc(opts.out, extension, sec, doc, cur_trees)
queries = [ yacc.parse(expression) for expression in query_callback_map.keys() ] for node in nodes(deriv): for query_expr, query_str in izip(queries, query_callback_map.keys()): context = Context() if query_expr.is_satisfied_by(node, context): if context: query_callback_map[query_str](node, **smash_key_case(context)) else: query_callback_map[query_str](node) find_all = tgrep find_first = compose(curry(take, 1), find_all) SmallSubtreeThreshold = 10 def find_small(*args, **kwargs): matches = tgrep(*args, **kwargs) with_context = kwargs['with_context'] if with_context: return ifilter( lambda (match, context): match.leaf_count() <= SmallSubtreeThreshold, matches) else: return ifilter(