Beispiel #1
0
class Subcats(Tabulation(
        'frames', 
        value_maker=Counter, reducer=len, 
        additional_info_maker=compose(
            '; '.join, 
            lambda vs: map(lambda v: '\n\\glosE{%s}{}' % v, vs),
            lambda vs: map(operator.itemgetter(0), vs),
            lambda e: e.most_common(5)),
        separator='&',
        row_terminator=' ',
        additional_row_terminator='\\\\ \n',
        ),
    Filter):
    
    def __init__(self):
        super(Subcats, self).__init__()
        
    def accept_derivation(self, bundle):
        for node, ctx in find_all(bundle.derivation, r'/VP/ < /V[VACE]/=T ! < /CC/', with_context=True):
            lex = ':'.join(ctx.t.text())
            self.frames[signature(node)][lex] += 1
        
    def output(self):
        super(Subcats, self).output()
Beispiel #2
0
            lex.input(expression)
            for tok in iter(lex.token, None):
                debug("\t%s %s", tok.type, tok.value)
    
    queries = [yacc.parse(expression) for expression in query_callback_map.keys()]
    for node in nodes(deriv):
        for query_expr, query_str in izip(queries, query_callback_map.keys()):
            context = Context()
            if query_expr.is_satisfied_by(node, context):
                if context:
                    query_callback_map[query_str](node, **smash_key_case(context))
                else:
                    query_callback_map[query_str](node)
    
find_all = tgrep
find_first = compose(curry(take, 1), find_all)

SmallSubtreeThreshold = 10
def find_small(*args, **kwargs):
    matches = tgrep(*args, **kwargs)
    with_context = kwargs['with_context']

    if with_context:
        return ifilter(lambda (match, context): match.leaf_count() <= SmallSubtreeThreshold,
                       matches)
    else:
        return ifilter(lambda match: match.leaf_count() <= SmallSubtreeThreshold, matches)
            
SmallSentenceThreshold = 18
def find_small_sents(*args, **kwargs):
    deriv = args[0]
Beispiel #3
0

# cache the last locator sequence so it can be referred to quickly
last_locator_bits = None

for (sec, doc), commands in group_by_derivation(
        parse_instruction_lines(sys.stdin.readlines())):
    cur_trees = load_trees(base, sec, doc, extension, guessers_to_use)

    for ((_, _, deriv), command) in commands:
        print command

        cur_bundle = cur_trees[deriv]

        locator, instr = command.split(' ', 2)
        locator_bits = list(
            flatten(
                map(
                    compose(
                        maybe_int, lambda value: desugar(
                            value, last_locator_bits, cur_bundle.derivation)),
                    locator.split(';'))))

        cur_bundle.derivation = process(cur_bundle.derivation, locator_bits,
                                        instr)

        last_locator_bits = locator_bits

    # Write tree back here
    write_doc(opts.out, extension, sec, doc, cur_trees)
Beispiel #4
0
def group_by_derivation(lines):
    def doc_comparator( ( (sec, doc, deriv), line) ): return (sec, doc)
    def spec_comparator( ( spec, line ) ): return spec
    return groupby(sorted(lines, key=spec_comparator), doc_comparator)

# cache the last locator sequence so it can be referred to quickly
last_locator_bits = None

for (sec, doc), commands in group_by_derivation(parse_instruction_lines(sys.stdin.readlines())):
    cur_trees = load_trees(base, sec, doc, extension, guessers_to_use)

    for ((_, _, deriv), command) in commands:
        print command

        cur_bundle = cur_trees[deriv]

        locator, instr = command.split(' ', 2)
        locator_bits = list(flatten(map(
            compose(maybe_int, 
                    lambda value: 
                       desugar(value, last_locator_bits, cur_bundle.derivation)), 
            locator.split(';'))))

        cur_bundle.derivation = process(cur_bundle.derivation, locator_bits, instr)

        last_locator_bits = locator_bits

    # Write tree back here
    write_doc(opts.out, extension, sec, doc, cur_trees)

Beispiel #5
0
    queries = [
        yacc.parse(expression) for expression in query_callback_map.keys()
    ]
    for node in nodes(deriv):
        for query_expr, query_str in izip(queries, query_callback_map.keys()):
            context = Context()
            if query_expr.is_satisfied_by(node, context):
                if context:
                    query_callback_map[query_str](node,
                                                  **smash_key_case(context))
                else:
                    query_callback_map[query_str](node)


find_all = tgrep
find_first = compose(curry(take, 1), find_all)

SmallSubtreeThreshold = 10


def find_small(*args, **kwargs):
    matches = tgrep(*args, **kwargs)
    with_context = kwargs['with_context']

    if with_context:
        return ifilter(
            lambda
            (match, context): match.leaf_count() <= SmallSubtreeThreshold,
            matches)
    else:
        return ifilter(