Esempio n. 1
0
def create_passage_yields(p, constructions=None, reference=None, verbose=False):
    """
    :returns dict: Construction ->
                   dict: set of terminal indices (excluding punctuation) ->
                         list of edges of the Construction whose yield (excluding remotes and punctuation) is that set
    """
    yield_tags = OrderedDict()
    for construction, edges in extract_edges(
            p, constructions=constructions, reference=reference, verbose=verbose).items():
        yield_tags[construction] = {}
        for edge in edges:
            yield_tags[construction].setdefault(get_yield(edge.child), []).append(edge.tag)
    return yield_tags
Esempio n. 2
0
def create_passage_yields(p, constructions=None, reference=None, verbose=False):
    """
    :returns dict: Construction ->
                   dict: set of terminal indices (excluding punctuation) ->
                         list of edges of the Construction whose yield (excluding remotes and punctuation) is that set
    """
    yield_tags = OrderedDict()
    for construction, edges in extract_edges(
            p, constructions=constructions, reference=reference, verbose=verbose).items():
        yield_tags[construction] = {}
        for edge in edges:
            yield_tags[construction].setdefault(get_yield(edge.child), []).append(edge.tag)
    return yield_tags
Esempio n. 3
0
def main(args):
    for passage in get_passages_with_progress_bar(args.passages):
        extracted = constructions.extract_edges(
            passage, constructions=args.constructions, verbose=args.verbose)
        if any(extracted.values()):
            with tqdm.external_write_mode():
                if not args.verbose:
                    print("%s:" % passage.ID)
                for construction, edges in extracted.items():
                    if edges:
                        print("  %s:" % construction.description)
                        for edge in edges:
                            print("    %s [%s %s]" %
                                  (edge, edge.tag, edge.child))
                print()
Esempio n. 4
0
from ucca import constructions
from ucca.ioutil import read_files_and_dirs

if __name__ == "__main__":
    argparser = ArgumentParser(
        description="Extract linguistic constructions from UCCA corpus.")
    argparser.add_argument("passages",
                           nargs="+",
                           help="the corpus, given as xml/pickle file names")
    constructions.add_argument(argparser, False)
    argparser.add_argument("-v",
                           "--verbose",
                           action="store_true",
                           help="print tagged text for each passage")
    args = argparser.parse_args()
    for passage in read_files_and_dirs(args.passages):
        if args.verbose:
            print("%s:" % passage.ID)
        extracted = constructions.extract_edges(
            passage, constructions=args.constructions, verbose=args.verbose)
        if any(extracted.values()):
            if not args.verbose:
                print("%s:" % passage.ID)
            for construction, edges in extracted.items():
                if edges:
                    print("  %s:" % construction.description)
                    for edge in edges:
                        print("    %s [%s %s]" % (edge, edge.tag, edge.child))
            print()
Esempio n. 5
0
from argparse import ArgumentParser

from ucca import constructions
from ucca.ioutil import read_files_and_dirs

if __name__ == "__main__":
    argparser = ArgumentParser(description="Extract linguistic constructions from UCCA corpus.")
    argparser.add_argument("passages", nargs="+", help="the corpus, given as xml/pickle file names")
    constructions.add_argument(argparser, False)
    argparser.add_argument("-v", "--verbose", action="store_true", help="print tagged text for each passage")
    args = argparser.parse_args()
    for passage in read_files_and_dirs(args.passages):
        if args.verbose:
            print("%s:" % passage.ID)
        extracted = constructions.extract_edges(passage, constructions=args.constructions, verbose=args.verbose)
        if any(extracted.values()):
            if not args.verbose:
                print("%s:" % passage.ID)
            for construction, edges in extracted.items():
                if edges:
                    print("  %s:" % construction.description)
                    for edge in edges:
                        print("    %s [%s %s]" % (edge, edge.tag, edge.child))
            print()
Esempio n. 6
0
def extract_and_check(p, constructions=None, expected=None):
    d = extract_edges(p, constructions=constructions)
    if expected is not None:
        hist = {c.name: len(e) for c, e in d.items()}
        assert hist == expected, " != ".join(",".join(sorted(h))
                                             for h in (hist, expected))