Ejemplo n.º 1
0
def copy_annotation(passages,
                    conllu,
                    by_id=False,
                    as_array=True,
                    as_extra=True,
                    verbose=False,
                    lang=None):
    conllu_sentences = {annotated.ID: annotated for annotated in
                        get_passages_with_progress_bar(conllu, converters=CONVERTERS, desc="Reading '%s'" % conllu)} \
        if by_id else get_passages(conllu, converters=CONVERTERS)
    for passage in passages:
        try:
            annotated = conllu_sentences[passage.ID] if by_id else next(
                conllu_sentences)
        except (KeyError, StopIteration) as e:
            raise ValueError(
                "Missing annotation for passage ID '%s', by_id=%s" %
                (passage.ID, by_id)) from e
        if verbose:
            with external_write_mode():
                print("Reading annotation from '%s'" % annotated.ID)
        if as_array:
            passage.layer(layer0.LAYER_ID).docs()[:] = annotated.layer(
                layer0.LAYER_ID).docs()
        if as_extra:
            for terminal, annotated_terminal in zip(
                    passage.layer(layer0.LAYER_ID).all,
                    annotated.layer(layer0.LAYER_ID).all):
                copy_tok_to_extra(annotated_terminal, terminal, lang=lang)
        yield passage
Ejemplo n.º 2
0
def main(args):
    if args.out_dir:
        os.makedirs(args.out_dir, exist_ok=True)
        if not args.tikz:
            import matplotlib
            matplotlib.use('Agg')
    to_stdout = (args.tikz or args.standoff) and not args.out_dir
    t = args.passages
    t = get_passages(t) if to_stdout else get_passages_with_progress_bar(
        t, desc="Visualizing")
    if args.sentences:
        t = (sentence for passage in t
             for sentence in split2sentences(passage))
    for passage in t:
        if args.tikz:
            print_text(args, visualization.tikz(passage),
                       passage.ID + ".tikz.txt")
        elif args.standoff:
            print_text(args, visualization.standoff(passage),
                       passage.ID + ".ann")
        else:
            import matplotlib.pyplot as plt
            width = len(passage.layer(layer0.LAYER_ID).all) * 19 / 27
            plt.figure(passage.ID, figsize=(width, width * 10 / 19))
            visualization.draw(passage, node_ids=args.node_ids)
            if args.out_dir:
                plt.savefig(
                    os.path.join(args.out_dir, passage.ID + "." + args.format))
                plt.close()
            else:
                plt.show()
Ejemplo n.º 3
0
def read(fp, text=None, prefix=None):
    parent = Path(fp.name).parent
    paths = [parent / file.strip() for file in fp]
    for passage in get_passages(map(str, paths)):
        try:
            graph = passage2graph(passage, text, prefix)
        except Exception as exception:
            print(exception)
            continue
        yield graph, None
Ejemplo n.º 4
0
 def run(self, guessed: List[str], ref: List[str], **kwargs):
     del kwargs
     guessed, ref = [{
         p.ID: p
         for p in get_passages(f, converters=self.converters())
     } for f in (guessed, ref)]
     stats = SummaryStatistics.aggregate([
         self.evaluate(g, ref[i]) for i, g in sorted(guessed.items())
         if i in ref
     ])
     stats.print()
Ejemplo n.º 5
0
def main(args):
    passages = list(get_passages(args.filenames))
    if args.join_by_prefix:
        subsets = defaultdict(list)
        for passage in passages:
            subsets[passage.ID[:-3]].append(passage)
    else:
        subsets = {passages[0].ID: passages}
    for passage_id, subset in sorted(subsets.items()):
        print("Joining passages " + ", ".join(passage.ID for passage in subset), file=sys.stderr)
        joined = ucca.convert.join_passages(passages, passage_id=passage_id, remarks=args.remarks)
        outfile = "%s/%s.%s" % (args.outdir, args.prefix + joined.ID, "pickle" if args.binary else "xml")
        print("Writing joined passage file '%s'..." % outfile, file=sys.stderr)
        passage2file(joined, outfile, args.binary)
Ejemplo n.º 6
0
def read_specs(args, converters=None):
    specs = [(pattern, args.out_dir, args.lang, args.udpipe, args.stanfordnlp,
              args.conllu, args.join) for pattern in args.filenames]
    if args.list_file:
        with open(args.list_file, encoding="utf-8") as f:
            specs += [l.strip().split() for l in f if not l.startswith("#")]
    for spec in specs:
        pattern = spec[0]
        filenames = sorted(glob(pattern))
        if not filenames:
            raise IOError("Not found: " + pattern)
        yield AnnotationSpecification(
            passages=get_passages(filenames, converters=converters),
            out_dir=spec[1] if len(spec) > 1 else args.out_dir,
            lang=spec[2] if len(spec) > 2 else args.lang,
            udpipe=spec[3] if len(spec) > 3 else args.udpipe,
            stanfordnlp=spec[4] if len(spec) > 4 else args.stanfordnlp,
            conllu=spec[5] if len(spec) > 5 else args.conllu,
            join=spec[6] if len(spec) > 6 else args.join)