Python normalize Exemples, ucca.normalization.normalize Python Exemples

Exemple #1

0

Afficher le fichier

 def validate_passage(self, passage):
     if self.normalization:
         normalize(passage, extra=self.extra)
     errors = list(validate(passage, linkage=self.linkage))
     if self.strict:
         print_errors(passage.ID, errors)
     return passage.ID, errors

Exemple #2

0

Afficher le fichier

Fichier : standard_to_sentences.py Projet : adishalev/ucca

def main(args):
    splitter = Splitter.read_file(args.sentences,
                                  enum=args.enumerate,
                                  suffix_format=args.suffix_format,
                                  suffix_start=args.suffix_start)
    os.makedirs(args.outdir, exist_ok=True)
    i = 0
    for passage in get_passages_with_progress_bar(args.filenames, "Splitting"):
        for sentence in splitter.split(
                passage) if splitter else split2sentences(
                    passage,
                    remarks=args.remarks,
                    lang=args.lang,
                    ids=map(str, count(i)) if args.enumerate else None):
            i += 1
            outfile = os.path.join(
                args.outdir, args.prefix + sentence.ID +
                (".pickle" if args.binary else ".xml"))
            if args.verbose:
                with external_write_mode():
                    print("Writing passage file for sentence '%s'..." %
                          outfile,
                          file=sys.stderr)
            if args.normalize:
                normalize(sentence)
            passage2file(sentence, outfile, binary=args.binary)
    if splitter and len(splitter.matched_indices) < len(splitter.sentences):
        print("Unmatched sentences:",
              *[
                  s for i, s in enumerate(splitter.sentences)
                  if i not in splitter.matched_indices
              ],
              sep="\n")

Exemple #3

0

Afficher le fichier

def validate(passage, normalization=False, extra_normalization=False, ucca_validation=False, output_format=None,
             **kwargs):
    del kwargs
    if normalization:
        normalize(passage, extra=extra_normalization)
    if ucca_validation:
        yield from ucca_validations.validate(passage)
    else:  # Generic validations depending on format-specific constraints
        try:
            constraints = CONSTRAINTS[passage.extra.get("format", output_format)]()
        except KeyError as e:
            raise ValueError("No validations defined for '%s' format" % output_format) from e
        yield from detect_cycles(passage)
        l0 = passage.layer(layer0.LAYER_ID)
        l1 = passage.layer(layer1.LAYER_ID)
        for terminal in l0.all:
            yield from check_orphan_terminals(constraints, terminal)
            yield from check_root_terminal_children(constraints, l1, terminal)
            yield from check_multiple_incoming(constraints, terminal)
        yield from check_top_level_allowed(constraints, l1)
        for node in l1.all:
            yield from check_multigraph(constraints, node)
            yield from check_implicit_children(constraints, node)
            yield from check_multiple_incoming(constraints, node)
            yield from check_top_level_only(constraints, l1, node)
            yield from check_required_outgoing(constraints, node)
            yield from check_tag_rules(constraints, node)

Exemple #4

0

Afficher le fichier

Fichier : parse.py Projet : danielhers/tupa

 def finish(self, status, display=True, write=False, accuracies=None):
     self.model.classifier.finished_item(self.training)
     for model in self.models[1:]:
         model.classifier.finished_item(renew=False)  # So that dynet.renew_cg happens only once
     if not self.training or self.config.args.verify:
         self.out = self.state.create_passage(verify=self.config.args.verify, format=self.out_format)
     if write:
         for out_format in self.config.args.formats or [self.out_format]:
             if self.config.args.normalize and out_format == "ucca":
                 normalize(self.out)
             ioutil.write_passage(self.out, output_format=out_format, binary=out_format == "pickle",
                                  outdir=self.config.args.outdir, prefix=self.config.args.prefix,
                                  converter=get_output_converter(out_format), verbose=self.config.args.verbose,
                                  append=self.config.args.join, basename=self.config.args.join)
     if self.oracle and self.config.args.verify:
         self.verify(self.out, self.passage)
     ret = (self.out,)
     if self.evaluation:
         ret += (self.evaluate(self.evaluation),)
         status = "%-14s %s F1=%.3f" % (status, self.eval_type, self.f1)
     if display:
         self.config.print("%s%.3fs %s" % (self.accuracy_str, self.duration, status), level=1)
     if accuracies is not None:
         accuracies[self.passage.ID] = self.correct_action_count / self.action_count if self.action_count else 0
     return ret

Exemple #5

0

Afficher le fichier

 def submit_tasks(self, filename, log_file, **kwargs):
     del kwargs
     log_file = open(log_file,'w')
     with open(filename) as f:
         task_ids = list(f.readlines())
     for task_id in task_ids:
         try:
             task_id = task_id.strip()
             task = self.get_user_task(int(task_id))
             if task['type'] not in ['ANNOTATION', 'REVIEW']:
                 print(task_id, "NOT AN ANNOTATION/REVIEW TASK", file=log_file, sep="\t", flush=True)
                 continue
             try:
                 passage = next(iter(convert.from_json(task)))
             except ValueError as e:
                 raise ValueError("Failed reading json for task %s:\n%s" % (task_id, json.dumps(task))) from e
             # validate the task
             normalization.normalize(passage)
             validation_errors = list(validation.validate(passage, linkage=False))
             if len(validation_errors) == 0:
                     self.submit_task(**task)
                     print(task_id, "SUBMITTED", file=log_file, sep="\t", flush=True)
             else:
                 for error in validation_errors:
                     print(task_id, error, file=log_file, sep="\t", flush=True)
         except requests.exceptions.HTTPError as e:
             print(task_id, "HTTP Request Error: "+str(e), file=log_file, sep="\t", flush=True)

Exemple #6

0

Afficher le fichier

def main(args):
    os.makedirs(args.out_dir, exist_ok=True)
    for passage in iter_passages(args.filenames,
                                 desc="Converting",
                                 input_format=args.input_format,
                                 prefix=args.prefix,
                                 mark_aux=args.mark_aux,
                                 annotate=args.annotate,
                                 wikification=args.wikification,
                                 label_map_file=args.label_map,
                                 output_format=args.output_format):
        map_labels(passage, args.label_map)
        if args.normalize and args.output_format != "txt":
            normalize(passage, extra=args.extra_normalization)
        if args.lang:
            passage.attrib["lang"] = args.lang
        write_passage(passage, **vars(args))
        if args.validate:
            try:
                errors = list(
                    validate(passage,
                             ucca_validation=args.ucca_validation,
                             output_format=args.output_format))
            except ValueError:
                continue
            if errors:
                print_errors(errors, passage.ID)
                sys.exit(1)

Exemple #7

0

Afficher le fichier

Fichier : submit_tasks.py Projet : danielhers/ucca

 def submit_tasks(self, filename, log_file, **kwargs):
     del kwargs
     log_file = open(log_file,'w')
     with open(filename) as f:
         task_ids = list(f.readlines())
     for task_id in task_ids:
         try:
             task_id = task_id.strip()
             task = self.get_user_task(int(task_id))
             if task['type'] not in ['ANNOTATION', 'REVIEW']:
                 print(task_id, "NOT AN ANNOTATION/REVIEW TASK", file=log_file, sep="\t", flush=True)
                 continue
             try:
                 passage = convert.from_json(task)
             except ValueError as e:
                 raise ValueError("Failed reading json for task %s:\n%s" % (task_id, json.dumps(task))) from e
             # validate the task
             normalization.normalize(passage)
             validation_errors = list(validation.validate(passage, linkage=False))
             if len(validation_errors) == 0:
                     self.submit_task(**task)
                     print(task_id, "SUBMITTED", file=log_file, sep="\t", flush=True)
             else:
                 for error in validation_errors:
                     print(task_id, error, file=log_file, sep="\t", flush=True)
         except requests.exceptions.HTTPError as e:
             print(task_id, "HTTP Request Error: "+str(e), file=log_file, sep="\t", flush=True)

Exemple #8

0

Afficher le fichier

Fichier : evaluation.py Projet : danielhers/ucca

def evaluate(guessed, ref, converter=None, verbose=False, constructions=DEFAULT,
             units=False, fscore=True, errors=False, normalize=True, eval_type=None, ref_yield_tags=None, **kwargs):
    """
    Compare two passages and return requested diagnostics and scores, possibly printing them too.
    NOTE: since normalize=True by default, this method is destructive: it modifies the given passages before evaluation.
    :param guessed: Passage object to evaluate
    :param ref: reference Passage object to compare to
    :param converter: optional function to apply to passages before evaluation
    :param verbose: whether to print the results
    :param constructions: names of construction types to include in the evaluation
    :param units: whether to evaluate common units
    :param fscore: whether to compute precision, recall and f1 score
    :param errors: whether to print the mistakes
    :param normalize: flatten centers and move common functions to root before evaluation - modifies passages
    :param eval_type: specific evaluation type(s) to limit to
    :param ref_yield_tags: reference passage for fine-grained evaluation
    :return: Scores object
    """
    del kwargs
    if converter is not None:
        guessed = converter(guessed)
        ref = converter(ref)
    if normalize:  # FIXME clone passages to avoid modifying the original ones
        for passage in (guessed, ref):
            normalization.normalize(passage)  # flatten Cs inside Cs
        move_functions(guessed, ref)  # move common Fs to be under the root, FIXME should be before normalize

    if isinstance(eval_type, str):
        eval_type = [eval_type]
    evaluator = Evaluator(verbose, constructions, units, fscore, errors)
    return Scores((evaluation_type, evaluator.get_scores(guessed, ref, evaluation_type, r=ref_yield_tags))
                  for evaluation_type in (eval_type or EVAL_TYPES))

Exemple #9

0

Afficher le fichier

Fichier : parse_ud.py Projet : zoharai/semstr

def main(args):
    for spec in read_specs(args, converters=FROM_FORMAT):
        scores = []
        if not args.verbose:
            spec.passages = tqdm(
                spec.passages,
                unit=" passages",
                desc="Parsing " +
                (spec.out_dir if spec.out_dir != "." else spec.lang))
        for passage, parsed in parse(spec.passages, spec.lang, spec.udpipe,
                                     args.verbose):
            map_labels(parsed, args.label_map)
            normalize(parsed, extra=True)
            if args.write:
                write_passage(parsed, args)
            if args.evaluate:
                evaluator = EVALUATORS.get(args.output_format)
                converter = TO_FORMAT.get(args.output_format)
                if converter is not None:
                    passage, parsed = map(converter, (passage, parsed))
                if evaluator is not None:
                    scores.append(
                        evaluator.evaluate(parsed,
                                           passage,
                                           verbose=args.verbose > 1))
        if scores:
            Scores(scores).print()

Exemple #10

0

Afficher le fichier

def evaluate(guessed, ref, converter=None, verbose=False, constructions=DEFAULT,
             units=False, fscore=True, errors=False, normalize=True, eval_type=None, ref_yield_tags=None, **kwargs):
    """
    Compare two passages and return requested diagnostics and scores, possibly printing them too.
    NOTE: since normalize=True by default, this method is destructive: it modifies the given passages before evaluation.
    :param guessed: Passage object to evaluate
    :param ref: reference Passage object to compare to
    :param converter: optional function to apply to passages before evaluation
    :param verbose: whether to print the results
    :param constructions: names of construction types to include in the evaluation
    :param units: whether to evaluate common units
    :param fscore: whether to compute precision, recall and f1 score
    :param errors: whether to print the mistakes
    :param normalize: flatten centers and move common functions to root before evaluation - modifies passages
    :param eval_type: specific evaluation type to limit to
    :param ref_yield_tags: reference passage for fine-grained evaluation
    :return: Scores object
    """
    del kwargs
    if converter is not None:
        guessed = converter(guessed)
        ref = converter(ref)
    if normalize:
        for passage in (guessed, ref):
            normalization.normalize(passage)  # flatten Cs inside Cs
        move_functions(guessed, ref)  # move common Fs to be under the root

    evaluator = Evaluator(verbose, constructions, units, fscore, errors)
    return Scores((evaluation_type, evaluator.get_scores(guessed, ref, evaluation_type, r=ref_yield_tags))
                  for evaluation_type in ([eval_type] if eval_type else EVAL_TYPES))

Exemple #11

0

Afficher le fichier

 def finish(self, status, display=True, write=False, accuracies=None):
     self.model.classifier.finished_item(self.training)
     for model in self.models[1:]:
         model.classifier.finished_item(renew=False)  # So that dynet.renew_cg happens only once
     if not self.training or self.config.args.verify:
         self.out = self.state.create_passage(verify=self.config.args.verify, format=self.out_format)
     if write:
         for out_format in self.config.args.formats or [self.out_format]:
             if self.config.args.normalize and out_format == "ucca":
                 normalize(self.out)
             ioutil.write_passage(self.out, output_format=out_format, binary=out_format == "pickle",
                                  outdir=self.config.args.outdir, prefix=self.config.args.prefix,
                                  converter=get_output_converter(out_format), verbose=self.config.args.verbose,
                                  append=self.config.args.join, basename=self.config.args.join)
     if self.oracle and self.config.args.verify:
         self.verify(self.out, self.passage)
     ret = (self.out,)
     if self.evaluation:
         ret += (self.evaluate(self.evaluation),)
         status = "%-14s %s F1=%.3f" % (status, self.eval_type, self.f1)
     if display:
         self.config.print("%s%.3fs %s" % (self.accuracy_str, self.duration, status), level=1)
     if accuracies is not None:
         accuracies[self.passage.ID] = self.correct_action_count / self.action_count if self.action_count else 0
     return ret

Exemple #12

0

Afficher le fichier

Fichier : download_task.py Projet : louismartin/ucca

 def download_task(self, task_id, normalize=False, write=True, validate=None, binary=None, log=None, out_dir=None,
                   prefix=None, by_external_id=False, verbose=False, write_valid_only=False, **kwargs):
     del kwargs
     task = self.get_user_task(task_id)
     user_id = task["user"]["id"]
     try:
         passage = from_json(task, by_external_id=by_external_id)
     except ValueError as e:
         raise ValueError("Failed reading json for task %s:\n%s" % (task_id, json.dumps(task))) from e
     if normalize:
         try:
             normalization.normalize(passage)
         except AssertionError as e:
             raise ValueError("Failed normalizing task %s:\n%s" % (task_id, json.dumps(task))) from e
     if log:
         print(passage.ID, task_id, user_id, task["user_comment"], task["created_at"], task["updated_at"],
               file=log, sep="\t", flush=True)
     ret = passage, task_id, user_id
     if validate or write_valid_only:
         for error in validation.validate(passage, linkage=False):
             if validate:
                 print(passage.ID, task_id, user_id, error, file=validate, sep="\t", flush=True)
             if write_valid_only:
                 return ret
     if write:
         write_passage(passage, binary=binary, outdir=out_dir, prefix=prefix, verbose=verbose)
     return ret

Exemple #13

0

Afficher le fichier

Fichier : test_normalization.py Projet : danielhers/ucca

def normalize_and_compare(unnormalized, normalized, extra=False):
    p1 = unnormalized()
    p2 = normalized()
    if unnormalized != normalized:
        assert not p1.equals(p2), "Unnormalized and normalized passage: %s == %s" % (str(p1), str(p2))
    normalize(p1, extra=extra)
    assert p1.equals(p2), "Normalized passage: %s != %s" % (str(p1), str(p2))

Exemple #14

0

Afficher le fichier

def normalize_and_compare(unnormalized, normalized, extra=False):
    p1 = unnormalized()
    p2 = normalized()
    if unnormalized != normalized:
        assert not p1.equals(
            p2), "Unnormalized and normalized passage: %s == %s" % (str(p1),
                                                                    str(p2))
    normalize(p1, extra=extra)
    assert p1.equals(p2), "Normalized passage: %s != %s" % (str(p1), str(p2))

Exemple #15

0

Afficher le fichier

Fichier : fix_tokenization.py Projet : danielhers/ucca

def main(args):
    os.makedirs(args.outdir, exist_ok=True)
    words_set = read_dict(args.words_set)
    with open(args.logfile, "w", newline="", encoding="utf-8") as outfile:
        cw = csv.writer(outfile)
        for passage in get_passages_with_progress_bar(args.filenames, "Fixing tokenization"):
            fixed = fix_tokenization(passage, words_set, lang=args.lang, cw=cw)
            if fixed is not None:
                outfile.flush()
                normalize(fixed)
                write_passage(fixed, outdir=args.outdir, binary=args.binary, prefix=args.prefix, verbose=args.verbose)

Exemple #16

0

Afficher le fichier

Fichier : normalize.py Projet : shachardon/ucca

def main(args):
    if args.outdir:
        os.makedirs(args.outdir, exist_ok=True)
    for p in get_passages_with_progress_bar(args.filenames,
                                            desc="Normalizing",
                                            converters={}):
        normalize(p, extra=args.extra)
        write_passage(p,
                      outdir=args.outdir,
                      prefix=args.prefix,
                      binary=args.binary,
                      verbose=False)

Exemple #17

0

Afficher le fichier

Fichier : standard_to_sentences.py Projet : shachardon/ucca

def main(args):
    splitter = Splitter.read_file(args.sentences, enum=args.enumerate)
    os.makedirs(args.outdir, exist_ok=True)
    i = 0
    for passage in get_passages_with_progress_bar(args.filenames, "Splitting"):
        for sentence in splitter.split(passage) if splitter else split2sentences(
                passage, remarks=args.remarks, lang=args.lang, ids=map(str, count(i)) if args.enumerate else None):
            i += 1
            outfile = os.path.join(args.outdir, args.prefix + sentence.ID + (".pickle" if args.binary else ".xml"))
            with external_write_mode():
                print("Writing passage file for sentence '%s'..." % outfile, file=sys.stderr)
            if args.normalize:
                normalize(sentence)
            passage2file(sentence, outfile, binary=args.binary)

Exemple #18

0

Afficher le fichier

Fichier : validate.py Projet : ruixiangcui/ucca

 def validate_passage(self, passage):
     if self.normalization:
         normalize(passage, extra=self.extra)
     errors = list(validate(passage, linkage=self.linkage, multigraph=self.multigraph))
     passage_id = passage.ID
     user_id = passage.attrib.get("userID")
     if user_id:
         passage_id += " " + user_id
     task_id = passage.attrib.get("annotationID")
     if task_id:
         passage_id += " " + task_id
     if self.strict:
         print_errors(passage_id, errors)
     return passage_id, errors

Exemple #19

0

Afficher le fichier

def main(args):
    os.makedirs(args.outdir, exist_ok=True)
    words_set = read_dict(args.words_set)
    with open(args.logfile, "w", newline="", encoding="utf-8") as outfile:
        cw = csv.writer(outfile)
        for passage in get_passages_with_progress_bar(args.filenames,
                                                      "Fixing tokenization"):
            fixed = fix_tokenization(passage, words_set, lang=args.lang, cw=cw)
            if fixed is not None:
                outfile.flush()
                normalize(fixed)
                write_passage(fixed,
                              outdir=args.outdir,
                              binary=args.binary,
                              prefix=args.prefix,
                              verbose=args.verbose)

Exemple #20

0

Afficher le fichier

def main(args):
    os.makedirs(args.out_dir, exist_ok=True)
    kwargs = vars(args)
    for passage in iter_passages(args.filenames, desc="Converting", **kwargs):
        map_labels(passage, args.label_map)
        if args.normalize and args.output_format != "txt":
            normalize(passage, extra=args.extra_normalization)
        if args.lang:
            passage.attrib["lang"] = args.lang
        write_passage(passage, **kwargs)
        if args.validate:
            try:
                errors = list(validate(passage, **kwargs))
            except ValueError:
                continue
            if errors:
                print_errors(errors, passage.ID)
                sys.exit(1)

Exemple #21

0

Afficher le fichier

def main(args):
    scores = []
    for pattern in args.filenames:
        filenames = glob(pattern)
        if not filenames:
            raise IOError("Not found: " + pattern)
        for filename in filenames:
            print("\rConverting '%s'" % filename, end="")
            if args.out_dir or args.verbose:
                print(flush=True)
            basename, ext = os.path.splitext(os.path.basename(filename))
            passage_format = ext.lstrip(".")
            converters = CONVERTERS.get(passage_format, CONVERTERS["amr"])
            evaluator = EVALUATORS.get(passage_format, EVALUATORS["amr"]).evaluate
            with open(filename, encoding="utf-8") as f:
                for passage, ref, passage_id in converters[0](f, passage_id=basename, return_original=True):
                    if args.normalize:
                        normalize(passage, extra=args.extra_normalization)
                    if args.out_dir:
                        os.makedirs(args.out_dir, exist_ok=True)
                        outfile = "%s/%s.xml" % (args.out_dir, passage.ID)
                        print("Writing '%s'..." % outfile, file=sys.stderr, flush=True)
                        ioutil.passage2file(passage, outfile)
                    try:
                        guessed = converters[1](passage, wikification=args.wikification, use_original=False)
                    except Exception as e:
                        raise ValueError("Error converting %s back from %s" % (filename, passage_format)) from e
                    if args.out_dir:
                        outfile = "%s/%s%s" % (args.out_dir, passage.ID, ext)
                        print("Writing '%s'..." % outfile, file=sys.stderr, flush=True)
                        with open(outfile, "w", encoding="utf-8") as f_out:
                            print("\n".join(guessed), file=f_out)
                    try:
                        s = evaluator(guessed, ref, verbose=args.verbose > 1)
                    except Exception as e:
                        raise ValueError("Error evaluating conversion of %s" % filename) from e
                    scores.append(s)
                    if args.verbose:
                        print(passage_id)
                        s.print()
    print()
    if args.verbose and len(scores) > 1:
        print("Aggregated scores:")
    Scores(scores).print()

Exemple #22

0

Afficher le fichier

Fichier : standard_to_sentences.py Projet : danielhers/ucca

def main(args):
    splitter = Splitter.read_file(args.sentences, enum=args.enumerate,
                                  suffix_format=args.suffix_format, suffix_start=args.suffix_start)
    os.makedirs(args.outdir, exist_ok=True)
    i = 0
    for passage in get_passages_with_progress_bar(args.filenames, "Splitting"):
        for sentence in splitter.split(passage) if splitter else split2sentences(
                passage, remarks=args.remarks, lang=args.lang, ids=map(str, count(i)) if args.enumerate else None):
            i += 1
            outfile = os.path.join(args.outdir, args.prefix + sentence.ID + (".pickle" if args.binary else ".xml"))
            if args.verbose:
                with external_write_mode():
                    print(sentence, file=sys.stderr)
                    print("Writing passage file for sentence '%s'..." % outfile, file=sys.stderr)
            if args.normalize:
                normalize(sentence)
            passage2file(sentence, outfile, binary=args.binary)
    if splitter and len(splitter.matched_indices) < len(splitter.sentences):
        print("", "Unmatched sentences:", *[s for i, s in enumerate(splitter.sentences)
                                            if i not in splitter.matched_indices], sep="\n")

Exemple #23

0

Afficher le fichier

Fichier : standard_to_paragraphs.py Projet : ruixiangcui/ucca

def main(args):
    os.makedirs(args.outdir, exist_ok=True)
    i = 0
    for passage in get_passages_with_progress_bar(args.filenames, "Splitting"):
        for paragraph in split2paragraphs(
                passage,
                remarks=args.remarks,
                lang=args.lang,
                ids=map(str, count(i)) if args.enumerate else None):
            i += 1
            outfile = os.path.join(
                args.outdir, args.prefix + paragraph.ID +
                (".pickle" if args.binary else ".xml"))
            if args.verbose:
                with external_write_mode():
                    print(paragraph, file=sys.stderr)
                    print("Writing passage file for paragraph '%s'..." %
                          outfile,
                          file=sys.stderr)
            if args.normalize:
                normalize(paragraph)
            passage2file(paragraph, outfile, binary=args.binary)

Exemple #24

0

Afficher le fichier

def main(args):
    os.makedirs(args.out_dir, exist_ok=True)
    for passage in iter_passages(args.filenames,
                                 desc="Converting",
                                 input_format=args.input_format,
                                 prefix=args.prefix,
                                 split=args.split,
                                 mark_aux=args.mark_aux,
                                 annotate=args.annotate):
        map_labels(passage, args.label_map)
        if args.normalize:
            normalize(passage, extra=args.extra_normalization)
        if args.lang:
            passage.attrib["lang"] = args.lang
        write_passage(passage, args)
        if args.validate:
            errors = list(
                validate(passage,
                         ucca_validation=args.ucca_validation,
                         output_format=args.output_format))
            if errors:
                print_errors(errors, passage.ID)
                sys.exit(1)

Exemple #25

0

Afficher le fichier

def main(args):
    if args.out_dir:
        os.makedirs(args.out_dir, exist_ok=True)
    scores = []
    for pattern in args.filenames:
        for filename in glob(pattern) or [pattern]:
            file_scores = []
            basename, ext = os.path.splitext(os.path.basename(filename))
            passage_format = ext.lstrip(".")
            if passage_format == "txt":
                passage_format = args.format
            in_converter, out_converter = CONVERTERS.get(
                passage_format, CONVERTERS[args.format])
            evaluate = EVALUATORS.get(passage_format, EVALUATORS[args.format])
            with open(filename, encoding="utf-8") as f:
                t = tqdm(in_converter(f,
                                      passage_id=basename,
                                      return_original=True),
                         unit=" passages",
                         desc=("Converting '%s'" % filename) +
                         ((", writing to '%s'" %
                           args.out_dir) if args.out_dir else ""))
                for passage, ref, passage_id in t:
                    if args.normalize:
                        normalize(passage, extra=args.extra_normalization)
                    if args.out_dir:
                        os.makedirs(args.out_dir, exist_ok=True)
                        outfile = os.path.join(args.out_dir,
                                               passage.ID + ".xml")
                        if args.verbose:
                            with ioutil.external_write_mode():
                                print("Writing '%s'..." % outfile,
                                      file=sys.stderr,
                                      flush=True)
                        ioutil.passage2file(passage, outfile)
                    try:
                        guessed = out_converter(passage,
                                                wikification=args.wikification,
                                                use_original=False)
                    except Exception as e:
                        raise ValueError("Error converting %s back from %s" %
                                         (filename, passage_format)) from e
                    if args.out_dir:
                        outfile = os.path.join(args.out_dir, passage.ID + ext)
                        if args.verbose:
                            with ioutil.external_write_mode():
                                print("Writing '%s'..." % outfile,
                                      file=sys.stderr,
                                      flush=True)
                        with open(outfile, "w", encoding="utf-8") as f_out:
                            print("\n".join(guessed), file=f_out)
                    try:
                        s = evaluate(guessed,
                                     ref,
                                     verbose=args.verbose > 1,
                                     units=args.units)
                    except Exception as e:
                        raise ValueError("Error evaluating conversion of %s" %
                                         filename) from e
                    file_scores.append(s)
                    if args.verbose:
                        with ioutil.external_write_mode():
                            print(passage_id)
                            s.print()
                    t.set_postfix(F1="%.2f" %
                                  (100.0 * Scores(file_scores).average_f1()))
            scores += file_scores
    print()
    if args.verbose and len(scores) > 1:
        print("Aggregated scores:")
    Scores(scores).print()

Exemple #26

0

Afficher le fichier

Fichier : visualize.py Projet : zoharai/semstr

    argparser.add_argument("passages", nargs="+", help="Passages in any format")
    argparser.add_argument("--tikz", action="store_true", help="print tikz code rather than showing plots")
    argparser.add_argument("--out-dir", help="directory to save figures in (otherwise displayed immediately)")
    group = argparser.add_mutually_exclusive_group()
    group.add_argument("--no-normalize", action="store_false", dest="normalize", help="do not normalize passage")
    group.add_argument("-e", "--extra-normalization", action="store_true", help="more normalization rules")
    argparser.add_argument("--label-map", help="CSV file specifying mapping of input edge labels to output edge labels")
    argparser.add_argument("-i", "--node-ids", action="store_true", help="print tikz code rather than showing plots")
    args = argparser.parse_args()

    if args.out_dir:
        os.makedirs(args.out_dir, exist_ok=True)
    for passage in get_passages_with_progress_bar(args.passages, desc="Visualizing", converters=FROM_FORMAT):
        map_labels(passage, args.label_map)
        if args.normalize:
            normalize(passage, extra=args.extra_normalization)
        if args.tikz:
            tikz = visualization.tikz(passage, node_ids=args.node_ids)
            if args.out_dir:
                with open(os.path.join(args.out_dir, passage.ID + ".tikz.txt"), "w") as f:
                    print(tikz, file=f)
            else:
                with tqdm.external_write_mode():
                    print(tikz)
        else:
            plt.figure(figsize=(19, 10))
            visualization.draw(passage, node_ids=args.node_ids)
            if args.out_dir:
                plt.savefig(os.path.join(args.out_dir, passage.ID + ".png"))
            else:
                mng = plt.get_current_fig_manager()