コード例 #1
0
ファイル: test_evaluation.py プロジェクト: louismartin/ucca
def test_evaluate(create1, create2, f1, units, errors):
    p1 = create1()
    p2 = create2()
    validation_errors_before = [list(validate(p, linkage=False)) for p in (p1, p2)]
    scores = evaluate(p1, p2, units=units, errors=errors)
    validation_errors_after = [list(validate(p, linkage=False)) for p in (p1, p2)]
    for before, after in zip(validation_errors_before, validation_errors_after):
        if not before:
            assert not after
    check_primary_remote(scores, f1)
コード例 #2
0
def validate(passage, normalization=False, extra_normalization=False, ucca_validation=False, output_format=None,
             **kwargs):
    del kwargs
    if normalization:
        normalize(passage, extra=extra_normalization)
    if ucca_validation:
        yield from ucca_validations.validate(passage)
    else:  # Generic validations depending on format-specific constraints
        try:
            constraints = CONSTRAINTS[passage.extra.get("format", output_format)]()
        except KeyError as e:
            raise ValueError("No validations defined for '%s' format" % output_format) from e
        yield from detect_cycles(passage)
        l0 = passage.layer(layer0.LAYER_ID)
        l1 = passage.layer(layer1.LAYER_ID)
        for terminal in l0.all:
            yield from check_orphan_terminals(constraints, terminal)
            yield from check_root_terminal_children(constraints, l1, terminal)
            yield from check_multiple_incoming(constraints, terminal)
        yield from check_top_level_allowed(constraints, l1)
        for node in l1.all:
            yield from check_multigraph(constraints, node)
            yield from check_implicit_children(constraints, node)
            yield from check_multiple_incoming(constraints, node)
            yield from check_top_level_only(constraints, l1, node)
            yield from check_required_outgoing(constraints, node)
            yield from check_tag_rules(constraints, node)
コード例 #3
0
ファイル: download_task.py プロジェクト: louismartin/ucca
 def download_task(self, task_id, normalize=False, write=True, validate=None, binary=None, log=None, out_dir=None,
                   prefix=None, by_external_id=False, verbose=False, write_valid_only=False, **kwargs):
     del kwargs
     task = self.get_user_task(task_id)
     user_id = task["user"]["id"]
     try:
         passage = from_json(task, by_external_id=by_external_id)
     except ValueError as e:
         raise ValueError("Failed reading json for task %s:\n%s" % (task_id, json.dumps(task))) from e
     if normalize:
         try:
             normalization.normalize(passage)
         except AssertionError as e:
             raise ValueError("Failed normalizing task %s:\n%s" % (task_id, json.dumps(task))) from e
     if log:
         print(passage.ID, task_id, user_id, task["user_comment"], task["created_at"], task["updated_at"],
               file=log, sep="\t", flush=True)
     ret = passage, task_id, user_id
     if validate or write_valid_only:
         for error in validation.validate(passage, linkage=False):
             if validate:
                 print(passage.ID, task_id, user_id, error, file=validate, sep="\t", flush=True)
             if write_valid_only:
                 return ret
     if write:
         write_passage(passage, binary=binary, outdir=out_dir, prefix=prefix, verbose=verbose)
     return ret
コード例 #4
0
 def submit_tasks(self, filename, log_file, **kwargs):
     del kwargs
     log_file = open(log_file,'w')
     with open(filename) as f:
         task_ids = list(f.readlines())
     for task_id in task_ids:
         try:
             task_id = task_id.strip()
             task = self.get_user_task(int(task_id))
             if task['type'] not in ['ANNOTATION', 'REVIEW']:
                 print(task_id, "NOT AN ANNOTATION/REVIEW TASK", file=log_file, sep="\t", flush=True)
                 continue
             try:
                 passage = next(iter(convert.from_json(task)))
             except ValueError as e:
                 raise ValueError("Failed reading json for task %s:\n%s" % (task_id, json.dumps(task))) from e
             # validate the task
             normalization.normalize(passage)
             validation_errors = list(validation.validate(passage, linkage=False))
             if len(validation_errors) == 0:
                     self.submit_task(**task)
                     print(task_id, "SUBMITTED", file=log_file, sep="\t", flush=True)
             else:
                 for error in validation_errors:
                     print(task_id, error, file=log_file, sep="\t", flush=True)
         except requests.exceptions.HTTPError as e:
             print(task_id, "HTTP Request Error: "+str(e), file=log_file, sep="\t", flush=True)
コード例 #5
0
ファイル: submit_tasks.py プロジェクト: danielhers/ucca
 def submit_tasks(self, filename, log_file, **kwargs):
     del kwargs
     log_file = open(log_file,'w')
     with open(filename) as f:
         task_ids = list(f.readlines())
     for task_id in task_ids:
         try:
             task_id = task_id.strip()
             task = self.get_user_task(int(task_id))
             if task['type'] not in ['ANNOTATION', 'REVIEW']:
                 print(task_id, "NOT AN ANNOTATION/REVIEW TASK", file=log_file, sep="\t", flush=True)
                 continue
             try:
                 passage = convert.from_json(task)
             except ValueError as e:
                 raise ValueError("Failed reading json for task %s:\n%s" % (task_id, json.dumps(task))) from e
             # validate the task
             normalization.normalize(passage)
             validation_errors = list(validation.validate(passage, linkage=False))
             if len(validation_errors) == 0:
                     self.submit_task(**task)
                     print(task_id, "SUBMITTED", file=log_file, sep="\t", flush=True)
             else:
                 for error in validation_errors:
                     print(task_id, error, file=log_file, sep="\t", flush=True)
         except requests.exceptions.HTTPError as e:
             print(task_id, "HTTP Request Error: "+str(e), file=log_file, sep="\t", flush=True)
コード例 #6
0
def test_evaluate_self(create, valid):
    p = create()
    errors = list(validate(p))
    if valid:
        assert not errors, p
    else:
        assert errors, p
コード例 #7
0
 def validate_passage(self, passage):
     if self.normalization:
         normalize(passage, extra=self.extra)
     errors = list(validate(passage, linkage=self.linkage))
     if self.strict:
         print_errors(passage.ID, errors)
     return passage.ID, errors
コード例 #8
0
ファイル: test_validation.py プロジェクト: danielhers/ucca
def test_evaluate_self(create, valid):
    p = create()
    errors = list(validate(p))
    if valid:
        assert not errors, p
    else:
        assert errors, p
コード例 #9
0
def main(args):
    errors = ((p.ID, list(validate(p)))
              for p in get_passages_with_progress_bar(args.filenames,
                                                      desc="Validating"))
    errors = {k: v for k, v in errors if v}
    if errors:
        id_len = max(map(len, errors))
        for passage_id, es in sorted(errors.items()):
            for i, e in enumerate(es):
                print("%-*s|%s" % (id_len, "" if i else passage_id, e))
        sys.exit(1)
    else:
        print("No errors found.")
コード例 #10
0
ファイル: validate.py プロジェクト: ruixiangcui/ucca
 def validate_passage(self, passage):
     if self.normalization:
         normalize(passage, extra=self.extra)
     errors = list(validate(passage, linkage=self.linkage, multigraph=self.multigraph))
     passage_id = passage.ID
     user_id = passage.attrib.get("userID")
     if user_id:
         passage_id += " " + user_id
     task_id = passage.attrib.get("annotationID")
     if task_id:
         passage_id += " " + task_id
     if self.strict:
         print_errors(passage_id, errors)
     return passage_id, errors