Ejemplo n.º 1
0
def main(filenames, write, **kwargs):
    uploader = TaskUploader(**kwargs)
    downloader = TaskDownloader(**kwargs)
    scores = []
    try:
        for pattern in filenames:
            filenames = glob(pattern)
            if not filenames:
                raise IOError("Not found: " + pattern)
            for ref in read_files_and_dirs(filenames):
                print("Converting passage " + ref.ID + "... ", end="")
                task = uploader.upload_task(ref)
                guessed = downloader.download_task(task["id"],
                                                   write=write,
                                                   **kwargs)
                score = evaluate(guessed, ref, **kwargs)
                print("F1=%.3f" % score.average_f1())
                scores.append(score)
    except HTTPError as e:
        try:
            raise ValueError(e.response.json()) from e
        except JSONDecodeError:
            raise ValueError(e.response.text) from e
    print()
    if len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()
Ejemplo n.º 2
0
 def get_metric(self, reset: bool = False):
     metrics = {}
     score_list = [
         item for sublist in self.scores.values() for item in sublist
     ]
     agg_score = Scores.aggregate(score_list)
     labeled_average_f1 = agg_score.average_f1(LABELED)
     unlabeled_average_f1 = agg_score.average_f1(UNLABELED)
     metrics[f'labeled_average_F1'] = labeled_average_f1
     metrics[f'unlabeled_average_F1'] = unlabeled_average_f1
     for dataset_label in self.scores:
         dataset_prefix = f'{dataset_label}_'  # if len(self.scores.keys()) > 1 else ""
         agg_score = Scores.aggregate(self.scores[dataset_label])
         labeled_average_f1 = agg_score.average_f1(LABELED)
         unlabeled_average_f1 = agg_score.average_f1(UNLABELED)
         metrics[f'{dataset_prefix}labeled_average_F1'] = labeled_average_f1
         metrics[
             f'{dataset_prefix}unlabeled_average_F1'] = unlabeled_average_f1
         titles = agg_score.titles()
         values = agg_score.fields()
         for title, value in zip(titles, values):
             metrics[f'{dataset_prefix}{title}'] = float(value)
     if reset:
         self.reset()
     return metrics
Ejemplo n.º 3
0
def main(filenames, write, **kwargs):
    uploader = TaskUploader(**kwargs)
    downloader = TaskDownloader(**kwargs)
    scores = []
    try:
        for pattern in filenames:
            filenames = glob(pattern)
            if not filenames:
                raise IOError("Not found: " + pattern)
            for ref in read_files_and_dirs(filenames):
                print("Converting passage " + ref.ID + "... ", end="")
                task = uploader.upload_task(ref)
                guessed = downloader.download_task(task["id"], write=write, **kwargs)
                score = evaluate(guessed, ref, **kwargs)
                print("F1=%.3f" % score.average_f1())
                scores.append(score)
    except HTTPError as e:
        try:
            raise ValueError(e.response.json()) from e
        except JSONDecodeError:
            raise ValueError(e.response.text) from e
    print()
    if len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()
Ejemplo n.º 4
0
def main():
    argparser = argparse.ArgumentParser(description=desc)
    argparser.add_argument("filenames", nargs="+",
                           help="file names to convert and evaluate")
    argparser.add_argument("-f", "--format", required=True, choices=convert.CONVERTERS,
                           help="input file format")
    argparser.add_argument("-T", "--tree", action="store_true",
                           help="remove multiple parents to get a tree")
    args = argparser.parse_args()

    converter1 = convert.TO_FORMAT[args.format]
    converter2 = convert.FROM_FORMAT[args.format]
    scores = []
    for pattern in args.filenames:
        filenames = glob.glob(pattern)
        if not filenames:
            raise IOError("Not found: " + pattern)
        for filename in filenames:
            ref = file2passage(filename)
            try:
                guessed = next(converter2(converter1(ref, tree=args.tree), ref.ID))
                scores.append(evaluate(guessed, ref, fscore=True, verbose=False,
                                       units=False, errors=False))
            except Exception as e:
                raise ValueError("Error evaluating conversion of %s" % filename, e)
    if len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()

    sys.exit(0)
Ejemplo n.º 5
0
def main():
    argparser = argparse.ArgumentParser(description=desc)
    argparser.add_argument("filenames",
                           nargs="+",
                           help="file names to convert and evaluate")
    argparser.add_argument("-f",
                           "--format",
                           required=True,
                           choices=convert.CONVERTERS,
                           help="input file format")
    argparser.add_argument("-T",
                           "--tree",
                           action="store_true",
                           help="remove multiple parents to get a tree")
    argparser.add_argument(
        "-s",
        "--strict",
        action="store_true",
        help="stop immediately if failed to convert or evaluate a file")
    argparser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help="print evaluation results for each file separately")
    args = argparser.parse_args()

    converter1 = convert.TO_FORMAT[args.format]
    converter2 = convert.FROM_FORMAT[args.format]
    scores = []
    for pattern in args.filenames:
        filenames = glob.glob(pattern)
        if not filenames:
            raise IOError("Not found: " + pattern)
        for filename in filenames:
            sys.stdout.write("\rConverting %s" % filename)
            sys.stdout.flush()
            ref = file2passage(filename)
            try:
                guessed = next(
                    converter2(converter1(ref, tree=args.tree), ref.ID))
                scores.append(evaluate(guessed, ref, verbose=args.verbose))
            except Exception as e:
                if args.strict:
                    raise ValueError("Error evaluating conversion of %s" %
                                     filename) from e
                else:
                    print("Error evaluating conversion of %s: %s" %
                          (filename, e),
                          file=sys.stderr)
    print()
    if args.verbose and len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()

    sys.exit(0)
Ejemplo n.º 6
0
def main(task_ids, by_filename=False, validate=None, log=None, **kwargs):
    kwargs["write"] = False
    if by_filename:
        task_ids_from_file = []
        for filename in task_ids:
            with open(filename, 'r') as f:
                task_ids_from_file += zip(
                    *list(map(str.split, filter(None, map(str.strip, f)))))
        task_ids = task_ids_from_file
    else:
        task_ids = [[task_id] for task_id in task_ids]
    assert len(task_ids) == 2, "Got %d lists of task IDs instead of two" % len(
        task_ids)
    downloader = TaskDownloader(**kwargs)
    scores = []
    validate_h = open(validate, "w", encoding="utf-8") if validate else None
    log_h = open(log, "w", encoding="utf-8") if log else None
    if log:
        fields = ["guessed", "ref"] + Scores.field_titles(
            eval_type=LABELED) + Scores.field_titles(eval_type=UNLABELED)
        print(*fields, file=log_h, sep="\t", flush=True)
    for task_id_pair in tqdm(list(zip(*task_ids)),
                             unit=" tasks",
                             desc="Evaluating"):
        passage_pair = []
        for task_id in task_id_pair:
            passage, *_ = downloader.download_task(task_id,
                                                   validate=validate_h,
                                                   **kwargs)
            passage_pair.append(passage)
        score = evaluate(*passage_pair, **kwargs)
        if log:
            fields = list(task_id_pair) + score.fields(
                eval_type=LABELED) + score.fields(eval_type=UNLABELED)
            print(*fields, file=log_h, sep="\t", flush=True)
        scores.append(score)
    if validate:
        validate_h.close()
    if log:
        log_h.close()
    print()
    if len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()
Ejemplo n.º 7
0
def main():
    argparser = argparse.ArgumentParser(description=desc)
    argparser.add_argument("filenames", nargs="+",
                           help="file names to convert and evaluate")
    argparser.add_argument("-f", "--format", required=True, choices=convert.CONVERTERS,
                           help="input file format")
    argparser.add_argument("-T", "--tree", action="store_true",
                           help="remove multiple parents to get a tree")
    argparser.add_argument("-s", "--strict", action="store_true",
                           help="stop immediately if failed to convert or evaluate a file")
    argparser.add_argument("-v", "--verbose", action="store_true",
                           help="print evaluation results for each file separately")
    args = argparser.parse_args()

    converter1 = convert.TO_FORMAT[args.format]
    converter2 = convert.FROM_FORMAT[args.format]
    scores = []
    for pattern in args.filenames:
        filenames = glob.glob(pattern)
        if not filenames:
            raise IOError("Not found: " + pattern)
        for filename in filenames:
            sys.stdout.write("\rConverting %s" % filename)
            sys.stdout.flush()
            ref = file2passage(filename)
            try:
                guessed = next(converter2(converter1(ref, tree=args.tree), ref.ID))
                scores.append(evaluate(guessed, ref, verbose=args.verbose))
            except Exception as e:
                if args.strict:
                    raise ValueError("Error evaluating conversion of %s" % filename) from e
                else:
                    print("Error evaluating conversion of %s: %s" % (filename, e), file=sys.stderr)
    print()
    if args.verbose and len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()

    sys.exit(0)
Ejemplo n.º 8
0
def main(args):
    converter1 = convert.TO_FORMAT[args.format]
    converter2 = convert.FROM_FORMAT[args.format]
    scores = []
    for ref in get_passages_with_progress_bar(args.filenames,
                                              desc="Converting"):
        try:
            guessed = next(converter2(converter1(ref, tree=args.tree), ref.ID))
            scores.append(evaluate(guessed, ref, verbose=args.verbose))
        except Exception as e:
            if args.strict:
                raise ValueError("Error evaluating conversion of %s" %
                                 ref.ID) from e
            else:
                with tqdm.external_write_mode():
                    print("Error evaluating conversion of %s: %s" %
                          (ref.ID, e),
                          file=sys.stderr)
    print()
    if args.verbose and len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()
Ejemplo n.º 9
0
def main():
    argparser = argparse.ArgumentParser(description=desc)
    argparser.add_argument("filenames",
                           nargs="+",
                           help="file names to convert and evaluate")
    argparser.add_argument("-f",
                           "--format",
                           required=True,
                           choices=convert.CONVERTERS,
                           help="input file format")
    argparser.add_argument("-T",
                           "--tree",
                           action="store_true",
                           help="remove multiple parents to get a tree")
    args = argparser.parse_args()

    converter1 = convert.TO_FORMAT[args.format]
    converter2 = convert.FROM_FORMAT[args.format]
    scores = []
    for pattern in args.filenames:
        filenames = glob.glob(pattern)
        if not filenames:
            raise IOError("Not found: " + pattern)
        for filename in filenames:
            ref = file2passage(filename)
            guessed = next(converter2(converter1(ref), ref.ID))
            scores.append(
                evaluate(guessed,
                         ref,
                         fscore=True,
                         verbose=True,
                         units=False,
                         errors=False))
    if len(scores) > 1:
        print("Aggregated scores:")
        Scores.aggregate(scores).print()

    sys.exit(0)
Ejemplo n.º 10
0
def main(task_ids, by_filename=False, validate=None, log=None, **kwargs):
    kwargs["write"] = False
    if by_filename:
        task_ids_from_file = []
        for filename in task_ids:
            with open(filename, 'r') as f:
                task_ids_from_file += zip(*list(map(str.split, filter(None, map(str.strip, f)))))
        task_ids = task_ids_from_file
    else:
        task_ids = [[task_id] for task_id in task_ids]
    assert len(task_ids) == 2, "Got %d lists of task IDs instead of two" % len(task_ids)
    downloader = TaskDownloader(**kwargs)
    scores = []
    validate_h = open(validate, "w", encoding="utf-8") if validate else None
    log_h = open(log, "w", encoding="utf-8") if log else None
    if log:
        fields = ["guessed", "ref"] + Scores.field_titles(eval_type=LABELED) + Scores.field_titles(eval_type=UNLABELED)
        print(*fields, file=log_h, sep="\t", flush=True)
    for task_id_pair in tqdm(list(zip(*task_ids)), unit=" tasks", desc="Evaluating"):
        passage_pair = []
        for task_id in task_id_pair:
            passage, *_ = downloader.download_task(task_id, validate=validate_h, **kwargs)
            passage_pair.append(passage)
        score = evaluate(*passage_pair, **kwargs)
        if log:
            fields = list(task_id_pair) + score.fields(eval_type=LABELED) + score.fields(eval_type=UNLABELED)
            print(*fields, file=log_h, sep="\t", flush=True)
        scores.append(score)
    if validate:
        validate_h.close()
    if log:
        log_h.close()
    print()
    if len(scores) > 1:
        print("Aggregated scores:")
    Scores.aggregate(scores).print()
Ejemplo n.º 11
0
def main(args):
    guessed, ref = [
        ioutil.read_files_and_dirs((x, ), converters=FROM_FORMAT)
        for x in (args.guessed, args.ref)
    ]
    if len(guessed) != len(ref):
        raise ValueError(
            "Number of passages to compare does not match: %d != %d" %
            (len(guessed), len(ref)))
    if len(guessed) > 1:
        guessed_by_id = {
            g.ID: g
            for g in tqdm(
                guessed, desc="Reading " + args.guessed, unit=" passages")
        }
        try:
            guessed = [
                guessed_by_id[p.ID] for p in tqdm(
                    ref, desc="Reading " + args.ref, unit=" passages")
            ]
        except KeyError as e:
            raise ValueError("Passage IDs do not match") from e
    results = [
        evaluate(g, r, errors=True) for g, r in zip(
            tqdm(guessed, desc="Evaluating", unit=" passages"), ref)
    ]
    confusion_matrix = Scores.aggregate(
        results).evaluators[LABELED].results[PRIMARY].errors.most_common()
    label_map = {}
    for (g, r), _ in confusion_matrix:
        g, *_ = g.partition("|")
        prefix, *_ = g.partition(":")
        if not any(l.startswith(prefix)
                   for l in label_map):  # drop suffix for most common label
            g = prefix
        if g not in label_map:
            label_map[g], *_ = r.partition("|")
    with open(args.out_file, "w", encoding="utf-8") as f:
        csv.writer(f).writerows(
            tqdm(sorted(label_map.items()),
                 desc="Writing " + args.out_file,
                 unit=" rows"))
Ejemplo n.º 12
0
Archivo: tune.py Proyecto: viksit/ucca
 def print_title(file):
     print("learning rate, decay factor, average unlabeled f1, "
           ", ".join(Scores.field_titles()),
           file=file)
Ejemplo n.º 13
0
Archivo: tune.py Proyecto: borgr/ucca
 def print_title(file):
     print("learning rate, decay factor, average unlabeled f1, " ", ".join(Scores.field_titles()), file=file)
Ejemplo n.º 14
0
 def get_field_titles(self):
     return [p for p in self.params.keys()] + ["average_labeled_f1"] + Scores.field_titles()
Ejemplo n.º 15
0
Archivo: tune.py Proyecto: ml-lab/tupa
 def get_field_titles(self):
     return [p for p in self.params.keys()] + ["average_labeled_f1"] + Scores.field_titles()