def main(filenames, write, **kwargs): uploader = TaskUploader(**kwargs) downloader = TaskDownloader(**kwargs) scores = [] try: for pattern in filenames: filenames = glob(pattern) if not filenames: raise IOError("Not found: " + pattern) for ref in read_files_and_dirs(filenames): print("Converting passage " + ref.ID + "... ", end="") task = uploader.upload_task(ref) guessed = downloader.download_task(task["id"], write=write, **kwargs) score = evaluate(guessed, ref, **kwargs) print("F1=%.3f" % score.average_f1()) scores.append(score) except HTTPError as e: try: raise ValueError(e.response.json()) from e except JSONDecodeError: raise ValueError(e.response.text) from e print() if len(scores) > 1: print("Aggregated scores:") Scores.aggregate(scores).print()
def get_metric(self, reset: bool = False): metrics = {} score_list = [ item for sublist in self.scores.values() for item in sublist ] agg_score = Scores.aggregate(score_list) labeled_average_f1 = agg_score.average_f1(LABELED) unlabeled_average_f1 = agg_score.average_f1(UNLABELED) metrics[f'labeled_average_F1'] = labeled_average_f1 metrics[f'unlabeled_average_F1'] = unlabeled_average_f1 for dataset_label in self.scores: dataset_prefix = f'{dataset_label}_' # if len(self.scores.keys()) > 1 else "" agg_score = Scores.aggregate(self.scores[dataset_label]) labeled_average_f1 = agg_score.average_f1(LABELED) unlabeled_average_f1 = agg_score.average_f1(UNLABELED) metrics[f'{dataset_prefix}labeled_average_F1'] = labeled_average_f1 metrics[ f'{dataset_prefix}unlabeled_average_F1'] = unlabeled_average_f1 titles = agg_score.titles() values = agg_score.fields() for title, value in zip(titles, values): metrics[f'{dataset_prefix}{title}'] = float(value) if reset: self.reset() return metrics
def main(): argparser = argparse.ArgumentParser(description=desc) argparser.add_argument("filenames", nargs="+", help="file names to convert and evaluate") argparser.add_argument("-f", "--format", required=True, choices=convert.CONVERTERS, help="input file format") argparser.add_argument("-T", "--tree", action="store_true", help="remove multiple parents to get a tree") args = argparser.parse_args() converter1 = convert.TO_FORMAT[args.format] converter2 = convert.FROM_FORMAT[args.format] scores = [] for pattern in args.filenames: filenames = glob.glob(pattern) if not filenames: raise IOError("Not found: " + pattern) for filename in filenames: ref = file2passage(filename) try: guessed = next(converter2(converter1(ref, tree=args.tree), ref.ID)) scores.append(evaluate(guessed, ref, fscore=True, verbose=False, units=False, errors=False)) except Exception as e: raise ValueError("Error evaluating conversion of %s" % filename, e) if len(scores) > 1: print("Aggregated scores:") Scores.aggregate(scores).print() sys.exit(0)
def main(): argparser = argparse.ArgumentParser(description=desc) argparser.add_argument("filenames", nargs="+", help="file names to convert and evaluate") argparser.add_argument("-f", "--format", required=True, choices=convert.CONVERTERS, help="input file format") argparser.add_argument("-T", "--tree", action="store_true", help="remove multiple parents to get a tree") argparser.add_argument( "-s", "--strict", action="store_true", help="stop immediately if failed to convert or evaluate a file") argparser.add_argument( "-v", "--verbose", action="store_true", help="print evaluation results for each file separately") args = argparser.parse_args() converter1 = convert.TO_FORMAT[args.format] converter2 = convert.FROM_FORMAT[args.format] scores = [] for pattern in args.filenames: filenames = glob.glob(pattern) if not filenames: raise IOError("Not found: " + pattern) for filename in filenames: sys.stdout.write("\rConverting %s" % filename) sys.stdout.flush() ref = file2passage(filename) try: guessed = next( converter2(converter1(ref, tree=args.tree), ref.ID)) scores.append(evaluate(guessed, ref, verbose=args.verbose)) except Exception as e: if args.strict: raise ValueError("Error evaluating conversion of %s" % filename) from e else: print("Error evaluating conversion of %s: %s" % (filename, e), file=sys.stderr) print() if args.verbose and len(scores) > 1: print("Aggregated scores:") Scores.aggregate(scores).print() sys.exit(0)
def main(task_ids, by_filename=False, validate=None, log=None, **kwargs): kwargs["write"] = False if by_filename: task_ids_from_file = [] for filename in task_ids: with open(filename, 'r') as f: task_ids_from_file += zip( *list(map(str.split, filter(None, map(str.strip, f))))) task_ids = task_ids_from_file else: task_ids = [[task_id] for task_id in task_ids] assert len(task_ids) == 2, "Got %d lists of task IDs instead of two" % len( task_ids) downloader = TaskDownloader(**kwargs) scores = [] validate_h = open(validate, "w", encoding="utf-8") if validate else None log_h = open(log, "w", encoding="utf-8") if log else None if log: fields = ["guessed", "ref"] + Scores.field_titles( eval_type=LABELED) + Scores.field_titles(eval_type=UNLABELED) print(*fields, file=log_h, sep="\t", flush=True) for task_id_pair in tqdm(list(zip(*task_ids)), unit=" tasks", desc="Evaluating"): passage_pair = [] for task_id in task_id_pair: passage, *_ = downloader.download_task(task_id, validate=validate_h, **kwargs) passage_pair.append(passage) score = evaluate(*passage_pair, **kwargs) if log: fields = list(task_id_pair) + score.fields( eval_type=LABELED) + score.fields(eval_type=UNLABELED) print(*fields, file=log_h, sep="\t", flush=True) scores.append(score) if validate: validate_h.close() if log: log_h.close() print() if len(scores) > 1: print("Aggregated scores:") Scores.aggregate(scores).print()
def main(): argparser = argparse.ArgumentParser(description=desc) argparser.add_argument("filenames", nargs="+", help="file names to convert and evaluate") argparser.add_argument("-f", "--format", required=True, choices=convert.CONVERTERS, help="input file format") argparser.add_argument("-T", "--tree", action="store_true", help="remove multiple parents to get a tree") argparser.add_argument("-s", "--strict", action="store_true", help="stop immediately if failed to convert or evaluate a file") argparser.add_argument("-v", "--verbose", action="store_true", help="print evaluation results for each file separately") args = argparser.parse_args() converter1 = convert.TO_FORMAT[args.format] converter2 = convert.FROM_FORMAT[args.format] scores = [] for pattern in args.filenames: filenames = glob.glob(pattern) if not filenames: raise IOError("Not found: " + pattern) for filename in filenames: sys.stdout.write("\rConverting %s" % filename) sys.stdout.flush() ref = file2passage(filename) try: guessed = next(converter2(converter1(ref, tree=args.tree), ref.ID)) scores.append(evaluate(guessed, ref, verbose=args.verbose)) except Exception as e: if args.strict: raise ValueError("Error evaluating conversion of %s" % filename) from e else: print("Error evaluating conversion of %s: %s" % (filename, e), file=sys.stderr) print() if args.verbose and len(scores) > 1: print("Aggregated scores:") Scores.aggregate(scores).print() sys.exit(0)
def main(args): converter1 = convert.TO_FORMAT[args.format] converter2 = convert.FROM_FORMAT[args.format] scores = [] for ref in get_passages_with_progress_bar(args.filenames, desc="Converting"): try: guessed = next(converter2(converter1(ref, tree=args.tree), ref.ID)) scores.append(evaluate(guessed, ref, verbose=args.verbose)) except Exception as e: if args.strict: raise ValueError("Error evaluating conversion of %s" % ref.ID) from e else: with tqdm.external_write_mode(): print("Error evaluating conversion of %s: %s" % (ref.ID, e), file=sys.stderr) print() if args.verbose and len(scores) > 1: print("Aggregated scores:") Scores.aggregate(scores).print()
def main(): argparser = argparse.ArgumentParser(description=desc) argparser.add_argument("filenames", nargs="+", help="file names to convert and evaluate") argparser.add_argument("-f", "--format", required=True, choices=convert.CONVERTERS, help="input file format") argparser.add_argument("-T", "--tree", action="store_true", help="remove multiple parents to get a tree") args = argparser.parse_args() converter1 = convert.TO_FORMAT[args.format] converter2 = convert.FROM_FORMAT[args.format] scores = [] for pattern in args.filenames: filenames = glob.glob(pattern) if not filenames: raise IOError("Not found: " + pattern) for filename in filenames: ref = file2passage(filename) guessed = next(converter2(converter1(ref), ref.ID)) scores.append( evaluate(guessed, ref, fscore=True, verbose=True, units=False, errors=False)) if len(scores) > 1: print("Aggregated scores:") Scores.aggregate(scores).print() sys.exit(0)
def main(task_ids, by_filename=False, validate=None, log=None, **kwargs): kwargs["write"] = False if by_filename: task_ids_from_file = [] for filename in task_ids: with open(filename, 'r') as f: task_ids_from_file += zip(*list(map(str.split, filter(None, map(str.strip, f))))) task_ids = task_ids_from_file else: task_ids = [[task_id] for task_id in task_ids] assert len(task_ids) == 2, "Got %d lists of task IDs instead of two" % len(task_ids) downloader = TaskDownloader(**kwargs) scores = [] validate_h = open(validate, "w", encoding="utf-8") if validate else None log_h = open(log, "w", encoding="utf-8") if log else None if log: fields = ["guessed", "ref"] + Scores.field_titles(eval_type=LABELED) + Scores.field_titles(eval_type=UNLABELED) print(*fields, file=log_h, sep="\t", flush=True) for task_id_pair in tqdm(list(zip(*task_ids)), unit=" tasks", desc="Evaluating"): passage_pair = [] for task_id in task_id_pair: passage, *_ = downloader.download_task(task_id, validate=validate_h, **kwargs) passage_pair.append(passage) score = evaluate(*passage_pair, **kwargs) if log: fields = list(task_id_pair) + score.fields(eval_type=LABELED) + score.fields(eval_type=UNLABELED) print(*fields, file=log_h, sep="\t", flush=True) scores.append(score) if validate: validate_h.close() if log: log_h.close() print() if len(scores) > 1: print("Aggregated scores:") Scores.aggregate(scores).print()
def main(args): guessed, ref = [ ioutil.read_files_and_dirs((x, ), converters=FROM_FORMAT) for x in (args.guessed, args.ref) ] if len(guessed) != len(ref): raise ValueError( "Number of passages to compare does not match: %d != %d" % (len(guessed), len(ref))) if len(guessed) > 1: guessed_by_id = { g.ID: g for g in tqdm( guessed, desc="Reading " + args.guessed, unit=" passages") } try: guessed = [ guessed_by_id[p.ID] for p in tqdm( ref, desc="Reading " + args.ref, unit=" passages") ] except KeyError as e: raise ValueError("Passage IDs do not match") from e results = [ evaluate(g, r, errors=True) for g, r in zip( tqdm(guessed, desc="Evaluating", unit=" passages"), ref) ] confusion_matrix = Scores.aggregate( results).evaluators[LABELED].results[PRIMARY].errors.most_common() label_map = {} for (g, r), _ in confusion_matrix: g, *_ = g.partition("|") prefix, *_ = g.partition(":") if not any(l.startswith(prefix) for l in label_map): # drop suffix for most common label g = prefix if g not in label_map: label_map[g], *_ = r.partition("|") with open(args.out_file, "w", encoding="utf-8") as f: csv.writer(f).writerows( tqdm(sorted(label_map.items()), desc="Writing " + args.out_file, unit=" rows"))
def print_title(file): print("learning rate, decay factor, average unlabeled f1, " ", ".join(Scores.field_titles()), file=file)
def get_field_titles(self): return [p for p in self.params.keys()] + ["average_labeled_f1"] + Scores.field_titles()