def score(model, source, hypothesis, reference, cuda, to_json): source = [s.strip() for s in source.readlines()] hypothesis = [s.strip() for s in hypothesis.readlines()] reference = [s.strip() for s in reference.readlines()] data = {"src": source, "mt": hypothesis, "ref": reference} data = [dict(zip(data, t)) for t in zip(*data.values())] model = load_checkpoint(model) if os.path.exists( model) else download_model(model) data, scores = model.predict(data, cuda, show_progress=True) if isinstance(to_json, str): with open(to_json, "w") as outfile: json.dump(data, outfile, ensure_ascii=False, indent=4) click.secho(f"Predictions saved in: {to_json}.", fg="yellow") click.secho("COMET system score: {}.".format(sum(scores) / len(scores)), fg="yellow")
description="Evaluates a COMET model against relative preferences.") parser.add_argument( "--checkpoint", required=True, help="Path to the Model checkpoint we want to test.", type=str, ) parser.add_argument( "--test_path", required=True, help="Path to the test json with translated documents.", type=str, ) args = parser.parse_args() model = load_checkpoint(args.checkpoint) seg_micro_scores, seg_macro_scores, seg_y = [], [], [] sys_micro, sys_macro, sys_y = [], [], [] with open(args.test_path) as json_file: data = json.load(json_file) for system in data: print("Scoring {} system:".format(system)) docs = [data[system][d] for d in data[system]] human_scores = [d['z_score'] for d in docs] _, micro_scores, macro_scores = model.document_predict( docs, cuda=True, show_progress=True) sys_micro.append(sum(micro_scores) / len(micro_scores)) sys_macro.append(sum(macro_scores) / len(macro_scores)) sys_y.append(sum(human_scores) / len(human_scores)) print("MICRO {} system-level score: {}".format(
def test_load_unvalid_checkpoint(self): with self.assertRaises(Exception) as context: load_checkpoint("folder/that/does/not/exist/") self.assertEqual(str(context.exception), "folder/that/does/not/exist/ file not found!")