aggregated_metrics = {} for curr_fold_data in fold_cache.fold_info: for metric, metric_value in curr_fold_data["results"].items(): existing = aggregated_metrics.get(metric, []) existing.append(metric_value) aggregated_metrics[metric] = existing for metric, metric_values in aggregated_metrics.items(): logging.info(f"- {metric}: mean={np.mean(metric_values): .4f} +- sd={np.std(metric_values): .4f}\n" f"\t all fold scores: {metric_values}") else: logging.info(f"Using single train/dev/test split...") if args.fixed_split: logging.info("Using fixed dataset split") train_docs, dev_docs, test_docs = fixed_split(tgt_docs, args.target_dataset) else: train_docs, dev_docs, test_docs = split_into_sets(tgt_docs, train_prop=0.7, dev_prop=0.15, test_prop=0.15) combined_train = src_docs + train_docs curr_tok2id, _ = extract_vocab(combined_train, lowercase=True, top_n=args.max_vocab_size) curr_tok2id = {tok: all_tok2id[tok] for tok in curr_tok2id} model = create_model_instance(args.model_name, tok2id=curr_tok2id) model.train(epochs=args.num_epochs, train_docs=combined_train, dev_docs=dev_docs) # Reload best checkpoint model = NoncontextualController.from_pretrained(model.path_model_dir) model.evaluate(test_docs) model.visualize()
logging.info(f"Final all-in-one scores (over {OUTER_K} folds)") for metric, metric_values in aio_metrics.items(): logging.info( f"- {metric}: mean={np.mean(metric_values): .4f} +- sd={np.std(metric_values): .4f}\n" f"\t all fold scores: {metric_values}") logging.info(f"Final each-in-own scores (over {OUTER_K} folds)") for metric, metric_values in eio_metrics.items(): logging.info( f"- {metric}: mean={np.mean(metric_values): .4f} +- sd={np.std(metric_values): .4f}\n" f"\t all fold scores: {metric_values}") else: logging.info(f"Using single train/dev/test split...") if args.fixed_split: logging.info("Using fixed dataset split") train_docs, dev_docs, test_docs = fixed_split( documents, args.dataset) else: train_docs, dev_docs, test_docs = split_into_sets(documents, train_prop=0.7, dev_prop=0.15, test_prop=0.15) model = create_model_instance(args.model_name) model.train(epochs=args.num_epochs, train_docs=train_docs, dev_docs=dev_docs) # Reload best checkpoint model = BaselineController.from_pretrained(model.path_model_dir) model.evaluate(test_docs) model.visualize()