#dev_file = train_params["validation_data_path"] #test_file = train_params["test_data_path"] #dev_pred, dev_eval, test_pred, test_eval = [ # os.path.join(serialization_dir, name) # for name in ["dev.conllu", "dev_results.json", "test.conllu", "test_results.json"] #] #if dev_file != test_file: # util.predict_and_evaluate_model(args.predictor, predict_params, serialization_dir, dev_file, dev_pred, dev_eval) #util.predict_and_evaluate_model(args.predictor, predict_params, serialization_dir, test_file, test_pred, test_eval) for dataset in predict_params['dataset_reader']['datasets']: dev_file = predict_params['dataset_reader']['datasets'][dataset]['dev'] dev_pred = os.path.join(serialization_dir, 'dev.conllu') dev_eval = os.path.join(serialization_dir, 'dev_results.json') eval_type = predict_params['dataset_reader']['datasets'][dataset][ 'evaluation'] util.predict_and_evaluate_model(args.predictor, predict_params, serialization_dir, dev_file, dev_pred, dev_eval, eval_type) #if args.archive_bert: # bert_config = "config/archive/bert-base-multilingual-cased/bert_config.json" # util.archive_bert_model(serialization_dir, bert_config) # If we want to use trainer>num_serialized_models_to_keep we need to comment this automatic cleanup util.cleanup_training(serialization_dir, keep_archive=not args.cleanup_archive)
try: util.cache_vocab(train_params) train_model(train_params, serialization_dir, recover=bool(args.resume)) except KeyboardInterrupt: logger.warning("KeyboardInterrupt, skipping training") dev_file = predict_params["validation_data_path"] test_file = predict_params["test_data_path"] dev_pred, dev_eval, test_pred, test_eval = [ os.path.join(serialization_dir, name) for name in ["dev.conllu", "dev_results.json", "test.conllu", "test_results.json"] ] if dev_file != test_file: util.predict_and_evaluate_model(args.predictor, predict_params, serialization_dir, dev_file, dev_pred, dev_eval) util.predict_and_evaluate_model(args.predictor, predict_params, serialization_dir, test_file, test_pred, test_eval) if args.archive_bert: #bert_config = "config/archive/bert-base-multilingual-cased/bert_config.json" bert_config = "config/archive/bert-base-cased/bert_config.json" #RUNNING ON English only BERT util.archive_bert_model(serialization_dir, bert_config) util.cleanup_training(serialization_dir, keep_archive=not args.cleanup_archive)
parser.add_argument("--archive_latest", action="store_true", help="Archive the latest trained model") parser.add_argument("--device", default=None, type=int, help="CUDA device number; set to -1 for CPU") parser.add_argument("--lazy", action="store_true", help="Lazy load dataset") parser.add_argument("--batch_size", default=1, type=int, help="The size of each prediction batch") parser.add_argument("--sigmorphon", action="store_true", help="Use Sigmorphon evaluation instead of UD") args = parser.parse_args() import_submodules("udify") overrides = {} if args.device is not None: overrides["trainer"] = {"cuda_device": args.device} if args.lazy: overrides["dataset_reader"] = {"lazy": args.lazy} configs = [Params(overrides), Params.from_file(os.path.join(args.archive_dir, "config.json"))] params = util.merge_configs(configs) if args.archive_latest: archive_model(args.archive_dir) pred_file = os.path.join(args.archive_dir, args.pred_file) # pred_file = args.pred_file if not args.eval_file: util.predict_model("udify_predictor", params, args.archive_dir, args.input_file, pred_file) else: eval_file = os.path.join(args.archive_dir, args.eval_file) util.predict_and_evaluate_model("udify_predictor", params, args.archive_dir, args.input_file, pred_file, eval_file, batch_size=args.batch_size)