def predict_model_with_archive(predictor: str, params: Params, archive: str, input_file: str, output_file: str, batch_size: int = None): if 'cuda_device' in params['trainer']: cuda_device = params['trainer']['cuda_device'] from allennlp.common.checks import check_for_gpu check_for_gpu(cuda_device) archive = load_archive(archive, cuda_device=cuda_device) else: archive = load_archive(archive) for item in archive.config.duplicate(): archive.config.__delitem__(item) for item in params: archive.config[item] = params.as_dict()[item] predictor = MachampPredictor.from_archive(archive, predictor) if batch_size == None: batch_size = params['data_loader']['batch_sampler']['batch_size'] manager = _PredictManager(predictor, input_file, output_file, batch_size, print_to_console=False, has_dataset_reader=True) manager.run()
def predict_model(predictor: str, params: Params, archive_dir: str, input_file: str, output_file: str, batch_size: int = 1): """ Predict output annotations from the given model and input file and produce an output file. :param predictor: the type of predictor to use, e.g., "udify_predictor" :param params: the Params of the model :param archive_dir: the saved model archive :param input_file: the input file to predict :param output_file: the output file to save :param batch_size: the batch size, set this higher to speed up GPU inference """ cuda_device = params["trainer"]["cuda_device"] check_for_gpu(cuda_device) archive = load_archive(os.path.join(archive_dir, "model.tar.gz"), cuda_device=cuda_device) predictor = Predictor.from_archive(archive, predictor) manager = _PredictManager(predictor, input_file, output_file, batch_size, print_to_console=False, has_dataset_reader=True) manager.run()
def file_iface_predictor(input_file, output_file): manager = _PredictManager(predictor, input_file, output_file, batch_size, print_to_console=False, has_dataset_reader=True) manager.run()
def predict(self, args: argparse.Namespace, predictor: Predictor): manager = _PredictManager( predictor, args.input_file, args.output_file, args.batch_size, not args.silent, args.use_dataset_reader, ) manager.run()
def predict_model_with_archive(predictor: str, params: Params, archive: str, input_file: str, output_file: str, batch_size: int = 1): cuda_device = params["trainer"]["cuda_device"] check_for_gpu(cuda_device) archive = load_archive(archive, cuda_device=cuda_device) predictor = Predictor.from_archive(archive, predictor) manager = _PredictManager(predictor, input_file, output_file, batch_size, print_to_console=False, has_dataset_reader=True) manager.run()
def init_load_model(): """ The [`run`](./train.md#run) command only knows about the registered classes in the ``allennlp`` codebase. In particular, once you start creating your own `Model` s and so forth, it won't work for them, unless you use the ``--include-package`` flag or you make your code available as a plugin (see [`plugins`](./plugins.md)). """ global predict_manager, cuda_place import_plugins() parser = create_parser("allennlp") args = parser.parse_args() args.archive_file = r"models/ace05_event/model.tar.gz" args.cuda_device = cuda_place args.input_file = r"data/ace-event/processed-data/default-settings/json/test.json" args.output_file = r"predictions/event_predict.json" args.use_dataset_reader = True args.include_package = "dygie" args.predictor = "dygie" args.weights_file = None args.overrides = '' args.dataset_reader_choice = 'validation' args.batch_size = 1 args.silent = False # print(dir(args)) # print(args.include_package) # for package_name in args.include_package: # import_module_and_submodules_new(package_name) import_module_and_submodules_new('dygie') import_module_and_submodules_new('dygie.predictors.dygie') import_module_and_submodules_new('dygie.models.dygie') # predictor = DyGIEPredictor.from_path(os.path.join(current_path, 'models/ace05_event/model.tar.gz'), predictor_name="dygie") predictor = predict._get_predictor(args) predict_manager = predict._PredictManager( predictor, args.input_file, args.output_file, args.batch_size, not args.silent, args.use_dataset_reader, )
def pred(cuda_device=0, archive_file="/backup3/jcxu/exComp/tmp_expsc74o5pf7/model.tar.gz", weights_file="/backup3/jcxu/exComp/tmp_expsc74o5pf7/best.th", predictor='lstm-tagger', input_file="/backup3/jcxu/exComp/example.txt"): with open(input_file, 'w') as fd: json.dump({"sentence": "This is a useful sentence."}, fd) fd.write("\n") json.dump({"sentence": "This is a gree, blue and useful sentence."}, fd) fd.write("\n") json.dump({"sentence": "This is a useless sentence."}, fd) check_for_gpu(cuda_device) archive = load_archive(archive_file, weights_file=weights_file, cuda_device=cuda_device, overrides="") # predictor = SentenceTaggerPredictor(archive, dataset_reader=PosDatasetReader()) predictor = Predictor.from_archive(archive, 'sentence-tagger') manager = _PredictManager(predictor, input_file, None, 1, not False, False) manager.run()
def run(_): """Run model.""" # Imports are required to make Registrable modules visible without passing parameter util.import_module_and_submodules("combo.commands") util.import_module_and_submodules("combo.models") util.import_module_and_submodules("combo.training") if FLAGS.mode == "train": checks.file_exists(FLAGS.config_path) params = common.Params.from_file(FLAGS.config_path, ext_vars=_get_ext_vars()) model_params = params.get("model").as_ordered_dict() serialization_dir = tempfile.mkdtemp(prefix="allennlp", dir=FLAGS.serialization_dir) model = train.train_model(params, serialization_dir=serialization_dir, file_friendly_logging=True) logger.info(f"Training model stored in: {serialization_dir}") if FLAGS.finetuning_training_data_path: for f in FLAGS.finetuning_training_data_path: checks.file_exists(f) # Loading will be performed from stored model.tar.gz del model if torch.cuda.is_available(): torch.cuda.empty_cache() params = common.Params.from_file( FLAGS.config_path, ext_vars=_get_ext_vars(finetuning=True)) # Replace model definition with pretrained archive params["model"] = { "type": "from_archive", "archive_file": serialization_dir + "/model.tar.gz", } serialization_dir = tempfile.mkdtemp(prefix="allennlp", suffix="-finetuning", dir=FLAGS.serialization_dir) model = train.train_model(params.duplicate(), serialization_dir=serialization_dir, file_friendly_logging=True) # Make finetuning model serialization independent from training serialization # Storing model definition instead of archive params["model"] = model_params params.to_file( os.path.join(serialization_dir, archival.CONFIG_NAME)) archival.archive_model(serialization_dir) logger.info(f"Finetuned model stored in: {serialization_dir}") if FLAGS.test_path and FLAGS.output_file: checks.file_exists(FLAGS.test_path) params = common.Params.from_file( FLAGS.config_path, ext_vars=_get_ext_vars())["dataset_reader"] params.pop("type") dataset_reader = dataset.UniversalDependenciesDatasetReader.from_params( params) predictor = predict.SemanticMultitaskPredictor( model=model, dataset_reader=dataset_reader) test_trees = dataset_reader.read(FLAGS.test_path) with open(FLAGS.output_file, "w") as file: for tree in test_trees: file.writelines( api.sentence2conllu( predictor.predict_instance(tree), keep_semrel=dataset_reader.use_sem).serialize()) else: use_dataset_reader = FLAGS.conllu_format predictor = _get_predictor() if FLAGS.input_file == "-": use_dataset_reader = False predictor.without_sentence_embedding = True if use_dataset_reader: predictor.line_to_conllu = True if FLAGS.silent: logging.getLogger("allennlp.common.params").disabled = True manager = allen_predict._PredictManager( predictor, FLAGS.input_file, FLAGS.output_file, FLAGS.batch_size, not FLAGS.silent, use_dataset_reader, ) manager.run()