Beispiel #1
0
def predict_model_with_archive(predictor: str,
                               params: Params,
                               archive: str,
                               input_file: str,
                               output_file: str,
                               batch_size: int = None):

    if 'cuda_device' in params['trainer']:
        cuda_device = params['trainer']['cuda_device']
        from allennlp.common.checks import check_for_gpu
        check_for_gpu(cuda_device)
        archive = load_archive(archive, cuda_device=cuda_device)
    else:
        archive = load_archive(archive)

    for item in archive.config.duplicate():
        archive.config.__delitem__(item)
    for item in params:
        archive.config[item] = params.as_dict()[item]

    predictor = MachampPredictor.from_archive(archive, predictor)

    if batch_size == None:
        batch_size = params['data_loader']['batch_sampler']['batch_size']

    manager = _PredictManager(predictor,
                              input_file,
                              output_file,
                              batch_size,
                              print_to_console=False,
                              has_dataset_reader=True)
    manager.run()
Beispiel #2
0
def predict_model(predictor: str,
                  params: Params,
                  archive_dir: str,
                  input_file: str,
                  output_file: str,
                  batch_size: int = 1):
    """
    Predict output annotations from the given model and input file and produce an output file.
    :param predictor: the type of predictor to use, e.g., "udify_predictor"
    :param params: the Params of the model
    :param archive_dir: the saved model archive
    :param input_file: the input file to predict
    :param output_file: the output file to save
    :param batch_size: the batch size, set this higher to speed up GPU inference
    """
    cuda_device = params["trainer"]["cuda_device"]

    check_for_gpu(cuda_device)
    archive = load_archive(os.path.join(archive_dir, "model.tar.gz"),
                           cuda_device=cuda_device)

    predictor = Predictor.from_archive(archive, predictor)

    manager = _PredictManager(predictor,
                              input_file,
                              output_file,
                              batch_size,
                              print_to_console=False,
                              has_dataset_reader=True)
    manager.run()
Beispiel #3
0
    def file_iface_predictor(input_file, output_file):

        manager = _PredictManager(predictor,
                                  input_file,
                                  output_file,
                                  batch_size,
                                  print_to_console=False,
                                  has_dataset_reader=True)
        manager.run()
Beispiel #4
0
 def predict(self, args: argparse.Namespace, predictor: Predictor):
     manager = _PredictManager(
         predictor,
         args.input_file,
         args.output_file,
         args.batch_size,
         not args.silent,
         args.use_dataset_reader,
     )
     manager.run()
Beispiel #5
0
def predict_model_with_archive(predictor: str, params: Params, archive: str,
                               input_file: str, output_file: str, batch_size: int = 1):
    cuda_device = params["trainer"]["cuda_device"]

    check_for_gpu(cuda_device)
    archive = load_archive(archive,
                           cuda_device=cuda_device)

    predictor = Predictor.from_archive(archive, predictor)

    manager = _PredictManager(predictor,
                              input_file,
                              output_file,
                              batch_size,
                              print_to_console=False,
                              has_dataset_reader=True)
    manager.run()
Beispiel #6
0
def init_load_model():
    """
    The [`run`](./train.md#run) command only knows about the registered classes in the ``allennlp``
    codebase. In particular, once you start creating your own `Model` s and so forth, it won't
    work for them, unless you use the ``--include-package`` flag or you make your code available
    as a plugin (see [`plugins`](./plugins.md)).
    """
    global predict_manager, cuda_place
    import_plugins()

    parser = create_parser("allennlp")
    args = parser.parse_args()
    args.archive_file = r"models/ace05_event/model.tar.gz"
    args.cuda_device = cuda_place
    args.input_file = r"data/ace-event/processed-data/default-settings/json/test.json"
    args.output_file = r"predictions/event_predict.json"
    args.use_dataset_reader = True
    args.include_package = "dygie"
    args.predictor = "dygie"
    args.weights_file = None
    args.overrides = ''
    args.dataset_reader_choice = 'validation'
    args.batch_size = 1
    args.silent = False

    # print(dir(args))
    # print(args.include_package)
    # for package_name in args.include_package:
    #     import_module_and_submodules_new(package_name)
    import_module_and_submodules_new('dygie')
    import_module_and_submodules_new('dygie.predictors.dygie')
    import_module_and_submodules_new('dygie.models.dygie')
    # predictor = DyGIEPredictor.from_path(os.path.join(current_path, 'models/ace05_event/model.tar.gz'), predictor_name="dygie")
    predictor = predict._get_predictor(args)

    predict_manager = predict._PredictManager(
        predictor,
        args.input_file,
        args.output_file,
        args.batch_size,
        not args.silent,
        args.use_dataset_reader,
    )
def pred(cuda_device=0,
         archive_file="/backup3/jcxu/exComp/tmp_expsc74o5pf7/model.tar.gz",
         weights_file="/backup3/jcxu/exComp/tmp_expsc74o5pf7/best.th",
         predictor='lstm-tagger',
         input_file="/backup3/jcxu/exComp/example.txt"):
    with open(input_file, 'w') as fd:
        json.dump({"sentence": "This is a useful sentence."}, fd)
        fd.write("\n")
        json.dump({"sentence": "This is a gree, blue and useful sentence."},
                  fd)
        fd.write("\n")
        json.dump({"sentence": "This is a useless sentence."}, fd)
    check_for_gpu(cuda_device)
    archive = load_archive(archive_file,
                           weights_file=weights_file,
                           cuda_device=cuda_device,
                           overrides="")
    # predictor = SentenceTaggerPredictor(archive, dataset_reader=PosDatasetReader())
    predictor = Predictor.from_archive(archive, 'sentence-tagger')

    manager = _PredictManager(predictor, input_file, None, 1, not False, False)
    manager.run()
Beispiel #8
0
def run(_):
    """Run model."""
    # Imports are required to make Registrable modules visible without passing parameter
    util.import_module_and_submodules("combo.commands")
    util.import_module_and_submodules("combo.models")
    util.import_module_and_submodules("combo.training")

    if FLAGS.mode == "train":
        checks.file_exists(FLAGS.config_path)
        params = common.Params.from_file(FLAGS.config_path,
                                         ext_vars=_get_ext_vars())
        model_params = params.get("model").as_ordered_dict()
        serialization_dir = tempfile.mkdtemp(prefix="allennlp",
                                             dir=FLAGS.serialization_dir)
        model = train.train_model(params,
                                  serialization_dir=serialization_dir,
                                  file_friendly_logging=True)
        logger.info(f"Training model stored in: {serialization_dir}")

        if FLAGS.finetuning_training_data_path:
            for f in FLAGS.finetuning_training_data_path:
                checks.file_exists(f)

            # Loading will be performed from stored model.tar.gz
            del model
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

            params = common.Params.from_file(
                FLAGS.config_path, ext_vars=_get_ext_vars(finetuning=True))
            # Replace model definition with pretrained archive
            params["model"] = {
                "type": "from_archive",
                "archive_file": serialization_dir + "/model.tar.gz",
            }
            serialization_dir = tempfile.mkdtemp(prefix="allennlp",
                                                 suffix="-finetuning",
                                                 dir=FLAGS.serialization_dir)
            model = train.train_model(params.duplicate(),
                                      serialization_dir=serialization_dir,
                                      file_friendly_logging=True)

            # Make finetuning model serialization independent from training serialization
            # Storing model definition instead of archive
            params["model"] = model_params
            params.to_file(
                os.path.join(serialization_dir, archival.CONFIG_NAME))
            archival.archive_model(serialization_dir)

            logger.info(f"Finetuned model stored in: {serialization_dir}")

        if FLAGS.test_path and FLAGS.output_file:
            checks.file_exists(FLAGS.test_path)
            params = common.Params.from_file(
                FLAGS.config_path, ext_vars=_get_ext_vars())["dataset_reader"]
            params.pop("type")
            dataset_reader = dataset.UniversalDependenciesDatasetReader.from_params(
                params)
            predictor = predict.SemanticMultitaskPredictor(
                model=model, dataset_reader=dataset_reader)
            test_trees = dataset_reader.read(FLAGS.test_path)
            with open(FLAGS.output_file, "w") as file:
                for tree in test_trees:
                    file.writelines(
                        api.sentence2conllu(
                            predictor.predict_instance(tree),
                            keep_semrel=dataset_reader.use_sem).serialize())
    else:
        use_dataset_reader = FLAGS.conllu_format
        predictor = _get_predictor()
        if FLAGS.input_file == "-":
            use_dataset_reader = False
            predictor.without_sentence_embedding = True
        if use_dataset_reader:
            predictor.line_to_conllu = True
        if FLAGS.silent:
            logging.getLogger("allennlp.common.params").disabled = True
        manager = allen_predict._PredictManager(
            predictor,
            FLAGS.input_file,
            FLAGS.output_file,
            FLAGS.batch_size,
            not FLAGS.silent,
            use_dataset_reader,
        )
        manager.run()