Example #1
0
def predict_model(predictor: str,
                  params: Params,
                  archive_dir: str,
                  input_file: str,
                  output_file: str,
                  batch_size: int = 1):
    """
    Predict output annotations from the given model and input file and produce an output file.
    :param predictor: the type of predictor to use, e.g., "udify_predictor"
    :param params: the Params of the model
    :param archive_dir: the saved model archive
    :param input_file: the input file to predict
    :param output_file: the output file to save
    :param batch_size: the batch size, set this higher to speed up GPU inference
    """
    cuda_device = params["trainer"]["cuda_device"]

    check_for_gpu(cuda_device)
    archive = load_archive(os.path.join(archive_dir, "model.tar.gz"),
                           cuda_device=cuda_device)

    predictor = Predictor.from_archive(archive, predictor)

    manager = _PredictManager(predictor,
                              input_file,
                              output_file,
                              batch_size,
                              print_to_console=False,
                              has_dataset_reader=True)
    manager.run()
Example #2
0
    def __init__(self, model: config.Model):
        self.model = model
        self.app = Flask(model.id)
        self.configure_logging()

        o = json.dumps(model.overrides) if model.overrides is not None else ""
        archive = load_archive(model.archive_file, overrides=o)
        self.predictor = Predictor.from_archive(archive, model.predictor_name)

        self.interpreters = self.load_interpreters()
        self.attackers = self.load_attackers()

        self.configure_error_handling()

        # By creating the LRU caches when the class is instantiated, we can
        # be sure that the caches are specific to the instance, and not the class,
        # i.e. every instance will have its own set of caches.

        @lru_cache(maxsize=1024)
        def predict_with_cache(inputs: str) -> JsonDict:
            return self.predict(json.loads(inputs))

        @lru_cache(maxsize=1024)
        def interpret_with_cache(interpreter_id: str, inputs: str) -> JsonDict:
            return self.interpret(interpreter_id, json.loads(inputs))

        @lru_cache(maxsize=1024)
        def attack_with_cache(attacker_id: str, attack: str) -> JsonDict:
            return self.attack(attacker_id, json.loads(attack))

        self.predict_with_cache = predict_with_cache
        self.interpret_with_cache = interpret_with_cache
        self.attack_with_cache = attack_with_cache

        self.setup_routes()
def predict_ensemble(test_df):
    """
    Predict input ``test_df`` with columns ``review`` and ``rating``
    """
    instances = [
        wongnai_predictor._dataset_reader.text_to_instance(
            word_tokenize(review)) for review in list(test_df.review)
    ]
    model_paths = glob('output_*/model.tar.gz')
    all_predicted_labels = []
    for model_path in model_paths:
        archive = load_archive(model_path)  # load trained model
        wongnai_predictor = Predictor.from_archive(archive,
                                                   'wongnai_predictor')
        predicted_labels = [
            int(
                wongnai_predictor.predict_instance(instance)
                ['predicted_label']) for instance in instances
        ]
        all_predicted_labels.append(predicted_labels)
    all_predicted_labels = np.array(all_predicted_labels)
    predicted_labels_vote = mode(np.array(all_predicted_labels).T,
                                 axis=-1).mode.ravel()
    test_df['rating'] = predicted_labels_vote
    return test_df.drop('review', axis=1)
Example #4
0
 def __init__(self,
              name: str,
              model_path: str = None,
              model_online_path: str = None,
              description: str = '',
              model_type: str = None) -> None:
     """A class specifically created for wrapping the predictors from 
     Allennlp: https://allenai.github.io/allennlp-docs/api/allennlp.predictors.html
     
     Parameters
     ----------
     name : str
     The name of the predictor.
     model_path : str, optional
         A local model path if you are using local models, by default None.
         This and ``model_online_path`` cannot both be None.
     model_online_path : str, optional
         An online model path, by default None
     description : str, optional
         A sentence describing the predictor., by default ''
     model_type : str, optional
         The model type as used in Allennlp, by default None
     
     Returns
     -------
     None
     """
     model = None
     if model_path:
         archive = load_archive(model_path)
         model = AllenPredictor.from_archive(archive, model_type)
     elif model_online_path:
         model = AllenPredictor.from_path(model_online_path, model_type)
     self.predictor = model
     Predictor.__init__(self, name, description, model, ['accuracy'])
Example #5
0
def load_model(vocab_path: str,
               model_path: str,
               predictor_name: str,
               device: int = -1):
    model_config = None
    files = os.listdir(model_path)
    for file in files:
        if file.endswith("config.json"):
            model_config = file

    # 如果model_config是None
    # 说明是LSTM和Transformer的Encoder-Decoder
    # 没有bert
    if model_config is None:
        config_override = {
            "vocabulary.directory": vocab_path,  # 改写词表的地址
            "dataset_reader.vocab_path": vocab_path,
            "model.text_field_embedder.token_embedders.pretrained_file":
            None  # 改写预训练词向量的地址
        }
    else:
        config_override = {
            "vocabulary.directory": vocab_path,  # 改写词表的地址
            "dataset_reader.vocab_path": vocab_path,
            "dataset_reader.model_name": model_path,
            "model.model_name": os.path.join(model_path, model_config)
        }
    archive = load_archive(os.path.join(model_path, "model.tar.gz"),
                           cuda_device=device,
                           overrides=json.dumps(config_override))
    predictor = Predictor.from_archive(archive, predictor_name)
    return predictor
def _get_predictor(args: argparse.Namespace) -> Predictor:
    check_for_gpu(args.cuda_device)
    archive = load_archive(args.archive_file,
                           weights_file=args.weights_file,
                           cuda_device=args.cuda_device,
                           overrides=args.overrides)

    return Predictor.from_archive(archive, args.predictor)
Example #7
0
def _get_predictor(args: argparse.Namespace) -> Predictor:
    check_for_gpu(args.cuda_device)
    archive = load_archive(args.archive_file,
                           weights_file=args.weights_file,
                           cuda_device=args.cuda_device,
                           overrides=args.overrides)

    return Predictor.from_archive(archive, args.predictor)
def run_on_one_gpu(root, input:List, cuda_device_id):
    arc = load_archive(
        archive_file=os.path.join(root, "elmo-constituency-parser-2018.03.14.tar.gz"),
        cuda_device=cuda_device_id)
    predictor = Predictor.from_archive(archive=arc)
    predictor.predict(
        sentence=sentence
    )
Example #9
0
 def post(self):
     question = request.json['question']
     passage = request.json['passage']
     archive = load_archive(
         'https://s3-us-west-2.amazonaws.com/allennlp/models/bidaf-model-2017.09.15-charpad.tar.gz'
     )
     predictor = Predictor.from_archive(archive, 'machine-comprehension')
     answering = predictor.predict(question, passage)
     return answering
Example #10
0
 def __init__(self, model_name: str = 'fine-grained-ner'):
   assert model_name in MODELS_MAPPING, \
     'Unknown model name: "{}". Available models: {}'.format(model_name, ', '.join(MODELS_MAPPING.keys()))
   model_url = MODELS_MAPPING[model_name]
   try:
     cuda_device = torch.cuda.current_device()
   except:
     cuda_device = -1
   self._predictor = Predictor.from_archive(load_archive(model_url, cuda_device=cuda_device))
def get_oie_predictor():
    if torch.cuda.is_available():
        archive = load_archive(
            "https://s3-us-west-2.amazonaws.com/allennlp/models/openie-model.2018-08-20.tar.gz",
            cuda_device=0)
    else:
        archive = load_archive(
            "https://s3-us-west-2.amazonaws.com/allennlp/models/openie-model.2018-08-20.tar.gz"
        )
    return Predictor.from_archive(archive)
Example #12
0
    def _load_predictor(self):
        """ Attempts loading of model locally, otherwise downloads file

        Returns:
            predictor : Dependent on type of model
        """
        Path(Path.joinpath(file_dir, "archived_models")).mkdir(parents=True,
                                                               exist_ok=True)
        if not Path(self.model_path).exists():
            print("Downloading archived model for %s" % self.model_name)
            urllib.request.urlretrieve(self.model_url, self.model_path)

        archived_model = load_archive(self.model_path)
        if self.model_name == "open-information-extraction":
            predictor = Predictor.from_archive(
                archived_model, self.model_name)  # Reverts to SRL otherwise
        else:
            predictor = Predictor.from_archive(archived_model)
        return predictor
Example #13
0
def _get_predictor(args: argparse.Namespace) -> Predictor:
    check_for_gpu(args.cuda_device)
    archive = load_archive(args.archive_file,
                           weights_file=args.weights_file,
                           cuda_device=args.cuda_device,
                           overrides=args.overrides)
    predicator = Predictor.from_archive(archive, args.predictor)
    if "dependency_srl" in args.predictor:
        predicator.set_files(args.input_file.replace("txt", "predict"))
    return predicator
def get_srl_predictor():
    if torch.cuda.is_available():
        archive = load_archive(
            "https://s3-us-west-2.amazonaws.com/allennlp/models/bert-base-srl-2019.06.17.tar.gz",
            cuda_device=0)
    else:
        archive = load_archive(
            "https://s3-us-west-2.amazonaws.com/allennlp/models/bert-base-srl-2019.06.17.tar.gz"
        )

    return Predictor.from_archive(archive)
def get_predictor(args):
    archive = load_archive(args.archive_file,
                           weights_file=None,
                           cuda_device=args.cuda_device,
                           overrides="")

    # Otherwise, use the mapping
    model_type = archive.config.get("model").get("type")
    if model_type != 'srl':
        raise Exception('the given model is not for srl.')
    return Predictor.from_archive(archive, 'semantic-role-labeling')
Example #16
0
def main():
    args = get_args()
    print(args)
    # program arguments
    load = args.load
    store = args.store
    verbose = args.v
    # hyper-params for gradient inference
    regularization = args.a
    learning_rate = args.l
    inference_iterations = args.i
    enable_cuda = args.c
    pickle_path = args.p

    # load data samples
    instances = []
    # load from data files
    if load in ['test', 'development', 'train', 'selected']:
        datapath = os.getcwd() + '/data/' + load
        srl_reader = CustomSrlReader(
            token_indexers={"elmo": ELMoTokenCharactersIndexer()})
        test_dataset = srl_reader.subsampled_read(datapath)
        instances = [i for i in test_dataset]
    # load from pickle files
    elif load in ['failed', 'fixed', 'gzero', 'fixed+failed']:
        if load == 'fixed+failed':
            instances = load_and_deserialize('fixed', pickle_path)
            instances += load_and_deserialize('failed', pickle_path)
        else:
            instances = load_and_deserialize(load, pickle_path)
    # init vocabulary and iterator
    vocab = Vocabulary.from_instances(instances)
    iterator = BasicIterator(batch_size=1)
    iterator.index_with(vocab)

    # load pre-trained model
    archive = load_archive("srl-model-2018.05.25.tar.gz")
    original_predictor = Predictor.from_archive(archive)
    model = CustomSemanticRoleLabeler.from_srl(original_predictor._model)

    # init and invoke inference method
    gbi = GradientBasedInference(model=model,
                                 learning_rate=learning_rate,
                                 alpha=regularization,
                                 store=store,
                                 enable_cuda=enable_cuda)
    for instance in iterator(instances, num_epochs=1):
        y_hat = gbi.gradient_inference(instance,
                                       iterations=inference_iterations,
                                       num_samples=len(instances),
                                       verbose=verbose)
        gbi.print_stats()

    gbi.append_stats(args)
Example #17
0
def cli_predict(granularity: str, fold: str, config_path: str):
    with open(config_path) as f:
        conf = toml.load(f)
    serialization_dir = conf["serialization_dir"]
    log.info("Loading model from: %s", serialization_dir)
    archive = load_archive(os.path.join(serialization_dir, "model.tar.gz"), cuda_device=0)
    predictor = Predictor.from_archive(archive, "qb.predictor.QbPredictor")
    # pylint: disable=protected-access
    dataset_reader = predictor._dataset_reader
    tokenizer = dataset_reader._tokenizer
    token_indexers = dataset_reader._token_indexers
Example #18
0
def _get_predictor(args: argparse.Namespace) -> Predictor:
    check_for_gpu(args.cuda_device)
    archive = load_archive(args.archive_file,
                           weights_file=args.weights_file,
                           cuda_device=args.cuda_device,
                           overrides=args.overrides,
                           config_file=args.config_file)

    return Predictor.from_archive(
        archive,
        args.predictor,
        dataset_reader_to_load=args.dataset_reader_choice)
Example #19
0
def parse(data, which=0):
    if which != 2:
        # depend parse
        darchive = load_archive(
            "https://s3-us-west-2.amazonaws.com/allennlp/models/biaffine-dependency-parser-ptb-2018.08.23.tar.gz"
        )
        dpred = Predictor.from_archive(darchive, 'biaffine-dependency-parser')

    if which != 1:
        # const parse
        carchive = load_archive(
            "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo-constituency-parser-2018.03.14.tar.gz"
        )
        cpred = Predictor.from_archive(carchive, 'constituency-parser')

    for d in data:
        if which != 2:
            dep = dpred.predict_json({"sentence": d.sentence})
            d.depend = dep
        if which != 1:
            con = cpred.predict_json({"sentence": d.sentence})
            d.const = con
Example #20
0
    def __init__(self, nlp):
        archive = load_archive(ner_model_archive)
        self.predictor = Predictor.from_archive(
            archive, predictor_name="atc-entity-tagger")

        Token.set_extension("atc_ent_bilou_", default="")
        Token.set_extension("atc_ent_type_", default="")
        Token.set_extension("atc_ent_type", default=0)
        Doc.set_extension("atc_ents", default=())

        for tag in valid_tags:
            nlp.vocab.strings.add(tag)
        self.vocab = nlp.vocab
 def predict_result(self):
     archive = load_archive('../pure_hscrf/model.tar.gz')
     predictor = Predictor.from_archive(archive, 'CON_NER_predictor')
     exit(0)
     while True:
         sent = input()
         if sent == "":
             exit(0)
         tokens = [Token(token) for token in sent]
         tags = ["O" * len(tokens)]
         instance = reader.text_to_instance(tokens, tags)
         result = predictor.predict_json(instance)
         print(result)
Example #22
0
def cli_generate_guesses(
    config_path: str,
    granularity: List[str] = [],
    char_skip: int = 25,
    max_n_guesses: int = 10,
    trickme_path: str = None,
    generation_fold: List[str] = constants.GENERATION_FOLDS,
):
    with open(config_path) as f:
        conf = toml.load(f)
    serialization_dir = conf["serialization_dir"]
    log.info("Loading model from: %s", serialization_dir)
    archive = load_archive(os.path.join(serialization_dir, "model.tar.gz"), cuda_device=0)
    predictor = Predictor.from_archive(archive, "qb.predictor.QbPredictor")
    # pylint: disable=protected-access
    dataset_reader = predictor._dataset_reader
    tokenizer = dataset_reader._tokenizer
    token_indexers = dataset_reader._token_indexers
    for g in granularity:
        if g == "first":
            first_sentence = True
            full_question = False
            partial_question = False
        elif g == "full":
            first_sentence = False
            full_question = True
            partial_question = False
        elif g == "char":
            first_sentence = False
            full_question = False
            partial_question = True
        else:
            raise ValueError("Invalid granularity")

        log.info("Generating guesses for: %s", generation_fold)
        for fold in generation_fold:
            log.info("Guesses for fold %s", fold)
            df = generate_guesses(
                model=archive.model,
                tokenizer=tokenizer,
                token_indexers=token_indexers,
                max_n_guesses=max_n_guesses,
                fold=fold,
                first_sentence=first_sentence,
                full_question=full_question,
                partial_question=partial_question,
                char_skip=char_skip,
                trickme_path=trickme_path,
            )
            path = os.path.join(serialization_dir, guess_df_path(g, fold))
            df.to_pickle(path)
Example #23
0
def main(args):
	#UDify setup (from predict.py file)
	import_submodules("udify")

	predictor = "udify_predictor"

	#load model
	archive = load_archive(args.archive) #cuda_device=cuda_device)
	predictor = Predictor.from_archive(archive, predictor)
	encoder = predictor._model.text_field_embedder.token_embedder_bert.bert_model

	#save BERT encoder state_dict
	with open(args.encoder_ckpt, 'wb') as f:
		torch.save(encoder.state_dict(), f)
def run_on_one_gpu(path_model, input: List = None, cuda_device_id: int = 0):
    arc = load_archive(
        archive_file=path_model,
        cuda_device=cuda_device_id)
    predictor = Predictor.from_archive(archive=arc)
    bag = []
    for s in input:
        out = predictor.predict(
            sentence=s
        )
        print(out["trees"])

        bag.append(out['trees'])
    return bag
Example #25
0
    def test_ranked_logical_forms_present(self):
        archive_path = "fixtures/trained_models/seq2seq_model.tar.gz"
        archive = load_archive(archive_path)
        predictor = Predictor.from_archive(archive, 'wikitables-reranker')

        inputs = {"question": "Who is a good boy?",
                  "table": "Dog\tType\nFido\tgood\nDofi\tbad",
                  "logical_forms":
                  ["(select_string (filter_in all_rows string_column:type string:good) string_column:dog)",
                   "(select_string (first all_rows) string_column:dog)"]}
        result = predictor.predict_json(inputs)
        assert result["ranked_logical_forms"] == \
                ["(select_string (first all_rows) string_column:dog)",
                 "(select_string (filter_in all_rows string_column:type string:good) string_column:dog)"]
Example #26
0
def main(archive_file: str):
    archive = load_archive(archive_file)
    predictor = Predictor.from_archive(archive)

    embedding = predictor._model.embedding_in
    vocab = predictor._model.vocab

    #write_embeddings(embedding, "./junks/text8_emb.txt", vocab)

    print(get_synonyms('one', embedding, vocab))
    print(get_synonyms('december', embedding, vocab))
    print(get_synonyms('flower', embedding, vocab))
    print(get_synonyms('design', embedding, vocab))
    print(get_synonyms('snow', embedding, vocab))
Example #27
0
    def __init__(self):
        cuda_device = 0
        archive_file = 'model/model.tar.gz'
        predictor_name = 'sentence_classifier_predictor'

        archive = load_archive(archive_file=archive_file,
                               cuda_device=cuda_device)

        predictor = Predictor.from_archive(archive,
                                           predictor_name=predictor_name)

        self.predictor = predictor
        label_map = archive.model.vocab.get_index_to_token_vocabulary('labels')
        self.labels = [label for _, label in sorted(label_map.items())]
Example #28
0
    def __init__(self, model: config.Model):
        self.model = model
        self.app = Flask(model.id)
        self.configure_logging()

        o = json.dumps(model.overrides) if model.overrides is not None else ""
        archive = load_archive(model.archive_file, overrides=o)
        self.predictor = Predictor.from_archive(archive, model.predictor_name)

        self.interpreters = self.load_interpreters()
        self.attackers = self.load_attackers()

        self.configure_error_handling()
        self.setup_routes()
def load_model(model_path: str, predictor_name: str, device: int = -1):
    model_config = "bert_config.json"
    files = os.listdir(model_path)
    for file in files:
        if file.endswith("config.json"):
            model_config = file

    config_override = {
        "dataset_reader.model_name": model_path,
        "model.model_name": os.path.join(model_path, model_config),
        "model.task_pretrained_file": None
    }
    archive = load_archive(os.path.join(model_path, "model.tar.gz"),
                           cuda_device=device, overrides=json.dumps(config_override))
    predictor = Predictor.from_archive(archive, predictor_name)
    return predictor
def _get_predictor(args: argparse.Namespace) -> Predictor:
    check_for_gpu(args.cuda_device)
    params = Params.from_file(args.extractor_config_file)

    model = Model.from_params(vocab=None, params=params.pop('model'))
    if args.cuda_device >= 0:
        model.to(args.cuda_device)
    else:
        model.to(None)

    archive = Archive(model=model, config=params)

    return Predictor.from_archive(
        archive,
        args.predictor,
        dataset_reader_to_load=args.dataset_reader_choice)
def predict(comment):
    """
    Snippet to predict sentiment of Wongnai comment
    """
    from allennlp.models.archival import load_archive
    from allennlp.predictors.predictor import Predictor
    from wongnai.wongnai_reader import WongnaiDatasetReader
    from wongnai.wongnai_classifier import WongnaiCommentClassifier
    from wongnai.wongnai_predictor import WongnaiCommentPredictor
    from pythainlp import word_tokenize

    archive = load_archive('model.tar.gz')
    wongnai_predictor = Predictor.from_archive(archive, 'wongnai_predictor')
    prediction = wongnai_predictor.predict_json(
        {"comment": word_tokenize(comment)})
    print(prediction)