def predict_model(predictor: str, params: Params, archive_dir: str, input_file: str, output_file: str, batch_size: int = 1): """ Predict output annotations from the given model and input file and produce an output file. :param predictor: the type of predictor to use, e.g., "udify_predictor" :param params: the Params of the model :param archive_dir: the saved model archive :param input_file: the input file to predict :param output_file: the output file to save :param batch_size: the batch size, set this higher to speed up GPU inference """ cuda_device = params["trainer"]["cuda_device"] check_for_gpu(cuda_device) archive = load_archive(os.path.join(archive_dir, "model.tar.gz"), cuda_device=cuda_device) predictor = Predictor.from_archive(archive, predictor) manager = _PredictManager(predictor, input_file, output_file, batch_size, print_to_console=False, has_dataset_reader=True) manager.run()
def __init__(self, model: config.Model): self.model = model self.app = Flask(model.id) self.configure_logging() o = json.dumps(model.overrides) if model.overrides is not None else "" archive = load_archive(model.archive_file, overrides=o) self.predictor = Predictor.from_archive(archive, model.predictor_name) self.interpreters = self.load_interpreters() self.attackers = self.load_attackers() self.configure_error_handling() # By creating the LRU caches when the class is instantiated, we can # be sure that the caches are specific to the instance, and not the class, # i.e. every instance will have its own set of caches. @lru_cache(maxsize=1024) def predict_with_cache(inputs: str) -> JsonDict: return self.predict(json.loads(inputs)) @lru_cache(maxsize=1024) def interpret_with_cache(interpreter_id: str, inputs: str) -> JsonDict: return self.interpret(interpreter_id, json.loads(inputs)) @lru_cache(maxsize=1024) def attack_with_cache(attacker_id: str, attack: str) -> JsonDict: return self.attack(attacker_id, json.loads(attack)) self.predict_with_cache = predict_with_cache self.interpret_with_cache = interpret_with_cache self.attack_with_cache = attack_with_cache self.setup_routes()
def predict_ensemble(test_df): """ Predict input ``test_df`` with columns ``review`` and ``rating`` """ instances = [ wongnai_predictor._dataset_reader.text_to_instance( word_tokenize(review)) for review in list(test_df.review) ] model_paths = glob('output_*/model.tar.gz') all_predicted_labels = [] for model_path in model_paths: archive = load_archive(model_path) # load trained model wongnai_predictor = Predictor.from_archive(archive, 'wongnai_predictor') predicted_labels = [ int( wongnai_predictor.predict_instance(instance) ['predicted_label']) for instance in instances ] all_predicted_labels.append(predicted_labels) all_predicted_labels = np.array(all_predicted_labels) predicted_labels_vote = mode(np.array(all_predicted_labels).T, axis=-1).mode.ravel() test_df['rating'] = predicted_labels_vote return test_df.drop('review', axis=1)
def __init__(self, name: str, model_path: str = None, model_online_path: str = None, description: str = '', model_type: str = None) -> None: """A class specifically created for wrapping the predictors from Allennlp: https://allenai.github.io/allennlp-docs/api/allennlp.predictors.html Parameters ---------- name : str The name of the predictor. model_path : str, optional A local model path if you are using local models, by default None. This and ``model_online_path`` cannot both be None. model_online_path : str, optional An online model path, by default None description : str, optional A sentence describing the predictor., by default '' model_type : str, optional The model type as used in Allennlp, by default None Returns ------- None """ model = None if model_path: archive = load_archive(model_path) model = AllenPredictor.from_archive(archive, model_type) elif model_online_path: model = AllenPredictor.from_path(model_online_path, model_type) self.predictor = model Predictor.__init__(self, name, description, model, ['accuracy'])
def load_model(vocab_path: str, model_path: str, predictor_name: str, device: int = -1): model_config = None files = os.listdir(model_path) for file in files: if file.endswith("config.json"): model_config = file # 如果model_config是None # 说明是LSTM和Transformer的Encoder-Decoder # 没有bert if model_config is None: config_override = { "vocabulary.directory": vocab_path, # 改写词表的地址 "dataset_reader.vocab_path": vocab_path, "model.text_field_embedder.token_embedders.pretrained_file": None # 改写预训练词向量的地址 } else: config_override = { "vocabulary.directory": vocab_path, # 改写词表的地址 "dataset_reader.vocab_path": vocab_path, "dataset_reader.model_name": model_path, "model.model_name": os.path.join(model_path, model_config) } archive = load_archive(os.path.join(model_path, "model.tar.gz"), cuda_device=device, overrides=json.dumps(config_override)) predictor = Predictor.from_archive(archive, predictor_name) return predictor
def _get_predictor(args: argparse.Namespace) -> Predictor: check_for_gpu(args.cuda_device) archive = load_archive(args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides) return Predictor.from_archive(archive, args.predictor)
def run_on_one_gpu(root, input:List, cuda_device_id): arc = load_archive( archive_file=os.path.join(root, "elmo-constituency-parser-2018.03.14.tar.gz"), cuda_device=cuda_device_id) predictor = Predictor.from_archive(archive=arc) predictor.predict( sentence=sentence )
def post(self): question = request.json['question'] passage = request.json['passage'] archive = load_archive( 'https://s3-us-west-2.amazonaws.com/allennlp/models/bidaf-model-2017.09.15-charpad.tar.gz' ) predictor = Predictor.from_archive(archive, 'machine-comprehension') answering = predictor.predict(question, passage) return answering
def __init__(self, model_name: str = 'fine-grained-ner'): assert model_name in MODELS_MAPPING, \ 'Unknown model name: "{}". Available models: {}'.format(model_name, ', '.join(MODELS_MAPPING.keys())) model_url = MODELS_MAPPING[model_name] try: cuda_device = torch.cuda.current_device() except: cuda_device = -1 self._predictor = Predictor.from_archive(load_archive(model_url, cuda_device=cuda_device))
def get_oie_predictor(): if torch.cuda.is_available(): archive = load_archive( "https://s3-us-west-2.amazonaws.com/allennlp/models/openie-model.2018-08-20.tar.gz", cuda_device=0) else: archive = load_archive( "https://s3-us-west-2.amazonaws.com/allennlp/models/openie-model.2018-08-20.tar.gz" ) return Predictor.from_archive(archive)
def _load_predictor(self): """ Attempts loading of model locally, otherwise downloads file Returns: predictor : Dependent on type of model """ Path(Path.joinpath(file_dir, "archived_models")).mkdir(parents=True, exist_ok=True) if not Path(self.model_path).exists(): print("Downloading archived model for %s" % self.model_name) urllib.request.urlretrieve(self.model_url, self.model_path) archived_model = load_archive(self.model_path) if self.model_name == "open-information-extraction": predictor = Predictor.from_archive( archived_model, self.model_name) # Reverts to SRL otherwise else: predictor = Predictor.from_archive(archived_model) return predictor
def _get_predictor(args: argparse.Namespace) -> Predictor: check_for_gpu(args.cuda_device) archive = load_archive(args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides) predicator = Predictor.from_archive(archive, args.predictor) if "dependency_srl" in args.predictor: predicator.set_files(args.input_file.replace("txt", "predict")) return predicator
def get_srl_predictor(): if torch.cuda.is_available(): archive = load_archive( "https://s3-us-west-2.amazonaws.com/allennlp/models/bert-base-srl-2019.06.17.tar.gz", cuda_device=0) else: archive = load_archive( "https://s3-us-west-2.amazonaws.com/allennlp/models/bert-base-srl-2019.06.17.tar.gz" ) return Predictor.from_archive(archive)
def get_predictor(args): archive = load_archive(args.archive_file, weights_file=None, cuda_device=args.cuda_device, overrides="") # Otherwise, use the mapping model_type = archive.config.get("model").get("type") if model_type != 'srl': raise Exception('the given model is not for srl.') return Predictor.from_archive(archive, 'semantic-role-labeling')
def main(): args = get_args() print(args) # program arguments load = args.load store = args.store verbose = args.v # hyper-params for gradient inference regularization = args.a learning_rate = args.l inference_iterations = args.i enable_cuda = args.c pickle_path = args.p # load data samples instances = [] # load from data files if load in ['test', 'development', 'train', 'selected']: datapath = os.getcwd() + '/data/' + load srl_reader = CustomSrlReader( token_indexers={"elmo": ELMoTokenCharactersIndexer()}) test_dataset = srl_reader.subsampled_read(datapath) instances = [i for i in test_dataset] # load from pickle files elif load in ['failed', 'fixed', 'gzero', 'fixed+failed']: if load == 'fixed+failed': instances = load_and_deserialize('fixed', pickle_path) instances += load_and_deserialize('failed', pickle_path) else: instances = load_and_deserialize(load, pickle_path) # init vocabulary and iterator vocab = Vocabulary.from_instances(instances) iterator = BasicIterator(batch_size=1) iterator.index_with(vocab) # load pre-trained model archive = load_archive("srl-model-2018.05.25.tar.gz") original_predictor = Predictor.from_archive(archive) model = CustomSemanticRoleLabeler.from_srl(original_predictor._model) # init and invoke inference method gbi = GradientBasedInference(model=model, learning_rate=learning_rate, alpha=regularization, store=store, enable_cuda=enable_cuda) for instance in iterator(instances, num_epochs=1): y_hat = gbi.gradient_inference(instance, iterations=inference_iterations, num_samples=len(instances), verbose=verbose) gbi.print_stats() gbi.append_stats(args)
def cli_predict(granularity: str, fold: str, config_path: str): with open(config_path) as f: conf = toml.load(f) serialization_dir = conf["serialization_dir"] log.info("Loading model from: %s", serialization_dir) archive = load_archive(os.path.join(serialization_dir, "model.tar.gz"), cuda_device=0) predictor = Predictor.from_archive(archive, "qb.predictor.QbPredictor") # pylint: disable=protected-access dataset_reader = predictor._dataset_reader tokenizer = dataset_reader._tokenizer token_indexers = dataset_reader._token_indexers
def _get_predictor(args: argparse.Namespace) -> Predictor: check_for_gpu(args.cuda_device) archive = load_archive(args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides, config_file=args.config_file) return Predictor.from_archive( archive, args.predictor, dataset_reader_to_load=args.dataset_reader_choice)
def parse(data, which=0): if which != 2: # depend parse darchive = load_archive( "https://s3-us-west-2.amazonaws.com/allennlp/models/biaffine-dependency-parser-ptb-2018.08.23.tar.gz" ) dpred = Predictor.from_archive(darchive, 'biaffine-dependency-parser') if which != 1: # const parse carchive = load_archive( "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo-constituency-parser-2018.03.14.tar.gz" ) cpred = Predictor.from_archive(carchive, 'constituency-parser') for d in data: if which != 2: dep = dpred.predict_json({"sentence": d.sentence}) d.depend = dep if which != 1: con = cpred.predict_json({"sentence": d.sentence}) d.const = con
def __init__(self, nlp): archive = load_archive(ner_model_archive) self.predictor = Predictor.from_archive( archive, predictor_name="atc-entity-tagger") Token.set_extension("atc_ent_bilou_", default="") Token.set_extension("atc_ent_type_", default="") Token.set_extension("atc_ent_type", default=0) Doc.set_extension("atc_ents", default=()) for tag in valid_tags: nlp.vocab.strings.add(tag) self.vocab = nlp.vocab
def predict_result(self): archive = load_archive('../pure_hscrf/model.tar.gz') predictor = Predictor.from_archive(archive, 'CON_NER_predictor') exit(0) while True: sent = input() if sent == "": exit(0) tokens = [Token(token) for token in sent] tags = ["O" * len(tokens)] instance = reader.text_to_instance(tokens, tags) result = predictor.predict_json(instance) print(result)
def cli_generate_guesses( config_path: str, granularity: List[str] = [], char_skip: int = 25, max_n_guesses: int = 10, trickme_path: str = None, generation_fold: List[str] = constants.GENERATION_FOLDS, ): with open(config_path) as f: conf = toml.load(f) serialization_dir = conf["serialization_dir"] log.info("Loading model from: %s", serialization_dir) archive = load_archive(os.path.join(serialization_dir, "model.tar.gz"), cuda_device=0) predictor = Predictor.from_archive(archive, "qb.predictor.QbPredictor") # pylint: disable=protected-access dataset_reader = predictor._dataset_reader tokenizer = dataset_reader._tokenizer token_indexers = dataset_reader._token_indexers for g in granularity: if g == "first": first_sentence = True full_question = False partial_question = False elif g == "full": first_sentence = False full_question = True partial_question = False elif g == "char": first_sentence = False full_question = False partial_question = True else: raise ValueError("Invalid granularity") log.info("Generating guesses for: %s", generation_fold) for fold in generation_fold: log.info("Guesses for fold %s", fold) df = generate_guesses( model=archive.model, tokenizer=tokenizer, token_indexers=token_indexers, max_n_guesses=max_n_guesses, fold=fold, first_sentence=first_sentence, full_question=full_question, partial_question=partial_question, char_skip=char_skip, trickme_path=trickme_path, ) path = os.path.join(serialization_dir, guess_df_path(g, fold)) df.to_pickle(path)
def main(args): #UDify setup (from predict.py file) import_submodules("udify") predictor = "udify_predictor" #load model archive = load_archive(args.archive) #cuda_device=cuda_device) predictor = Predictor.from_archive(archive, predictor) encoder = predictor._model.text_field_embedder.token_embedder_bert.bert_model #save BERT encoder state_dict with open(args.encoder_ckpt, 'wb') as f: torch.save(encoder.state_dict(), f)
def run_on_one_gpu(path_model, input: List = None, cuda_device_id: int = 0): arc = load_archive( archive_file=path_model, cuda_device=cuda_device_id) predictor = Predictor.from_archive(archive=arc) bag = [] for s in input: out = predictor.predict( sentence=s ) print(out["trees"]) bag.append(out['trees']) return bag
def test_ranked_logical_forms_present(self): archive_path = "fixtures/trained_models/seq2seq_model.tar.gz" archive = load_archive(archive_path) predictor = Predictor.from_archive(archive, 'wikitables-reranker') inputs = {"question": "Who is a good boy?", "table": "Dog\tType\nFido\tgood\nDofi\tbad", "logical_forms": ["(select_string (filter_in all_rows string_column:type string:good) string_column:dog)", "(select_string (first all_rows) string_column:dog)"]} result = predictor.predict_json(inputs) assert result["ranked_logical_forms"] == \ ["(select_string (first all_rows) string_column:dog)", "(select_string (filter_in all_rows string_column:type string:good) string_column:dog)"]
def main(archive_file: str): archive = load_archive(archive_file) predictor = Predictor.from_archive(archive) embedding = predictor._model.embedding_in vocab = predictor._model.vocab #write_embeddings(embedding, "./junks/text8_emb.txt", vocab) print(get_synonyms('one', embedding, vocab)) print(get_synonyms('december', embedding, vocab)) print(get_synonyms('flower', embedding, vocab)) print(get_synonyms('design', embedding, vocab)) print(get_synonyms('snow', embedding, vocab))
def __init__(self): cuda_device = 0 archive_file = 'model/model.tar.gz' predictor_name = 'sentence_classifier_predictor' archive = load_archive(archive_file=archive_file, cuda_device=cuda_device) predictor = Predictor.from_archive(archive, predictor_name=predictor_name) self.predictor = predictor label_map = archive.model.vocab.get_index_to_token_vocabulary('labels') self.labels = [label for _, label in sorted(label_map.items())]
def __init__(self, model: config.Model): self.model = model self.app = Flask(model.id) self.configure_logging() o = json.dumps(model.overrides) if model.overrides is not None else "" archive = load_archive(model.archive_file, overrides=o) self.predictor = Predictor.from_archive(archive, model.predictor_name) self.interpreters = self.load_interpreters() self.attackers = self.load_attackers() self.configure_error_handling() self.setup_routes()
def load_model(model_path: str, predictor_name: str, device: int = -1): model_config = "bert_config.json" files = os.listdir(model_path) for file in files: if file.endswith("config.json"): model_config = file config_override = { "dataset_reader.model_name": model_path, "model.model_name": os.path.join(model_path, model_config), "model.task_pretrained_file": None } archive = load_archive(os.path.join(model_path, "model.tar.gz"), cuda_device=device, overrides=json.dumps(config_override)) predictor = Predictor.from_archive(archive, predictor_name) return predictor
def _get_predictor(args: argparse.Namespace) -> Predictor: check_for_gpu(args.cuda_device) params = Params.from_file(args.extractor_config_file) model = Model.from_params(vocab=None, params=params.pop('model')) if args.cuda_device >= 0: model.to(args.cuda_device) else: model.to(None) archive = Archive(model=model, config=params) return Predictor.from_archive( archive, args.predictor, dataset_reader_to_load=args.dataset_reader_choice)
def predict(comment): """ Snippet to predict sentiment of Wongnai comment """ from allennlp.models.archival import load_archive from allennlp.predictors.predictor import Predictor from wongnai.wongnai_reader import WongnaiDatasetReader from wongnai.wongnai_classifier import WongnaiCommentClassifier from wongnai.wongnai_predictor import WongnaiCommentPredictor from pythainlp import word_tokenize archive = load_archive('model.tar.gz') wongnai_predictor = Predictor.from_archive(archive, 'wongnai_predictor') prediction = wongnai_predictor.predict_json( {"comment": word_tokenize(comment)}) print(prediction)