def load_model_from_args(args, checkpoint_path=None, load_best_model=False): model_params, optimizer_params = args_to_params(args) _, _, wordvec_tensor = load_word2vec_from_file() model = ModelSupervisedParaphrasingTemplate(model_params, wordvec_tensor).to(get_device()) if checkpoint_path is not None: load_model(checkpoint_path, model=model, load_best_model=load_best_model) return model
def _load_all_type_datasets(dataset_fun, debug_dataset=False, data_types=None, data_path=None, name=None): _, word2id_dict, _ = load_word2vec_from_file() dataset_list = list() if data_types is None: data_types = [ 'train' if not debug_dataset else 'dev', 'dev', 'test' ] elif debug_dataset: data_types[0] = data_types[1] for data_type in data_types: if data_path is None: dataset = dataset_fun(data_type, shuffle_data=('train' in data_type)) else: dataset = dataset_fun(data_type, data_path=data_path, shuffle_data=('train' in data_type), name=name) dataset.set_vocabulary(word2id_dict) dataset.print_statistics() dataset_list.append(dataset) return dataset_list
def create_dataset(templates): dataset = DialogueParaphraseDataset(data_path=None, data_type='train', shuffle_data=False) par_indices = [t.split("\t", 1)[0] for t in templates] templates = [t.split("\t", 1)[1] for t in templates] for sent_index, sent in enumerate(templates): if (sent_index + 1) < len(templates) and par_indices[ sent_index + 1] == par_indices[sent_index]: partner_sent = templates[sent_index + 1] elif sent_index > 0 and par_indices[sent_index] == par_indices[ sent_index - 1]: partner_sent = templates[sent_index - 1] else: print( "[!] ERROR: Neither the sentence before or after had the same index..." ) print(par_indices[sent_index]) sys.exit(1) dataset.data_list.append( DialogueContextParData(paraphrases=[partner_sent, sent], contexts=[["Test"]] * 2, max_len=80, randomized=False)) _, word2id_dict, _ = load_word2vec_from_file() dataset.set_vocabulary(word2id_dict) dataset.reset_index() return dataset
def create_model(checkpoint_path, model_type, model_params): word2vec, word2id, wordvec_tensor = load_word2vec_from_file() model = MultiTaskEncoder(model_type, model_params, wordvec_tensor) _ = load_model(checkpoint_path, model=model) for param in model.parameters(): param.requires_grad = False model.eval() return model
def __init__(self, tasks, model_type, model_params, optimizer_params, multitask_params, batch_size, checkpoint_path, debug=False): _, self.word2id, wordvec_tensor = load_word2vec_from_file() self.batch_size = batch_size self.model = MultiTaskEncoder(model_type, model_params, wordvec_tensor) self.tasks = [create_task(self.model, t, model_params, debug=debug) for t in tasks] assert len(self.tasks) > 0, "Please specify at least one task to train on." self.multitask_sampler = MultiTaskSampler(self.tasks, multitask_params, batch_size) self._create_optimizer(optimizer_params) self._prepare_checkpoint(checkpoint_path)
def load_model_from_args(args, checkpoint_path=None, load_best_model=False): tasks, model_type, model_params, optimizer_params, multitask_params = args_to_params( args) _, _, wordvec_tensor = load_word2vec_from_file() model = MultiTaskEncoder(model_type, model_params, wordvec_tensor) tasks = [create_task(model, t, model_params, debug=True) for t in tasks] if checkpoint_path is not None: load_model(checkpoint_path, model=model, tasks=tasks, load_best_model=load_best_model) return model, tasks
def task_to_dataset(sentences, labels, label_dict=None): _, word2id, _ = load_word2vec_from_file() data_batch = list() for sent, lab in zip(sentences, labels): str_sent = " ".join( [w if isinstance(w, str) else w.decode('UTF-8') for w in sent]) new_d = SentData(sentence=str_sent, label=lab) new_d.translate_to_dict(word2id) data_batch.append(new_d) dataset = DatasetTemplate("all") dataset.set_data_list(data_batch) if label_dict is not None: dataset.add_label_explanation(label_dict) return dataset
def prepare(params, samples): global UNKNOWN_WORDS _, word2id, _ = load_word2vec_from_file() params.word2id = word2id # for s in samples: # print(s) words = ' '.join([' '.join([w if isinstance(w, str) else w.decode('UTF-8') for w in s]).lower() for s in samples]).split(" ") for w in words: if w not in word2id: UNKNOWN_WORDS[w] = '' print("Number of unknown words: " + str(len(UNKNOWN_WORDS.keys()))) # sys.exit(1) with open("senteval_unknown_words.txt", "w") as f: f.write("\n".join(list(UNKNOWN_WORDS.keys()))) return
def supervised_args_to_params(args): _, _, wordvec_tensor = load_word2vec_from_file() # Define model parameters model_params = { "embed_word_dim": 300, "embed_dropout": args.embed_dropout, "finetune_embeds": args.finetune_embeds, "share_encoder": args.share_encoder, "teacher_forcing_ratio": args.teacher_forcing_ratio, "teacher_forcing_annealing": args.teacher_forcing_annealing } model_params["dialogue_module"] = { "type": args.dialogue_model, "input_size": model_params["embed_word_dim"], "hidden_size": args.dialogue_hidden_size, "num_layers": args.dialogue_num_layers, "dropout": args.dialogue_dropout, "bert_model": args.dialogue_bert_model, "bert_finetune_layers": args.dialogue_bert_finetune_layers } model_params["template_module"] = { "type": args.template_model, "input_size": model_params["embed_word_dim"], "hidden_size": args.template_hidden_size, "num_layers": args.template_num_layers, "dropout": args.template_dropout, "bert_model": args.template_bert_model, "bert_finetune_layers": args.template_bert_finetune_layers } model_params["paraphrase_module"] = { "type": args.paraphrase_model, "num_classes": wordvec_tensor.shape[0], "hidden_size": args.paraphrase_hidden_size, "num_layers": args.paraphrase_num_layers, "input_dropout": args.paraphrase_input_dropout, "lstm_dropout": args.paraphrase_lstm_dropout, "output_dropout": args.paraphrase_output_dropout } model_params, optimizer_params = general_args_to_params(args, model_params=model_params) return model_params, optimizer_params
def __init__(self, model_params, optimizer_params, batch_size, checkpoint_path, debug=False): ## Load vocabulary _, self.word2id, wordvec_tensor = load_word2vec_from_file() self.batch_size = batch_size ## Load model self.model = self._create_model(model_params, wordvec_tensor).to(get_device()) ## Load task self.task = self._create_task(model_params, debug=debug) ## Load optimizer and checkpoints self._create_optimizer(optimizer_params) self._prepare_checkpoint(checkpoint_path)
def discriminator_args_to_params(args): _, _, wordvec_tensor = load_word2vec_from_file() # Define model parameters model_params = { "embed_word_dim": 300, "embed_dropout": args.embed_dropout, "finetune_embeds": args.finetune_embeds, "semantic_size": args.semantic_size, "style_size": args.style_size, "use_VAE": args.use_VAE, "use_semantic_specific_attn": args.use_semantic_specific_attn, "num_context_turns": args.num_context_turns, "slot_value_embeddings": args.slot_value_embeddings, "use_small_dataset": args.use_small_dataset } model_params["slot_encoder_module"] = { "use_CBOW": args.slots_CBOW, "hidden_size": model_params["embed_word_dim"] } model_params["encoder_module"] = { "type": args.encoder_model, "input_size": model_params["embed_word_dim"], "hidden_size": args.encoder_hidden_size, "num_layers": args.encoder_num_layers, "dropout": args.encoder_dropout, "semantic_size": model_params["semantic_size"], "style_size": model_params["style_size"], "share_attention": not args.encoder_separate_attentions } model_params["discriminator_module"] = { "type": args.discriminator_model, "hidden_size": args.discriminator_hidden_size, "num_hidden_layers": args.discriminator_num_layers, "input_dropout": args.discriminator_dropout, "semantic_size": model_params["semantic_size"], "style_size": model_params["style_size"] } model_params, optimizer_params = general_args_to_params(args, model_params) return model_params, optimizer_params
def load_our_model(checkpoint_path): global OUR_MODEL if OUR_MODEL is None: args = load_args(checkpoint_path) print("-> Loading model...") model_params, _ = unsupervised_args_to_params(args) _, _, wordvec_tensor = load_word2vec_from_file() model = ModelUnsupervisedContextParaphrasingTemplate( model_params, wordvec_tensor) print(checkpoint_path) _ = load_model(checkpoint_path, model=model, load_best_model=True) model = model.to(get_device()) model.eval() OUR_MODEL = model return OUR_MODEL
def unsupervised_args_to_params(args): _, _, wordvec_tensor = load_word2vec_from_file() # Define model parameters model_params = { "embed_word_dim": 300, "embed_dropout": args.embed_dropout, "finetune_embeds": args.finetune_embeds, "switch_rate": args.switch_rate, "teacher_forcing_ratio": args.teacher_forcing_ratio, "teacher_forcing_annealing": args.teacher_forcing_annealing, "VAE_loss_scaling": args.VAE_loss_scaling, "VAE_annealing_iters": args.VAE_annealing_iters, "VAE_annealing_func": args.VAE_annealing_func, "VAE_scheduler": args.VAE_scheduler, "cosine_loss_scaling": args.cosine_loss_scaling, "cosine_counter_loss": args.cosine_counter_loss, "style_loss_scaling": args.style_loss_scaling, "style_loss_module": args.style_loss_module, "style_loss_stop_grads": args.style_loss_stop_grads, "style_loss_annealing_iters": args.style_loss_annealing_iters, "semantics_dropout": args.semantics_dropout, "semantic_full_dropout": args.semantic_full_dropout, "semantic_size": args.semantic_size, "style_size": args.style_size, "response_style_size": args.response_style_size if args.response_style_size > 0 else args.style_size, "num_context_turns": args.num_context_turns, "pure_style_loss": args.pure_style_loss, "positional_embedding_factor": args.positional_embedding_factor, "pretraining_iterations": args.pretraining_iterations, "pretraining_second_task": args.pretraining_second_task, "only_paraphrasing": args.only_paraphrasing, "slot_value_embeddings": not args.no_slot_value_embeddings, "use_semantic_specific_attn": args.use_semantic_specific_attn, "style_exponential_dropout": args.style_exponential_dropout, "style_full_dropout": args.style_full_dropout } model_params["slot_encoder_module"] = { "use_CBOW": args.slots_CBOW, "hidden_size": model_params["embed_word_dim"] } model_params["encoder_module"] = { "type": args.encoder_model, "input_size": model_params["embed_word_dim"], "hidden_size": args.encoder_hidden_size, "num_layers": args.encoder_num_layers, "dropout": args.encoder_dropout, "semantic_size": model_params["semantic_size"], "style_size": model_params["style_size"], "response_style_size": model_params["response_style_size"], "share_attention": not args.encoder_separate_attentions, "use_prototype_styles": args.use_prototype_styles, "num_prototypes": args.num_prototypes, "use_semantic_for_context_proto": args.use_semantic_for_context_proto, "no_prototypes_for_context": args.no_prototypes_for_context } model_params["decoder_module"] = { "type": args.decoder_model, "num_classes": wordvec_tensor.shape[0], "hidden_size": args.decoder_hidden_size, "num_layers": args.decoder_num_layers, "input_dropout": args.decoder_input_dropout, "lstm_dropout": args.decoder_lstm_dropout, "output_dropout": args.decoder_output_dropout, "concat_features": args.decoder_concat_features, "lstm_additional_input": args.decoder_lstm_additional_input, "semantic_size": model_params["semantic_size"], "style_size": (model_params["style_size"] if True or not model_params["encoder_module"]["use_prototype_styles"] else 0) + model_params["response_style_size"] } model_params, optimizer_params = general_args_to_params(args, model_params) return model_params, optimizer_params