def run_infer(): vocab_dir = params['vocab_dir'] clean_data_dir = params['clean_data_dir'] test_dataset_dir = params['test_dataset_dir'] data_producer = data_helper.DataSet(vocab_dir, clean_data_dir) sos_id = data_producer.tokenizer.subtoken_to_id_dict['<s>'] eos_id = data_producer.tokenizer.subtoken_to_id_dict['</s>'] params['sos_id'] = sos_id params['eos_id'] = eos_id vocab_size = len(data_producer.tokenizer.subtoken_list) params['vocab_size'] = vocab_size params['maximum_iterations'] = 30 params["batch_size"] = 1 decode_mode = params['decode_mode'] model = Seq2SeqModel(params) while 1: sentence = input('you say: ') subtoken = data_producer.tokenizer.encode( pre_process(sentence, keep_sep=True)) output = model.infer(subtoken) if decode_mode == 'greedy': output = [int(i) for i in output[0]] res = data_producer.tokenizer.decode(output) print(res) print('\n') elif decode_mode == 'beam_search': for j in range(len(output)): print("output[i]", output[j]) res = [int(i) for i in list(output[j])] res = data_producer.tokenizer.decode(res) print('answer {}:{}'.format(j, res)) print('\n')
def init_model(opt, rating_tokens_tensor): logging.info( '====================== Model Parameters =========================') if opt.model_type == 'hss': overall_model = HSSModel(opt) elif opt.model_type == 'multi_task_basic': overall_model = MultiTaskBasicClassifySeq2Seq(opt, rating_tokens_tensor) elif opt.model_type == 'word_attn_modulate': overall_model = AttnModulateClassifySeq2Seq(opt, rating_tokens_tensor) elif opt.model_type == 'hre_max': overall_model = HirEncMultiTaskBasicModel(opt) elif opt.model_type == 'external_feed': overall_model = ExternalFeedClassifySeq2Seq(opt) elif opt.model_type == "external_soft_feed": overall_model = ExternalSoftFeedClassifySeq2Seq( opt, rating_tokens_tensor) elif opt.model_type == "multi_view_ex_soft_feed": overall_model = MultiViewExternalSoftFeedClassifySeq2Seq( opt, rating_tokens_tensor) elif opt.model_type == "multi_view_attn_modulate": overall_model = MultiViewAttnModulateClassifySeq2Seq( opt, rating_tokens_tensor) elif opt.model_type == "multi_view_multi_task_basic": overall_model = MultiViewMultiTaskBasicClassifySeq2Seq( opt, rating_tokens_tensor) elif opt.model_type == "rnn_enc_single_classifier": overall_model = RnnEncSingleClassifier(opt) elif opt.model_type == "seq2seq": overall_model = Seq2SeqModel(opt) else: raise ValueError("Invalid model type") overall_model.to(opt.device) if opt.train_from: logging.info("loading previous checkpoint from %s" % opt.train_from) # TODO: load the saved model and override the current one if opt.w2v: # NOTE: the pretrained embedding having the same dimension # as args.emb_dim should already be trained embedding, _ = io.make_embedding(opt.idx2word, opt.w2v) overall_model.set_embedding(embedding) return overall_model
def __init__(self, opt): super(MultiTaskBasicModel, self).__init__() memory_bank_size = 2 * opt.encoder_size if opt.bidirectional else opt.encoder_size self.seq2seq_model = Seq2SeqModel(opt) if opt.classifier_type == "max": self.classifier_model = MaxPoolClassifier(memory_bank_size, opt.num_classes, opt.classifier_dropout, opt.ordinal) elif opt.classifier_type == "word_attn": self.classifier_model = WordAttnClassifier(opt.query_hidden_size, memory_bank_size, opt.num_classes, opt.attn_mode, opt.classifier_dropout, opt.ordinal) elif opt.classifier_type == "word_attn_no_query": self.classifier_model = WordAttnNoQueryClassifier(memory_bank_size, opt.num_classes, opt.classifier_dropout, opt.ordinal) elif opt.classifier_type == "word_multi_hop_attn": self.classifier_model = WordMultiHopAttnClassifier(opt.query_hidden_size, memory_bank_size, opt.num_classes, opt.attn_mode, opt.classifier_dropout, opt.ordinal) else: raise ValueError self.model_type = opt.model_type self.classifier_type = opt.classifier_type
def __init__(self, args, vocab, transition_system): super(Reconstructor, self).__init__() if args.no_copy: self.seq2seq = Seq2SeqModel( src_vocab=vocab.code, tgt_vocab=vocab.source, embed_size=args.embed_size, hidden_size=args.hidden_size, dropout=args.dropout, label_smoothing=args.src_token_label_smoothing, cuda=args.cuda) else: self.seq2seq = Seq2SeqWithCopy(src_vocab=vocab.code, tgt_vocab=vocab.source, embed_size=args.embed_size, hidden_size=args.hidden_size, dropout=args.dropout, cuda=args.cuda) self.vocab = vocab self.args = args self.transition_system = transition_system
te_path = "trans_test_data.pkl" # Preprocess the dataset(NTU here for demo) to generate data for # Step1: seq2seq unsupervised training # Step2: classification dsamp_train, dsamp_test, \ fea, lab, seq_len_new, \ test_fea, test_lab, test_seq_len_new = preprocess_pipeline(base_path, tr_path, te_path, mode="cross_subject_data", dsamp_frame=50) # Building Seq2Seq Model applying (Fixed-State or Fixed Weight strategy, modifying loop_fn in Seq2SeqModel to switch # to fixed-weight strategy, default is fixed-state strategy.) tf.reset_default_graph() sess = get_session() model = Seq2SeqModel(max_seq_len, input_size, rnn_size, batch_size, lr, train_keep_prob) sess = get_session() sess.run(tf.global_variables_initializer()) start_time = timeit.default_timer() knn_score = [] train_loss_li = [] max_score = 0.0 # Training for i in range(1, iterations + 1): encoder_inputs, decoder_inputs, seq_len_enc = mini_batch(dsamp_train, seq_len=50, input_size=75, batch_size=256) _, gradient_norm, train_loss = model.step(sess, encoder_inputs,
def run_train(): model = Seq2SeqModel(params) model.train(batch_iter, data_producer.tokenizer)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dataset = [] #dataset.extend(loadCornellDataset('data/cornell movie-dialogs corpus')) #dataset.extend(loadConvAI2Dataset('data/ConvAI2')) dataset.extend(loadNUCCDataset('data/nucc')) dataloader = TextDataloader(dataset, max_length=32, min_count=3, batch_size=args.batch_size, shuffle=True) voc = dataloader.getVoc() model = Seq2SeqModel(device, SOS_token, voc.num_words).to(device) if args.load: model.load_state_dict(torch.load(args.load)) if not args.eval: model.train() for epoch in range(args.iteration): for i, data in enumerate(dataloader): inputs, lengths, targets, mask, max_target_len = data inputs = inputs.to(device) lengths = lengths.to(device) targets = targets.to(device) mask = mask.to(device)
import argparse import tensorflow as tf from keras import backend as K from model.seq2seq import Seq2SeqModel from config.config import ConfigTrain if __name__ == '__main__': # Construct the argument parser ap = argparse.ArgumentParser() ap.add_argument( "-config", "--config", help="main configuration path, containing main parameters " + "except the training dataset. See ./config/config_delex.yaml " + "for an example") ap.add_argument("-train", "--train", help="training data path") args = vars(ap.parse_args()) config_path = args['config'] train_path = args['train'] config = ConfigTrain(main_config_path=config_path, train_path=train_path) optimizer = tf.keras.optimizers.Adam() loss_object = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True, reduction='none') seq2seq = Seq2SeqModel(config=config, optimizer=optimizer, loss_object=loss_object) seq2seq.train() K.clear_session()