def test_Seq2Seq(): x = np.random.random((samples, input_length, input_dim)) y = np.random.random((samples, output_length, output_dim)) models = [] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim)) ] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), peek=True) ] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=2) ] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), peek=True, depth=2) ] for model in models: model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1) model = Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), peek=True, depth=2, teacher_force=True) model.compile(loss='mse', optimizer='sgd') model.fit([x, y], y, epochs=1)
def training(): params = parameters() # mode parameters dict model_param_dict = {'num_units': int(params.get('modelparam', 'num_units')), 'num_layers': int(params.get('modelparam', 'num_layers')), 'vocab_size': int(params.get('modelparam', 'vocab_size')), 'embedding_size': int(params.get('modelparam', 'embedding_size')), 'beam_size': int(params.get('modelparam', 'beam_size')), 'use_attention': bool(params.get('modelparam', 'use_attention')), 'use_beam_search': bool(params.get('modelparam', 'use_beam_search')), 'start_token_idx': int(params.get('modelparam', 'start_token_idx')), 'end_token_idx': int(params.get('modelparam', 'end_token_idx')), 'max_gradient_norm': float(params.get('modelparam', 'max_gradient_norm'))} # as for the comments for below parameters, please find in config.ini batch_size = int(params.get('trainparam', 'batch_size')) learning_rate = float(params.get('trainparam', 'learning_rate')) keep_prob = float(params.get('trainparam', 'keep_prob')) epochs = int(params.get('trainparam', 'epochs')) modelsaved_dir = params.get('trainparam', 'checkpoint_dir') savedname = params.get('trainparam', 'checkpoint_name') _, _, questionbatch, answerbatch, qlengthbatch, alengthbatch = data(DIR, epochs, batch_size) seq2seq = Seq2Seq(**model_param_dict) decode_outputs = seq2seq.model(questionbatch, answerbatch, qlengthbatch, alengthbatch, 'train', batch_size, keep_prob) train_op, loss, summary_merge, predicts = seq2seq.train(decode_outputs, answerbatch, alengthbatch, learning_rate) # for save and restore model ckpt = tf.train.get_checkpoint_state(modelsaved_dir) saver = tf.train.Saver() with tf.Session() as sess: if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('Reloading model parameters..') saver.restore(sess, tf.train.latest_checkpoint(modelsaved_dir)) else: print('Create model from scratch..') sess.run(tf.global_variables_initializer()) step = 0 summary_writer = tf.summary.FileWriter(modelsaved_dir, graph=sess.graph) while True: try: step += 1 _, temploss, tempsummary = sess.run([train_op, loss, summary_merge]) # temploss should not be named as loss, as the name has been used in the model, or, will raise error: # eg: 'Fetch argument 10.112038 has invalid type <class 'numpy.float32'>, must be a string or Tensor. # (Can not convert a float32 into a Tensor or Operation.)' print('run step: ', step, end='\r') if step % int(params.get('trainparam', 'steps_per_checkpoint')) == 0: perplexity = math.exp(float(temploss)) if temploss < 300 else float('inf') print('save at step: ', step, 'perplexity: ', perplexity) summary_writer.add_summary(tempsummary, step) checkpoint_path = os.path.join(modelsaved_dir, savedname) saver.save(sess, checkpoint_path, global_step=step) except: print('done') break
def start_train(args,data): if args.gpu == 0: use_cuda = False else: use_cuda = True MAX_ITER = 5000 epochs = 50 print_every = 5000 plot_every = 100 start_time = time.time() plot_losses = [] # 存储loss用来绘图 print_loss_total = 0 input_dim = args.vocab_size output_dim = args.vocab_size embedding_dim = args.embedding_size hidden_dim = args.hidden_size learning_rate = args.learning_rate input_word = "怎么获得立减券" text = data.text2index(input_word) rfile=open('../data/questions_viewer.txt','w',encoding='utf-8') if use_cuda: seq2seq_model = Seq2Seq(input_dim,embedding_dim,hidden_dim,output_dim,use_cuda=use_cuda,learning_rate=learning_rate).cuda() else: seq2seq_model = Seq2Seq(input_dim,embedding_dim,hidden_dim,output_dim,use_cuda=use_cuda,learning_rate=learning_rate) seq2seq_model.train() print(len(data.source_index)) for epoch in range(epochs): #valid_targets, valid_sources, valid_targets_lengths, valid_source_lengths = data.get_valid_batch() #valid_loss = seq2seq_model.seq2seq_train(valid_targets, valid_sources) for iter,(source, target) in enumerate(zip(data.source_index,data.target_index)): #print(source,target) loss = seq2seq_model.seq2seq_train(source,target) print_loss_total += loss if iter % print_every == 0: seq2seq_model.encoder_scheduler.step() seq2seq_model.decoder_scheduler.step() print_loss_avg = print_loss_total / print_every print_loss_total = 0 time_dif = get_time_dif(start_time) print('Epoch {:>3}/{} - Training Loss: {:>6.6f} Time:{}'.format(epoch,epochs,print_loss_avg,time_dif)) torch.save(seq2seq_model.state_dict(), args.module_path) predict_indices = seq2seq_model.seq2seq_predict(text) predict_result = data.index2text(predict_indices) print(predict_result) rfile.write("".join(predict_result)) rfile.write('\n')
def _create_model(conf): """ Creates a simple model using the given configuration """ model = Seq2Seq(**conf['seq2seq']) output = _add_layers(model.output, conf['top']) model = Model(model.input, output) return model
def start_test(args, data): # 输入一句话 if args.gpu == 0: use_cuda = False else: use_cuda = True input_dim = args.vocab_size output_dim = args.vocab_size embedding_dim = args.embedding_size hidden_dim = args.hidden_size args.mode = 'test' while True: input_word = input('请输入您的问题:') input_word = data.clean_str(input_word) word1s = pseg.cut(input_word) x = [] for w in word1s: if w.flag == 'n': x.append('*') else: x.append(w.word) print(x) text = data.text2index(x) if use_cuda: seq2seq_model = Seq2Seq(input_dim, embedding_dim, hidden_dim, output_dim, use_cuda=use_cuda).cuda() else: seq2seq_model = Seq2Seq(input_dim, embedding_dim, hidden_dim, output_dim, use_cuda=use_cuda) seq2seq_model.load_state_dict(torch.load(args.module_path)) seq2seq_model.eval() #predict_indices = seq2seq_model.seq2seq_predict(text) #predict_result = data.index2text(predict_indices) #print(predict_result) predict_sample_indices = seq2seq_model.beamsearch(text) for predict_indices in predict_sample_indices: predict_result = data.index2text(predict_indices) print("".join(predict_result[:-1]))
def main(): args = set_arguments() seq2seq = Seq2Seq(args) #pdb.set_trace() if "train" in args.mode: trainer = Trainer(args, seq2seq) trainer.train() print("\ttask =", args.task) print("\tunits =", args.units) print("\t=" * 50) pdb.set_trace()
def __init_model(self): log('Initializing an empty model') self.model = Seq2Seq( input_size=len(self.input_vocab), output_size=len(self.output_vocab), hidden_size=constants.HIDDEN_SIZE, learning_rate=constants.LEARNING_RATE, teacher_forcing_ratio=constants.TEACHER_FORCING_RATIO, device=constants.DEVICE) log(str(self.model))
def build_model(voca_size=29331, hidden_size=512, lstm_layers_num=2, batch_size=10, max_epochs=200000, retrain=False): """ train a model with mini-batch gradient descend """ model = None if not retrain: try: f = open(path_pkl) model = pickle.load(f) return model except Exception: print "Model does not pre-exist..." print "Will train a new model..." encoderInputs, decoderInputs, decoderTarget = load_train_data( path_train, 20000) #(sent_size, example_num) num_batchs = encoderInputs.shape[1] // batch_size model = Seq2Seq(voca_size, hidden_size, lstm_layers_num, learning_rate=0.1) batch_idx = 0 for ep in xrange(max_epochs): enIpt = encoderInputs[:, batch_idx * batch_size:(batch_idx + 1) * batch_size] deIpt = decoderInputs[:, batch_idx * batch_size:(batch_idx + 1) * batch_size] deTgt = decoderTarget[:, batch_idx * batch_size:(batch_idx + 1) * batch_size] enMsk = get_mask(enIpt) deMsk = get_mask(deIpt) loss, costs = model.train(enIpt, enMsk, deIpt, deMsk, deTgt) if ep % 20 == 0: print "in epoch %d/%d..." % (ep, max_epochs) if batch_idx == 0: ot = "in epoch %d/%d..." % (ep, max_epochs) + " loss: " + str(loss) print ot logging.info(ot) batch_idx = (batch_idx + 1) % num_batchs """ with open(path_pkl, "wb") as mf: pickle.dump(model, mf) """ return model
def interpreter(data_path, model_path): """ Run this function, if you want to talk to seq2seq model. if you type "exit", finish to talk. :param data_path: the path of corpus you made model learn :param model_path: the path of model you made learn :return: """ # call dictionary class corpus = ConvCorpus(file_path=None) corpus.load(load_dir=data_path) print('Vocabulary Size (number of words) :', len(corpus.dic.token2id)) print('') # rebuild seq2seq model model = Seq2Seq(len(corpus.dic.token2id), feature_num=args.feature_num, hidden_num=args.hidden_num, batch_size=1, gpu_flg=args.gpu) serializers.load_hdf5(model_path, model) # run conversation system print('The system is ready to run, please talk to me!') print('( If you want to end a talk, please type "exit". )') print('') while True: print('>> ', end='') sentence = input() if sentence == 'exit': print('See you again!') break input_vocab = [ unicodedata.normalize('NFKC', word.lower()) for word in word_tokenize(sentence) ] input_vocab.reverse() input_vocab.insert(0, "<eos>") # convert word into ID input_sentence = [ corpus.dic.token2id[word] for word in input_vocab if not corpus.dic.token2id.get(word) is None ] model.initialize() # initialize cell sentence = model.generate(input_sentence, sentence_limit=len(input_sentence) + 30, word2id=corpus.dic.token2id, id2word=corpus.dic) print("-> ", sentence) print('')
def main(args): # Construct Solver # data tr_dataset = AudioDataset(args.train_json, args.batch_size, args.maxlen_in, args.maxlen_out) cv_dataset = AudioDataset(args.valid_json, args.batch_size, args.maxlen_in, args.maxlen_out) tr_loader = AudioDataLoader(tr_dataset, batch_size=1, num_workers=args.num_workers) cv_loader = AudioDataLoader(cv_dataset, batch_size=1, num_workers=args.num_workers) # load dictionary and generate char_list, sos_id, eos_id char_list, sos_id, eos_id = process_dict(args.dict) vocab_size = len(char_list) data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} # model encoder = Encoder(args.einput, args.ehidden, args.elayer, dropout=args.edropout, bidirectional=args.ebidirectional, rnn_type=args.etype) decoder = Decoder(vocab_size, args.dembed, sos_id, eos_id, args.dhidden, args.dlayer, bidirectional_encoder=args.ebidirectional) model = Seq2Seq(encoder, decoder) print(model) model.cuda() # optimizer if args.optimizer == 'sgd': optimizier = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.l2) elif args.optimizer == 'adam': optimizier = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2) else: print("Not support optimizer") return # solver ctc = 0 solver = Solver(data, model, optimizier, args) solver.train()
def predict(): model = Seq2Seq( args.size_layer, args.num_layers, args.learning_rate, args.vocab_file, args.bert_config, args.is_training, ) dictionary_output, rev_dictionary_output = model.tokenizer.vocab, model.tokenizer.inv_vocab dictionary_input, rev_dictionary_input = model.tokenizer.vocab, model.tokenizer.inv_vocab with tf.Session() as sess: with tf.device("/cpu:0"): ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): tf.logging.info("restore model from patch: %s", ckpt.model_checkpoint_path) # 加载预训练模型 saver = tf.train.Saver(max_to_keep=4) saver.restore(sess, ckpt.model_checkpoint_path) else: tf.logging.error("model path wrong !!") return while True: text = input("input your dream: ") inputs = ['[SEP]'] + list(text) + ['[CLS]'] inputs_ids = model.tokenizer.convert_tokens_to_ids(inputs) segment_ids = [0] * len(inputs_ids) input_mask = [1] * len(inputs_ids) predicted2 = sess.run(model.predicting_ids, feed_dict={ model.input_ids: [inputs_ids], model.input_mask: [input_mask], model.segment_ids: [segment_ids], model.dropout: 1.0 }) print( 'dream:', ''.join([ rev_dictionary_input[n] for n in inputs_ids if n not in [0, 1, 2, 3] ])) print( 'dream decoding:', ''.join([ rev_dictionary_output[n] for n in predicted2[0] if n not in [0, 1, 2, 3] ]), '') print("*" * 20)
def predict(): sep_word = thulac.thulac(seg_only=True) model = Seq2Seq(batch_size=1, forward_only=True) model_path = './models/0612/' vocab_en, _, = utils.read_vocabulary(config.TRAIN_ENC_VOCABULARY) _, vocab_de, = utils.read_vocabulary(config.TRAIN_DEC_VOCABULARY) with tf.Session() as sess: # 恢复前一次训练 ckpt = tf.train.get_checkpoint_state(model_path) if ckpt != None: print('find modal: ', ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("没找到模型") while True: input_string = raw_input('me > ') # 退出 if input_string == 'quit': exit() personal_ans = utils.check_pre_ques(input_string.decode('utf-8')) if personal_ans is not None: print('AI > ' + personal_ans) continue input_string_vec = [] aseq = sep_word.cut(input_string, text=True) for words in aseq.split(' '): input_string_vec.append(vocab_en.get(words, config.UNK_ID)) bucket_id = min([ b for b in range(len(config.BUCKETS)) if config.BUCKETS[b][0] > len(input_string_vec) ]) encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(input_string_vec, [])]}, bucket_id) _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] if config.EOS_ID in outputs: outputs = outputs[:outputs.index(config.EOS_ID)] response = "".join( [tf.compat.as_str(vocab_de[output]) for output in outputs]) print('AI > ' + response)
def chatbot(): """ you can use this to chat with your own chat bot :return: """ questionholder = tf.placeholder(shape=[None, None], dtype=tf.int32) quelengthholder = tf.placeholder(shape=[None], dtype=tf.int32) params = parameters() model_param_dict = { 'num_units': int(params.get('modelparam', 'num_units')), 'num_layers': int(params.get('modelparam', 'num_layers')), 'vocab_size': int(params.get('modelparam', 'vocab_size')), 'embedding_size': int(params.get('modelparam', 'embedding_size')), 'beam_size': int(params.get('modelparam', 'beam_size')), 'use_attention': bool(params.get('modelparam', 'use_attention')), 'use_beam_search': bool(params.get('modelparam', 'use_beam_search')), 'start_token_idx': int(params.get('modelparam', 'start_token_idx')), 'end_token_idx': int(params.get('modelparam', 'end_token_idx')), 'max_gradient_norm': float(params.get('modelparam', 'max_gradient_norm')) } modelsaved_dir = params.get('trainparam', 'checkpoint_dir') seq2seq = Seq2Seq(**model_param_dict) decode_outputs = seq2seq.model(source_input=questionholder, source_length=quelengthholder, mode='inference', batch_size=1, keep_probs=1.0) predict_ids = decode_outputs.predicted_ids ckpt = tf.train.get_checkpoint_state(modelsaved_dir) saver = tf.train.Saver() with tf.Session() as sess: if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('Reloading model parameters..') saver.restore(sess, tf.train.latest_checkpoint(modelsaved_dir)) else: raise ValueError( 'There is no chatbot baby in {}'.format(modelsaved_dir)) question = 'start' print('Hello, I\'m ibot, nice to meet you!') while question: question = input(':: ') questionbatch, question_length = sentence2ids(question) answer_ids = sess.run(predict_ids, feed_dict={ questionholder: questionbatch, quelengthholder: question_length }) answer = ids2sentence(answer_ids, model_param_dict['beam_size'])
def interface(_input): # 进行预测 model = Seq2Seq().to(config.device) model.load_state_dict(torch.load("./models/model.pkl")) input = list(str(_input)) input_len = torch.LongTensor([len(input)]) # [1] input = torch.LongTensor([config.ns.transform(input)]) # [1,max_len] with torch.no_grad(): input = input.to(config.device) input_len = input_len.to(config.device) _, decoder_predict = model.evaluate(input, input_len) # [batch_Size,max_len,vocab_size] # decoder_predict进行inverse_transform pred = [config.ns.inverse_transform(i) for i in decoder_predict] print(_input, "---->", pred[0])
def create_model(session, FLAGS): config = OrderedDict(sorted(FLAGS.__flags.items())) model = Seq2Seq(config, 'train') ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("reloading model parameters...") model.restore(session, ckpt.model_checkpoint_path) else: if not os.path.exists(FLAGS.model_dir): os.makedirs(FLAGS.model_dir) print("create new model parameters...") session.run(tf.global_variables_initializer()) return model
def create_model(gen_config): encoder = Encoder(gen_config.vocab_size, gen_config.emb_dim, gen_config.hidden_size, n_layers=2, dropout=0.5) decoder = Decoder(gen_config.emb_dim, gen_config.hidden_size, gen_config.vocab_size, n_layers=1, dropout=0.5) seq2seq = Seq2Seq(encoder, decoder).cuda() optimizer = optim.Adam(seq2seq.parameters(), lr=gen_config.lr) return seq2seq, optimizer
def main(**kwargs): vocab, vocab_rsd = default_build_vocab('./data/vocab.txt') vocab_size = len(vocab) print 'vocabulary size is %d' % vocab_size data = Seq2SeqIter(data_path=None, source_path='./data/a.txt', target_path='./data/b.txt', vocab=vocab, vocab_rsd=vocab_rsd, batch_size=10, max_len=25, data_name='data', label_name='label', split_char='\n', text2id=None, read_content=None, model_parallel=False) print 'training data size is %d' % data.size model = Seq2Seq(seq_len=25, batch_size=10, num_layers=1, input_size=vocab_size, embed_size=150, hidden_size=150, output_size=vocab_size, dropout=0.0, mx_ctx=CTX) model.train(dataset=data, epoch=5)
def main(): X_indices, Y_indices, X_char2idx, Y_char2idx, X_idx2char, Y_idx2char = preprocess_data( ) model = Seq2Seq(rnn_size=50, n_layers=2, encoder_embedding_dim=15, decoder_embedding_dim=15, X_word2idx=X_char2idx, Y_word2idx=Y_char2idx) model.fit(X_indices, Y_indices) model.infer('common', X_idx2char, Y_idx2char) model.infer('apple', X_idx2char, Y_idx2char) model.infer('zhedong', X_idx2char, Y_idx2char)
def lstmmodel(neurons, layer, drop, batch_size, epochs, backday, n_outputs): x_train, y_train = [], [] x_valid, y_valid = [], [] x_test, y_test = [], [] for i in range(backday, train_size - n_outputs): x_train.append(train[i - backday:i, :]) y_train.append(train[i:i + n_outputs, 0]) x_train, y_train = np.array(x_train), np.array(y_train) y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], 1)) for i in range(backday, valid_size - n_outputs): x_valid.append(valid[i - backday:i, :]) y_valid.append(valid[i:i + n_outputs, 0]) x_valid, y_valid = np.array(x_valid), np.array(y_valid) y_valid = y_valid.reshape((y_valid.shape[0], y_valid.shape[1], 1)) for i in range(backday, test_size - n_outputs): x_test.append(test[i - backday:i, :]) y_test.append(test[i:i + n_outputs, 0]) x_test, y_test = np.array(x_test), np.array(y_test) y_test = y_test.reshape((y_test.shape[0], y_test.shape[1], 1)) print(neurons, layer, drop, batch_size, epochs, backday, n_outputs) model = Seq2Seq(output_dim=1, hidden_dim=neurons, output_length=n_outputs, input_shape=(x_train.shape[1], x_train.shape[2]), peek=False, depth=layer, dropout=drop) model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mae']) early_stopping = EarlyStopping(monitor='mean_absolute_error', patience=10) model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_valid, y_valid), verbose=0, callbacks=[early_stopping]) loss, accuracy = model.evaluate(x_test, y_test) print('Test Mean Absolute Error: %f using %f,%f,%f,%f,%f,%f' % (accuracy, neurons, layer, drop, batch_size, epochs, backday)) return model, accuracy, [neurons, layer, drop, batch_size, epochs, backday]
def build_model(config, en_vocab_size, cn_vocab_size): # 建構模型 encoder = Encoder(en_vocab_size, config.emb_dim, config.hid_dim, config.n_layers, config.dropout) decoder = Decoder(cn_vocab_size, config.emb_dim, config.hid_dim, config.n_layers, config.dropout, config.attention) model = Seq2Seq(encoder, decoder, device) print(model) # 建構 optimizer optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) print(optimizer) if config.load_model: model = load_model(model, config.load_model_path) model = model.to(device) return model, optimizer
def __init__(self): self.du = data_unit.DataUnit(**data_config) self.save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME) self.batch_size = 1 tf.reset_default_graph() self.model = Seq2Seq(batch_size=self.batch_size, encoder_vocab_size=self.du.vocab_size, decoder_vocab_size=self.du.vocab_size, mode='decode', **model_config) self.sess = tf.InteractiveSession() self.init = tf.global_variables_initializer() self.sess.run(self.init) self.model.load(self.sess, self.save_path)
def eval(args): batch_size = 32 train_on_gpu = torch.cuda.is_available() enc = RNNEncoder(300, args.embedding_file) dec = RNNDecoder(300, args.embedding_file) device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu') model = Seq2Seq(enc, dec, device).to(device) ckpt = torch.load(args.model_path) model.load_state_dict(ckpt['state_dict']) model.eval() embedding_matrix = pickle.load(open(args.embedding_file, 'rb')) tokenizer = Tokenizer(lower=True) tokenizer.set_vocab(embedding_matrix.vocab) eval_data = pickle.load(open(args.test_data_path, 'rb')) eval_loader = DataLoader(eval_data, batch_size=batch_size, num_workers=0, shuffle=False, collate_fn=eval_data.collate_fn) output_file = open(args.output_path, 'w') val_losses = [] prediction = {} for batch in tqdm(eval_loader): pred = model(batch, 0) pred = torch.argmax(pred, dim=2) # batch, seq_len for i in range(len(pred)): prediction[batch['id'][i]] = tokenizer.decode( pred[i]).split('</s>')[0].split(' ', 1)[1] pred_output = [ json.dumps({ 'id': key, 'predict': value }) for key, value in sorted(prediction.items(), key=lambda item: item[0]) ] output_file.write('\n'.join(pred_output)) output_file.write('\n') output_file.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_path', type=str, required=True) parser.add_argument('--train_percent', type=float, default=0.8) parser.add_argument('--ts_dim', type=int, default=3) parser.add_argument('--input_length', type=int, default=24 * 2 * 6) parser.add_argument('--output_length', type=int, default=24 * 1 * 6) parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--hidden_size', type=int, default=128) parser.add_argument('--num_epochs', type=int, default=30) parser.add_argument('--early_stopping', type=int, default=5) parser.add_argument('--checkpoint_path', type=str, default="../checkpoints/seq2seq.h5") config = parser.parse_args() model = Seq2Seq(hidden_size=config.hidden_size, ts_dim=config.ts_dim, input_length=config.input_length, output_length=config.output_length, teacher_forcing=True) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=0.9, step_size=1) time_series_list = joblib.load(os.path.join(config.data_path, 'ts.joblib')) trainer = Trainer(model=model, time_series_list=time_series_list, criterion=criterion, optimizer=optimizer, scheduler=scheduler, batch_size=config.batch_size, num_epochs=config.num_epochs, early_stopping=config.early_stopping, train_valid_split=config.train_percent, checkpoint_path=config.checkpoint_path, plot=False, offet_for_plot=50) trainer.train()
def build_model(options): model = Seq2Seq.load(ImageCaptioning, options.model_path, tok_dir=options.tokenizer_path, use_obj=options.obj) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) num_gpu = torch.cuda.device_count() generator = BeamDecoder(model, beam_width=options.beam_width, max_len_a=options.max_len_a, max_len_b=options.max_len_b, len_penalty_ratio=options.len_penalty_ratio) if options.fp16: generator = amp.initialize(generator, opt_level="O2") if num_gpu > 1: generator = DataParallelModel(generator) return generator, model.text_processor
def build_graph(self, name="train"): graph = tf.Graph() tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True with graph.as_default(), tf.container(name): self.logger.info("Building {} graph...".format(name)) model = Seq2Seq(self.data, self.config) sess = tf.Session(config=tf_config, graph=graph) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(tf.tables_initializer()) sess.run(model.data_iterator.initializer) if (self.config.checkpoint_dir) and (name == "train"): self.logger.info('Loading checkpoint from {}'.format( self.checkpoint_dir)) model.load(sess) self.global_step = model.global_step_tensor.eval(sess) return model, sess
def main(_): converter = TextConverter(filename=FLAGS.converter_name + '_converter.pkl') if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path = tf.train.latest_checkpoint( FLAGS.checkpoint_path) model = Seq2Seq('sample', converter.vocab_size, lstm_size=FLAGS.lstm_size, num_steps=FLAGS.num_steps, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size, bidirectional=FLAGS.bidirectional, beam_search=FLAGS.beam_search, beam_width=FLAGS.beam_width) model.load(FLAGS.checkpoint_path) max_len = FLAGS.num_steps while True: inp = input('Input (Q to quit): ') if inp == 'Q': break else: inp = converter.sentence_to_idxs(inp) if (len(inp) > max_len): inp = inp[:max_len] else: inp = inp + [0 for i in range(max_len - len(inp))] if FLAGS.beam_search == True: decoder_outputs = model.sample(inp) predicted_ids = decoder_outputs.predicted_ids[0] parent_ids = decoder_outputs.parent_ids[0] sentences = converter.beam_to_sentences( predicted_ids, parent_ids) for i, s in enumerate(sentences): print('Output %d: %s' % (i, s)) else: sample_id = model.sample(inp) output = converter.idxs_to_words(sample_id[0]) print('Output: %s' % output) print('--------------------')
def predict(): dictionary_input, rev_dictionary_input = read_vocab(args.vocab_file) dictionary_output, rev_dictionary_output = dictionary_input, rev_dictionary_input model = Seq2Seq(args.size_layer, args.num_layers, args.embedded_size, len(dictionary_input), len(dictionary_output), args.learning_rate, dictionary_input) with tf.Session() as sess: with tf.device("/cpu:0"): ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): tf.logging.info("restore model from patch: %s", ckpt.model_checkpoint_path) # 加载预训练模型 saver = tf.train.Saver(max_to_keep=4) saver.restore(sess, ckpt.model_checkpoint_path) else: tf.logging.error("model path wrong !!") return while True: text = input("input your dream: ") input_test = [" ".join(list(text))] input_test = str_idx(input_test, dictionary_input, dictionary_input['UNK']) batch_x, _ = pad_sentence_batch(input_test, dictionary_input["PAD"]) predicted2 = sess.run(model.predicting_ids, feed_dict={model.X: batch_x}) for i in range(len(batch_x)): print( 'dream:', ''.join([ rev_dictionary_input[n] for n in batch_x[i] if n not in [0, 1, 2, 3] ])) print( 'dream decoding:', ''.join([ rev_dictionary_output[n] for n in predicted2[i] if n not in [0, 1, 2, 3] ]), '\n') print("*" * 20)
def test_run(data_path, model_path, n_show=10): """ Test function. Input is training data. Output have to be the sentence which is correct data in training phase. :return: """ corpus = ConvCorpus(file_path=None) corpus.load(load_dir=data_path) print('Vocabulary Size (number of words) :', len(corpus.dic.token2id)) print('') # rebuild seq2seq model model = Seq2Seq(len(corpus.dic.token2id), feature_num=args.feature_num, hidden_num=args.hidden_num, batch_size=1, gpu_flg=args.gpu) serializers.load_hdf5(model_path, model) # run an interpreter for num, input_sentence in enumerate(corpus.posts): id_sequence = input_sentence.copy() input_sentence.reverse() input_sentence.insert(0, corpus.dic.token2id["<eos>"]) model.initialize() # initialize cell sentence = model.generate(input_sentence, sentence_limit=len(input_sentence) + 30, word2id=corpus.dic.token2id, id2word=corpus.dic) print("teacher : ", " ".join([corpus.dic[w_id] for w_id in id_sequence])) print("correct :", " ".join([corpus.dic[w_id] for w_id in corpus.cmnts[num]])) print("-> ", sentence) print('') if num == n_show: break
def test_albert_seq2seq_init(self): path_dir_name = os.path.dirname(os.path.realpath(__file__)) data_path = os.path.join(path_dir_name, "sample.txt") with tempfile.TemporaryDirectory() as tmpdirname: processor = TextProcessor() processor.train_tokenizer([data_path], vocab_size=1000, to_save_dir=tmpdirname, languages={ "<en>": 0, "<fa>": 1 }) seq2seq = Seq2Seq(text_processor=processor) src_inputs = torch.tensor([[ 1, 2, 3, 4, 5, processor.pad_token_id(), processor.pad_token_id() ], [1, 2, 3, 4, 5, 6, processor.pad_token_id()]]) tgt_inputs = torch.tensor( [[6, 8, 7, processor.pad_token_id(), processor.pad_token_id()], [6, 8, 7, 8, processor.pad_token_id()]]) src_mask = (src_inputs != processor.pad_token_id()) tgt_mask = (tgt_inputs != processor.pad_token_id()) src_langs = torch.tensor([[0], [0]]).squeeze() tgt_langs = torch.tensor([[1], [1]]).squeeze() seq_output = seq2seq(src_inputs, tgt_inputs, src_mask, tgt_mask, src_langs, tgt_langs, log_softmax=True) assert list(seq_output.size()) == [5, processor.vocab_size()] seq_output = seq2seq(src_inputs, tgt_inputs, src_mask, tgt_mask, src_langs, tgt_langs) assert list(seq_output.size()) == [5, processor.vocab_size()]
def test(embedded, batch_size=100): print("예측 테스트") model = Seq2Seq(embedded.voca_size) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state("./model") model.saver.restore(sess, ckpt.model_checkpoint_path) enc_input, dec_input, targets = embedded.batch(batch_size) expect, outputs, accuracy = model.test(sess, enc_input, dec_input, targets) expect = embedded.decode(expect) outputs = embedded.decode(outputs) pick = random.randrange(0, len(expect) / 2) input = embedded.decode([embedded.test[pick * 2]], True) expect = embedded.decode([embedded.test[pick * 2 + 1]], True) outputs = embedded.cut_eos(outputs[pick]) print("\n정확도:", accuracy) print("랜덤 결과\n") print("입력값:", input) print("실제값:", expect) print("예측값:", ' '.join(outputs))