def test_Seq2Seq(): x = np.random.random((samples, input_length, input_dim)) y = np.random.random((samples, output_length, output_dim)) models = [] models += [ Seq2Seq(output_dim=output_dim, output_length=output_length, input_shape=(input_length, input_dim)) ] models += [ Seq2Seq(output_dim=output_dim, output_length=output_length, input_shape=(input_length, input_dim), peek=True) ] models += [ Seq2Seq(output_dim=output_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=2) ] models += [ Seq2Seq(output_dim=output_dim, output_length=output_length, input_shape=(input_length, input_dim), peek=True, depth=2) ] for model in models: model.compile(loss='mse', optimizer='sgd') model.fit(x, y, nb_epoch=1)
def start_test(args, data): # 输入一句话 if args.gpu == 0: use_cuda = False else: use_cuda = True input_dim = args.vocab_size output_dim = args.vocab_size embedding_dim = args.embedding_size hidden_dim = args.hidden_size args.mode = 'test' while True: input_word = input('请输入您的问题:') text = data.text2index(input_word) if use_cuda: seq2seq_model = Seq2Seq(input_dim, embedding_dim, hidden_dim, output_dim, use_cuda=use_cuda).cuda() else: seq2seq_model = Seq2Seq(input_dim, embedding_dim, hidden_dim, output_dim, use_cuda=use_cuda) seq2seq_model.load_state_dict(torch.load(args.module_path)) seq2seq_model.eval() #predict_indices = seq2seq_model.seq2seq_predict(text) #predict_result = data.index2text(predict_indices) #print(predict_result) predict_sample_indices = seq2seq_model.beamsearch(text) for predict_indices in predict_sample_indices: predict_result = data.index2text(predict_indices) print("".join(predict_result[:-1]))
def __init__(self, config): super(AEMapper, self).__init__() self.config = config self.source_seq2seq = Seq2Seq(config) self.target_seq2seq = Seq2Seq(config) self.mapper = layers.FeedForward(config.context_size, config.num_layers * config.decoder_hidden_size, num_layers=1, activation=config.activation)
def test_Seq2Seq(): x = np.random.random((samples, input_length, input_dim)) y = np.random.random((samples, output_length, output_dim)) models = [] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim)) ] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), peek=True) ] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=2) ] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), peek=True, depth=2) ] # for model in models: # model.compile(loss='mse', optimizer='sgd') # model.summary() # model.fit(x, y, epochs=1) model = Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), peek=True, depth=2, teacher_force=True) model.compile(loss='mse', optimizer='sgd') model.fit([x, y], y, epochs=1) for layer in model.layers: print layer if "RecurrentSequential" in str(layer): print K.get_value(layer.x)
def train(): model = Seq2Seq() data_loader = DataLoader() with tf.Session() as sess: checkpoint = tf.train.latest_checkpoint(FLAGS.train_dir) if checkpoint: print('다음 파일에서 모델을 읽는 중입니다... ', checkpoint) model.saver.restore(sess, checkpoint) else: print('새로운 모델을 생성하는 중입니다...') sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) input_batch, output_batch, target_batch = data_loader.make_batch(data_loader.seq_data) for step in range(FLAGS.max_steps): _, loss = model.train( sess, input_batch, output_batch, target_batch) model.write_logs(sess, writer, input_batch, output_batch, target_batch) print('Step', '%04d' % (step + 1), 'cost =', '{:.6f}'.format(loss)) checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.checkpoint_name) model.saver.save(sess, checkpoint_path) print('최적화 완료!')
def test(): model = Seq2Seq() data_loader = DataLoader() print(FLAGS.input_size) with tf.Session() as sess: checkpoint = tf.train.latest_checkpoint(FLAGS.train_dir) if checkpoint: print('다음 파일에서 모델을 읽는 중입니다... ', checkpoint) model.saver.restore(sess, checkpoint) while True: word = input('Enter an english word: ') seq_data = [word, 'P' * len(word)] input_batch, output_batch, target_batch = data_loader.make_batch([ seq_data]) prediction = tf.argmax(model.logits, 2) result = sess.run(prediction, feed_dict={model.enc_input: input_batch, model.dec_input: output_batch, model.targets: target_batch}) decoded = [data_loader.char_list[i] for i in result[0]] try: end = decoded.index('E') translated = ''.join(decoded[:end]) except: translated = ''.join(decoded) print(word, ' -> ', translated) else: print('학습된 데이터가 없습니다.')
def setUp(self): # data stuff self.batch_size = 32 self.window_size = 64 self.hidden_size = 256 self.overlap_size = 32 input_file = '../data/songs-utf-8.txt' self.assertTrue(os.path.exists(input_file)) # training stuff self.learning_rate = 0.001 self.decay_rate = 0.999 self.decay_steps = 100 self.epochs = 10 self.skip_steps = 20 # sampling stuff self.temp = 0.7 self.seed = list(string.ascii_uppercase) + ['Š', 'Đ', 'Č', 'Ć', 'Ž'] self.seed = list( filter(lambda v: v not in ['X', 'Y', 'Q', 'W'], self.seed)) print('seed', self.seed) # clear everything that might be in the default graph from previous tests tf.reset_default_graph() # construction stuff self.seq2seq = Seq2Seq(input_file=input_file, window_size=self.window_size, overlap_size=self.overlap_size, batch_size=self.batch_size, hidden_size=self.hidden_size)
def eval(): model = Seq2Seq().to(config.device) model.load_state_dict(torch.load("./models/model.pkl")) loss_list = [] acc_list = [] data_loader = get_dataloader(train=False) # 获取测试集 with torch.no_grad(): for idx, (input, target, input_len, target_len) in enumerate(data_loader): input = input.to(config.device) # target = target #[batch_size,max_len] input_len = input_len.to(config.device) # decoder_predict:[batch_size,max_len] decoder_outputs, decoder_predict = model.evaluate(input, input_len) # [batch_Size,max_len,vocab_size] loss = F.nll_loss(decoder_outputs.view(-1, len(config.ns)), target.to(config.device).view(-1), ignore_index=config.ns.PAD) loss_list.append(loss.item()) # 把traget 和 decoder_predict进行inverse_transform target_inverse_tranformed = [config.ns.inverse_transform(i) for i in target.numpy()] predict_inverse_tranformed = [config.ns.inverse_transform(i) for i in decoder_predict] cur_eq = [1 if target_inverse_tranformed[i] == predict_inverse_tranformed[i] else 0 for i in range(len(target_inverse_tranformed))] acc_list.extend(cur_eq) # print(np.mean(cur_eq)) print("mean acc:{} mean loss:{:.6f}".format(np.mean(acc_list), np.mean(loss_list)))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--output_dir', type=str, default="../output/") parser.add_argument('--checkpoint_path', type=str, default="../checkpoints/seq2seq.h5") parser.add_argument('--ts_path', type=str, default="../processed_data/train/ts.joblib") parser.add_argument('--ts_dim', type=int, default=3) parser.add_argument('--input_length', type=int, default=64) parser.add_argument('--output_length', type=int, default=16) parser.add_argument('--hidden_size', type=int, default=128) config = parser.parse_args() model = Seq2Seq(hidden_size=config.hidden_size, ts_dim=config.ts_dim, input_length=config.input_length, output_length=config.output_length, teacher_forcing=True) ts = joblib.load(config.ts_path) inferer = Inference(model, ts, output_dir=config.output_dir, checkpoint_path=config.checkpoint_path) inferer.infer()
def main(): BATCH_SIZE = 128 X_indices, Y_indices, X_char2idx, Y_char2idx, X_idx2char, Y_idx2char = preprocess_data( ) X_train = X_indices[BATCH_SIZE:] Y_train = Y_indices[BATCH_SIZE:] X_test = X_indices[:BATCH_SIZE] Y_test = Y_indices[:BATCH_SIZE] model = Seq2Seq( rnn_size=50, n_layers=2, X_word2idx=X_char2idx, encoder_embedding_dim=15, Y_word2idx=Y_char2idx, decoder_embedding_dim=15, ) model.fit(X_train, Y_train, val_data=(X_test, Y_test), batch_size=BATCH_SIZE, n_epoch=50) model.infer('common', X_idx2char, Y_idx2char) model.infer('apple', X_idx2char, Y_idx2char) model.infer('zhedong', X_idx2char, Y_idx2char)
def main(_): model_path = os.path.join('model', FLAGS.name) if os.path.exists(model_path) is False: os.makedirs(model_path) train_data, train_lang = loaddata(path, FLAGS.num_steps) vocab_size = train_lang.vocab_size converter = TextConverter(lang=train_lang, max_vocab=FLAGS.max_vocab) converter.save_lang(filename=FLAGS.name + '_converter.pkl') g = batch_generator(train_data, FLAGS.batch_size, FLAGS.max_steps) model = Seq2Seq('train', vocab_size, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_steps, max_steps=FLAGS.max_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size, max_iters=FLAGS.max_iters, bidirectional=FLAGS.bidirectional, beam_search=False) model.train( g, converter, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def test_run(data_path, model_path, n_show=10): """ Test function. Input is training data. Output have to be the sentence which is correct data in training phase. :return: """ corpus = ConvCorpus(file_path=None) corpus.load(load_dir=data_path) print('Vocabulary Size (number of words) :', len(corpus.dic.token2id)) print('') # rebuild seq2seq model model = Seq2Seq(len(corpus.dic.token2id), feature_num=args.feature_num, hidden_num=args.hidden_num, batch_size=1, gpu_flg=args.gpu) serializers.load_hdf5(model_path, model) # run an interpreter for num, input_sentence in enumerate(corpus.posts): id_sequence = input_sentence.copy() # input_sentence.reverse() # input_sentence.insert(0, corpus.dic.token2id["<eos>"]) model.initialize() # initialize cell sentence = model.generate(input_sentence, sentence_limit=len(input_sentence) + 30, word2id=corpus.dic.token2id, id2word=corpus.dic) print("teacher : ", " ".join([corpus.dic[w_id] for w_id in id_sequence])) print("correct :", " ".join([corpus.dic[w_id] for w_id in corpus.cmnts[num]])) print("-> ", sentence) print('') if num == n_show: break
def setUp(self): vocab_size = 13 wordvec_size = 100 hidden_size = 100 self.seq2seq = Seq2Seq(vocab_size, wordvec_size, hidden_size) self.xs = np.random.randint(0, 13, (13, 100)) self.ts = np.random.randint(0, 13, (13, 100))
def main(): #Dataset dataset = PadDataset(WORKING_DIR, EMBEDDING_SIZE, diff_vocab = DIFF_VOCAB, embedding_path = EMBEDDING_PATH,\ limit_encode = LIMIT_ENCODE, limit_decode = LIMIT_DECODE) print("112") encoder_vocab_size = dataset.length_vocab_encode() decoder_vocab_size = dataset.length_vocab_decode() print("Steps per epoch %d" %(int(math.ceil(float(dataset.datasets["train"].number_of_samples)\ /float(BATCH_SIZE))))) #Initialising Model embeddings_encoder = dataset.vocab.embeddings_encoder embeddings_encoder = torch.Tensor(embeddings_encoder).cuda() embeddings_decoder = dataset.vocab.embeddings_decoder embeddings_decoder = torch.Tensor(embeddings_decoder).cuda() content_encoder = Encoder(encoder_vocab_size, embeddings_encoder, EMBEDDING_SIZE, HIDDEN_SIZE).cuda() print("123") query_encoder = Encoder(encoder_vocab_size, embeddings_encoder, EMBEDDING_SIZE, HIDDEN_SIZE).cuda() print("ddf") decoder = Decoder(EMBEDDING_SIZE, embeddings_decoder, HIDDEN_SIZE, decoder_vocab_size).cuda() print("adsf") seq2seqwattn = Seq2Seq(content_encoder, query_encoder, decoder).cuda() print("adsdf") run_this = run_model(dataset, seq2seqwattn) print('rehc') run_this.run_training() print('124124')
def lstmmodel(neurons,layer,drop,batch_size,epochs,backday,n_outputs): x_train, y_train = [], [] x_valid, y_valid = [], [] x_test, y_test = [], [] for i in range(backday,train_size-n_outputs): x_train.append(train[i-backday:i,:]) y_train.append(train[i:i+n_outputs,0]) x_train, y_train = np.array(x_train), np.array(y_train) y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], 1)) for i in range(backday,valid_size-n_outputs): x_valid.append(valid[i-backday:i,:]) y_valid.append(valid[i:i+n_outputs,0]) x_valid, y_valid = np.array(x_valid), np.array(y_valid) y_valid = y_valid.reshape((y_valid.shape[0], y_valid.shape[1], 1)) for i in range(backday,test_size-n_outputs): x_test.append(test[i-backday:i,:]) y_test.append(test[i:i+n_outputs,0]) x_test, y_test = np.array(x_test), np.array(y_test) y_test = y_test.reshape((y_test.shape[0], y_test.shape[1], 1)) print(neurons,layer,drop,batch_size,epochs,backday,n_outputs) model = Seq2Seq(output_dim=1, hidden_dim=neurons, output_length=n_outputs, input_shape=(x_train.shape[1], x_train.shape[2]), peek=False, depth=layer,dropout=drop) model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mae']) early_stopping = EarlyStopping(monitor='mean_absolute_error', patience=10) model.fit(x_train, y_train, epochs=epochs, batch_size = len(x_train), validation_data=(x_valid, y_valid), verbose=0, callbacks=[early_stopping]) loss,accuracy = model.evaluate(x_test,y_test) print('Test Mean Absolute Error: %f using %f,%f,%f,%f,%f,%f' % (accuracy,neurons,layer,drop,batch_size,epochs,backday)) return model,accuracy,[neurons,layer,drop,batch_size,epochs,backday]
def main(**args): vocab, vocab_rsd = default_build_vocab('./data/vocab.txt') vocab_size = len(vocab) print 'vocabulary size is %d' % vocab_size data = Seq2SeqIter(data_path='./data/data.pickle', source_path='./data/a.txt', target_path='./data/b.txt', vocab=vocab, vocab_rsd=vocab_rsd, batch_size=10, max_len=25, data_name='data', label_name='label', split_char='\n', text2id=None, read_content=None, model_parallel=False) print 'training data size is %d' % data.size model = Seq2Seq(seq_len=25, batch_size=10, num_layers=1, input_size=vocab_size, embed_size=150, hidden_size=150, output_size=vocab_size, dropout=0.0, mx_ctx=CTX) model.train(dataset=data, epoch=5)
def __build_model(self): return Seq2Seq(input_size=self.__source_lang.n_words, output_size=self.__target_lang.n_words, hidden_size=constants.HIDDEN_SIZE, learning_rate=constants.LEARNING_RATE, teacher_forcing_ratio=constants.TEACHER_FORCING_RATIO, device=constants.DEVICE)
def predict(): """ 针对用户输入的聊天内容给出回复 :return: """ du = data_unit.DataUnit(**data_config) save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME) batch_size = 1 tf.reset_default_graph() model = Seq2Seq(batch_size=batch_size, encoder_vocab_size=du.vocab_size, decoder_vocab_size=du.vocab_size, mode='decode', **model_config) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) model.load(sess, save_path) while True: q = input('请输入聊天内容:') if q is None or q.strip() == '': print('-----------------------------') continue if q == r'\b': print('再见!') exit() q = q.strip() indexs = du.transform_sentence(q) x = np.asarray(indexs).reshape((1, -1)) xl = np.asarray(len(indexs)).reshape((1, )) pred = model.predict(sess, np.array(x), np.array(xl)) print('Q: ', du.transform_indexs(x[0])) print('A: ', du.transform_indexs(pred[0])) print('-----------------------------')
def train(embedded, batch_size=100, epoch=100): model = Seq2Seq(embedded.voca_size) with tf.Session() as sess: checkpoint = tf.train.get_checkpoint_state("./model") if checkpoint and tf.train.checkpoint_exists( checkpoint.model_checkpoint_path): print("모델을 읽는 중", checkpoint.model_checkpoint_path) model.saver.restore(sess, checkpoint.model_checkpoint_path) else: print("새로운 모델을 생성") sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter("./logs", sess.graph) total_batch = int(math.ceil(len(embedded.test) / float(batch_size))) for step in range(total_batch * epoch): enc_input, dec_input, targets = embedded.batch(batch_size) _, loss = model.train(sess, enc_input, dec_input, targets) if (step + 1) % 100 == 0: model.write_logs(sess, writer, enc_input, dec_input, targets) print('Step:', '%06d' % model.global_step.eval(), 'cost =', '{:.6f}'.format(loss)) checkpoint_path = os.path.join("./model", "conversation.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) print('완료')
def get_trained_model(task='autoencoder', data_name='autoencoder', units=256, random_seed=2): import evaluator from seq2seq import Seq2Seq from argparse import ArgumentParser parser = ArgumentParser() args = parser.parse_args(args=[]) args.mode = "analysis" args.task = task args.data_name = data_name args.units = units args.random_seed = random_seed args.model_path = "../saved_model/%s_units=%s_seed=%d" % ( args.data_name, args.units, args.random_seed) seq2seq = Seq2Seq(args) seq2seq.load_seq2seq(args.model_path) print("\tmode=%s, units=%d, model_path=%s" % (seq2seq.mode, seq2seq.units, seq2seq.model_path)) if task == 'autoencoder' or task == 'autoenc-last': whole_accuracy, each_accuracy = evaluator.evaluate_autoencoder( seq2seq=seq2seq) else: raise 'no this task' assert whole_accuracy > 0.93 and each_accuracy > 0.99, "Load model failed." return seq2seq
def train(): with tf.Graph().as_default() as graph: model = Seq2Seq(600, [7800, 300]) queue = InputPipeline([ '{0}/frames/conversations_new.tfrecords'.format(options.data_dir) ], batch_size=64, n_epochs=1000, capacity=1e4) input_seq, target_seq, input_seq_len, target_seq_len, history, history_size, history_seq_len = queue.inputs( ) loss = model.graph(input_seq, target_seq, input_seq_len, target_seq_len, history, history_size, history_seq_len) global_step = tf.Variable(0, trainable=False, name='global_step') train_op = tf.train.AdamOptimizer(learning_rate=1e-3).minimize( model.loss, global_step=global_step) sv = tf.train.Supervisor(graph=graph, logdir='{0}/seq2seq_v7/{1}'.format( options.output_dir, options.run_name), saver=tf.train.Saver(max_to_keep=None), summary_op=tf.summary.merge_all(), global_step=global_step) with sv.managed_session(config=tf.ConfigProto( log_device_placement=True)) as sess: while not sv.should_stop(): sess.run(train_op)
def main(): # train data train_data, teach_data = # train auto_encoder = AutoEncoderBase(Seq2Seq(128, 128)) auto_encoder.model_train()
def test_seq2seq(): x = np.random.random((samples, input_length, input_dim)) y = np.random.random((samples, output_length, output_dim)) models = [] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim)) ] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), peek=True) ] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=2) ] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), peek=True, depth=2) ] for model in models: model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=epoch_num) model = Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), peek=True, depth=2, teacher_force=True) model.compile(loss='mse', optimizer='sgd') model.fit([x, y], y, epochs=epoch_num)
def test_Seq2Seq(): x = np.random.random((batch, max_encoder_length, input_dim)) y = np.random.random((batch, max_decoder_length, output_dim)) models = [] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=max_decoder_length, input_shape=(max_encoder_length, input_dim)) ] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=max_decoder_length, input_shape=(max_encoder_length, input_dim), peek=True) ] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=max_decoder_length, input_shape=(max_encoder_length, input_dim), depth=2) ] models += [ Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=max_decoder_length, input_shape=(max_encoder_length, input_dim), peek=True, depth=2) ] for model in models: model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1) model = Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=max_decoder_length, input_shape=(max_encoder_length, input_dim), peek=True, depth=2, teacher_force=True) model.compile(loss='mse', optimizer='sgd') model.fit([x, y], y, epochs=1)
def training(): params = parameters() # mode parameters dict model_param_dict = {'num_units': int(params.get('modelparam', 'num_units')), 'num_layers': int(params.get('modelparam', 'num_layers')), 'vocab_size': int(params.get('modelparam', 'vocab_size')), 'embedding_size': int(params.get('modelparam', 'embedding_size')), 'beam_size': int(params.get('modelparam', 'beam_size')), 'use_attention': bool(params.get('modelparam', 'use_attention')), 'use_beam_search': bool(params.get('modelparam', 'use_beam_search')), 'start_token_idx': int(params.get('modelparam', 'start_token_idx')), 'end_token_idx': int(params.get('modelparam', 'end_token_idx')), 'max_gradient_norm': float(params.get('modelparam', 'max_gradient_norm'))} # as for the comments for below parameters, please find in config.ini batch_size = int(params.get('trainparam', 'batch_size')) learning_rate = float(params.get('trainparam', 'learning_rate')) keep_prob = float(params.get('trainparam', 'keep_prob')) epochs = int(params.get('trainparam', 'epochs')) modelsaved_dir = params.get('trainparam', 'checkpoint_dir') savedname = params.get('trainparam', 'checkpoint_name') _, _, questionbatch, answerbatch, qlengthbatch, alengthbatch = data(DIR, epochs, batch_size) seq2seq = Seq2Seq(**model_param_dict) decode_outputs = seq2seq.model(questionbatch, answerbatch, qlengthbatch, alengthbatch, 'train', batch_size, keep_prob) train_op, loss, summary_merge, predicts = seq2seq.train(decode_outputs, answerbatch, alengthbatch, learning_rate) # for save and restore model ckpt = tf.train.get_checkpoint_state(modelsaved_dir) saver = tf.train.Saver() with tf.Session() as sess: if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('Reloading model parameters..') saver.restore(sess, tf.train.latest_checkpoint(modelsaved_dir)) else: print('Create model from scratch..') sess.run(tf.global_variables_initializer()) step = 0 summary_writer = tf.summary.FileWriter(modelsaved_dir, graph=sess.graph) while True: try: step += 1 _, temploss, tempsummary = sess.run([train_op, loss, summary_merge]) # temploss should not be named as loss, as the name has been used in the model, or, will raise error: # eg: 'Fetch argument 10.112038 has invalid type <class 'numpy.float32'>, must be a string or Tensor. # (Can not convert a float32 into a Tensor or Operation.)' print('run step: ', step, end='\r') if step % int(params.get('trainparam', 'steps_per_checkpoint')) == 0: perplexity = math.exp(float(temploss)) if temploss < 300 else float('inf') print('save at step: ', step, 'perplexity: ', perplexity) summary_writer.add_summary(tempsummary, step) checkpoint_path = os.path.join(modelsaved_dir, savedname) saver.save(sess, checkpoint_path, global_step=step) except: print('done') break
def start_train(args,data): if args.gpu == 0: use_cuda = False else: use_cuda = True MAX_ITER = 5000 epochs = 50 print_every = 5000 plot_every = 100 start_time = time.time() plot_losses = [] # 存储loss用来绘图 print_loss_total = 0 input_dim = args.vocab_size output_dim = args.vocab_size embedding_dim = args.embedding_size hidden_dim = args.hidden_size learning_rate = args.learning_rate input_word = "怎么获得立减券" text = data.text2index(input_word) rfile=open('../data/questions_viewer.txt','w',encoding='utf-8') if use_cuda: seq2seq_model = Seq2Seq(input_dim,embedding_dim,hidden_dim,output_dim,use_cuda=use_cuda,learning_rate=learning_rate).cuda() else: seq2seq_model = Seq2Seq(input_dim,embedding_dim,hidden_dim,output_dim,use_cuda=use_cuda,learning_rate=learning_rate) seq2seq_model.train() print(len(data.source_index)) for epoch in range(epochs): #valid_targets, valid_sources, valid_targets_lengths, valid_source_lengths = data.get_valid_batch() #valid_loss = seq2seq_model.seq2seq_train(valid_targets, valid_sources) for iter,(source, target) in enumerate(zip(data.source_index,data.target_index)): #print(source,target) loss = seq2seq_model.seq2seq_train(source,target) print_loss_total += loss if iter % print_every == 0: seq2seq_model.encoder_scheduler.step() seq2seq_model.decoder_scheduler.step() print_loss_avg = print_loss_total / print_every print_loss_total = 0 time_dif = get_time_dif(start_time) print('Epoch {:>3}/{} - Training Loss: {:>6.6f} Time:{}'.format(epoch,epochs,print_loss_avg,time_dif)) torch.save(seq2seq_model.state_dict(), args.module_path) predict_indices = seq2seq_model.seq2seq_predict(text) predict_result = data.index2text(predict_indices) print(predict_result) rfile.write("".join(predict_result)) rfile.write('\n')
def _create_model(conf): """ Creates a simple model using the given configuration """ model = Seq2Seq(**conf['seq2seq']) output = _add_layers(model.output, conf['top']) model = Model(model.input, output) return model
def __init_model(self): log('Initializing an empty model') self.model = Seq2Seq( input_size=len(self.input_vocab), output_size=len(self.output_vocab), hidden_size=constants.HIDDEN_SIZE, learning_rate=constants.LEARNING_RATE, teacher_forcing_ratio=constants.TEACHER_FORCING_RATIO, device=constants.DEVICE) log(str(self.model))
def main(): args = set_arguments() seq2seq = Seq2Seq(args) #pdb.set_trace() if "train" in args.mode: trainer = Trainer(args, seq2seq) trainer.train() print("\ttask =", args.task) print("\tunits =", args.units) print("\t=" * 50) pdb.set_trace()
def interpreter(data_path, model_path): """ Run this function, if you want to talk to seq2seq model. if you type "exit", finish to talk. :param data_path: the path of corpus you made model learn :param model_path: the path of model you made learn :return: """ # call dictionary class corpus = ConvCorpus(file_path=None) corpus.load(load_dir=data_path) print('Vocabulary Size (number of words) :', len(corpus.dic.token2id)) print('') # rebuild seq2seq model model = Seq2Seq(len(corpus.dic.token2id), feature_num=args.feature_num, hidden_num=args.hidden_num, batch_size=1, gpu_flg=args.gpu) serializers.load_hdf5(model_path, model) # run conversation system print('The system is ready to run, please talk to me!') print('( If you want to end a talk, please type "exit". )') print('') while True: print('>> ', end='') sentence = input() if sentence == 'exit': print('See you again!') break input_vocab = [ unicodedata.normalize('NFKC', word.lower()) for word in word_tokenize(sentence) ] input_vocab.reverse() input_vocab.insert(0, "<eos>") # convert word into ID input_sentence = [ corpus.dic.token2id[word] for word in input_vocab if not corpus.dic.token2id.get(word) is None ] model.initialize() # initialize cell sentence = model.generate(input_sentence, sentence_limit=len(input_sentence) + 30, word2id=corpus.dic.token2id, id2word=corpus.dic) print("-> ", sentence) print('')