def training_result(num=0): # 학습 결과 확인 # 이 모델은 입력값과 출력값 데이터로 [영어단어, 한글단어] 사용하지만, # 예측시에는 한글단어를 알지 못하므로, 디코더의 입출력값을 의미 없는 값인 P 값으로 채운다. # ['word', 'PPPP'] start = num end = num + 1 encoder_inputs, decoder_inputs, targets_vector, target_weights = tool.make_batch( encoderinputs[start:end], decoderinputs[start:end], targets_[start:end], targetweights[start:end]) encoder_vector = [] decoder_vector = [] temp_encoder = [] temp_decoder = [] for j in range(encoder_size): temp_word = ix_to_word[encoder_inputs[j][0]][:3] temp_encoder.append(word_to_vector[temp_word]) for j in range(decoder_size): temp_word = ix_to_word[decoder_inputs[j][0]][:3] temp_decoder.append(word_to_vector[temp_word]) encoder_vector.append(np.array(temp_encoder)) decoder_vector.append(np.array(temp_decoder)) targets_vector = np.transpose(targets_vector) # 결과가 [batch size, time step, input] 으로 나오기 때문에, # 2번째 차원인 input 차원을 argmax 로 취해 가장 확률이 높은 글자를 예측 값으로 만든다. prediction = tf.argmax(model, 2) result = sess.run(prediction, feed_dict={ enc_input: encoder_vector, dec_input: decoder_vector, targets: targets_vector }) # 결과 값인 숫자의 인덱스에 해당하는 글자를 가져와 글자 배열을 만든다. # print(result[0]) result_target = "" predict_target = "" training_resultd = "" for target_index in targets_vector[0]: result_target += ix_to_word[target_index] result_target += " " for result_index in result[0]: predict_target += ix_to_word[result_index] predict_target += " " training_resultd = (str(num) + "\ntarget : " + result_target + "\npredict : " + predict_target) # 출력의 끝을 의미하는 'E' 이후의 글자들을 제거하고 문자열로 만든다. # end = decoded.index('E') return training_resultd
start = 0 end = batch_size index = 0 while current_step < 10000001: if end > len(title): start = 0 end = batch_size if index > len(test_title): index = 0 # Get a batch and make a step start_time = time.time() encoder_inputs, decoder_inputs, targets, target_weights = tool.make_batch(encoderinputs[start:end], decoderinputs[start:end], targets_[start:end], targetweights[start:end]) if current_step % steps_per_checkpoint == 0: for i in range(decoder_size - 2): decoder_inputs[i + 1] = np.array([word_to_ix['<PAD>']] * batch_size) output_logits = model.step(sess, encoder_inputs, decoder_inputs, targets, target_weights, True) predict = [np.argmax(logit, axis=1)[0] for logit in output_logits] predict = ' '.join(ix_to_word[ix] for ix in predict) real = [word[0] for word in targets] real = ' '.join(ix_to_word[ix] for ix in real) print('\n----\n step : %s \n time : %s \n LOSS : %s \n 예측 : %s \n 손질한 정답 : %s \n 정답 : %s \n----' % (current_step, step_time, loss, predict, real, title[start])) loss, step_time = 0.0, 0.0 if (current_step) % 100 == 0: _encoder_inputs, _decoder_inputs, _targets, _target_weights = tool.make_batch(
forward_only=forward_only) sess.run(tf.global_variables_initializer()) step_time, loss = 0.0, 0.0 current_step = 0 start = 0 end = batch_size while current_step < 10000001: if end > len(title): start = 0 end = batch_size # Get a batch and make a step start_time = time.time() encoder_inputs, decoder_inputs, targets, target_weights = tool.make_batch(encoderinputs[start:end], decoderinputs[start:end], targets_[start:end], targetweights[start:end]) if current_step % steps_per_checkpoint == 0: for i in range(decoder_size - 2): decoder_inputs[i + 1] = np.array([word_to_ix['<PAD>']] * batch_size) output_logits = model.step(sess, encoder_inputs, decoder_inputs, targets, target_weights, True) predict = [np.argmax(logit, axis=1)[0] for logit in output_logits] predict = ' '.join(ix_to_word[ix][0] for ix in predict) real = [word[0] for word in targets] real = ' '.join(ix_to_word[ix][0] for ix in real) print('\n----\n step : %s \n time : %s \n LOSS : %s \n 예측 : %s \n 손질한 정답 : %s \n 정답 : %s \n----' % (current_step, step_time, loss, predict, real, title[start])) loss, step_time = 0.0, 0.0 step_loss = model.step(sess, encoder_inputs, decoder_inputs, targets, target_weights, False)
tf.nn.sparse_softmax_cross_entropy_with_logits( logits=model, labels=targets)) # sparse_softmax_cross_entropy_with_logits optimizer = tf.train.AdamOptimizer(learning_rate).minimize( cost) # AdamOptimizer 최적화 방법 ######### # 신경망 모델 학습 ###### sess = tf.Session() sess.run(tf.global_variables_initializer()) start = 0 end = batch_size encoder_inputs, decoder_inputs, targets_vector, target_weights = tool.make_batch( encoderinputs[start:end], decoderinputs[start:end], targets_[start:end], targetweights[start:end]) # 인코더 데이터, 디코터 데이터, 타겟 데이터를 가져옵니다. encoder_vector = [] decoder_vector = [] # 위 데이터는 transpose해서 사용해주어야합니다. for i in range(batch_size): # 임베딩 해준거 temp_encoder = [] temp_decoder = [] for j in range(encoder_size): temp_word = ix_to_word[encoder_inputs[j][i]][:3] temp_encoder.append(word_to_vector[temp_word]) for j in range(decoder_size): temp_word = ix_to_word[decoder_inputs[j][i]][:3] # if temp_word == "<PA" : # PAD 갯수 줄이기 # if j==5: # break
decoderinputs_test_tmp = [] targets__test_tmp = [] targetweights_test_tmp = [] test_all = 0 test_correct = 0 for k in range(0, int(test_size * (1 / batch_size))): test_all += batch_size test_start = k * batch_size test_end = k * batch_size + batch_size encoderinputs_test_tmp = encoderinputs_test[test_start:test_end] decoderinputs_test_tmp = decoderinputs_test[test_start:test_end] targets__test_tmp = targets__test[test_start:test_end] targetweights_test_tmp = targetweights_test[test_start:test_end] title_test_tmp = title_test[test_start:test_end] encoder_inputs, decoder_inputs, targets, target_weights = tool.make_batch( encoderinputs_test_tmp, decoderinputs_test_tmp, targets__test_tmp, targetweights_test_tmp) for i in range(decoder_size - 2): decoder_inputs[i + 1] = np.array([word_to_ix['<PAD>']] * batch_size) output_logits = model.step(sess, encoder_inputs, decoder_inputs, targets, target_weights, True) for i in range(0, len(np.argmax(output_logits[0], axis=1))): # print(ix_to_word[np.argmax(output_logits[0], axis=1)[i]] + ' ' + title_tmp[i]) if title_test_tmp[i] == "직장생활": if ix_to_word[np.argmax(output_logits[0], axis=1)[i]] == "직장생": test_correct += 1 else: if ix_to_word[np.argmax(output_logits[0], axis=1)[i]] == title_test_tmp[i]: test_correct += 1
def train(batch_size=2, epoch=100): model = Seq2Seq(vocab_size) with tf.Session() as sess: # TODO: 세션을 로드하고 로그를 위한 summary 저장등의 로직을 Seq2Seq 모델로 넣을 필요가 있음 ckpt = tf.train.get_checkpoint_state("./model2") if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("새로운 모델을 생성하는 중 입니다.") sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter("./logs", sess.graph) step_time, loss = 0.0, 0.0 current_step = 0 start = 0 end = batch_size encoder_vector = [] decoder_vector = [] while current_step < 10000001: if end > len(title): start = 0 end = batch_size # Get a batch and make a step start_time = time.time() encoder_inputs, decoder_inputs, targets, target_weights = tool.make_batch( encoderinputs[start:end], decoderinputs[start:end], targets_[start:end], targetweights[start:end]) for batch_size_ in range(batch_size): # 임베딩 해준거 temp_encoder = [] temp_decoder = [] for j in range(encoder_size): temp_word = ix_to_word[encoder_inputs[j][batch_size_]][:3] temp_encoder.append(word_to_vector[temp_word]) for j in range(decoder_size): temp_word = ix_to_word[decoder_inputs[j][batch_size_]][:3] temp_decoder.append(word_to_vector[temp_word]) encoder_vector.append(np.array(temp_encoder)) decoder_vector.append(np.array(temp_decoder)) targets_vector = np.transpose(targets) # temp_word = ix_to_word[decoder_inputs[j][i]][:3] # temp_decoder.append(word_to_vector[temp_word]) # encoder_vector.append(np.array(temp_encoder)) # decoder_vector.append(np.array(temp_decoder)) # for i in range(encoder_size): # # temp_encoder = [] # for j in range(batch_size): # temp_word = ix_to_word[encoder_inputs[i][j]][:3] # temp_encoder.append(word_to_vector[temp_word]) # encoder_vector.append(np.array(temp_encoder)) # # for i in range(decoder_size): # temp_decoder = [] # for j in range(batch_size): # temp_word = ix_to_word[decoder_inputs[i][j]][:3] # temp_decoder.append(word_to_vector[temp_word]) # decoder_vector.append(np.array(temp_decoder)) # for step in range(total_batch * epoch): # enc_input, dec_input, targets = dialog.next_batch(batch_size) _, loss = model.train(sess, encoder_vector, decoder_vector, targets_vector) if (step + 1) % 100 == 0: model.write_logs(sess, writer, encoder_inputs, decoder_inputs, targets_vector) print('Step:', '%06d' % model.global_step.eval(), 'cost =', '{:.6f}'.format(loss)) checkpoint_path = os.path.join("./model2", "news.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) print('최적화 완료!')