def decode_line(sess, model, enc_vocab, rev_dec_vocab, sentence): # Get token-ids for the input sentence. token_ids = prepareData.sentence_to_token_ids(tf.compat.as_bytes(sentence), enc_vocab) # Which bucket does it belong to? bucket_id = min( [b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if prepareData.EOS_ID in outputs: outputs = outputs[:outputs.index(prepareData.EOS_ID)] return " ".join( [tf.compat.as_str(rev_dec_vocab[output]) for output in outputs])
def decode_line(sess, model, enc_vocab, rev_dec_vocab, sentence): # 把句子转换成编号列表 # tf.compat.as_bytes() 用于将句子用 utf-8 编码而不管传进来的句子是已经编码好的还是没有编码好的 token_ids = prepareData.sentence_to_token_ids(tf.compat.as_bytes(sentence), enc_vocab) # 判断问属于哪个 bucket,取最小的 bucket_id = min( [b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # 对句子编号进行处理,产生模型的正确输入 encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # 使用模型进行预测 _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # 将输出转换成词语的数字编号 outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # 如果生成了 EOS,那么就将 EOS 后面的部分全部裁减掉 if prepareData.EOS_ID in outputs: outputs = outputs[:outputs.index(prepareData.EOS_ID)] return " ".join( [tf.compat.as_str(rev_dec_vocab[output]) for output in outputs])
def test2(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. enc_vocab_path = os.path.join( gConfig['data_directory'], "vocab%d.enc" % gConfig['enc_vocab_size']) dec_vocab_path = os.path.join( gConfig['data_directory'], "vocab%d.dec" % gConfig['dec_vocab_size']) enc_vocab, _ = prepareData.initialize_vocabulary(enc_vocab_path) _, rev_dec_vocab = prepareData.initialize_vocabulary(dec_vocab_path) # Decode from standard input. sys.stdout.write("ask> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. sentence = sentence.strip('\n') token_ids = prepareData.sentence_to_token_ids( tf.compat.as_bytes(sentence), enc_vocab) #print ("问句词向量id:", token_ids) # Which bucket does it belong to? bucket_id = min([ b for b in range(len(_buckets)) if _buckets[b][0] > len(token_ids) ]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] # If there is an EOS symbol in outputs, cut them at that point. if prepareData.EOS_ID in outputs: outputs = outputs[:outputs.index(prepareData.EOS_ID)] # Print out French sentence corresponding to outputs. #print("回答词向量id:", outputs) result = "".join([ tf.compat.as_str(rev_dec_vocab[output]) for output in outputs ]) print("answer> " + result) print("ask> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def test2(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. enc_vocab_path = os.path.join(FLAGS.data_directory, "vocab%d.enc" % FLAGS.enc_vocab_size) dec_vocab_path = os.path.join(FLAGS.data_directory, "vocab%d.dec" % FLAGS.dec_vocab_size) enc_vocab, _ = prepareData.initialize_vocabulary(enc_vocab_path) _, rev_dec_vocab = prepareData.initialize_vocabulary(dec_vocab_path) test_data_path = os.path.join(FLAGS.data_directory, "test.enc") with open(test_data_path, 'r', encoding='utf-8') as f: for line in f.readlines(): #ask_sentence = line.strip()x_list = x.split(' ') x_list = line.split(' ') sentence = "".join(x_list) #sentence = line.strip(' ') token_ids = prepareData.sentence_to_token_ids( tf.compat.as_bytes(sentence), enc_vocab) bucket_id = min([ b for b in range(len(_buckets)) if _buckets[b][0] > len(token_ids) ]) encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] # If there is an EOS symbol in outputs, cut them at that point. if prepareData.EOS_ID in outputs: outputs = outputs[:outputs.index(prepareData.EOS_ID)] result = "".join([ tf.compat.as_str(rev_dec_vocab[output]) for output in outputs ]) with open(FLAGS.result_path, 'a', encoding='utf-8') as ff: ff.write("ask: " + sentence + "\n") ff.write("answer: " + result + "\n") ff.write("\n")
def test(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. enc_vocab_path = os.path.join(FLAGS.data_directory, "vocab%d.enc" % FLAGS.enc_vocab_size) dec_vocab_path = os.path.join(FLAGS.data_directory, "vocab%d.dec" % FLAGS.dec_vocab_size) enc_vocab, _ = prepareData.initialize_vocabulary(enc_vocab_path) _, rev_dec_vocab = prepareData.initialize_vocabulary(dec_vocab_path) sys.stdout.write("我> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: sentence = sentence.strip('\n') token_ids = prepareData.sentence_to_token_ids( tf.compat.as_bytes(sentence), enc_vocab) bucket_id = min([ b for b in range(len(_buckets)) if _buckets[b][0] > len(token_ids) ]) encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] # If there is an EOS symbol in outputs, cut them at that point. if prepareData.EOS_ID in outputs: outputs = outputs[:outputs.index(prepareData.EOS_ID)] result = "".join([ tf.compat.as_str(rev_dec_vocab[output]) for output in outputs ]) print("AI> " + result) print("我> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def decode_line(sess, model, enc_vocab, rev_dec_vocab, sentence): # Get token-ids for the input sentence. token_ids = prepareData.sentence_to_token_ids(tf.compat.as_bytes(sentence), enc_vocab) # Which bucket does it belong to? bucket_id = min([b for b in xrange(len(_buckets)) if _buckets[b][0] > len(token_ids)]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch({bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] # If there is an EOS symbol in outputs, cut them at that point. if prepareData.EOS_ID in outputs: outputs = outputs[:outputs.index(prepareData.EOS_ID)] return " ".join([tf.compat.as_str(rev_dec_vocab[output]) for output in outputs])