def get_copy_dict(vocab): lines = du.read_data(filepaths['copypp']) ids = du.convert_to_token_ids(lines, vocab) copy_dict = {} for line in ids: copy_dict[line[0]] = line[1:] return copy_dict
def predict(): tf.reset_default_graph() with tf.Session() as sess: model = load_model(sess) while True: sys.stdout.write("Please input a sentence...\n") sys.stdout.flush() sentence = sys.stdin.readline() sentence = sentence.strip().split() # [B = 1, T] ids = du.convert_to_token_ids([sentence], vocab_dict) encoder_inputs = list_transpose(ids) # outputs是一个包含bucket[bucket[idx]][1]个元素的list,每个元素是一个batch_size * vocab_size的np.array predict_ids = model.predict(sess, encoder_inputs) predict_words = [ vocab_list[id[0]] for id in predict_ids[0].tolist() ] # 截断<EOS>以后的部分 truncate = len(predict_words) for i in range(truncate): if predict_words[i] == '<EOS>': truncate = i break predict_sentence = ' '.join(predict_words[:truncate]) print('Predicted paraphrase: %s' % predict_sentence)
def predict_a_lot(): tf.reset_default_graph() with tf.Session() as sess: model = load_model(sess) while True: sentence = input() if len(sentence.strip().split()) == 0: continue # print(sentence) sentence = sentence.strip().split() # [B = 1, T] ids = du.convert_to_token_ids([sentence], vocab_dict) encoder_inputs = list_transpose(ids) # outputs是一个包含bucket[bucket[idx]][1]个元素的list,每个元素是一个batch_size * vocab_size的np.array predict_ids = model.predict(sess, encoder_inputs) predict_words = [ vocab_list[id[0]] for id in predict_ids[0].tolist() ] # 截断<EOS>以后的部分 truncate = len(predict_words) for i in range(truncate): if predict_words[i] == '<EOS>': truncate = i break predict_sentence = ' '.join(predict_words[:truncate]) print(predict_sentence)
def predict(): tf.reset_default_graph() with tf.Session() as sess: model = load_model(sess, forward_only=True) batch_size = model.batch_size = 1 while True: sys.stdout.write("Please input a sentence...\n") sys.stdout.flush() sentence = sys.stdin.readline() sentence = sentence.strip().split() ids_tmp = du.convert_to_token_ids([sentence], vocab_dict) ids = ids_tmp[0] length = len(ids) bucket_idx = -1 for (i, (bucket_size, _)) in enumerate(buckets): if (length < bucket_size): bucket_idx = i break if length > buckets[-1][0]: print("Sorry, this sentence is too long :-(") else: enc_inputs_all = [[] for _ in range(len(buckets))] dec_inputs_all = [[] for _ in range(len(buckets))] enc_inputs_all[bucket_idx] = [ids] dec_inputs_all[bucket_idx] = [[]] encoder_inputs, decoder_inputs, target_weights = get_batch( enc_inputs_all, dec_inputs_all, bucket_idx, batch_size) # outputs是一个包含bucket[bucket[idx]][1]个元素的list,每个元素是一个batch_size * vocab_size的np.array outputs = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_idx, forward_only=True) # 使用greedy decoder predict_ids = [ int(np.argmax(output, axis=1)) for output in outputs ] predict_words = [vocab_list[id] for id in predict_ids] # 截断<EOS>以后的部分 truncate = len(predict_words) for i in range(truncate): if predict_words[i] == '<EOS>': truncate = i break predict_sentence = ' '.join(predict_words[:truncate]) print('Predicted paraphrase: %s' % predict_sentence)