Example #1
0
def get_copy_dict(vocab):
    lines = du.read_data(filepaths['copypp'])
    ids = du.convert_to_token_ids(lines, vocab)
    copy_dict = {}
    for line in ids:
        copy_dict[line[0]] = line[1:]
    return copy_dict
Example #2
0
def predict():
    tf.reset_default_graph()
    with tf.Session() as sess:
        model = load_model(sess)

        while True:
            sys.stdout.write("Please input a sentence...\n")
            sys.stdout.flush()
            sentence = sys.stdin.readline()
            sentence = sentence.strip().split()

            # [B = 1, T]
            ids = du.convert_to_token_ids([sentence], vocab_dict)
            encoder_inputs = list_transpose(ids)

            # outputs是一个包含bucket[bucket[idx]][1]个元素的list,每个元素是一个batch_size * vocab_size的np.array
            predict_ids = model.predict(sess, encoder_inputs)
            predict_words = [
                vocab_list[id[0]] for id in predict_ids[0].tolist()
            ]
            # 截断<EOS>以后的部分
            truncate = len(predict_words)
            for i in range(truncate):
                if predict_words[i] == '<EOS>':
                    truncate = i
                    break
            predict_sentence = ' '.join(predict_words[:truncate])
            print('Predicted paraphrase: %s' % predict_sentence)
Example #3
0
def predict_a_lot():
    tf.reset_default_graph()

    with tf.Session() as sess:
        model = load_model(sess)

        while True:
            sentence = input()
            if len(sentence.strip().split()) == 0:
                continue

            # print(sentence)
            sentence = sentence.strip().split()

            # [B = 1, T]
            ids = du.convert_to_token_ids([sentence], vocab_dict)
            encoder_inputs = list_transpose(ids)

            # outputs是一个包含bucket[bucket[idx]][1]个元素的list,每个元素是一个batch_size * vocab_size的np.array
            predict_ids = model.predict(sess, encoder_inputs)
            predict_words = [
                vocab_list[id[0]] for id in predict_ids[0].tolist()
            ]
            # 截断<EOS>以后的部分
            truncate = len(predict_words)
            for i in range(truncate):
                if predict_words[i] == '<EOS>':
                    truncate = i
                    break
            predict_sentence = ' '.join(predict_words[:truncate])
            print(predict_sentence)
Example #4
0
def predict():
    tf.reset_default_graph()
    with tf.Session() as sess:
        model = load_model(sess, forward_only=True)

        batch_size = model.batch_size = 1
        while True:
            sys.stdout.write("Please input a sentence...\n")
            sys.stdout.flush()
            sentence = sys.stdin.readline()
            sentence = sentence.strip().split()

            ids_tmp = du.convert_to_token_ids([sentence], vocab_dict)
            ids = ids_tmp[0]

            length = len(ids)
            bucket_idx = -1
            for (i, (bucket_size, _)) in enumerate(buckets):
                if (length < bucket_size):
                    bucket_idx = i
                    break
            if length > buckets[-1][0]:
                print("Sorry, this sentence is too long :-(")
            else:
                enc_inputs_all = [[] for _ in range(len(buckets))]
                dec_inputs_all = [[] for _ in range(len(buckets))]
                enc_inputs_all[bucket_idx] = [ids]
                dec_inputs_all[bucket_idx] = [[]]
                encoder_inputs, decoder_inputs, target_weights = get_batch(
                    enc_inputs_all, dec_inputs_all, bucket_idx, batch_size)
                # outputs是一个包含bucket[bucket[idx]][1]个元素的list,每个元素是一个batch_size * vocab_size的np.array
                outputs = model.step(sess,
                                     encoder_inputs,
                                     decoder_inputs,
                                     target_weights,
                                     bucket_idx,
                                     forward_only=True)
                # 使用greedy decoder
                predict_ids = [
                    int(np.argmax(output, axis=1)) for output in outputs
                ]
                predict_words = [vocab_list[id] for id in predict_ids]
                # 截断<EOS>以后的部分
                truncate = len(predict_words)
                for i in range(truncate):
                    if predict_words[i] == '<EOS>':
                        truncate = i
                        break
                predict_sentence = ' '.join(predict_words[:truncate])
                print('Predicted paraphrase: %s' % predict_sentence)