Beispiel #1
0
def predict():
    def _get_test_dataset():
        with open(TEST_DATASET_PATH) as test_fh:
            test_sentences = [s.strip() for s in test_fh.readlines()]
        return test_sentences

    results_filename = '_'.join([
        'results',
        str(FLAGS.num_layers),
        str(FLAGS.size),
        str(FLAGS.vocab_size)
    ])
    results_path = os.path.join(FLAGS.results_dir, results_filename)

    with tf.Session() as sess, open(results_path, 'w') as results_fh:
        # Create model and load parameters.
        model = create_model(sess, forward_only=True)
        model.batch_size = 1  # We decode one sentence at a time.

        # Load vocabularies.
        vocab_path = os.path.join(FLAGS.data_dir,
                                  "vocab%d.in" % FLAGS.vocab_size)
        vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path)

        test_dataset = _get_test_dataset()

        for sentence in test_dataset:
            # Get token-ids for the input sentence.
            predicted_sentence = get_predicted_sentence(
                sentence, vocab, rev_vocab, model, sess)
            print(sentence + ' -> ' + predicted_sentence)

            results_fh.write(predicted_sentence + '\n')
Beispiel #2
0
def chat():
    with tf.Session() as sess:
        # Create model and load parameters.
        model = create_model(sess, forward_only=True)
        model.batch_size = 1  # We decode one sentence at a time.

        # Load vocabularies.
        vocab_path = os.path.join(FLAGS.data_dir,
                                  "vocab%d.in" % FLAGS.vocab_size)
        vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path)

        # Decode from standard input.
        sys.stdout.write("> ")
        sys.stdout.flush()
        sentence = sys.stdin.readline()
        sentence = re.sub(u'[^\u4e00-\u9fa5,。;:?!‘’“”、]', '',
                          sentence.decode('utf-8'))
        sentence = re.sub(u'(?P<chinese>[\u4e00-\u9fa5,。;:?!‘’“”、])',
                          add_space, sentence)

        while sentence:
            predicted_sentence = get_predicted_sentence(
                sentence, vocab, rev_vocab, model, sess)
            print(predicted_sentence)
            print("> ")
            sys.stdout.flush()
            sentence = sys.stdin.readline()
            sentence = re.sub(u'[^\u4e00-\u9fa5,。;:?!‘’“”、]', '',
                              sentence.decode('utf-8'))
            sentence = re.sub(u'(?P<chinese>[\u4e00-\u9fa5,。;:?!‘’“”、])',
                              add_space, sentence)
Beispiel #3
0
def chat(args):
    with tf.Session() as sess:
        # Create model and load parameters.
        args.batch_size = 1  # We decode one sentence at a time.
        model = create_model(sess, args)

        # Load vocabularies.
        vocab_path = os.path.join(args.data_dir,
                                  "vocab%d.in" % args.vocab_size)
        vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path)

        # Decode from standard input.
        sys.stdout.write("> ")
        sys.stdout.flush()
        sentence = sys.stdin.readline()

        while sentence:
            predicted_sentence = get_predicted_sentence(
                args, sentence, vocab, rev_vocab, model, sess)
            # print(predicted_sentence)
            if isinstance(predicted_sentence, list):
                for sent in predicted_sentence:
                    print("  (%s) -> %s" % (sent['prob'], sent['dec_inp']))
            else:
                print(sentence, ' -> ', predicted_sentence)

            sys.stdout.write("> ")
            sys.stdout.flush()
            sentence = sys.stdin.readline()
Beispiel #4
0
def predict(args, debug=False):
    def _get_test_dataset():
        # with open(args.test_dataset_path) as test_fh:
        with open(args.input_name) as test_fh:
            test_sentences = [s.strip() for s in test_fh.readlines()]
        return test_sentences

    results_filename = '_'.join([
        'results',
        str(args.num_layers),
        str(args.size),
        str(args.vocab_size)
    ])
    # results_path = os.path.join(args.results_dir, results_filename+'.txt')
    results_path = str(args.output_name)

    with tf.Session() as sess, open(results_path, 'w') as results_fh:
        # Create model and load parameters.
        args.batch_size = 1
        model = create_model(sess, args)

        # Load vocabularies.
        vocab_path = os.path.join(args.data_dir,
                                  "vocab%d.in" % args.vocab_size)
        vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path)

        test_dataset = _get_test_dataset()

        for sentence in test_dataset:
            # Get token-ids for the input sentence.
            predicted_sentence = get_predicted_sentence(args,
                                                        sentence,
                                                        vocab,
                                                        rev_vocab,
                                                        model,
                                                        sess,
                                                        debug=debug)
            if isinstance(predicted_sentence, list):
                print("%s : (%s)" % (sentence, datetime.now()))
                # results_fh.write("%s : (%s)\n" % (sentence, datetime.now()))
                for sent in predicted_sentence:
                    print("  (%s) -> %s" % (sent['prob'], sent['dec_inp']))
                    # results_fh.write("  (%f) -> %s\n" % (sent['prob'], sent['dec_inp']))
                    results_fh.write("%s\n" % (sent['dec_inp']))
            else:
                print(sentence, ' -> ', predicted_sentence)
                # results_fh.write("%s -> %s\n" % (sentence, predicted_sentence))
                results_fh.write("%s\n" % (predicted_sentence))
            # break

    results_fh.close()
    print("results written in %s" % results_path)
Beispiel #5
0
def predict():

    f = open('pkl_tianya/q_table.pkl')
    qtable = pickle.load(f)
    f.close()
    #f = open('pkl_file/n_table.pkl')
    #ntable = pickle.load(f)
    #f.close()
    f = open('pkl_tianya/co_table.pkl')
    cotable = pickle.load(f)
    f.close()
    f = open('/home/zyma/work/data_daily_punct/nouns2500.in')
    nouns = f.readlines()
    nouns = [ele.strip() for ele in nouns]
    f.close()

    def _get_test_dataset():
        with open(TEST_DATASET_PATH) as test_fh:
            test_sentences = [s.strip() for s in test_fh.readlines()]
        return test_sentences

    results_filename = '_'.join([
        'results',
        str(FLAGS.num_layers),
        str(FLAGS.size),
        str(FLAGS.vocab_size)
    ])
    results_path = os.path.join(FLAGS.results_dir, results_filename)
    #ss = u'你好'
    #ss = ss.encode('utf-8')
    #print(ss)
    with tf.Session() as sess, open(results_path, 'a') as results_fh:
        #with tf.Session() as sess:
        # Create model and load parameters.
        bw_model, fw_model = create_model(sess)
        bw_model.batch_size = 1
        fw_model.batch_size = 1
        # Load vocabularies.
        vocab_path = os.path.join(FLAGS.data_dir,
                                  "vocab%d.in" % FLAGS.vocab_size)
        vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path)
        print(vocab.items()[:20])

        test_dataset = _get_test_dataset()
        #test_dataset = test_dataset[374:]
        #predicted_sentences = beam_search(test_dataset,vocab,rev_vocab,model,sess)
        #results_fh.write('\n'.join(predicted_sentences))

        for sentence in test_dataset:
            # Get token-ids for the input sentence.
            #best,predicted_sentences,scores = beam_search(sentence, vocab, rev_vocab, model, sess)
            key_word = sentencePMI(sentence, cotable, qtable, nouns)
            print('key_word:%s' % key_word)
            bw_sentence = get_predicted_sentence(sentence, key_word, vocab,
                                                 rev_vocab, bw_model, sess)
            print(bw_sentence)
            bw_sentence = bw_sentence.split()[:10]
            bw_sentence.reverse()
            bw_sentence = ' '.join(bw_sentence)
            print('bw_sentence:%s' % bw_sentence)
            predicted_sentences = get_predicted_sentence(
                sentence, bw_sentence, vocab, rev_vocab, fw_model, sess)
            print(sentence + ' -> ' + predicted_sentences)
            #predicted_sentences = predicted_sentences.split()
            #predicted_sentences = ' '.join(predicted_sentences[:1])
            #predicted_sentences = get_predicted_sentence(sentence,None,vocab,rev_vocab,fw_model,sess)
            #print(sentence+' ---> '+predicted_sentences)
            #print ('\n'.join([str(ele)+','+predicted_sentences[ind] for ind,ele in enumerate(scores)]))
            #print(len(scores))
            #results_fh.write(best+'\n')

            results_fh.write(predicted_sentences + '(%s)' % key_word + '\n')