Example #1
0
 def __init__(self,
              model_path,
              vocab_path,
              learning_rate=0.01,
              batch_size=1):
     self.word_to_idx = load_word_dict(vocab_path)
     self.idx_to_word = {v: k for k, v in self.word_to_idx.items()}
     self.learning_rate = learning_rate
     self.batch_size = batch_size
     t1 = time.time()
     tf.reset_default_graph()
     self.input_data = tf.placeholder(tf.int32, [batch_size, None])
     self.output_targets = tf.placeholder(tf.int32, [batch_size, None])
     # init model
     self.model = rnn_model(model='lstm',
                            input_data=self.input_data,
                            output_data=self.output_targets,
                            vocab_size=len(self.word_to_idx),
                            rnn_size=128,
                            num_layers=2,
                            batch_size=batch_size,
                            learning_rate=learning_rate)
     saver = tf.train.Saver(tf.global_variables())
     init_op = tf.group(tf.global_variables_initializer(),
                        tf.local_variables_initializer())
     self.sess = tf.Session()
     # init op
     self.sess.run(init_op)
     checkpoint = tf.train.latest_checkpoint(model_path)
     saver.restore(self.sess, checkpoint)
Example #2
0
def ppl(sentence_list):
    ppl_list = []
    # load data dict
    word_to_int = load_word_dict(conf.word_dict_path)
    # init params
    batch_size = 1
    tf.reset_default_graph()
    input_data = tf.placeholder(tf.int32, [batch_size, None])
    output_targets = tf.placeholder(tf.int32, [batch_size, None])
    # init model
    end_points = rnn_model(model='lstm',
                           input_data=input_data,
                           output_data=output_targets,
                           vocab_size=len(word_to_int),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=batch_size,
                           learning_rate=conf.learning_rate)
    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    with tf.Session() as sess:
        # init op
        sess.run(init_op)
        checkpoint = tf.train.latest_checkpoint(conf.model_dir)
        saver.restore(sess, checkpoint)
        print("loading model from the checkpoint {0}".format(checkpoint))

        # infer each sentence
        for sentence in sentence_list:
            ppl = 0
            # data idx
            x = [
                word_to_int[c] if c in word_to_int else word_to_int[UNK_TOKEN]
                for c in sentence
            ]
            x = [word_to_int[START_TOKEN]] + x + [word_to_int[END_TOKEN]]
            print('x:', x)
            # reshape
            y = np.array(x[1:]).reshape((-1, batch_size))
            x = np.array(x[:-1]).reshape((-1, batch_size))
            print(x.shape)
            print(y.shape)
            # get each word perplexity
            word_count = x.shape[0]
            for i in range(word_count):
                perplexity = sess.run(end_points['perplexity'],
                                      feed_dict={
                                          input_data: x[i:i + 1, :],
                                          output_targets: y[i:i + 1, :]
                                      })
                print('{0} -> {1}, perplexity: {2}'.format(
                    x[i:i + 1, :], y[i:i + 1, :], perplexity))
                if i == 0 or i == word_count:
                    continue
                ppl += perplexity
            ppl /= (word_count - 2)
            print('perplexity:' + str(ppl))
            ppl_list.append(ppl)
    return ppl_list
Example #3
0
def ppl(sentence_list):
    ppl_list = []
    # load data dict
    word_to_int = load_word_dict(conf.word_dict_path)
    # init params
    batch_size = 1
    tf.reset_default_graph()
    input_data = tf.placeholder(tf.int32, [batch_size, None])
    output_targets = tf.placeholder(tf.int32, [batch_size, None])
    # init model
    end_points = rnn_model(model='lstm',
                           input_data=input_data,
                           output_data=output_targets,
                           vocab_size=len(word_to_int),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=batch_size,
                           learning_rate=conf.learning_rate)
    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    with tf.Session() as sess:
        # init op
        sess.run(init_op)
        checkpoint = tf.train.latest_checkpoint(conf.model_dir)
        saver.restore(sess, checkpoint)
        print("loading model from the checkpoint {0}".format(checkpoint))

        # infer each sentence
        for sentence in sentence_list:
            ppl = 0
            # data idx
            x = [word_to_int[c] if c in word_to_int else word_to_int[UNK_TOKEN] for c in sentence]
            x = [word_to_int[START_TOKEN]] + x + [word_to_int[END_TOKEN]]
            print('x:', x)
            # reshape
            y = np.array(x[1:]).reshape((-1, batch_size))
            x = np.array(x[:-1]).reshape((-1, batch_size))
            print(x.shape)
            print(y.shape)
            # get each word perplexity
            word_count = x.shape[0]
            for i in range(word_count):
                perplexity = sess.run(end_points['perplexity'],
                                      feed_dict={input_data: x[i:i + 1, :],
                                                 output_targets: y[i:i + 1, :]})
                print('{0} -> {1}, perplexity: {2}'.format(x[i:i + 1, :], y[i:i + 1, :], perplexity))
                if i == 0 or i == word_count:
                    continue
                ppl += perplexity
            ppl /= (word_count - 2)
            print('perplexity:' + str(ppl))
            ppl_list.append(ppl)
    return ppl_list
Example #4
0
def generate(begin_word):
    batch_size = 1
    word_to_idx = load_word_dict(config.word_dict_path)
    vocabularies = [k for k, v in word_to_idx.items()]
    tf.reset_default_graph()
    input_data = tf.placeholder(tf.int32, [batch_size, None])
    end_points = rnn_model(model='lstm',
                           input_data=input_data,
                           output_data=None,
                           vocab_size=len(word_to_idx),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=64,
                           learning_rate=0.0002)

    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    with tf.Session() as sess:
        sess.run(init_op)
        checkpoint = tf.train.latest_checkpoint(config.model_dir)
        saver.restore(sess, checkpoint)
        print("loading model from the checkpoint {0}".format(checkpoint))
        x = np.array([list(map(word_to_idx.get, START_TOKEN))])
        [predict, last_state
         ] = sess.run([end_points['prediction'], end_points['last_state']],
                      feed_dict={input_data: x})
        if begin_word:
            word = begin_word
        else:
            word = to_word(predict, vocabularies)
        sentence = ''
        i = 0
        while word != END_TOKEN and word != START_TOKEN and word != UNK_TOKEN:
            sentence += word
            i += 1
            if i >= 24:
                break
            x = np.zeros((1, 1))
            try:
                x[0, 0] = word_to_idx[word]
            except KeyError:
                print("please enter a chinese char again.")
                break
            [predict, last_state
             ] = sess.run([end_points['prediction'], end_points['last_state']],
                          feed_dict={
                              input_data: x,
                              end_points['initial_state']: last_state
                          })
            word = to_word(predict, vocabularies)

        return sentence
Example #5
0
def main(_):
    # build vocab and word dict
    data_vector, word_to_int = process_data(config.train_word_path, config.word_dict_path, config.cutoff_frequency)
    # batch data
    batches_inputs, batches_outputs = generate_batch(config.batch_size, data_vector, word_to_int)
    # placeholder
    input_data = tf.placeholder(tf.int32, [config.batch_size, None])
    output_targets = tf.placeholder(tf.int32, [config.batch_size, None])
    # create model
    end_points = rnn_model(model='lstm',
                           input_data=input_data,
                           output_data=output_targets,
                           vocab_size=len(word_to_int),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=config.batch_size,
                           learning_rate=config.learning_rate)
    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    # start
    with tf.Session(config=tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)) as sess:
        # init
        sess.run(init_op)

        start_epoch = 0
        checkpoint = tf.train.latest_checkpoint(config.model_dir)
        if checkpoint:
            saver.restore(sess, checkpoint)
            print("restore from the checkpoint {0}".format(checkpoint))
            start_epoch += int(checkpoint.split('-')[-1])
        print('start training...')
        try:
            for epoch in range(start_epoch, config.epochs):
                n = 0
                n_chunk = len(data_vector) // config.batch_size
                for batch in range(n_chunk):
                    loss, _, _, perplexity = sess.run([
                        end_points['total_loss'],
                        end_points['last_state'],
                        end_points['train_op'],
                        end_points['perplexity']
                    ], feed_dict={input_data: batches_inputs[n], output_targets: batches_outputs[n]})
                    n += 1
                    print('Epoch: %d, batch: %d, training loss: %.6f, ppl: %.1f' % (epoch, batch, loss, perplexity))
                if epoch % config.num_save_epochs == 0:
                    saver.save(sess, os.path.join(config.model_dir, config.model_prefix), global_step=epoch)
                    print('Save model to %s,  epoch:%d' % (config.model_dir + config.model_prefix, epoch))
        except KeyboardInterrupt:
            print('Interrupt manually, try saving checkpoint for now...')
            saver.save(sess, os.path.join(config.model_dir, config.model_prefix), global_step=epoch)
            print('Last epoch were saved, next time will start from epoch {}.'.format(epoch))
Example #6
0
def main(_):
    # build vocab and word dict
    data_vector, word_to_int = process_data(conf.train_word_path, conf.word_dict_path)
    # batch data
    batches_inputs, batches_outputs = generate_batch(conf.batch_size, data_vector, word_to_int)
    # placeholder
    input_data = tf.placeholder(tf.int32, [conf.batch_size, None])
    output_targets = tf.placeholder(tf.int32, [conf.batch_size, None])
    # create model
    end_points = rnn_model(model='lstm',
                           input_data=input_data,
                           output_data=output_targets,
                           vocab_size=len(word_to_int),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=conf.batch_size,
                           learning_rate=conf.learning_rate)
    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    # start
    with tf.Session() as sess:
        # init
        sess.run(init_op)

        start_epoch = 0
        checkpoint = tf.train.latest_checkpoint(conf.model_dir)
        if checkpoint:
            saver.restore(sess, checkpoint)
            print("restore from the checkpoint {0}".format(checkpoint))
            start_epoch += int(checkpoint.split('-')[-1])
        print('start training...')
        try:
            for epoch in range(start_epoch, conf.epochs):
                n = 0
                n_chunk = len(data_vector) // conf.batch_size
                for batch in range(n_chunk):
                    loss, _, _, perplexity = sess.run([
                        end_points['total_loss'],
                        end_points['last_state'],
                        end_points['train_op'],
                        end_points['perplexity']
                    ], feed_dict={input_data: batches_inputs[n], output_targets: batches_outputs[n]})
                    n += 1
                    print('Epoch: %d, batch: %d, training loss: %.6f, ppl: %.1f' % (epoch, batch, loss, perplexity))
                if epoch % conf.num_save_epochs == 0:
                    saver.save(sess, os.path.join(conf.model_dir, conf.model_prefix), global_step=epoch)
        except KeyboardInterrupt:
            print('Interrupt manually, try saving checkpoint for now...')
            saver.save(sess, os.path.join(conf.model_dir, conf.model_prefix), global_step=epoch)
            print('Last epoch were saved, next time will start from epoch {}.'.format(epoch))
Example #7
0
def generate(begin_word):
    batch_size = 1
    word_to_int = load_word_dict(conf.word_dict_path)
    vocabularies = [k for k, v in word_to_int.items()]
    tf.reset_default_graph()
    input_data = tf.placeholder(tf.int32, [batch_size, None])
    end_points = rnn_model(model='lstm',
                           input_data=input_data,
                           output_data=None,
                           vocab_size=len(word_to_int),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=64,
                           learning_rate=0.0002)

    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    with tf.Session() as sess:
        sess.run(init_op)
        checkpoint = tf.train.latest_checkpoint(conf.model_dir)
        saver.restore(sess, checkpoint)
        print("loading model from the checkpoint {0}".format(checkpoint))
        x = np.array([list(map(word_to_int.get, START_TOKEN))])
        [predict, last_state] = sess.run([end_points['prediction'],
                                          end_points['last_state']],
                                         feed_dict={input_data: x})
        if begin_word:
            word = begin_word
        else:
            word = to_word(predict, vocabularies)
        sentence = ''
        i = 0
        while word != END_TOKEN and word != START_TOKEN and word != UNK_TOKEN:
            sentence += word
            i += 1
            if i >= 24:
                break
            x = np.zeros((1, 1))
            try:
                x[0, 0] = word_to_int[word]
            except KeyError:
                print("please enter a chinese char again.")
                break
            [predict, last_state] = sess.run([end_points['prediction'], end_points['last_state']],
                                             feed_dict={input_data: x, end_points['initial_state']: last_state})
            word = to_word(predict, vocabularies)

        return sentence