Esempio n. 1
0
def chatbot_api(infos):
    du = DataProcessing.DataUnit(**data_config)
    save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME)
    batch_size = 1
    tf.reset_default_graph()
    model = Seq2Seq(batch_size=batch_size,
                    encoder_vocab_size=du.vocab_size,
                    decoder_vocab_size=du.vocab_size,
                    mode='decode',
                    **model_config)
    # 创建session的时候允许显存增长
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        model.load(sess, save_path)
        while True:
            q = infos
            if q is None or q.strip() == '':
                return "请输入聊天信息"
                continue
            q = q.strip()
            indexs = du.transform_sentence(q)
            x = np.asarray(indexs).reshape((1, -1))
            xl = np.asarray(len(indexs)).reshape((1, ))
            pred = model.predict(sess, np.array(x), np.array(xl))
            result = du.transform_indexs(pred[0])
            return result
Esempio n. 2
0
def train():
    """
    训练模型
    :return:
    """
    du = DataProcessing.DataUnit(**data_config)
    save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME)
    steps = int(len(du) / batch_size) + 1

    # 创建session的时候设置显存根据需要动态申请
    tf.reset_default_graph()
    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.9
    config.gpu_options.allow_growth = True

    with tf.Graph().as_default():
        with tf.Session(config=config) as sess:
            # 定义模型
            model = Seq2Seq(batch_size=batch_size,
                            encoder_vocab_size=du.vocab_size,
                            decoder_vocab_size=du.vocab_size,
                            mode='train',
                            **model_config)

            init = tf.global_variables_initializer()
            writer = tf.summary.FileWriter('./graph/nlp', sess.graph)
            sess.run(init)
            if continue_train:
                model.load(sess, save_path)
            model.export(sess)
            '''
Esempio n. 3
0
def train():
    """
    训练模型
    :return:
    """
    du = DataProcessing.DataUnit(**data_config)
    save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME)
    trainset, _, _, _ = pickle.load(open('dataset.pkl', 'rb'))
    steps = int(len(list(trainset)) / batch_size) + 1
    print(trainset.shape)
    # 创建session的时候设置显存根据需要动态申请
    tf.reset_default_graph()
    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.9
    config.gpu_options.allow_growth = True

    with tf.Graph().as_default():
        with tf.Session(config=config) as sess:
            # 定义模型
            model = Seq2Seq(batch_size=batch_size,
                            encoder_vocab_size=du.vocab_size,
                            decoder_vocab_size=du.vocab_size,
                            mode='train',
                            **model_config)

            init = tf.global_variables_initializer()
            sess.run(init)
            if continue_train:
                model.load(sess, save_path)
            loss_list = []

            for epoch in range(1, n_epoch + 1):
                costs = []
                bar = tqdm(range(steps),
                           total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))
                for time in bar:
                    x, xl, y, yl = du.next_batch(batch_size, list(trainset))
                    max_len = np.max(yl)
                    y = y[:, 0:max_len]
                    cost, lr = model.train(sess, x, xl, y, yl, keep_prob)
                    costs.append(cost)
                    if epoch == 1 and time == 0:
                        loss_list.append(np.mean(costs))
                    if time == steps - 1:
                        loss_list.append(np.mean(costs))
                    bar.set_description(
                        'epoch {} loss={:.6f} lr={:.6f}'.format(
                            epoch, np.mean(costs), lr))
            epoch_list = list(range(0, n_epoch + 1))
            plt.xlabel('Epoch Number')
            plt.ylabel('Loss Value')
            plt.plot(epoch_list, loss_list)
            plt.show()
            model.save(sess, save_path=save_path)
Esempio n. 4
0
def train():
    """
    训练模型
    :return:
    """
    du = DataProcessing.DataUnit(**data_config)
    save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME)
    steps = int(len(du) / batch_size) + 1  # len(du)处理后的语料库中问答对的数量

    # 创建session的时候设置显存根据需要动态申请
    tf.reset_default_graph()
    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.9
    config.gpu_options.allow_growth = True

    with tf.Graph().as_default():
        with tf.Session(config=config) as sess:
            # 定义模型
            model = Seq2Seq(batch_size=batch_size,
                            encoder_vocab_size=du.vocab_size,
                            decoder_vocab_size=du.vocab_size,
                            mode='train',
                            **model_config)

            init = tf.global_variables_initializer()
            writer = tf.summary.FileWriter('./graph/nlp', sess.graph)
            sess.run(init)
            if continue_train:
                model.load(sess, save_path)

            emb = pickle.load(open('data/emb.pkl', 'rb'))
            model.feed_embedding(sess, encoder=emb, decoder=emb)

            for epoch in range(1, n_epoch + 1):
                costs = []
                bar = tqdm(range(steps),
                           total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))  #进度条
                for _ in bar:
                    x, xl, y, yl = du.next_batch(
                        batch_size
                    )  # x为question,xl为question实际长度;y为answer,yl为answer实际长度
                    max_len = np.max(yl)  # 实际最长句子的长度
                    y = y[:, 0:max_len]  # 表示所有行的第0:max_len列
                    cost, lr = model.train(sess, x, xl, y, yl, keep_prob)
                    costs.append(cost)
                    bar.set_description(
                        'epoch {} loss={:.6f} lr={:.6f}'.format(
                            epoch, np.mean(costs), lr))
                model.save(sess, save_path=save_path)
Esempio n. 5
0
def handle_data():
    du = DataProcessing.DataUnit(**data_config)
    fullset = np.array(du.data)
    index_test = np.random.choice(fullset.shape[0], 13503, replace=False)
    testset = fullset[index_test]
    index_full = np.arange(fullset.shape[0])
    index_rest = np.delete(index_full, index_test)
    restset = fullset[index_rest]
    index_validate = np.random.choice(restset.shape[0], 13503, replace=False)
    validateset = restset[index_validate]
    index_rest_full = np.arange(restset.shape[0])
    index_rest_rest = np.delete(index_rest_full, index_validate)
    trainset = restset[index_rest_rest]
    # in_corpus_data = random.sample(list(trainset),13503)
    # print(in_corpus_data)
    print(testset.shape)
    print(validateset.shape)
    print(trainset.shape)
    pickle.dump((trainset, validateset, testset, du),
                open('dataset.pkl', 'wb'))
Esempio n. 6
0
 def __init__(self):
     print("init111")
     self.du = DataProcessing.DataUnit(**data_config)
     save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME)
     batch_size = 1
     tf.reset_default_graph()
     self.model = Seq2Seq(batch_size=batch_size,
                          encoder_vocab_size=self.du.vocab_size,
                          decoder_vocab_size=self.du.vocab_size,
                          mode='decode',
                          **model_config)
     # 创建session的时候允许显存增长
     #config = tf.ConfigProto()
     #config.gpu_options.allow_growth = True
     config = tf.ConfigProto(allow_soft_placement=True,
                             log_device_placement=False)
     self.sess = tf.Session(config=config)
     init = tf.global_variables_initializer()
     self.sess.run(init)
     self.model.load(self.sess, save_path)