예제 #1
0
def train():
    print("重新训练,请保证计算机拥有至少8G空闲内存与2G空闲显存")
    # 准备训练数据
    print("正在准备训练数据,大约需要五分钟...")
    qTrain, aTrain, lTrain, qIdTrain = qaData.loadData(trainingFile, word2idx,
                                                       unrollSteps, True)
    qDevelop, aDevelop, lDevelop, qIdDevelop = qaData.loadData(
        developFile, word2idx, unrollSteps, True)
    trainQuestionCounts = qIdTrain[-1]
    for i in range(len(qIdDevelop)):
        qIdDevelop[i] += trainQuestionCounts
    tqs, tta, tfa = [], [], []
    for question, trueAnswer, falseAnswer in qaData.trainingBatchIter(
            qTrain + qDevelop, aTrain + aDevelop, lTrain + lDevelop,
            qIdTrain + qIdDevelop, batchSize):
        tqs.append(question), tta.append(trueAnswer), tfa.append(falseAnswer)
    print("加载完成!")
    # 开始训练
    print("开始训练,全部训练过程大约需要12小时")
    sess.run(tf.global_variables_initializer())
    lr = learningRate  # 引入局部变量,防止shadow name
    for i in range(lrDownCount):
        optimizer = tf.train.GradientDescentOptimizer(lr)
        optimizer.apply_gradients(zip(grads, tvars))
        trainOp = optimizer.apply_gradients(zip(grads, tvars),
                                            global_step=globalStep)
        for epoch in range(epochs):
            for question, trueAnswer, falseAnswer in zip(tqs, tta, tfa):
                startTime = time.time()
                feed_dict = {
                    lstm.inputQuestions: question,
                    lstm.inputTrueAnswers: trueAnswer,
                    lstm.inputFalseAnswers: falseAnswer,
                    lstm.keep_prob: dropout
                }
                _, step, _, _, loss = \
                    sess.run([trainOp, globalStep, lstm.trueCosSim, lstm.falseCosSim, lstm.loss], feed_dict)
                timeUsed = time.time() - startTime
                print("step:", step, "loss:", loss, "time:", timeUsed)
            saver.save(sess, saveFile)
        lr *= lrDownRate
예제 #2
0
파일: main.py 프로젝트: sjqzhang/QA
def train():
    print("重新训练,请保证计算机拥有至少8G空闲内存与2G空闲显存")
    # 准备训练数据
    print("正在准备训练数据,大约需要五分钟...")
    qTrain, aTrain, lTrain, qIdTrain = qaData.loadData(trainingFile, word2idx, unrollSteps, True)
    qDevelop, aDevelop, lDevelop, qIdDevelop = qaData.loadData(developFile, word2idx, unrollSteps, True)
    trainQuestionCounts = qIdTrain[-1]
    for i in range(len(qIdDevelop)):
        qIdDevelop[i] += trainQuestionCounts
    tqs, tta, tfa = [], [], []
    for question, trueAnswer, falseAnswer in qaData.trainingBatchIter(qTrain + qDevelop, aTrain + aDevelop,
                                                                      lTrain + lDevelop, qIdTrain + qIdDevelop,
                                                                      batchSize):
        tqs.append(question), tta.append(trueAnswer), tfa.append(falseAnswer)
    print("加载完成!")
    # 开始训练
    print("开始训练,全部训练过程大约需要12小时")
    sess.run(tf.global_variables_initializer())
    lr = learningRate  # 引入局部变量,防止shadow name
    for i in range(lrDownCount):
        optimizer = tf.train.GradientDescentOptimizer(lr)
        optimizer.apply_gradients(zip(grads, tvars))
        trainOp = optimizer.apply_gradients(zip(grads, tvars), global_step=globalStep)
        for epoch in range(epochs):
            for question, trueAnswer, falseAnswer in zip(tqs, tta, tfa):
                startTime = time.time()
                feed_dict = {
                    lstm.inputQuestions: question,
                    lstm.inputTrueAnswers: trueAnswer,
                    lstm.inputFalseAnswers: falseAnswer,
                    lstm.keep_prob: dropout
                }
                _, step, _, _, loss = \
                    sess.run([trainOp, globalStep, lstm.trueCosSim, lstm.falseCosSim, lstm.loss], feed_dict)
                timeUsed = time.time() - startTime
                print("step:", step, "loss:", loss, "time:", timeUsed)
            saver.save(sess, saveFile)
        lr *= lrDownRate
예제 #3
0
def train():
    print("重新训练,请保证计算机拥有至少8G空闲内存与2G空闲显存")
    # 准备训练数据
    print("正在准备训练数据,大约需要五分钟...")
    qTrain, aTrain, lTrain, qIdTrain = qaData.loadData(trainingFile, word2idx,
                                                       unrollSteps, True)

    tqs, tta, tfa = [], [], []
    for question, trueAnswer, falseAnswer in qaData.trainingBatchIter(
            qTrain, aTrain, lTrain, qIdTrain, batchSize):
        tqs.append(question), tta.append(trueAnswer), tfa.append(falseAnswer)
    tqs = np.array(tqs)
    tta = np.array(tta)
    tfa = np.array(tfa)
    print("加载完成!")
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(tqs)))
    tqs_shuffled = tqs[shuffle_indices]
    tta_shuffled = tta[shuffle_indices]
    tfa_shuffled = tfa[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(0.1 * float(len(tqs)))
    tqs_train, tqs_dev = tqs_shuffled[:dev_sample_index], tqs_shuffled[
        dev_sample_index:]
    tta_train, tta_dev = tta_shuffled[:dev_sample_index], tta_shuffled[
        dev_sample_index:]
    tfa_train, tfa_dev = tfa_shuffled[:dev_sample_index], tfa_shuffled[
        dev_sample_index:]

    print("开始训练,全部训练过程大约需要12小时")
    sess.run(tf.global_variables_initializer())
    lr = learningRate  # 引入局部变量,防止shadow name
    for i in range(lrDownCount):
        optimizer = tf.train.GradientDescentOptimizer(lr)
        optimizer.apply_gradients(zip(grads, tvars))
        trainOp = optimizer.apply_gradients(zip(grads, tvars),
                                            global_step=globalStep)
        for epoch in range(epochs):
            for question, trueAnswer, falseAnswer in zip(
                    tqs_train, tta_train, tfa_train):
                startTime = time.time()
                feed_dict = {
                    lstm.inputQuestions: question,
                    lstm.inputTrueAnswers: trueAnswer,
                    lstm.inputFalseAnswers: falseAnswer,
                    lstm.keep_prob: dropout
                }
                _, step, _, _, loss = \
                    sess.run([trainOp, globalStep, lstm.trueCosSim, lstm.falseCosSim, lstm.loss], feed_dict)
                timeUsed = time.time() - startTime
                print("step:", step, "loss:", loss, "time:", timeUsed)
            saver.save(sess, saveFile)
        lr *= lrDownRate
    with open(resultFile, 'w') as file:
        for question, answer in zip(tqs_dev, tta_dev):
            feed_dict = {
                lstm.inputTestQuestions: question,
                lstm.inputTestAnswers: answer,
                lstm.keep_prob: dropout
            }
            _, scores = sess.run([globalStep, lstm.result], feed_dict)
            for score in scores:
                print("%.9f" % score + '\n')
        print("------------------------------------------")
        for question, answer in zip(tqs_dev, tfa_dev):
            feed_dict = {
                lstm.inputTestQuestions: question,
                lstm.inputTestAnswers: answer,
                lstm.keep_prob: dropout
            }
            _, scores = sess.run([globalStep, lstm.result], feed_dict)
            for score in scores:
                print("%.9f" % score + '\n')
예제 #4
0
    epochs = 20  # 每次学习速度指数下降之前执行的完整epoch次数
    batchSize = 20  # 每一批次处理的<b>问题</b>个数

    rnnSize = 100  # LSTM cell中隐藏层神经元的个数
    margin = 0.1  # M is constant margin

    unrollSteps = 100  # 句子中的最大词汇数目
    max_grad_norm = 5  # 用于控制梯度膨胀,如果梯度向量的L2模超过max_grad_norm,则等比例缩小

    allow_soft_placement = True  # Allow device soft device placement
    gpuMemUsage = 0.75  # 显存最大使用率

    # 读取测试数据
    print("正在载入测试数据,大约需要一分钟...")
    embedding, word2idx = qaData.loadEmbedding(embeddingFile)
    qTest, aTest, _, qIdTest = qaData.loadData(trainingFile, word2idx,
                                               unrollSteps)
    print("测试数据加载完成")
    # 配置TensorFlow
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        session_conf = tf.ConfigProto(
            allow_soft_placement=allow_soft_placement)
        with tf.Session(config=session_conf).as_default() as sess:
            # 加载LSTM网络
            print("正在加载LSTM网络,大约需要三分钟...")
            globalStep = tf.Variable(0, name="globle_step", trainable=False)
            lstm = QaLSTMNet(batchSize, unrollSteps, embedding, embeddingSize,
                             rnnSize, margin)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(lstm.loss, tvars),
                                              max_grad_norm)
            saver = tf.train.Saver()
예제 #5
0
파일: main.py 프로젝트: sjqzhang/QA
    batchSize = 20  # 每一批次处理的<b>问题</b>个数

    rnnSize = 100  # LSTM cell中隐藏层神经元的个数
    margin = 0.1  # M is constant margin

    unrollSteps = 100  # 句子中的最大词汇数目
    max_grad_norm = 5  # 用于控制梯度膨胀,如果梯度向量的L2模超过max_grad_norm,则等比例缩小

    allow_soft_placement = True  # Allow device soft device placement
    gpuMemUsage = 0.75  # 显存最大使用率
    gpuDevice = "/gpu:0"  # GPU设备名

    # 读取测试数据
    print("正在载入测试数据,大约需要一分钟...")
    embedding, word2idx = qaData.loadEmbedding(embeddingFile)
    qTest, aTest, _, qIdTest = qaData.loadData(testingFile, word2idx, unrollSteps)
    print("测试数据加载完成")
    # 配置TensorFlow
    with tf.Graph().as_default(), tf.device(gpuDevice):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpuMemUsage)
        session_conf = tf.ConfigProto(allow_soft_placement=allow_soft_placement, gpu_options=gpu_options)
        with tf.Session(config=session_conf).as_default() as sess:
            # 加载LSTM网络
            print("正在加载LSTM网络,大约需要三分钟...")
            globalStep = tf.Variable(0, name="globle_step", trainable=False)
            lstm = QaLSTMNet(batchSize, unrollSteps, embedding, embeddingSize, rnnSize, margin)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(lstm.loss, tvars), max_grad_norm)
            saver = tf.train.Saver()
            print("加载完成!")