Ejemplo n.º 1
0
def runMoreRnn(path=None, epochs=10, saveResult=True):
	trainData, validData, testData, wordId = loadWordIdsFromFiles()
	trainData = np.array(trainData, np.float32)
	# validData = np.array(validData, np.float32)
	testData = np.array(testData, np.float32)
	vocabSz = len(wordId)

	info = loadInfo('rnn', path)
	learnRate = info['learning rate']
	batchSz = info['batch size']
	embedSz = info['embed size']
	rnnSz = info['rnn size']
	winSz = info['win size']
	numWin = (trainData.shape[0] - 1) // (batchSz * winSz)
	# each batch has winSz * numWin words
	batchLen = winSz * numWin

	testNumWin = (testData.shape[0] - 1) // (batchSz * winSz)
	testBatchLen = winSz * testNumWin

	inp = tf.placeholder(tf.int32, shape=[batchSz, winSz])
	# ans = tf.placeholder(tf.int32, shape=[batchSz * winSz])
	ans = tf.placeholder(tf.int32, shape=[batchSz, winSz])

	E = tf.Variable(tf.random_normal([vocabSz, embedSz], stddev=0.1))
	embed = tf.nn.embedding_lookup(E, inp)

	rnn = BasicRNNCell(rnnSz, activation='relu')
	initialState = rnn.zero_state(batchSz, tf.float32)
	output, nextState = tf.nn.dynamic_rnn(rnn, embed, initial_state=initialState)
	# output = tf.reshape(output, [batchSz * winSz, rnnSz])

	W = tf.Variable(tf.random_normal([rnnSz, vocabSz], stddev=.1))
	B = tf.Variable(tf.random_normal([vocabSz], stddev=.1))
	# logits = tf.matmul(output, W) + B
	logits = tf.tensordot(output, W, [[2], [0]]) + B

	ents = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=ans)
	loss = tf.reduce_sum(ents)
	train = tf.train.GradientDescentOptimizer(learnRate).minimize(loss)

	trainPerp = np.zeros(epochs + 1, dtype=np.float32)
	trainPerp[0] = info['train perplexity']
	testPerp = np.zeros(epochs + 1, dtype=np.float32)
	testPerp[0] = info['test perplexity']
	with tf.Session() as sess:
		loadSession(sess, 'rnn', path)
		startTime = time.time()
		epoch = 0
		print('epoch:', end=' ')
		while epoch < epochs:
			epoch += 1
			win = 0
			state = sess.run(initialState)
			testState = sess.run(initialState)
			# print(state, testState)
			winStart, winEnd = 0, winSz
			while win < numWin:
				inInp = np.array([trainData[i * batchLen + winStart:i * batchLen + winEnd] for i in range(batchSz)])
				# inAns = np.reshape(np.array([trainData[i * batchLen + winStart + 1: i * batchLen + winEnd + 1] for i in range(batchSz)]), batchSz * winSz)
				inAns = np.array([trainData[i * batchLen + winStart + 1: i * batchLen + winEnd + 1] for i in range(batchSz)])
				_, state, outLoss = sess.run([train, nextState, loss], {inp: inInp, ans: inAns, nextState: state})
				trainPerp[epoch] += outLoss
				if win < testNumWin:
					inInp = np.array([testData[i * testBatchLen + winStart:i * testBatchLen + winEnd] for i in range(batchSz)])
					# inAns = np.reshape(np.array([testData[i * testBatchLen + winStart + 1: i * testBatchLen + winEnd + 1] for i in range(batchSz)]), batchSz * winSz)
					inAns = np.array([testData[i * testBatchLen + winStart + 1: i * testBatchLen + winEnd + 1] for i in range(batchSz)])
					testState, testOutLoss = sess.run([nextState, loss], {inp: inInp, ans: inAns, nextState: testState})
					testPerp[epoch] += testOutLoss
				winStart, winEnd = winEnd, winEnd + winSz
				win += 1
			print(epoch + info['epochs'], end=' ')
		trainPerp[1:] = np.exp(trainPerp[1:] / (trainData.shape[0] // (batchSz * batchLen) * (batchSz * batchLen)))
		testPerp[1:] = np.exp(testPerp[1:] / (testData.shape[0] // (batchSz * testBatchLen) * (batchSz * testBatchLen)))
		print(f'\nelapsed: {time.time() - startTime}')
		print('train perplexity:', trainPerp[-1])
		print('test perplexity:', testPerp[-1])

		info['epochs'] += epochs
		info['train perplexity'] = trainPerp[-1]
		info['test perplexity'] = testPerp[-1]
		if saveResult:
			save(sess, info)
	drawPerplexity(trainPerp, testPerp, info['epochs'] - epochs)
Ejemplo n.º 2
0
def runMoreBigram(path=None, epochs=10, saveResult=True):
    trainData, validData, testData, wordId = loadWordIdsFromFiles()
    trainData = np.array(trainData, np.float32)
    # validData = np.array(validData, np.float32)
    testData = np.array(testData, np.float32)
    vocabSz = len(wordId)

    info = loadInfo('bigram', path)
    batchSz = info['batch size']
    embedSz = info['embed size']
    learnRate = info['learning rate']

    # inp = tf.placeholder(tf.int32, shape=[info['batch size']])
    # ans = tf.placeholder(tf.int32, shape=[info['embed size']])
    inp = tf.placeholder(tf.int32)
    ans = tf.placeholder(tf.int32)
    E = tf.Variable(tf.random_normal([vocabSz, embedSz], stddev=0.1))
    embed = tf.nn.embedding_lookup(E, inp)

    dropRate = tf.placeholder(tf.float32)
    embed = tf.nn.dropout(embed, rate=dropRate)

    W = tf.Variable(tf.random_normal([embedSz, vocabSz], stddev=.1))
    B = tf.Variable(tf.random_normal([vocabSz], stddev=.1))
    logits = tf.matmul(embed, W) + B

    # dropRate = tf.placeholder(tf.float32)
    # logits = tf.nn.dropout(logits, rate=dropRate)

    ents = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                          labels=ans)
    loss = tf.reduce_sum(ents)
    train = tf.train.GradientDescentOptimizer(learnRate).minimize(loss)

    sess = tf.Session()
    loadSession(sess, 'bigram', path)

    startTime = time.time()
    trainPerp = np.zeros(epochs + 1, dtype=np.float32)
    trainPerp[0] = info['train perplexity']
    testPerp = np.zeros(epochs + 1, dtype=np.float32)
    testPerp[0] = info['test perplexity']
    epoch = 0
    print('epoch:', end=' ')
    while epoch < epochs:
        epoch += 1
        for i in range(trainData.shape[0] // batchSz):
            inInp = trainData[i * batchSz:(i + 1) * batchSz]
            inAns = trainData[i * batchSz + 1:(i + 1) * batchSz + 1]

            # sess.run(train, feed_dict={inp: inp0, ans: ans0})
            outLoss, _ = sess.run([loss, train],
                                  feed_dict={
                                      inp: inInp,
                                      ans: inAns,
                                      dropRate: 0.5
                                  })
            trainPerp[epoch] += outLoss
        testPerp[epoch] = sess.run(loss,
                                   feed_dict={
                                       inp: testData[:-1],
                                       ans: testData[1:],
                                       dropRate: 0.0
                                   })
        print(epoch + info['epochs'], end=' ')
    trainPerp[1:] = np.exp(trainPerp[1:] /
                           (trainData.shape[0] // batchSz * batchSz))
    testPerp[1:] = np.exp(testPerp[1:] / (testData.shape[0] - 1))
    print(f'\nelapsed: {time.time() - startTime}')
    print('train perplexity:', trainPerp[-1])
    print('test perplexity:', testPerp[-1])

    info['epochs'] += epochs
    info['train perplexity'] = trainPerp[-1]
    info['test perplexity'] = testPerp[-1]
    if saveResult:
        save(sess, info)
    drawPerplexity(trainPerp, testPerp, info['epochs'] - epochs)