for i in xrange(500): mlp_bigram.traintext(train_text, add_se=False) print "Error rate: %0.5f. Epoch: %s. Training time so far: %0.1fm" % (mlp_bigram.testtext(test_text), i+1, (time.clock()-s_time)/60.) if (i+1) % 50 == 0: mlp_bigram.savemodel("./data/MlpBigram.model.epoch%s.obj" % i) e_time = time.clock() duration = e_time - s_time print "MlpBigram train over!! The total training time is %.2fm." % (duration / 60.) ############# # Testing # ############# theano.sandbox.cuda.use('cpu') mlp_bigram = MlpBigram(nlpdict, backup_file_path="./data/MlpBigram.model.epoch499.obj") print mlp_bigram.likelihood(u"国家主席江泽") print nlpdict.gettoken(mlp_bigram.predict(u"国家主席江")) # test text f = file('./data/pku_test.txt') test_text = unicode(f.read(), 'utf-8') f.close() print "Test size is: %s" % len(test_text) ce, logs = mlp_bigram.crossentropy(test_text) print "Cross-entropy is:", ce print "Perplexity is:", numpy.exp2(ce)
# Trainging # ############# # text f = file('./data/text.txt') text = unicode(f.read(), 'utf-8') text = text.replace(" ", "") f.close() len_text = len(text) print "Train size is: %s" % len_text rnnlm = RnnLM(nlpdict, n_hidden=30, lr=0.09, batch_size=50) print "Rnn training start!" train_text = text[:100] test_text = text[:100] s_time = time.clock() for i in xrange(50): rnnlm.traintext(train_text) test_res = rnnlm.testtext(test_text) print "Error rate: %.5f. Epoch: %s. Training time so far: %0.1fm" % (test_res[0], i+1, (time.clock()-s_time)/60.) print ''.join(nlpdict.gettoken(i) for i in test_res[1]) e_time = time.clock() duration = e_time - s_time print "RnnLM train over!! The total training time is %.2fm." % (duration / 60.)