def main(): if not MODEL_OUTPUT_FILE: ts = datetime.now().strftime("%Y-%m-%d-%H-%M") MODEL_OUTPUT_FILE = "GRU-%s-%s-%s-%s.dat" % (ts, VOCABULARY_SIZE, EMBEDDING_DIM, HIDDEN_DIM) # Load data x_train, y_train, word_to_index, index_to_word = load_data(INPUT_DATA_FILE, VOCABULARY_SIZE) # Build model model = GRUTheano(VOCABULARY_SIZE, hidden_dim=HIDDEN_DIM, bptt_truncate=-1) # Print SGD step time t1 = time.time() model.sgd_step(x_train[10], y_train[10], LEARNING_RATE) t2 = time.time() print ("SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.)) sys.stdout.flush() # We do this every few examples to understand what's going on #def sgd_callback(model, num_examples_seen): # dt = datetime.now().isoformat() # loss = model.calculate_loss(x_train[:10000], y_train[:10000]) # print("\n%s (%d)" % (dt, num_examples_seen)) # print("--------------------------------------------------") # print("Loss: %f" % loss) # generate_sentences(model, 10, index_to_word, word_to_index) # save_model_parameters_theano(model, MODEL_OUTPUT_FILE) # print("\n") # sys.stdout.flush() for epoch in range(NEPOCH): train_with_sgd(model, x_train, y_train, learning_rate=LEARNING_RATE, nepoch=1, decay=0.9, callback_every=PRINT_EVERY, callback=sgd_callback)
for i, sent in enumerate(tokenized_sentences): tokenized_sentences[i] = [ w if w in word_to_index else unknown_token for w in sent ] print "\nExample sentence: '%s'" % sentences[0] print "\nExample sentence after Pre-processing: '%s'" % tokenized_sentences[0] # Create the training data print("Creating training data") X_train = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences]) y_train = np.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences]) model = GRUTheano(vocabulary_size, hidden_dim=_HIDDEN_DIM, bptt_truncate=-1) t1 = time.time() model.sgd_step(X_train[10], y_train[10], _LEARNING_RATE) t2 = time.time() print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.) if _MODEL_FILE != None: ut.load_model_parameters_theano(_MODEL_FILE, model) for epoch in range(_NEPOCH): train_with_sgd(model, X_train, y_train, nepoch=1, learning_rate=_LEARNING_RATE,
# Replace all words not in our vocabulary with the unknown token # todo needs cleaner text preprocessing for i, sent in enumerate(tokenized_sentences): tokenized_sentences[i] = [ w if w in word_to_index else unknown_token for w in sent ] # Create the training data X_train = numpy.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences]) y_train = numpy.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences]) ######################################################################## construct RNN print "constructing model..." # todo try a smarter initialization - wrt vanishing gradients model = GRUTheano(vocabulary_size, hidden_dim=HIDDEN_DIM) #gradient_check_theano(model, X_train[10], y_train[10], h=0.0000001, error_threshold=0.01) ######################################################################## train if RETRAIN: # run a single step to get a feel for training time print "run a single step..." t1 = time.time() model.sgd_step(X_train[10], y_train[10], LEARNING_RATE) t2 = time.time() print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.) #print " loading model parameters..." #if MODEL_FILE != None:
EMBEDDING_DIM = int(os.environ.get("EMBEDDING_DIM", "48")) HIDDEN_DIM = int(os.environ.get("HIDDEN_DIM", "128")) NEPOCH = int(os.environ.get("NEPOCH", "20")) MODEL_OUTPUT_FILE = os.environ.get("MODEL_OUTPUT_FILE") INPUT_DATA_FILE = os.environ.get("INPUT_DATA_FILE", "./data/reddit-comments-2015.csv") PRINT_EVERY = int(os.environ.get("PRINT_EVERY", "25000")) if not MODEL_OUTPUT_FILE: ts = datetime.now().strftime("%Y-%m-%d-%H-%M") MODEL_OUTPUT_FILE = "GRU-%s-%s-%s-%s.dat" % (ts, VOCABULARY_SIZE, EMBEDDING_DIM, HIDDEN_DIM) # Load data x_train, y_train, word_to_index, index_to_word = load_data(INPUT_DATA_FILE, VOCABULARY_SIZE) # Build model model = GRUTheano(VOCABULARY_SIZE, hidden_dim=HIDDEN_DIM, bptt_truncate=-1) # Print SGD step time t1 = time.time() model.sgd_step(x_train[10], y_train[10], LEARNING_RATE) t2 = time.time() print ("SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.)) sys.stdout.flush() # We do this every few examples to understand what's going on def sgd_callback(model, num_examples_seen): dt = datetime.now().isoformat() loss = model.calculate_loss(x_train[:10000], y_train[:10000]) print("\n%s (%d)" % (dt, num_examples_seen)) print("--------------------------------------------------") print("Loss: %f" % loss)
path = 'GRU-epo41-voc3700-hid128.dat.npz' model = load_model_parameters_theano(path, modelClass=GRUTheano) generate_sentences(model, 5, index_to_word, word_to_index) exit for epoch in range(start_epoch, NEPOCH): if epoch>0: path = 'results/GRU-epo%s-voc%s-hid%s.dat.npz' %(epoch, VOCABULARY_SIZE, HIDDEN_DIM) print('Loading file %s' %path) print('Training %s of %s epochs' %(epoch,NEPOCH)) model = load_model_parameters_theano(path, modelClass=GRUTheano) else: # Build model from scratch model = GRUTheano(VOCABULARY_SIZE, hidden_dim=HIDDEN_DIM, bptt_truncate=-1) train_with_sgd(model, x_train, y_train, \ learning_rate=LEARNING_RATE, \ nepoch=80, \ startfrom=epoch, \ decay=0.9, \ callback_every=PRINT_EVERY, \ callback=sgd_callback) """ # load vocabulary xx, yy, vocab, word_to_index, index_to_word = loadText('pg11.txt', \
W=model.W.get_value(), V=model.V.get_value(), b=model.b.get_value(), c=model.c.get_value()) print "Saved model parameters to %s." % outfile if not MODEL_OUTPUT_FILE: ts = datetime.now().strftime("%Y-%m-%d-%H-%M") MODEL_OUTPUT_FILE = "./data/GRU-%s-%s-%s-%s.dat" % ( lyric_document, ts, len(num2word), HIDDEN_DIM) # 建立模型,第一个参数是 word_dim,即词汇数量 # 第二个参数,隐藏层节点数。 第三个参数,训练时向后回溯的步数。 model = GRUTheano(len(num2word), hidden_dim=HIDDEN_DIM, bptt_truncate=BPTT_STEPS) """ #这几行代码测试执行一步训练需要多少时间。 # Print SGD step time t1 = time.time() model.sgd_step(x_train[10], y_train[10], LEARNING_RATE) t2 = time.time() print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.) sys.stdout.flush() """ # We do this every few examples to understand what's going on def sgd_callback(model, num_examples_seen): dt = datetime.now().isoformat()
HIDDEN_DIM = int(os.environ.get("HIDDEN_DIM", "1024")) NEPOCH = int(os.environ.get("NEPOCH", "20")) MODEL_OUTPUT_FILE = os.environ.get("MODEL_OUTPUT_FILE", "./model.txt") INPUT_DATA_FILE = os.environ.get("INPUT_DATA_FILE", "/home/zhouh/Data/nmt/corpus.en") INPUT_DICT_FILE = os.environ.get("INPUT_DATA_FILE", "/home/zhouh/Data/nmt/corpus.en.pkl") PRINT_EVERY = int(os.environ.get("PRINT_EVERY", "25000")) if not MODEL_OUTPUT_FILE: ts = datetime.now().strftime("%Y-%m-%d-%H-%M") MODEL_OUTPUT_FILE = "GRU-%s-%s-%s-%s.dat" % (ts, VOCABULARY_SIZE, EMBEDDING_DIM, HIDDEN_DIM) # Load data x_train, y_train, word_to_index, index_to_word = load_data(INPUT_DATA_FILE, INPUT_DICT_FILE) # Build model model = GRUTheano(VOCABULARY_SIZE, hidden_dim=HIDDEN_DIM, bptt_truncate=-1) # Print SGD step time t1 = time.time() model.sgd_step(x_train[10], y_train[10], LEARNING_RATE) t2 = time.time() print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.) sys.stdout.flush() # We do this every few examples to understand what's going on def sgd_callback(model, num_examples_seen): dt = datetime.now().isoformat() loss = model.calculate_loss(x_train[:10000], y_train[:10000]) print("\n%s (%d)" % (dt, num_examples_seen)) print("--------------------------------------------------") print("Loss: %f" % loss)
print "\ntest loss: " + str(type(model)) print "Expected Loss for random predictions: %f" % np.log(model.word_dim) print "Actual loss: %f" % model.calculate_loss(X_train[:100], y_train[:100]) def test_performance(model, learning_rate): print "\ntest performance: " + str(type(model)) t1 = time.time() model.sgd_step(X_train[10], y_train[10], learning_rate) t2 = time.time() print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.) model_gru = GRUTheano(word_dim=_VOCABULARY_SIZE, hidden_dim=_HIDDEN_DIM, bptt_truncate=-1) model_theano = RNNTheano(word_dim=_VOCABULARY_SIZE, hidden_dim=_HIDDEN_DIM, bptt_truncate=-1) model_rnn = RNNNumpy(word_dim=_VOCABULARY_SIZE, hidden_dim=_HIDDEN_DIM, bptt_truncate=-1) test_performance(model_gru, _LEARNING_RATE) test_performance(model_theano, _LEARNING_RATE) test_performance(model_rnn, _LEARNING_RATE) test_loss(model_gru) test_loss(model_theano) test_loss(model_rnn)