Esempio n. 1
0
def main():
    if not MODEL_OUTPUT_FILE:
      ts = datetime.now().strftime("%Y-%m-%d-%H-%M")
      MODEL_OUTPUT_FILE = "GRU-%s-%s-%s-%s.dat" % (ts, VOCABULARY_SIZE, EMBEDDING_DIM, HIDDEN_DIM)

    # Load data
    x_train, y_train, word_to_index, index_to_word = load_data(INPUT_DATA_FILE, VOCABULARY_SIZE)

    # Build model
    model = GRUTheano(VOCABULARY_SIZE, hidden_dim=HIDDEN_DIM, bptt_truncate=-1)

    # Print SGD step time
    t1 = time.time()
    model.sgd_step(x_train[10], y_train[10], LEARNING_RATE)
    t2 = time.time()
    print ("SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.))
    sys.stdout.flush()

    # We do this every few examples to understand what's going on
    #def sgd_callback(model, num_examples_seen):
    #  dt = datetime.now().isoformat()
    #  loss = model.calculate_loss(x_train[:10000], y_train[:10000])
    #  print("\n%s (%d)" % (dt, num_examples_seen))
    #  print("--------------------------------------------------")
    #  print("Loss: %f" % loss)
    #  generate_sentences(model, 10, index_to_word, word_to_index)
    #  save_model_parameters_theano(model, MODEL_OUTPUT_FILE)
    #  print("\n")
    #  sys.stdout.flush()

    for epoch in range(NEPOCH):
      train_with_sgd(model, x_train, y_train, learning_rate=LEARNING_RATE, nepoch=1, decay=0.9,
        callback_every=PRINT_EVERY, callback=sgd_callback)
for i, sent in enumerate(tokenized_sentences):
    tokenized_sentences[i] = [
        w if w in word_to_index else unknown_token for w in sent
    ]

print "\nExample sentence: '%s'" % sentences[0]
print "\nExample sentence after Pre-processing: '%s'" % tokenized_sentences[0]

# Create the training data
print("Creating training data")
X_train = np.asarray([[word_to_index[w] for w in sent[:-1]]
                      for sent in tokenized_sentences])
y_train = np.asarray([[word_to_index[w] for w in sent[1:]]
                      for sent in tokenized_sentences])

model = GRUTheano(vocabulary_size, hidden_dim=_HIDDEN_DIM, bptt_truncate=-1)

t1 = time.time()
model.sgd_step(X_train[10], y_train[10], _LEARNING_RATE)
t2 = time.time()
print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.)

if _MODEL_FILE != None:
    ut.load_model_parameters_theano(_MODEL_FILE, model)

for epoch in range(_NEPOCH):
    train_with_sgd(model,
                   X_train,
                   y_train,
                   nepoch=1,
                   learning_rate=_LEARNING_RATE,
Esempio n. 3
0
# Replace all words not in our vocabulary with the unknown token
# todo needs cleaner text preprocessing
for i, sent in enumerate(tokenized_sentences):
    tokenized_sentences[i] = [
        w if w in word_to_index else unknown_token for w in sent
    ]

# Create the training data
X_train = numpy.asarray([[word_to_index[w] for w in sent[:-1]]
                         for sent in tokenized_sentences])
y_train = numpy.asarray([[word_to_index[w] for w in sent[1:]]
                         for sent in tokenized_sentences])

######################################################################## construct RNN
print "constructing model..."  # todo try a smarter initialization - wrt vanishing gradients
model = GRUTheano(vocabulary_size, hidden_dim=HIDDEN_DIM)

#gradient_check_theano(model, X_train[10], y_train[10], h=0.0000001, error_threshold=0.01)

######################################################################## train

if RETRAIN:
    # run a single step to get a feel for training time
    print "run a single step..."
    t1 = time.time()
    model.sgd_step(X_train[10], y_train[10], LEARNING_RATE)
    t2 = time.time()
    print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.)

    #print " loading model parameters..."
    #if MODEL_FILE != None:
Esempio n. 4
0
EMBEDDING_DIM = int(os.environ.get("EMBEDDING_DIM", "48"))
HIDDEN_DIM = int(os.environ.get("HIDDEN_DIM", "128"))
NEPOCH = int(os.environ.get("NEPOCH", "20"))
MODEL_OUTPUT_FILE = os.environ.get("MODEL_OUTPUT_FILE")
INPUT_DATA_FILE = os.environ.get("INPUT_DATA_FILE", "./data/reddit-comments-2015.csv")
PRINT_EVERY = int(os.environ.get("PRINT_EVERY", "25000"))

if not MODEL_OUTPUT_FILE:
  ts = datetime.now().strftime("%Y-%m-%d-%H-%M")
  MODEL_OUTPUT_FILE = "GRU-%s-%s-%s-%s.dat" % (ts, VOCABULARY_SIZE, EMBEDDING_DIM, HIDDEN_DIM)

# Load data
x_train, y_train, word_to_index, index_to_word = load_data(INPUT_DATA_FILE, VOCABULARY_SIZE)

# Build model
model = GRUTheano(VOCABULARY_SIZE, hidden_dim=HIDDEN_DIM, bptt_truncate=-1)

# Print SGD step time
t1 = time.time()
model.sgd_step(x_train[10], y_train[10], LEARNING_RATE)
t2 = time.time()
print ("SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.))
sys.stdout.flush()

# We do this every few examples to understand what's going on
def sgd_callback(model, num_examples_seen):
  dt = datetime.now().isoformat()
  loss = model.calculate_loss(x_train[:10000], y_train[:10000])
  print("\n%s (%d)" % (dt, num_examples_seen))
  print("--------------------------------------------------")
  print("Loss: %f" % loss)
Esempio n. 5
0
  path = 'GRU-epo41-voc3700-hid128.dat.npz'
  model = load_model_parameters_theano(path, modelClass=GRUTheano)
  generate_sentences(model, 5, index_to_word, word_to_index)
  exit


for epoch in range(start_epoch, NEPOCH):
  
  if epoch>0:
    path = 'results/GRU-epo%s-voc%s-hid%s.dat.npz' %(epoch, VOCABULARY_SIZE, HIDDEN_DIM)
    print('Loading file %s' %path)
    print('Training  %s of %s epochs' %(epoch,NEPOCH))
    model = load_model_parameters_theano(path, modelClass=GRUTheano)
  else:
    # Build model from scratch
    model = GRUTheano(VOCABULARY_SIZE, hidden_dim=HIDDEN_DIM, bptt_truncate=-1)

  
  train_with_sgd(model, x_train, y_train, \
                 learning_rate=LEARNING_RATE, \
				 nepoch=80, \
                 startfrom=epoch, \
                 decay=0.9, \
                 callback_every=PRINT_EVERY, \
                 callback=sgd_callback)



"""
# load vocabulary 
xx, yy, vocab, word_to_index, index_to_word = loadText('pg11.txt', \
Esempio n. 6
0
             W=model.W.get_value(),
             V=model.V.get_value(),
             b=model.b.get_value(),
             c=model.c.get_value())
    print "Saved model parameters to %s." % outfile


if not MODEL_OUTPUT_FILE:
    ts = datetime.now().strftime("%Y-%m-%d-%H-%M")
    MODEL_OUTPUT_FILE = "./data/GRU-%s-%s-%s-%s.dat" % (
        lyric_document, ts, len(num2word), HIDDEN_DIM)

# 建立模型,第一个参数是 word_dim,即词汇数量
# 第二个参数,隐藏层节点数。 第三个参数,训练时向后回溯的步数。
model = GRUTheano(len(num2word),
                  hidden_dim=HIDDEN_DIM,
                  bptt_truncate=BPTT_STEPS)
"""
#这几行代码测试执行一步训练需要多少时间。
# Print SGD step time
t1 = time.time()
model.sgd_step(x_train[10], y_train[10], LEARNING_RATE)
t2 = time.time()
print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.)
sys.stdout.flush()
"""


# We do this every few examples to understand what's going on
def sgd_callback(model, num_examples_seen):
    dt = datetime.now().isoformat()
Esempio n. 7
0
HIDDEN_DIM = int(os.environ.get("HIDDEN_DIM", "1024"))
NEPOCH = int(os.environ.get("NEPOCH", "20"))
MODEL_OUTPUT_FILE = os.environ.get("MODEL_OUTPUT_FILE", "./model.txt")
INPUT_DATA_FILE = os.environ.get("INPUT_DATA_FILE", "/home/zhouh/Data/nmt/corpus.en")
INPUT_DICT_FILE = os.environ.get("INPUT_DATA_FILE", "/home/zhouh/Data/nmt/corpus.en.pkl")
PRINT_EVERY = int(os.environ.get("PRINT_EVERY", "25000"))

if not MODEL_OUTPUT_FILE:
  ts = datetime.now().strftime("%Y-%m-%d-%H-%M")
  MODEL_OUTPUT_FILE = "GRU-%s-%s-%s-%s.dat" % (ts, VOCABULARY_SIZE, EMBEDDING_DIM, HIDDEN_DIM)

# Load data
x_train, y_train, word_to_index, index_to_word = load_data(INPUT_DATA_FILE, INPUT_DICT_FILE)

# Build model
model = GRUTheano(VOCABULARY_SIZE, hidden_dim=HIDDEN_DIM, bptt_truncate=-1)

# Print SGD step time
t1 = time.time()
model.sgd_step(x_train[10], y_train[10], LEARNING_RATE)
t2 = time.time()
print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.)
sys.stdout.flush()

# We do this every few examples to understand what's going on
def sgd_callback(model, num_examples_seen):
  dt = datetime.now().isoformat()
  loss = model.calculate_loss(x_train[:10000], y_train[:10000])
  print("\n%s (%d)" % (dt, num_examples_seen))
  print("--------------------------------------------------")
  print("Loss: %f" % loss)
Esempio n. 8
0
    print "\ntest loss: " + str(type(model))
    print "Expected Loss for random predictions: %f" % np.log(model.word_dim)
    print "Actual loss: %f" % model.calculate_loss(X_train[:100],
                                                   y_train[:100])


def test_performance(model, learning_rate):
    print "\ntest performance: " + str(type(model))
    t1 = time.time()
    model.sgd_step(X_train[10], y_train[10], learning_rate)
    t2 = time.time()
    print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.)


model_gru = GRUTheano(word_dim=_VOCABULARY_SIZE,
                      hidden_dim=_HIDDEN_DIM,
                      bptt_truncate=-1)
model_theano = RNNTheano(word_dim=_VOCABULARY_SIZE,
                         hidden_dim=_HIDDEN_DIM,
                         bptt_truncate=-1)
model_rnn = RNNNumpy(word_dim=_VOCABULARY_SIZE,
                     hidden_dim=_HIDDEN_DIM,
                     bptt_truncate=-1)

test_performance(model_gru, _LEARNING_RATE)
test_performance(model_theano, _LEARNING_RATE)
test_performance(model_rnn, _LEARNING_RATE)

test_loss(model_gru)
test_loss(model_theano)
test_loss(model_rnn)