def train_with_sgd(model, X_train, y_train, learning_rate=0.005, nepoch=1, evaluate_loss_after=5, verbose = True): # We keep track of the losses so we can plot them later losses = [] num_examples_seen = 0 for epoch in range(nepoch): if verbose: print("Epoch %d " %epoch) # Optionally evaluate the loss if (epoch % evaluate_loss_after == 0): loss = model.calculate_loss(X_train, y_train) losses.append((num_examples_seen, loss)) time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S') print "%s: Loss after num_examples_seen=%d epoch=%d: %f" % (time, num_examples_seen, epoch, loss) # Adjust the learning rate if loss increases if (len(losses) > 1 and losses[-1][1] > losses[-2][1]): learning_rate = learning_rate * 0.5 print "Setting learning rate to %f" % learning_rate sys.stdout.flush() # ADDED! Saving model parameters ut.save_model_parameters_theano("./data/rnn-theano-%d-%d-%s.npz" % (model.hidden_dim, model.word_dim, time), model) # For each training example... for i in range(len(y_train)): # One SGD step model.sgd_step(X_train[i], y_train[i], learning_rate) num_examples_seen += 1
def sgd_callback(model, num_examples_seen): dt = datetime.now().isoformat() loss = model.calculate_loss(x_train[:5000], y_train[:5000]) print("\n%s (%d)" % (dt, num_examples_seen)) print("--------------------------------------------------") print("Loss: %f" % loss) #generate_sentences(model, 10, index_to_word, word_to_index) ts = datetime.now().strftime("%Y-%m-%d-%H-%M") MODEL_OUTPUT_FILE = "GRU-%s-%s-%s-%s.dat" % (ts, vocabulary_size, _HIDDEN_DIM, HIDDEN_DIM) ut.save_model_parameters_theano(model, MODEL_OUTPUT_FILE) print("\n") sys.stdout.flush()
def train_with_sgd(model, X_train, y_train, learning_rate=0.005, nepoch=1, evaluate_loss_after=5): # We keep track of the losses so we can plot them later losses = [] num_examples_seen = 0 for epoch in range(nepoch): # Optionally evaluate the loss if (epoch % evaluate_loss_after == 0): loss = model.calculate_loss(X_train, y_train) losses.append((num_examples_seen, loss)) time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S') print "%s: Loss after num_examples_seen=%d epoch=%d: %f" % (time, num_examples_seen, epoch, loss) # Adjust the learning rate if loss increases if (len(losses) > 1 and losses[-1][1] > losses[-2][1]): learning_rate = learning_rate * 0.5 print "Setting learning rate to %f" % learning_rate sys.stdout.flush() # ADDED! Saving model oarameters save_model_parameters_theano("./data/rnn-theano-%d-%d-%s.npz" % (model.hidden_dim, model.word_dim, time), model) # For each training example... for i in range(len(y_train)): # One SGD step model.sgd_step(X_train[i], y_train[i], learning_rate) num_examples_seen += 1
np.random.seed(10) model = RNNTheano(vocabulary_size) #ic(u'timeit model.sgd_step(X_train[10], y_train[10], 0.005)') # This time, one SGD step takes 70ms on my Mac (without GPU) and 23ms on a [g2.2xlarge](https://aws.amazon.com/ec2/instance-types/#g2) Amazon EC2 instance with GPU. That's a 15x improvement over our initial implementation and means we can train our model in hours/days instead of weeks. There are still a vast number of optimizations we could make, but we're good enough for now. # # To help you avoid spending days training a model I have pre-trained a Theano model with a hidden layer dimensionality of 50 and a vocabulary size of 8000. I trained it for 50 epochs in about 20 hours. The loss was was still decreasing and training longer would probably have resulted in a better model, but I was running out of time and wanted to publish this post. Feel free to try it out yourself and trian for longer. You can find the model parameters in `data/trained-model-theano.npz` in the Github repository and load them using the `load_model_parameters_theano` method: # In[ ]: from utils import load_model_parameters_theano, save_model_parameters_theano model = RNNTheano(vocabulary_size, hidden_dim=50) losses = train_with_sgd(model, X_train, y_train, nepoch=20) save_model_parameters_theano('./data/trained-model-theano.npz', model) #load_model_parameters_theano('./data/trained-model-theano.npz', model) # ### Generating Text # # Now that we have our model we can ask it to generate new text for us! Let's implement a helper function to generate new sentences: # In[ ]: def generate_sentence(model): # We start the sentence with the start token new_sentence = [word_to_index[sentence_start_token]] # Repeat until we get an end token while not new_sentence[-1] == word_to_index[sentence_end_token]: next_word_probs = model.forward_propagation(new_sentence)
print("Found %d unique words tokens." % len(list(word_freq.items()))) vocabulary_size = min(max_vocab_size,len(list(word_freq.items()))) # Get the most common words and build index_to_word and word_to_index vectors vocab = word_freq.most_common(vocabulary_size-1) index_to_word = [x[0] for x in vocab] index_to_word.append(unknown_token) word_to_index = dict([(w,i) for i,w in enumerate(index_to_word)]) print("Using vocabulary size %d." % vocabulary_size) print("The least frequent word in our vocabulary is '%s' and appeared %d times." % (vocab[-1][0], vocab[-1][1])) # Replace all words not in our vocabulary with the unknown token for i, sent in enumerate(tokenized_sentences): tokenized_sentences[i] = [w if w in word_to_index else unknown_token for w in sent] #print "\nExample sentence: '%s'" % sentences[0] #print "\nExample sentence after Pre-processing: '%s'" % tokenized_sentences[0] # Create the training data X_train = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences]) y_train = np.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences]) model = RNNTheano(vocabulary_size, hidden_dim=50) losses = train_with_sgd(model, X_train, y_train, nepoch=50) save_model_parameters_theano('./data/trained-model-'+label+'-dim50-t50.npz', model) #load_model_parameters_theano('./data/trained-model-theano.npz', model)
# Get the most common words and build index_to_word and word_to_index vectors vocab = word_freq.most_common(vocabulary_size - 1) index_to_word = [x[0] for x in vocab] index_to_word.append(unknown_token) word_to_index = dict([(w, i) for i, w in enumerate(index_to_word)]) print "Using vocabulary size %d." % vocabulary_size print "The least frequent word in our vocabulary is '%s' and appeared %d times." % ( vocab[-1][0], vocab[-1][1]) # Replace all words not in our vocabulary with the unknown token for i, sent in enumerate(tokenized_sentences): tokenized_sentences[i] = [ w if w in word_to_index else unknown_token for w in sent ] #print "\nExample sentence: '%s'" % sentences[0] #print "\nExample sentence after Pre-processing: '%s'" % tokenized_sentences[0] # Create the training data X_train = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences]) y_train = np.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences]) model = RNNTheano(vocabulary_size, hidden_dim=50) losses = train_with_sgd(model, X_train, y_train, nepoch=50) save_model_parameters_theano( './data/trained-model-' + label + '-dim50-t50.npz', model) #load_model_parameters_theano('./data/trained-model-theano.npz', model)
test_label = dio.get_test_gt() mean_ = np.mean(train_data, axis=0) rand_data -= mean_ model = rnn.RNNTheano() if TRAIN: EPOCH = 1 n = train_data.shape[0] / model.batch_size j = 0 for it in xrange(EPOCH): np.random.shuffle(rand_data) for i in xrange(n): dt = rand_data[i*model.batch_size:(i+1)*model.batch_size] rt = model.sgd_step(dt) time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S') print("%s iterators :%6d, loss : %f " % (time, j, rt[0])) j += 1 if os.path.exists('models')==False: os.makedirs("models") utils.save_model_parameters_theano('models/models.npz',model) utils.load_model_parameters_theano('models/models.npz',model) d = train_data - mean_ p = model.predict(d) B = utils.num2bit(p) test_data -= mean_ query = model.predict(test_data) query_b = utils.num2bit(query) print "start calculate map ..." print utils.cat_map(B,train_label,query_b,test_label)