Example #1
0
def train_with_sgd(model, X_train, y_train, learning_rate=0.005, nepoch=1, evaluate_loss_after=5, verbose = True):
    # We keep track of the losses so we can plot them later
    losses = []
    num_examples_seen = 0
    for epoch in range(nepoch):

        if verbose:
            print("Epoch %d " %epoch)

        # Optionally evaluate the loss
        if (epoch % evaluate_loss_after == 0):
            loss = model.calculate_loss(X_train, y_train)
            losses.append((num_examples_seen, loss))
            time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
            print "%s: Loss after num_examples_seen=%d epoch=%d: %f" % (time, num_examples_seen, epoch, loss)
            # Adjust the learning rate if loss increases
            if (len(losses) > 1 and losses[-1][1] > losses[-2][1]):
                learning_rate = learning_rate * 0.5  
                print "Setting learning rate to %f" % learning_rate
            sys.stdout.flush()
            # ADDED! Saving model parameters
            ut.save_model_parameters_theano("./data/rnn-theano-%d-%d-%s.npz" % (model.hidden_dim, model.word_dim, time), model)
        # For each training example...
        for i in range(len(y_train)):
            # One SGD step
            model.sgd_step(X_train[i], y_train[i], learning_rate)
            num_examples_seen += 1
def sgd_callback(model, num_examples_seen):
    dt = datetime.now().isoformat()
    loss = model.calculate_loss(x_train[:5000], y_train[:5000])
    print("\n%s (%d)" % (dt, num_examples_seen))
    print("--------------------------------------------------")
    print("Loss: %f" % loss)
    #generate_sentences(model, 10, index_to_word, word_to_index)
    ts = datetime.now().strftime("%Y-%m-%d-%H-%M")
    MODEL_OUTPUT_FILE = "GRU-%s-%s-%s-%s.dat" % (ts, vocabulary_size,
                                                 _HIDDEN_DIM, HIDDEN_DIM)
    ut.save_model_parameters_theano(model, MODEL_OUTPUT_FILE)
    print("\n")
    sys.stdout.flush()
def train_with_sgd(model, X_train, y_train, learning_rate=0.005, nepoch=1, evaluate_loss_after=5):
    # We keep track of the losses so we can plot them later
    losses = []
    num_examples_seen = 0
    for epoch in range(nepoch):
        # Optionally evaluate the loss
        if (epoch % evaluate_loss_after == 0):
            loss = model.calculate_loss(X_train, y_train)
            losses.append((num_examples_seen, loss))
            time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
            print "%s: Loss after num_examples_seen=%d epoch=%d: %f" % (time, num_examples_seen, epoch, loss)
            # Adjust the learning rate if loss increases
            if (len(losses) > 1 and losses[-1][1] > losses[-2][1]):
                learning_rate = learning_rate * 0.5  
                print "Setting learning rate to %f" % learning_rate
            sys.stdout.flush()
            # ADDED! Saving model oarameters
            save_model_parameters_theano("./data/rnn-theano-%d-%d-%s.npz" % (model.hidden_dim, model.word_dim, time), model)
        # For each training example...
        for i in range(len(y_train)):
            # One SGD step
            model.sgd_step(X_train[i], y_train[i], learning_rate)
            num_examples_seen += 1
Example #4
0
np.random.seed(10)
model = RNNTheano(vocabulary_size)
#ic(u'timeit model.sgd_step(X_train[10], y_train[10], 0.005)')

# This time, one SGD step takes 70ms on my Mac (without GPU) and 23ms on a [g2.2xlarge](https://aws.amazon.com/ec2/instance-types/#g2) Amazon EC2 instance with GPU. That's a 15x improvement over our initial implementation and means we can train our model in hours/days instead of weeks. There are still a vast number of optimizations we could make, but we're good enough for now.
#
# To help you avoid spending days training a model I have pre-trained a Theano model with a hidden layer dimensionality of 50 and a vocabulary size of 8000. I trained it for 50 epochs in about 20 hours. The loss was was still decreasing and training longer would probably have resulted in a better model, but I was running out of time and wanted to publish this post. Feel free to try it out yourself and trian for longer. You can find the model parameters in `data/trained-model-theano.npz` in the Github repository and load them using the `load_model_parameters_theano` method:

# In[ ]:

from utils import load_model_parameters_theano, save_model_parameters_theano

model = RNNTheano(vocabulary_size, hidden_dim=50)
losses = train_with_sgd(model, X_train, y_train, nepoch=20)
save_model_parameters_theano('./data/trained-model-theano.npz', model)
#load_model_parameters_theano('./data/trained-model-theano.npz', model)

# ### Generating Text
#
# Now that we have our model we can ask it to generate new text for us! Let's implement a helper function to generate new sentences:

# In[ ]:


def generate_sentence(model):
    # We start the sentence with the start token
    new_sentence = [word_to_index[sentence_start_token]]
    # Repeat until we get an end token
    while not new_sentence[-1] == word_to_index[sentence_end_token]:
        next_word_probs = model.forward_propagation(new_sentence)
Example #5
0
        print("Found %d unique words tokens." % len(list(word_freq.items())))
        
        vocabulary_size = min(max_vocab_size,len(list(word_freq.items())))

        # Get the most common words and build index_to_word and word_to_index vectors
        vocab = word_freq.most_common(vocabulary_size-1)
        index_to_word = [x[0] for x in vocab]
        index_to_word.append(unknown_token)
        word_to_index = dict([(w,i) for i,w in enumerate(index_to_word)])

        print("Using vocabulary size %d." % vocabulary_size)
        print("The least frequent word in our vocabulary is '%s' and appeared %d times." % (vocab[-1][0], vocab[-1][1]))

        # Replace all words not in our vocabulary with the unknown token
        for i, sent in enumerate(tokenized_sentences):
            tokenized_sentences[i] = [w if w in word_to_index else unknown_token for w in sent]

        #print "\nExample sentence: '%s'" % sentences[0]
        #print "\nExample sentence after Pre-processing: '%s'" % tokenized_sentences[0]
                              
        # Create the training data
        X_train = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences])
        y_train = np.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences])
                              
        model = RNNTheano(vocabulary_size, hidden_dim=50)
        losses = train_with_sgd(model, X_train, y_train, nepoch=50)
        save_model_parameters_theano('./data/trained-model-'+label+'-dim50-t50.npz', model)
        #load_model_parameters_theano('./data/trained-model-theano.npz', model)
        
                              
Example #6
0
        # Get the most common words and build index_to_word and word_to_index vectors
        vocab = word_freq.most_common(vocabulary_size - 1)
        index_to_word = [x[0] for x in vocab]
        index_to_word.append(unknown_token)
        word_to_index = dict([(w, i) for i, w in enumerate(index_to_word)])

        print "Using vocabulary size %d." % vocabulary_size
        print "The least frequent word in our vocabulary is '%s' and appeared %d times." % (
            vocab[-1][0], vocab[-1][1])

        # Replace all words not in our vocabulary with the unknown token
        for i, sent in enumerate(tokenized_sentences):
            tokenized_sentences[i] = [
                w if w in word_to_index else unknown_token for w in sent
            ]

        #print "\nExample sentence: '%s'" % sentences[0]
        #print "\nExample sentence after Pre-processing: '%s'" % tokenized_sentences[0]

        # Create the training data
        X_train = np.asarray([[word_to_index[w] for w in sent[:-1]]
                              for sent in tokenized_sentences])
        y_train = np.asarray([[word_to_index[w] for w in sent[1:]]
                              for sent in tokenized_sentences])

        model = RNNTheano(vocabulary_size, hidden_dim=50)
        losses = train_with_sgd(model, X_train, y_train, nepoch=50)
        save_model_parameters_theano(
            './data/trained-model-' + label + '-dim50-t50.npz', model)
        #load_model_parameters_theano('./data/trained-model-theano.npz', model)
Example #7
0
test_label = dio.get_test_gt()

mean_ = np.mean(train_data, axis=0)
rand_data -= mean_

model = rnn.RNNTheano()
if TRAIN:
    EPOCH  = 1
    n = train_data.shape[0] / model.batch_size
    j = 0
    for it in xrange(EPOCH):
        np.random.shuffle(rand_data)
        for i in xrange(n):
            dt = rand_data[i*model.batch_size:(i+1)*model.batch_size]
            rt = model.sgd_step(dt)
            time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
            print("%s  iterators :%6d, loss : %f  " % (time, j, rt[0]))
            j += 1
    if os.path.exists('models')==False:
        os.makedirs("models")
    utils.save_model_parameters_theano('models/models.npz',model)
utils.load_model_parameters_theano('models/models.npz',model)
d = train_data - mean_
p = model.predict(d)
B = utils.num2bit(p)
test_data -= mean_
query = model.predict(test_data)
query_b = utils.num2bit(query)
print "start calculate map ..."
print utils.cat_map(B,train_label,query_b,test_label)