def train_numpy(): model = RNNNumpy(Config._VOCABULARY_SIZE, hidden_dim=Config._HIDDEN_DIM) t1 = time.time() model.sgd_step(X_train[10], y_train[10], Config._LEARNING_RATE) t2 = time.time() print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.) model.train_with_sgd(X_train, y_train, nepoch=Config._NEPOCH, learning_rate=Config._LEARNING_RATE) # train_with_sgd(model, X_train, y_train, nepoch=_NEPOCH, learning_rate=_LEARNING_RATE) if Config._MODEL_FILE != None: print "start saving model..." save_model_parameters_numpy(Config._MODEL_FILE, model) print "model saved!"
# To avoid performing millions of expensive calculations we use # a smaller vocabulary size for checking. grad_check_vocab_size = 100 np.random.seed(10) # re-seed the generator model_test_grad_check = RNNNumpy(grad_check_vocab_size, 10, bptt_truncate=1000) model_test_grad_check.gradient_check([0, 1, 2, 3], [1, 2, 3, 4]) print() print("##########################") print("# Test a single SGD STEP #") print("##########################") np.random.seed(10) model_test_sgd_step = RNNNumpy(vocabulary_size) model_test_sgd_step.sgd_step(X_train[10], y_train[10], 0.005) print() # Train on a small subset of the data to see what happens print("####################################") print("# Test TRAINING on a small dataset #") print("####################################") np.random.seed(10) model_training_small = RNNNumpy(vocabulary_size) # Stochastic Gradient Descent Algorithm def train_with_sgd(model, X_train,
# Limit to 1000 examples to save time print("Expected Loss for random predictions: %f" % np.log(vocabulary_size)) print("Actual loss: %f" % model.calculate_loss(X_train[:1000], y_train[:1000])) # To avoid performing millions of expensive calculations we use a smaller vocabulary size for checking. grad_check_vocab_size = 100 np.random.seed(10) modelcheck = RNNNumpy(grad_check_vocab_size, 10, bptt_truncate=1000) modelcheck.gradient_check([0, 1, 2, 3], [1, 2, 3, 4]) print("\n -------------ccccccccc") # get a sense of how long it would take to train our network: np.random.seed(10) model = RNNNumpy(vocabulary_size) t1 = time.time() model.sgd_step(X_train[10], y_train[10], 0.005) #do 1 step of SGD to test updating of para t2 = time.time() print("SGD Step time with RNNNumpy: %f milliseconds" % ((t2 - t1) * 1000.)) # Train on a small subset of the data to see what happens # np.random.seed(10) # model = RNNNumpy(vocabulary_size) # losses = train_with_sgd(model, X_train[:100], y_train[:100], nepoch=10, evaluate_loss_after=1) ######### Build RNN Theano model ########### print("-----------------------------") model = RNNTheano(vocabulary_size, hidden_dim=_HIDDEN_DIM) t1 = time.time() model.sgd_step(X_train[10], y_train[10], _LEARNING_RATE) t2 = time.time()