def main(): data_root = '../data/wordinds/' print 'starting L matrix construction' gw = GloveWrapper(verbose=True) L0 = gw.L train_files = os.listdir(data_root+ '/train/') print 'getting train and test data' train_x, train_y, train_D, _ = get_data(gw, train_files, 'train/') #pdb.set_trace() test_x, test_y, test_D, _ = get_data(gw, train_files, 'test/') D0 = np.random.randn(train_D[-1] + 1, 300) print 'got train and test data' n_epochs = 25 train_x = train_x*n_epochs train_y = train_y*n_epochs train_D = train_D*n_epochs model = DRNNLM(L0, D0, U0 = L0, alpha=0.05, rseed=10, bptt=3) print 'constructed model, training...' model.custom_train_sgd(train_x,train_y, train_D, apply_to=['H','U','L','D'], printevery=5, costevery=25) print 'training done' print 'saving model' with open('../data/drnnlm_model.pkl', 'w') as model_file: pkl.dump(model, model_file) print 'model saved' for i in range(10): seq, J = model.generate_sequence(1, gw.get_index("SSTART"), gw.get_index("EEND"), maxlen=100) print " ".join(gw, seq_to_words(seq))
def main(): data_root = "../data/wordinds/" print "starting L matrix construction" gw = GloveWrapper(verbose=True) L0 = gw.L train_files = os.listdir(data_root + "/train/") print "getting train and test data" train_x, train_y, train_D, _ = get_data(gw, train_files, "train/") pdb.set_trace() test_x, test_y, test_D, _ = get_data(gw, train_files, "test/") D0 = np.random.randn(train_D[-1] + 1, 300) print "got train and test data" n_epochs = 25 train_x = train_x * n_epochs train_y = train_y * n_epochs train_D = train_D * n_epochs model = SimpleDRNNLM(L0, D0, U0=L0, alpha=0.05, rseed=10, bptt=3) print "constructed model, training..." model.custom_train_sgd(train_x, train_y, train_D, apply_to=["H", "U", "L", "D"], printevery=5, costevery=25) print "training done" print "saving model" with open("../data/simple_drnnlm_model.pkl", "w") as model_file: pkl.dump(model, model_file) print "model saved" for i in range(10): seq, J = model.generate_sequence(1, gw.get_index("SSTART"), gw.get_index("EEND"), maxlen=100) print " ".join(seq_to_words(seq))
def main(): data_root = '../data/wordinds/' print 'starting L matrix construction' gw = GloveWrapper(verbose=True) L0 = gw.L train_files = os.listdir(data_root+ '/train/') print 'getting train and test data' train_x, train_y, train_D, _ = get_data(gw, train_files, 'train/') pdb.set_trace() test_x, test_y, test_D, _ = get_data(gw, train_files, 'test/') D0 = np.random.randn(train_D[-1] + 1, 300) print 'got train and test data' n_epochs = 25 train_x = train_x*n_epochs train_y = train_y*n_epochs train_D = train_D*n_epochs model = SimpleDRNNLM(L0, D0, U0 = L0, alpha=0.05, rseed=10, bptt=3) print 'constructed model, training...' model.custom_train_sgd(train_x,train_y, train_D, apply_to=['H','U','L','D'], printevery=5, costevery=25) print 'training done' print 'saving model' with open('../data/simple_drnnlm_model.pkl', 'w') as model_file: pkl.dump(model, model_file) print 'model saved' for i in range(10): seq, J = model.generate_sequence(1, gw.get_index("SSTART"), gw.get_index("EEND"), maxlen=100) print " ".join(seq_to_words(seq))
class TextPreprocessor(): def __init__(self): self.glove_vecs = GloveWrapper(verbose=True) def doc_to_inds(self, doc): doc = doc.lower() doc = doc.replace('\n', ' ') inds = [] for sent in nltk.tokenize.sent_tokenize(doc): words = nltk.word_tokenize(sent) sent_of_inds = [] sent_of_inds.append(self.glove_vecs.get_index('SSTART')) for word in words: sent_of_inds.append(self.glove_vecs.get_index(word)) sent_of_inds.append(self.glove_vecs.get_index('EEND')) inds.append(sent_of_inds) return inds
import os import numpy as np from theano import tensor as T import cPickle as cpkl from rnn_lang import get_data from rnn_slu import RNNSLU from glove_wrapper import GloveWrapper data_root = '../data/wordinds/' print 'starting L matrix construction' gw = GloveWrapper(verbose=True) train_files = os.listdir(data_root+ '/train/') print 'getting train and test data' train_x, train_y, train_D, train_class = get_data(gw, train_files, 'train/') print 'initializing' drnn = RNNSLU(train_D[-1]+1, gw) print 'intialized' num_epochs = 25 learn_rate = 0.01 for e in range(num_epochs): for i in range(len(train_x)): x = np.array(train_x[i]).astype('int32') y = np.array(train_y[i]).astype('int32') drnn.sentence_train(x, y, learn_rate, train_D[i]) drnn.normalize()
def __init__(self): self.glove_vecs = GloveWrapper(verbose=True)