def ready(self, params = None): ''' Sets up the model. ''' #Creates the shapes of the inputs, target, and other variables. self.x = t.matrix() self.y = t.vector(name = 'y', dtype = 'int32') self.h0 = t.vector() self.lr = t.scalar() #The params to be used (input nodes, output nodes, etc...) are retrieved #from the params dictionary. When the values are not found, the default #values are used. params = self.defaultparams(params) self.setparams(params) #The actual RNN. self.rnn = RNN(input = self.x, n_in = self.n_in, n_hid = self.n_hid, n_out = self.n_out, activation = self.activation) #Computes the probabilities of the next token and the next token. self.predict_probability = theano.function(inputs = [self.x,], outputs = self.rnn.probability_y) self.predict = theano.function(inputs = [self.x,], outputs = self.rnn.y_out)
def train(self, data, hidden_layer_cnt = 40): n_input = len(self.vocab) n_output = len(self.tags) n_hidden = hidden_layer_cnt self.rnn = RNN(n_input, n_output, n_hidden) training_set = self.prepare_training_set(data) n_epochs = 50 max_rate = 0.0001 learning_coeff = 1.0 history_cnt = 10 learning_rate_history = [0.0 for i in range(history_cnt)] history_pointer = 0 for epoch in range(n_epochs): print "Running epoch #%d" % epoch curr_rate = self.rnn.train(training_set, 0.5) #200.0/(len(training_set))) # * learning_coeff)) learning_rate_history[history_pointer] = curr_rate history_pointer = (history_pointer + 1) % history_cnt rate = sum(learning_rate_history) #max_rate = max([curr_rate, max_rate]) #learning_coeff -= (max_rate - curr_rate) / (n_epochs) #print curr_rate, max_rate, learning_coeff #if rate < 0.1: # break if epoch % 20 == 0: self.save_to_file('_tmp_save')
class CWS: def __init__(self, s): self.rnn = RNN(s['ne'], s['de'], s['win'], s['nh'], s['nc'], np.random.RandomState(s['seed'])) self.s = s def fit(self, lex, label): s = self.s n_sentences = len(lex) n_train = int(n_sentences * (1. - s['valid_size'])) s['clr'] = s['lr'] best_f = 0 be = 0 for e in xrange(s['n_epochs']): shuffle([lex, label], s['seed']) train_lex, valid_lex = lex[:n_train], lex[n_train:] train_label, valid_label = label[:n_train], label[n_train:] tic = time.time() for i in xrange(n_train): cwords = contextwin(train_lex[i], s['win']) words = map(lambda x: np.asarray(x).astype('int32'), minibatch(cwords, s['bs'])) labels = train_label[i] for word_batch, label_last_word in zip(words, labels): self.rnn.fit(word_batch, label_last_word, s['clr']) self.rnn.normalize() if s['verbose']: print '[learning] epoch %i >> %2.2f%%' % (e+1, (i+1)*100./n_train), 'completed in %s << \r' % time_format(time.time() - tic), sys.stdout.flush() pred_y = self.predict(valid_lex) p, r, f = evaluate(pred_y, valid_label) print '[learning] epoch %i >> P: %2.2f%% R: %2.2f%% F: %2.2f%%' % (e+1, p*100., r*100., f*100.), '<< %s used' % time_format(time.time() - tic) if f > best_f: best_f = f be = e self.save() if s['decay'] and e - be >= 5: s['clr'] *= 0.5 if s['clr'] < 1e-5: break def predict(self, lex): s = self.s y = [self.rnn.predict(np.asarray(contextwin(x, s['win'])).astype('int32'))[1:-1] for x in lex] return y def save(self): if not os.path.exists('params'): os.mkdir('params') self.rnn.save() def load(self): self.rnn.load()
def create_training_data(self): X_train = np.asarray( [[self.word_to_index[w] for w in sent[:-1]] for sent in self.tokenized_sentences]) y_train = np.asarray( [[self.word_to_index[w] for w in sent[1:]] for sent in self.tokenized_sentences]) model = RNN(self.vocabulary_size, self.hidden_dim, 4) t1 = time.time() model.sgd_step(X_train[10], y_train[10], self.learning_rate) t2 = time.time() print("SGD step time: %f milliseconds" % ((t2 - t1) * 1000.)) if self.model_file is not None: self.load_model_parameters(self.model_file, model) if self.enable_training: self.train_with_sgd(model, X_train, y_train, self.learning_rate, self.nepoch) return model
def create_model(): if args.model_type == 'lstm': return LSTM(input_size=dset.input_dimension, hidden_size=args.hx, output_size=dset.output_dimension, layers=args.layers, drop=args.drop, rec_drop=args.rec_drop) elif args.model_type == 'rnn': return RNN(input_size=dset.input_dimension, hidden_size=args.hx, output_size=dset.output_dimension, layers=args.layers, drop=args.drop, rec_drop=args.rec_drop) elif args.model_type == 'irnn': return IRNN(input_size=dset.input_dimension, hidden_size=args.hx, output_size=dset.output_dimension, layers=args.layers, drop=args.drop, rec_drop=args.rec_drop) elif args.model_type == 'gru': return GRU(input_size=dset.input_dimension, hidden_size=args.hx, output_size=dset.output_dimension, layers=args.layers, drop=args.drop, rec_drop=args.rec_drop) elif args.model_type == 'rnn+': if args.layers == 1: args.layers = 2 return IntersectionRNN(input_size=dset.input_dimension, hidden_size=args.hx, output_size=dset.output_dimension, layers=args.layers, drop=args.drop, rec_drop=args.rec_drop) elif args.model_type == 'peephole': return Peephole(input_size=dset.input_dimension, hidden_size=args.hx, output_size=dset.output_dimension, layers=args.layers, drop=args.drop, rec_drop=args.rec_drop) elif args.model_type == 'ugrnn': return UGRNN(input_size=dset.input_dimension, hidden_size=args.hx, output_size=dset.output_dimension, layers=args.layers, drop=args.drop, rec_drop=args.rec_drop) else: raise Exception
def main(): global encoder count = 0 for filename in FILE_NAMES[:-1]: count += file_len('clean/' + filename) take_size = math.floor(count / 5) tmp = labeled_data() data = tmp[-1] all_labeled_data = shuffle_labled_data(tmp) train_test_data = shuffle_labled_data(tmp[:-1]) tokenizer = tfds.features.text.Tokenizer() vocabulary_set = set() for text_tensor, _ in all_labeled_data: some_tokens = tokenizer.tokenize(text_tensor.numpy()) vocabulary_set.update(some_tokens) vocab_size = len(vocabulary_set) encoder = tfds.features.text.TokenTextEncoder(vocabulary_set) train_test_data = train_test_data.map(encode_map_fn) data = data.map(encode_map_fn) data = data.padded_batch(BATCH_SIZE, padded_shapes=([None], [])) for ex in data.take(5): print(ex) train_data = train_test_data.skip(take_size).shuffle(BUFFER_SIZE) train_data = train_data.padded_batch(BATCH_SIZE, padded_shapes=([None], [])) test_data = train_test_data.take(take_size) test_data = test_data.padded_batch(BATCH_SIZE, padded_shapes=([None], [])) vocab_size += 1 model = RNN(vocab_size, train_data, test_data) result = model.predict_classes(data, batch_size=None, verbose=0) for i, v in enumerate(result): if v == 0: print(i + 1)
def creat_trunk_ply_by_nn(AE_model_dir1, AE_model_dir2, RNN_model_dir, rawPc_ply_dir_list, device, threshold, save_dir): rnn_model = RNN().to(device) rnn_model.load_state_dict(torch.load(RNN_model_dir)) AE_model1 = autoencoder.AE_3d_conv().to(device) AE_model1.load_state_dict(torch.load(AE_model_dir1)) rnn_in_feature = get_rnn_in_featur(AE_model1, device, rawPc_ply_dir_list) rnn_out = get_rnn_out(rnn_model, rnn_in_feature) # AE_model2 = autoencoder.AE_3d_conv().to(device) # AE_model2.load_state_dict(torch.load(AE_model_dir2)) AE_decoder_out = get_AE_decoder_out(AE_model1, rnn_out) #point_counts = len(AE_decoder_out[AE_decoder_out>0.2]) AE_decoder_out = torch.squeeze(AE_decoder_out) print('try to save predicted point cloud(.ply) by NN ') tensor_to_ply(AE_decoder_out, threshold, save_dir)
def __init__(self, vocab_size, embedding_dim, hidden_dim, n_classes=1, bidirectional=False, padding_idx=0): super(SentimentRNN, self).__init__() self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx) self.bridge = nn.Linear(embedding_dim, embedding_dim) self.rnn = RNN(embedding_dim, hidden_dim) self.out = nn.Linear(hidden_dim, n_classes)
def test(netFile, dataSet, model='RNN', trees=None, confusion_matrix_file=None, acti=None): if trees == None: trees = tr.loadTrees(dataSet) assert netFile is not None, "Must give model to test" print "Testing netFile %s" % netFile with open(netFile, 'r') as fid: opts = pickle.load(fid) _ = pickle.load(fid) if (model == 'RNTN'): nn = RNTN(wvecDim=opts.wvecDim, outputDim=opts.outputDim, numWords=opts.numWords, mbSize=opts.minibatch, rho=opts.rho, acti=acti) elif (model == 'RNN'): nn = RNN(opts.wvecDim, opts.outputDim, opts.numWords, opts.minibatch) else: raise '%s is not a valid neural network so far only RNTN, RNN' % opts.model nn.initParams() nn.fromFile(fid) print "Testing %s..." % model cost, correct, guess, total = nn.costAndGrad(trees, test=True) correct_sum = 0 for i in xrange(0, len(correct)): correct_sum += (guess[i] == correct[i]) correctSent = 0 for tree in trees: sentLabel = tree.root.label sentPrediction = tree.root.prediction if sentLabel == sentPrediction: correctSent += 1 # Generate confusion matrix #if confusion_matrix_file is not None: # cm = confusion_matrix(correct, guess) # makeconf(cm, confusion_matrix_file) print "%s: Cost %f, Acc %f, Sentence-Level: Acc %f" % ( dataSet, cost, correct_sum / float(total), correctSent / float(len(trees))) return (correct_sum / float(total), correctSent / float(len(trees)))
def train_rnn(step, data_path): rnn = RNN(27, 50, 27, lr=0.01) gen = yield_sample(data_path) for i in range(step): word, x, y = next(gen) #print(y) rnn.inference(x) los = rnn.loss(y) rnn.bptt(y) print("step:%d, loss:%f" % (i, los))
def test(netFile, dataSet, model='RNN', trees=None): if trees == None: trees = tr.loadTrees(dataSet) assert netFile is not None, "Must give model to test" print "Testing netFile %s" % netFile with open(netFile, 'r') as fid: opts = pickle.load(fid) _ = pickle.load(fid) if (model == 'RNTN'): nn = RNTN(opts.wvecDim, opts.outputDim, opts.numWords, opts.minibatch) elif (model == 'RNN'): nn = RNN(opts.wvecDim, opts.outputDim, opts.numWords, opts.minibatch) elif (model == 'RNN2'): nn = RNN2(opts.wvecDim, opts.middleDim, opts.outputDim, opts.numWords, opts.minibatch) elif (opts.model == 'RNN3'): nn = RNN3(opts.wvecDim, opts.middleDim, opts.outputDim, opts.numWords, opts.minibatch) elif (model == 'DCNN'): nn = DCNN(opts.wvecDim, opts.ktop, opts.m1, opts.m2, opts.n1, opts.n2, 0, opts.outputDim, opts.numWords, 2, opts.minibatch, rho=1e-4) trees = cnn.tree2matrix(trees) else: raise '%s is not a valid neural network so far only RNTN, RNN, RNN2, RNN3, and DCNN' % opts.model nn.initParams() nn.fromFile(fid) print "Testing %s..." % model cost, correct, guess, total = nn.costAndGrad(trees, test=True) correct_sum = 0 for i in xrange(0, len(correct)): correct_sum += (guess[i] == correct[i]) # TODO # Plot the confusion matrix? print "Cost %f, Acc %f" % (cost, correct_sum / float(total)) return correct_sum / float(total)
def get_audio_feature_extractor(model_path, gpu=-1): if gpu < 0: device = torch.device("cpu") model_dict = torch.load(model_path, map_location=lambda storage, loc: storage) else: device = torch.device("cuda:" + str(gpu)) model_dict = torch.load( model_path, map_location=lambda storage, loc: storage.cuda(gpu)) audio_rate = model_dict["audio_rate"] audio_feat_len = model_dict['audio_feat_len'] rnn_gen_dim = model_dict['rnn_gen_dim'] aud_enc_dim = model_dict['aud_enc_dim'] video_rate = model_dict["video_rate"] encoder = RNN(audio_feat_len, aud_enc_dim, rnn_gen_dim, audio_rate, init_kernel=0.005, init_stride=0.001) encoder.to(device) encoder.load_state_dict(model_dict['encoder']) overlap = audio_feat_len - 1.0 / video_rate return encoder, { "rate": audio_rate, "feature length": audio_feat_len, "overlap": overlap }
def load_existed_models(self): mdir = self.model_dir models = {} for fname in os.listdir(mdir): if fname[-3:] == '.h5': print('[Load]', fname) rnn = RNN(mdir + fname) if rnn.model is not None: models[fname] = rnn self.models = models print('[Load] done.')
def test_forward(self): config = { 'dim_hidden' : 10 , 'len' : 2 } l = RNN(config) l.accept([26]) x = [np.zeros([26])] * 2 x[0][0] = 1.0 x[1][1] = 1.0 l.forward(x) pass
def test_fit(self): config = { 'dim_hidden' : 10 , 'len' : 2 , 'step_size' : 0.01 } l = RNN(config) l.accept([26]) x = [np.zeros([26])] * 2 x[0][0] = 1.0 x[1][1] = 1.0 y = np.array([1, 2]) l.fit(x, y, 100, config)
def load_from_file(self, filename): f = open(filename, 'rb') context = json.loads(f.read()) self.vocab = context['vocab'] self.tags = context['tags'] self.__init__(self.vocab, self.tags) self.rnn = RNN(context['n_inputs'], context['n_outputs'], context['n_hidden']) self.rnn.U = array([array(x) for x in context['U']]) self.rnn.V = array([array(x) for x in context['V']]) self.rnn.W = array([array(x) for x in context['W']]) f.close()
def eval(opts, data=None): # generate and evaluate a test set for analysis print('eval start') save_path = opts.save_path if not os.path.exists(save_path): os.makedirs(save_path) print('graph start') tf.reset_default_graph() if data: X, Y, N = data else: X, Y, N = inputs.create_inputs(opts, train=False) opts.n_inputs = X.shape[0] opts.batch_size = opts.n_inputs X_pl, Y_pl, N_pl = create_placeholders(X.shape[-1], Y.shape[-1], opts.rnn_size, X.shape[1]) train_iter, next_element = create_tf_dataset(X_pl, Y_pl, N_pl, opts.batch_size, shuffle=False) print('rnn start') model = RNN(next_element, opts, training=False) save_name = opts.activity_name print('[*] Testing') with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(train_iter.initializer, feed_dict={X_pl: X, Y_pl: Y, N_pl: N}) # sess.run(train_iter.initializer, feed_dict={X_pl: X, Y_pl: Y}) print('loading saved') model.load() save_activity(model, X, Y, N, save_path, save_name)
def test(netFile, dataSet, model='RNN', trees=None): if trees == None: if dataSet == "train": trees = tr.load_trees(TRAIN_DATA_FILE) elif dataSet == "dev": trees = tr.load_trees(DEV_DATA_FILE) assert netFile is not None, "Must give model to test" print "Testing netFile %s" % netFile #f = open(netFile, 'rb') #opts = pickle.load(f) #_ = pickle.load(f) opts = joblib.load(netFile + "_opts") _ = joblib.load(netFile + "_cost") if (model=='RNTN'): nn = RNTN(opts.wvecDim,opts.outputDim,opts.numWords,opts.minibatch) elif(model=='RNN'): nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.minibatch) elif(model=='RNN2'): nn = RNN2(opts.wvecDim,opts.middleDim,opts.outputDim,opts.numWords,opts.minibatch) else: raise '%s is not a valid neural network so far only RNTN, RNN, and RNN2' % opts.model nn.initParams() #nn.stack = pickle.load(f) #nn.stack = np.load(f) nn.stack = joblib.load(netFile + "_stack") #f.close() print "Testing %s..." % model cost, correct, guess, total = nn.costAndGrad(trees, test=True) correct_sum = 0 for i in xrange(0, len(correct)): correct_sum += (guess[i] == correct[i]) # confusion matrix conf_arr = np.zeros((opts.outputDim, opts.outputDim)) for i in xrange(len(correct)): curr_correct = correct[i] curr_guess = guess[i] conf_arr[curr_correct][curr_guess] += 1.0 #makeconf(conf_arr) print "Cost %f, Acc %f" % (cost, correct_sum / float(total)) return correct_sum / float(total)
def predicted_labels(sentence, hypothesis, classifier, network='best-GRU'): if network == 'best-GRU': # load model vocab = ['Europeans', 'Germans', 'Italians', 'Romans', 'all', 'children', 'fear', 'hate', 'like', 'love', 'not', 'some'] rels = ['#', '<', '=', '>', '^', 'v', '|'] word_dim = 25 n_hidden = 128 cpr_dim = 75 model_path = '/Users/mathijs/Documents/Studie/MoL/thesis/mol_thesis/final_experiments/binary_fol_rnn/nobrackets/models/GRUbinary_2dets_4negs_train_0bracket_pairs1.pt' net = RNN('GRU', vocab, rels, word_dim, n_hidden, cpr_dim) net.load_state_dict(torch.load(model_path)) s = [sentence.split()] _, hidden_vectors = net.rnn_forward(s, 1, hypothesis=hypothesis) test_hiddens = np.array(hidden_vectors[0]) y_pred = classifier.predict(test_hiddens) labels = np.array([y_pred]) return(labels)
def runCircTest(): r0 = 10.0 r1 = 10.0 nTrainingPoint = 1000000 batchSize = 30 deltaTheta = 0.25 theta = 0.0 data = [] rnn = RNN(inputDim=2, stateDim=20, rate=0.1) for i in range(0, nTrainingPoint): theta = (theta + deltaTheta) % (2 * np.pi) x = r0 * np.cos(theta) y = r1 * np.sin(theta) data.append(np.array([x, y])) for i in range(0, nTrainingPoint, batchSize): batch = data[i:i + batchSize] rnn.update(batch) result = rnn.predict(start=np.array([r0, 0.0]), nStep=10) for x, y in result: print(x, y)
def main(): input_size, output_size = 3, 3 rnn = RNN() rnn.add_layer(LSTM(input_size, output_size)) X_train = [[[1, 0, 0]], [[0, 1, 0]], [[0, 0, 1]]] Y_train = [[[0, 1, 0]], [[0, 0, 1]], [[1, 0, 0]]] epochs = 1000 rnn.train(X_train, Y_train, epochs=epochs) for p, y in zip(rnn.predict(X_train), Y_train): _p = np.zeros_like(p).astype(int) _p[:, np.argmax(p)] = 1 print('%30s %10s %10s' % (p.reshape(1, -1), _p, np.array(y)))
class Scorer(object): def __init__(self, char_list, model_path, rnn_type, ninp, nhid, nlayers, device): char_list = list(char_list) + ['sil_start', 'sil_end'] self.inv_vocab_map = dict([(i, c) for (i, c) in enumerate(char_list)]) self.vocab_map = dict([(c, i) for (i, c) in enumerate(char_list)]) self.criterion = nn.CrossEntropyLoss() self.device = device self.rnn = RNN(rnn_type, len(char_list), ninp, nhid, nlayers).to(self.device) self.rnn.load_state_dict(torch.load(model_path)) self.rnn.eval() self.history = defaultdict(tuple) def get_score(self, string): if len(string) < 2: return 0, self.rnn.init_hidden(1) string_idx = map(lambda x: self.vocab_map[x], string) input = string_idx[:-1] grt = string_idx[1:] input, grt = torch.LongTensor(input).to( self.device), torch.LongTensor(grt).to(self.device) input = input.view(1, input.size()[0]) init_hidden = self.rnn.init_hidden(1) pred, hidden = self.rnn(input, init_hidden) pred = pred.view(-1, pred.size(-1)) loss = self.criterion(pred, grt) return -(len(string_idx) - 1) * loss.item(), hidden def get_score_fast(self, strings): strings = [''.join(x) for x in strings] history_to_update = defaultdict(tuple) scores = [] for string in strings: if len(string) <= 2: score, hidden_state = self.get_score(string) scores.append(score) history_to_update[string] = (score, hidden_state) elif string in self.history: history_to_update[string] = self.history[string] scores.append(self.history[string][0]) elif string[:-1] in self.history: score, hidden = self.history[string[:-1]] input, grt = torch.LongTensor([ self.vocab_map[string[-2]] ]).view(1, 1).to(self.device), torch.LongTensor( [self.vocab_map[string[-1]]]).to(self.device) pred, hidden = self.rnn(input, hidden) loss = self.criterion(pred.view(-1, pred.size(-1)), grt).item() history_to_update[string] = (score - loss, hidden) scores.append(score - loss) else: raise ValueError("%s not stored" % (string[:-1])) self.history = history_to_update return scores
def TrainHebian(identifier, num_epochs=2_000): hebian_model = RNN(input_size, hidden_size, output_size) #train model using Hebian learning trainer = Hebian(hebian_model, task, alpha_trace = 0.5) trainer.TrainHebbian(num_trials=num_epochs) F = hebian_model.GetF() roots, pca = FindFixedPoints(F, [1,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1,\ -0.1,-0.2,-0.3,-0.4,-0.5,-0.6,-0.7,-0.8,-0.9,-1]) hebian_model.pca = pca hebian_model.save('hebian_model'+str(identifier)) return hebian_model
def TrainFORCE(identifier, num_epochs=2_000): force_model = RNN(input_size, hidden_size, output_size) #train model using FORCE trainer = Force(force_model, task, alpha=1000) trainer.trainForce(num_trials=num_epochs) F = force_model.GetF() roots, pca = FindFixedPoints(F, [1,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1,\ -0.1,-0.2,-0.3,-0.4,-0.5,-0.6,-0.7,-0.8,-0.9,-1]) force_model.pca = pca force_model.save('force_model'+str(identifier)) return force_model
def __init__(self, rng, input, h_prev, y_prev, dim, n_feature_maps, window_sizes, n_hidden, n_out): #self.cnn = CNN(rng=rng, input=input, dim=dim, #n_feature_maps=n_feature_maps, window_sizes=window_sizes) #self.rnn = RNN(rng=rng, input=self.cnn.output, h_prev=h_prev, #y_prev=y_prev, n_in=n_feature_maps*len(window_sizes), #n_hidden=n_hidden, n_out=n_out) self.avg = Average(input=input, dim=dim) self.rnn = RNN(rng=rng, input=self.avg.output, h_prev=h_prev, y_prev=y_prev, n_in=dim, n_hidden=n_hidden, n_out=n_out) self.h = self.rnn.h self.y = self.rnn.y self.output = self.rnn.output self.loss = self.rnn.loss self.error = self.rnn.error #self.params = self.cnn.params + self.rnn.params self.params = self.rnn.params
def test(): data = LoadTestData() untrained_models = [] config = {'ngram': 3, 'est': 'add-delta', 'delta': 0.3} untrained_models.append((HMM(config), 'HMM. config: {}'.format(config))) config = { 'ftrs': ('IS_FIRST', 'IS_LAST', 'VAL', 'PRV_VAL', 'NXT_VAL', 'FRST_VAL', 'LST_VAL', 'SCND_VAL', 'SCND_LST_VAL') } untrained_models.append((MEMM(config), 'MEMM. config: {}'.format(config))) config = { 'ftrs': ('IS_FIRST', 'IS_LAST', 'IDX', 'VAL', 'PRV_VAL', 'NXT_VAL', 'FRST_VAL', 'LST_VAL', 'SCND_VAL', 'SCND_LST_VAL') } untrained_models.append( (CRF_WORD(config), 'CRF. config: {}'.format(config))) trained_models = [(model.prep_data().shuffle(0xfab1e).split(0).train(), name) for model, name in untrained_models] config = { 'n_layers': 3, 'hidden_dim': 32, 'embedding': 'mds', 'win_len': 4, "device": "cpu" } rnn = RNN(config) trained_models.append((rnn.prep_model().load('rnn_model.bin'), 'RNN. config: {}'.format(config))) for model, name in trained_models: trained_model = model conf_mat, dist = TestModel(trained_model, data) print('\n') print(name) print('=' * 80) print('Vowel metrics:') print('-' * 50) PrintConfMat(conf_mat) print('-' * 50) print('Edit distance:') print('-' * 50) for stage in range(1, 4): print('Stage = {}:'.format(stage_names[stage])) print(' Average = {}\n Median = {}\n Min = {}\n Max = {}'. format(dist[stage][0], dist[stage][1], dist[stage][2], dist[stage][3]))
def test_backward(self): config = { 'dim_hidden' : 10 , 'len' : 2 } l = RNN(config) l.accept([26]) x = [np.zeros([26])] * 2 x[0][0] = 1.0 x[1][1] = 1.0 y = l.forward(x) dy = [None] * 2 loss, dy[0] = utils.cross_entropy(utils.softmax(y[0]), np.array([0])) loss, dy[1] = utils.cross_entropy(utils.softmax(y[1]), np.array([1])) dW, dU, dV = l.backward(dy)
def TrainGenetic(identifier, num_generations=15): genetic_model = RNN(input_size, hidden_size, output_size) #train model using genetic algorithm num_pop=50 sigma=0.01 #num_generations=15 trainer = Genetic(genetic_model, task, num_generations) trainer.trainGenetic(num_pop, sigma, batch_size=50, num_parents=5, mutation=0.1) F = genetic_model.GetF() roots, pca = FindFixedPoints(F, [[1],[0.9],[0.8],[0.7],[0.6],[0.5],[0.4],[0.3],[0.2],[0.1],\ [-0.1],[-0.2],[-0.3],[-0.4],[-0.5],[-0.6],[-0.7],[-0.8],[-0.9],[-1]]) genetic_model.pca = pca genetic_model.save('genetic_model'+str(identifier)) return genetic_model
def load_model(input_size): model = RNN(input_size, hidden_size, num_layers) # load on CPU only checkpoint = torch.load('checkpoint.pt', map_location='cpu') model.load_state_dict(checkpoint['model_state_dict']) model.eval() print(model) print('model training loss', checkpoint['loss']) print('model training epoch', checkpoint['epoch']) return model
def run_model(which='all'): if which in ['ann', 'all', 'main', 'standard']: model = ANN(emb_size, vocab_size, hid_dim, hid_num, class_num, sent_len).cuda() ann_loss = train(model, x, target, ann=True) plt.plot(ann_loss, label='ann') if which in ['wann', 'all', 'standard']: model = WANN(emb_size, vocab_size, hid_dim, hid_num, class_num, sent_len).cuda() wann_loss = train(model, x, target, ann=True) plt.plot(wann_loss, label='wann') if which in ['rnn', 'all', 'main']: model = RNN(emb_size, vocab_size, hid_dim, hid_num, class_num).cuda() rnn_loss = train(model, x, target) plt.plot(rnn_loss, label='rnn') if which in ['exrnn', 'all']: model = EXRNN(emb_size, vocab_size, hid_dim, hid_num, class_num, 2000, 2000).cuda() exrnn_loss = train(model, x, target) plt.plot(exrnn_loss, label='exrnn') if which in ['exmem', 'all']: model = EXRNN(emb_size, vocab_size, hid_dim, hid_num, class_num, 2000, forget_dim=None).cuda() exmem_loss = train(model, x, target) plt.plot(exmem_loss, label='exmem') if which in ['lstm', 'all', 'main']: model = LSTM(emb_size, vocab_size, hid_dim, hid_num, class_num).cuda() lstm_loss = train(model, x, target) plt.plot(lstm_loss, label='lstm') if which in ['gru', 'all', 'main']: model = GRU(emb_size, vocab_size, hid_dim, hid_num, class_num).cuda() gru_loss = train(model, x, target) plt.plot(gru_loss, label='gru') # plt.ylim([0, 2]) plt.legend() plt.grid(True) plt.show()
def main(save=True): """ Train a model \n ave {bool} - whether to save the trained model (default: True) \n Returns: wrapper RNN class for a Keras model (e.g. keras.models.Sequential) """ startTime = time() trainingSet, validationSet, scaler = setup() trainGen = DataGenerator(trainingSet, scaler, windowSize=WINDOW_SIZE, lookback=LOOKBACK, sampleRate=SAMPLERATE, prediction=PREDICTION).generator() validGen = DataGenerator(validationSet, scaler, windowSize=WINDOW_SIZE, lookback=LOOKBACK, sampleRate=SAMPLERATE, prediction=PREDICTION).generator() rnn = RNN(HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, PREDICTION) optimizer = rnn.pickOptimizer(OPTIMIZER, lr=LEARNING_RATE) rnn.model.compile(loss=LOSS_FUNC, optimizer=optimizer) rnn.model.fit_generator(trainGen, steps_per_epoch=STEPS_PER_EPOCH, epochs=EPOCHS, validation_data=validGen, validation_steps=VALIDATION_STEP_PER_EPOCH, verbose=2, shuffle=False) endTime = time() print( f"\nTRAINING DONE. Total time elapsed: {strftime('%H:%M:%S', gmtime(endTime - startTime))}" ) if save: weightsFile = constructFilename(BASE_PATH, HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, PREDICTION, WEIGHT_EXT) architectureFile = constructFilename(BASE_PATH, HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, PREDICTION, ARCHITECT_EXT) rnn.saveWeights(weightsFile) rnn.saveArchitecture(architectureFile) return rnn
def __init__(self, input_dimension=300, output_dimension=1000, hidden_dimension=512, \ num_layers=3, context_dimension=None): super(Decoder, self).__init__() self.max_sequence_length = 20 self.input_dimension = input_dimension self.output_dimension = output_dimension self.context_dimension = context_dimension self.hidden_dimension = hidden_dimension self.num_layers = num_layers self.step_count = 0 self.example_count = 0 self.fc = nn.Linear(self.hidden_dimension, self.output_dimension) self.generating_activation = nn.Softmax(dim=1) if self.context_dimension is None: self.rnn = RNN(self.input_dimension, self.hidden_dimension, self.num_layers) else: self.rnn = ContextEnhancedRNN(self.input_dimension, self.hidden_dimension, \ self.context_dimension, self.num_layers) self.initialize_modules()
def __init__(self, vocab_size, hidden_size=256, lr=2e-3, rnn='gru', sampling='sample'): super(CHAR_RNN, self).__init__() self.vocab_size = vocab_size self.hidden_size = hidden_size self.sampling = sampling if rnn == 'rnn': self.rnn = RNN(self.vocab_size, self.hidden_size) elif rnn == 'gru': self.rnn = GRU(self.vocab_size, self.hidden_size) else: raise NotImplementedError() self.optimizer = optim.Adam(self.parameters(), lr=lr) self.criterion = nn.CrossEntropyLoss()
def __init__( self, encoder_output_dim, decoder_type, vocab_size, embedding_dim, num_layers, ): super().__init__() # CNN Model self.cnn = CNN(encoder_output_dim) # Decoder (RNN or Transformer) d_model = embedding_dim + encoder_output_dim if decoder_type == "rnn": self.decoder = RNN(vocab_size, embedding_dim, d_model, num_layers) else: self.decoder = Transformer(vocab_size, embedding_dim, d_model, num_layers)
def __init__( self, training_data_dir, logdir, autoencoder_config, rnn_config, z_length=32, z_output_fn=normalize, ): self.feed = FeedDict(training_data_dir, logdir) self.autoencoder = AutoEncoder(z_length=z_length, z_output_fn=z_output_fn, **autoencoder_config) self.rnn = RNN(z_length=z_length, z_output_fn=z_output_fn, **rnn_config) self.sess = tf.Session() self.sess.run(tf.initialize_all_variables())
def test(netFile,dataSet,trees=None): if trees==None: trees = tr.printtree(dataSet) assert netFile is not None, "Must give model to test" print "Testing netFile %s"%netFile with open(netFile,'r') as fid: opts = pickle.load(fid) _ = pickle.load(fid) nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.alpha,opts.minibatch) nn.initParams() nn.fromFile(fid) cost, Mis = nn.costAndGrad(trees,test=True) print "Cost = %f, Acc = %f"%(cost, 1.0 - Mis) return (1.0 - Mis)
''' Generating examples with prediction with softmax every character at time softmax(output). Maximize total log probability of training sequence which implies that the RNN learns a probability distribution over sequences. We can sample from this conditional distribution to get the next character in a generated string and provide it as the next input to the RNN ''' '''e = 1 for i in range(10): X = np.random.rand(100,2) y = np.dot(X[:,0], X[:,1]) net = RNN(2, 30, 1) c = net.fit(X, y) e = 0.1*np.sqrt(c)+0.9*e print(e)''' nout = 2 net = RNN(2, 30, nout) np.random.seed(123) X = np.random.rand(10, 10, 2) y = np.random.rand(10,2) '''for i in range(nout): y[:,i+1:,i] = X[:,:-i-1,i]''' tresh=0.5 '''y[0:,][X[1:-1, :,1] > X[:-2,:,0] + tresh] = 1 y[1][X[1:-1, :,1] > X[:-2,:,0] + tresh] = 2''' net.fit(X, y)
def __init__(self, name, imsize, patchsize, nhid, numpy_rng, eps, hids_scale, feature_network=None, input_feature_layer_name=None, metric_feature_layer_name=None, nchannels=1, weight_decay=0.): # CALL PARENT CONSTRUCTOR TO SETUP CONVENIENCE FUNCTIONS # (SAVE/LOAD, ...) super(RATM, self).__init__(name=name) self.imsize = imsize assert len(patchsize) == 2 self.patchsize = patchsize self.nhid = nhid self.numpy_rng = numpy_rng self.eps = eps self.hids_scale = hids_scale self.nchannels = nchannels self.weight_decay = weight_decay assert hasattr(feature_network, 'forward') assert hasattr(feature_network, 'load') self.feature_network = feature_network self.input_feature_layer_name = input_feature_layer_name assert (self.input_feature_layer_name in self.feature_network.layers.keys()) self.metric_feature_layer_name = metric_feature_layer_name assert (self.metric_feature_layer_name in self.feature_network.layers.keys()) # TODO: remove this constraint, if everything else works assert ( self.feature_network.layers.keys().index( self.metric_feature_layer_name) > self.feature_network.layers.keys().index( self.input_feature_layer_name)) ftensor5 = T.TensorType(theano.config.floatX, (False,) * 5) self.inputs = ftensor5(name='inputs') self.inputs.tag.test_value = numpy_rng.randn( 16, 5, nchannels, imsize[0], imsize[1]).astype(np.float32) self.targets = T.ftensor3(name='targets') self.targets.tag.test_value = numpy_rng.randn( 16, 5, 4).astype(np.float32) self.masks = T.fmatrix(name='masks') self.masks.tag.test_value = np.ones((16, 5), dtype=np.float32) self.batchsize = self.inputs.shape[0] self.nframes = self.inputs.shape[1] # shuffle axis, such that time axis is first self.inputs_frames = self.inputs.transpose(1, 0, 2, 3, 4) self.targets_frames = self.targets.transpose(1, 0, 2) self.masks_frames = self.masks.T self.attention_mechanism = SelectiveAttentionMechanism( imsize=imsize, patchsize=patchsize, eps=self.eps, nchannels=nchannels) self.targets_widthheight = (self.targets_frames[:, :, 1::2] - self.targets_frames[:, :, ::2]) self.targets_XYs = (self.targets_frames[:, :, 1::2] + self.targets_frames[:, :, ::2]) / 2. self.targets_centers_widthheight = T.concatenate(( self.targets_XYs, self.targets_widthheight), axis=2) self.nin = self.feature_network.layers[ self.input_feature_layer_name].outputs_shape[1] self.rnn = RNN(nin=self.nin, nout=10, nhid=self.nhid, numpy_rng=self.numpy_rng, scale=hids_scale) self.wread = theano.shared( numpy_rng.uniform( low=-.001, high=.001, size=(self.nhid, 7) ).astype(np.float32), name='wread') self.targets_params = T.concatenate(( # center x,y self.targets_centers_widthheight[ :, :, :2] / np.array(((imsize[::-1],),), dtype=np.float32), # std x (self.targets_centers_widthheight[:, :, 2] / patchsize[1]).dimshuffle(0, 1, 'x'), # stride x np.float32(1.5) * (self.targets_centers_widthheight[:, :, 2] / imsize[1]).dimshuffle(0, 1, 'x'), # gamma (unused) T.ones((self.nframes, self.batchsize, 1)), # std y (self.targets_centers_widthheight[:, :, 3] / patchsize[0]).dimshuffle(0, 1, 'x'), # stride y np.float32(1.5) * (self.targets_centers_widthheight[:, :, 3] / imsize[0]).dimshuffle(0, 1, 'x'), ), axis=2) self.targets_params_reshape = self.targets_params.reshape(( self.nframes * self.batchsize, 7 )) (self.targets_patches, _, _, _, _) = self.attention_mechanism.build_read_graph( images_var=self.inputs_frames.reshape(( self.nframes * self.batchsize, self.nchannels, self.imsize[0], self.imsize[1])), attention_acts=self.targets_params_reshape) self.targets_features = self.feature_network.forward_from_to( self.targets_patches, to_layer_name=self.metric_feature_layer_name ) self.targets_features = self.targets_features.reshape(( self.nframes, self.batchsize, T.prod(self.targets_features.shape[1:]))) self.bread_init = T.concatenate(( # center x,y self.targets_centers_widthheight[ 0, :, :2] / np.array((imsize[::-1],), dtype=np.float32), # std x (self.targets_centers_widthheight[0, :, 2] / patchsize[1]).dimshuffle(0, 'x'), # stride x np.float32(1.5) * (self.targets_centers_widthheight[0, :, 2] / imsize[1]).dimshuffle( 0, 'x'), # gamma (unused) T.ones((self.batchsize, 1)), # std y (self.targets_centers_widthheight[0, :, 3] / patchsize[0]).dimshuffle(0, 'x'), # stride y np.float32(1.5) * (self.targets_centers_widthheight[0, :, 3] / imsize[0]).dimshuffle( 0, 'x'), ), axis=1) self.params = [self.wread] # , self.bread_init_factors] self.params.extend(self.rnn.params) # we're not using the rnn output layer, so remove params self.params.remove(self.rnn.wout) self.params.remove(self.rnn.bout) def step(x_t, h_tm1, bread, wread): (patches_t, window_params_t, muX, muY, gX, gY) = self.get_input_patches( x_t, h_tm1, wread, bread) features_t = self.feature_network.forward_from_to( patches_t, from_layer_name=self.feature_network.layers.keys()[0], to_layer_name=self.input_feature_layer_name) h_t, o_t = self.rnn.step(features_t, h_tm1) h_t_norm = T.sqrt(T.sum(h_t**2, axis=-1)) return (h_t, window_params_t, patches_t, features_t, window_params_t, muX, muY, gX, gY, h_t_norm) (self.hiddens, breads, self.patches, self.features, self.window_params, muX, muY, gX, gY, h_t_norms), self.updates = theano.scan( fn=step, sequences=self.inputs_frames, outputs_info=[ T.zeros((self.batchsize, self.nhid), dtype=theano.config.floatX), self.bread_init, None, None, None, None, None, None, None, None], non_sequences=[self.wread]) # vector containing corner mus of window, in order x1, x2, y1, y2 self._attention_mus = T.concatenate(( muX[:, :, 0].dimshuffle(0, 1, 'x'), muX[:, :, -1].dimshuffle(0, 1, 'x'), muY[:, :, 0].dimshuffle(0, 1, 'x'), muY[:, :, -1].dimshuffle(0, 1, 'x')), axis=2) self._attention_gs = T.concatenate(( gX.dimshuffle(0, 1, 'x'), gY.dimshuffle(0, 1, 'x')), axis=2) # get index of layer after feature layer after_feat_layer_idx = self.feature_network.layers.keys().index( self.input_feature_layer_name) + 1 self.attention_features = self.feature_network.forward_from_to( self.features.reshape((T.prod(self.features.shape[:2]), self.features.shape[2])), from_layer_name=self.feature_network.layers.keys()[ after_feat_layer_idx], to_layer_name=self.metric_feature_layer_name ).reshape(( self.nframes, self.batchsize, self.targets_features.shape[2] )) self._stepcosts = T.mean(( self.targets_features - self.attention_features)**2, axis=2) self._dists = self._stepcosts # normalize mask to sum up to 1 for each sequence, to give equal # contribution to long and short sequences self._stepcosts_masked = ( self._stepcosts * self.masks_frames) / T.sum( self.masks_frames, axis=0, keepdims=True) self._cost = ( T.mean(self._stepcosts_masked) + self.weight_decay * ( T.mean(self.rnn.win**2) + T.mean(self.wread**2) ) ) # grads graph will be built when first accessed self.__grads = None target_centers_widthheight = T.ftensor3('target_centers_widthheight') target_centers_widthheight.tag.test_value = numpy_rng.rand( 16, 5, 4).astype(np.float32) print "compiling get_all_patches_and_windows..." self.get_all_patches_and_windows = theano.function( [self.inputs, target_centers_widthheight], [self.patches, self.window_params], givens={ self.targets_centers_widthheight: target_centers_widthheight.dimshuffle(1, 0, 2)}) print "done (with compiling get_all_patches_and_windows)" print "compiling get_all_patches_and_windows_and_dists..." self.get_all_patches_and_windows_and_probs = theano.function( [self.inputs, target_centers_widthheight], [self.patches, self.window_params, self._dists], givens={ self.targets_centers_widthheight: target_centers_widthheight.dimshuffle(1, 0, 2)}) print "done (with compiling get_all_patches_and_windows_and_dists)" self.get_bbs = theano.function( [self.inputs, target_centers_widthheight], self._attention_mus, givens={ self.targets_centers_widthheight: target_centers_widthheight.dimshuffle(1, 0, 2)})
class RNNHfOptim(BaseEstimator): def __init__(self, n_in=5, n_hidden=50, n_out=5, L1_reg=0.00, L2_reg=0.00, activation='tanh', output_type='real', use_symbolic_softmax=False, model="SGRNN", weight_handler=None): self.n_in = int(n_in) self.n_hidden = int(n_hidden) self.n_out = int(n_out) self.L1_reg = float(L1_reg) self.L2_reg = float(L2_reg) self.activation = activation self.output_type = output_type self.use_symbolic_softmax = use_symbolic_softmax self.weight_handler = weight_handler self.model = model self.ready() self.tune_optimizer() def tune_optimizer( self, initial_lambda=0.1, mu=0.03, global_backtracking=False, preconditioner=False, max_cg_iterations=250, num_updates=5, validation=None, validation_frequency=1, patience=np.inf, save_progress=None, cg_number_batches=100, gd_number_batches=100, plot_cost_file=None): #TODO write all parameters with descriptions self.initial_lambda = initial_lambda self.mu = mu self.global_backtracking = global_backtracking self.preconditioner = preconditioner self.max_cg_iterations = max_cg_iterations self.n_updates = num_updates self.validation = validation self.validation_frequency = validation_frequency self.patience = patience self.save_progress = save_progress self.cg_number_batches = cg_number_batches self.gd_number_batches = gd_number_batches self.plot_cost_file = plot_cost_file def ready(self): # input (where first dimension is time) self.x = T.matrix() # target (where first dimension is time) if self.output_type == 'real': self.y = T.matrix(name='y', dtype=theano.config.floatX) elif self.output_type == 'binary': self.y = T.matrix(name='y', dtype='int32') elif self.output_type == 'softmax': # only vector labels supported self.y = T.vector(name='y', dtype='int32') else: raise NotImplementedError # initial hidden state of the RNN self.h0 = T.vector() # learning rate self.lr = T.scalar() if self.activation == 'tanh': activation = T.tanh elif self.activation == 'sigmoid': activation = T.nnet.sigmoid elif self.activation == 'relu': activation = lambda x: x * (x > 0) elif self.activation == 'cappedrelu': activation = lambda x: T.minimum(x * (x > 0), 6) else: raise NotImplementedError if self.model == "SGRNN": if self.weight_handler is None: raise NotImplementedError("you need to provide a weighthandler") else: self.rnn = SGRNN( input=self.x, weight_handler=self.weight_handler, activation=activation, output_type=self.output_type, use_symbolic_softmax=self.use_symbolic_softmax) else: self.rnn = RNN( input=self.x, n_in=self.n_in, n_hidden=self.n_hidden, n_out=self.n_out, activation=activation, output_type=self.output_type, use_symbolic_softmax=self.use_symbolic_softmax) if self.output_type == 'real': self.predict = theano.function(inputs=[self.x, ], outputs=self.rnn.y_pred, mode=mode) elif self.output_type == 'binary': self.predict_proba = theano.function( inputs=[self.x, ], outputs=self.rnn.p_y_given_x, mode=mode) self.predict = theano.function( inputs=[self.x, ], outputs=T.round(self.rnn.p_y_given_x), mode=mode) elif self.output_type == 'softmax': self.predict_proba = theano.function( inputs=[self.x, ], outputs=self.rnn.p_y_given_x, mode=mode) self.predict = theano.function( inputs=[self.x, ], outputs=self.rnn.y_out, mode=mode) else: raise NotImplementedError def shared_dataset(self, data_xy): """ Load the dataset into shared variables """ data_x, data_y = data_xy shared_x = theano.shared(np.asarray(data_x, dtype=theano.config.floatX)) shared_y = theano.shared(np.asarray(data_y, dtype=theano.config.floatX)) if self.output_type in ('binary', 'softmax'): return shared_x, T.cast(shared_y, 'int32') else: return shared_x, shared_y def __getstate__(self): """ Return state sequence.""" params = self._get_params() # parameters set in constructor weights = [p.get_value() for p in self.rnn.params] state = (params, weights) return state def _set_weights(self, weights): """ Set fittable parameters from weights sequence. Parameters must be in the order defined by self.params: W, W_in, W_out, h0, bh, by """ i = iter(weights) for param in self.rnn.params: param.set_value(i.next()) def __setstate__(self, state): """ Set parameters from state sequence. Parameters must be in the order defined by self.params: W, W_in, W_out, h0, bh, by """ params, weights = state self.set_params(**params) self.ready() self._set_weights(weights) def save(self, fpath='.', fname=None): """ Save a pickled representation of Model state. """ fpathstart, fpathext = os.path.splitext(fpath) if fpathext == '.pkl': # User supplied an absolute path to a pickle file fpath, fname = os.path.split(fpath) elif fname is None: # Generate filename based on date date_obj = datetime.datetime.now() date_str = date_obj.strftime('%Y-%m-%d-%H:%M:%S') class_name = self.__class__.__name__ fname = '%s.%s.pkl' % (class_name, date_str) fabspath = os.path.join(fpath, fname) file = open(fabspath, 'wb') state = self.__getstate__() pickle.dump(state, file, protocol=pickle.HIGHEST_PROTOCOL) file.close() def load(self, path): """ Load model parameters from path. """ file = open(path, 'rb') state = pickle.load(file) self.__setstate__(state) file.close() def fit(self, X_train, Y_train): self.prepare(X_train, Y_train) ############### # TRAIN MODEL # ############### print "starting training ..." for i in range(self.n_updates): self.train_step(i) def prepare(self, X_train, Y_train): """ Fit model Pass in X_test, Y_test to compute test error and report during training. X_train : ndarray (n_seq x n_steps x n_in) Y_train : ndarray (n_seq x n_steps x n_out) """ # SequenceDataset wants a list of sequences # this allows them to be different lengths, but here they're not seq = [i for i in X_train] targets = [i for i in Y_train] ###################### # BUILD ACTUAL MODEL # ###################### print "building model..." #TODO : batch_size in parameters. self.gradient_dataset = SequenceDataset( [seq, targets], batch_size=len(seq) / self.gd_number_batches, number_batches=self.gd_number_batches) self.cg_dataset = SequenceDataset( [seq, targets], batch_size=len(seq) / self.cg_number_batches , number_batches=self.cg_number_batches) cost = self.rnn.loss(self.y) \ + self.L1_reg * self.rnn.L1 \ + self.L2_reg * self.rnn.L2_sqr self.opt = hf_optimizer( p=self.rnn.params, inputs=[self.x, self.y], s=self.rnn.y_pred, costs=[cost], h=self.rnn.h, ha=self.rnn.ha) #TODO add h and ha for structural damping def train_step(self, n): self.opt.train( self.gradient_dataset, self.cg_dataset, num_updates=n, save_progress=self.save_progress, plot_cost_file=self.plot_cost_file) def continue_training(self, n_updates): self.n_updates = n_updates self.train()
def ready(self): # input (where first dimension is time) self.x = T.matrix() # target (where first dimension is time) if self.output_type == 'real': self.y = T.matrix(name='y', dtype=theano.config.floatX) elif self.output_type == 'binary': self.y = T.matrix(name='y', dtype='int32') elif self.output_type == 'softmax': # only vector labels supported self.y = T.vector(name='y', dtype='int32') else: raise NotImplementedError # initial hidden state of the RNN self.h0 = T.vector() # learning rate self.lr = T.scalar() if self.activation == 'tanh': activation = T.tanh elif self.activation == 'sigmoid': activation = T.nnet.sigmoid elif self.activation == 'relu': activation = lambda x: x * (x > 0) elif self.activation == 'cappedrelu': activation = lambda x: T.minimum(x * (x > 0), 6) else: raise NotImplementedError if self.model == "SGRNN": if self.weight_handler is None: raise NotImplementedError("you need to provide a weighthandler") else: self.rnn = SGRNN( input=self.x, weight_handler=self.weight_handler, activation=activation, output_type=self.output_type, use_symbolic_softmax=self.use_symbolic_softmax) else: self.rnn = RNN( input=self.x, n_in=self.n_in, n_hidden=self.n_hidden, n_out=self.n_out, activation=activation, output_type=self.output_type, use_symbolic_softmax=self.use_symbolic_softmax) if self.output_type == 'real': self.predict = theano.function(inputs=[self.x, ], outputs=self.rnn.y_pred, mode=mode) elif self.output_type == 'binary': self.predict_proba = theano.function( inputs=[self.x, ], outputs=self.rnn.p_y_given_x, mode=mode) self.predict = theano.function( inputs=[self.x, ], outputs=T.round(self.rnn.p_y_given_x), mode=mode) elif self.output_type == 'softmax': self.predict_proba = theano.function( inputs=[self.x, ], outputs=self.rnn.p_y_given_x, mode=mode) self.predict = theano.function( inputs=[self.x, ], outputs=self.rnn.y_out, mode=mode) else: raise NotImplementedError
class Model(object): def __init__(self, logger, params = None): self.logger = logger self.ready(params) def ready(self, params = None): ''' Sets up the model. ''' #Creates the shapes of the inputs, target, and other variables. self.x = t.matrix() self.y = t.vector(name = 'y', dtype = 'int32') self.h0 = t.vector() self.lr = t.scalar() #The params to be used (input nodes, output nodes, etc...) are retrieved #from the params dictionary. When the values are not found, the default #values are used. params = self.defaultparams(params) self.setparams(params) #The actual RNN. self.rnn = RNN(input = self.x, n_in = self.n_in, n_hid = self.n_hid, n_out = self.n_out, activation = self.activation) #Computes the probabilities of the next token and the next token. self.predict_probability = theano.function(inputs = [self.x,], outputs = self.rnn.probability_y) self.predict = theano.function(inputs = [self.x,], outputs = self.rnn.y_out) def fit(self, x_train, y_train, x_test = None, y_test = None, validation_freq = 200): ''' Used to train the RNN. x_train - the inputs used for training the RNN. y_train - the targets used for training the RNN. x_test - the inputs used for testing how well the training is going. Requires that y_test also be provided, otherwise it is ignored. y_test - the targets used for testing how well the training is going. Requires that x_test also be provided, otherwise it is ignored. validation_freq - how often the training should be interrupted and tested for accuracy. ''' if x_test is not None and y_test is not None: self.runtests = True test_x, test_y = self.share_dataset(x_test, y_test) else: self.runtests = False train_x, train_y = self.share_dataset(x_train, y_train) n_train = train_x.get_value(borrow = True).shape[0] ''' Creates the model. ''' self.logger.info('Building the model...') idx = t.lscalar('index') l_r = t.scalar(name = 'l_r', dtype = theano.config.floatX) mom = t.scalar(name = 'mom', dtype = theano.config.floatX) cost = self.rnn.loss(self.y) + self.L1_reg * self.rnn.L1 \ + self.L2_reg * self.rnn.L2_sqr train_error = theano.function(inputs = [idx,], outputs = self.rnn.loss(self.y), givens = { self.x: train_x[idx], self.y: train_y[idx] }) if self.runtests: test_error = theano.function(inputs = [idx,], outputs = self.rnn.loss(self.y), givens = { self.x: test_x[idx], self.y: test_y[idx] }) # Compute the cost gradients with BPTT gparams = [] for param in self.rnn.params: gparam = t.grad(cost, param) gparams.append(gparam) updates = {} for param, gparam in zip(self.rnn.params, gparams): update = self.rnn.updates[param] u = mom * update - l_r * gparam updates[update] = u updates[param] = param + u # The function to train the model. train_model = theano.function(inputs = [idx, l_r, mom], outputs = cost, updates = updates, givens = { self.x: train_x[idx], self.y: train_y[idx] }) ''' Train the model ''' self.logger.info('Training the model...') epoch = 0 while epoch < self.n_epochs: epoch += 1 for i in xrange(n_train): t0 = time.time() eff_momentum = self.final_momentum \ if epoch > self.momentum_switchover \ else self.initial_momentum example_cost = train_model(i, self.learning_rate, eff_momentum) itr = (epoch - 1) * n_train + i + 1 if itr % validation_freq == 0: train_losses = [train_error(j) for j in xrange(n_train)] train_losses = np.mean(train_losses) if self.runtests: test_losses = [test_error(j) for j in xrange(n_test)] test_losses = np.mean(test_losses) self.logger.info('epoch {}, seq {} / {}, training losses {}, test losses {}, learning rate {}, elasped time {}.'.format( epoch, i + 1, n_train, train_losses, test_losses, self.learning_rate, time.time() - t0)) else: self.logger.info('epoch {}, seq {} / {}, training losses {}, learning rate {}, elasped time {}.'.format( epoch, i + 1, n_train, train_losses, self.learning_rate, time.time() - t0)) def share_dataset(self, data_x, data_y): ''' Load the datasets into shared variables. ''' shared_x = theano.shared(np.asarray(data_x, dtype = theano.config.floatX)) shared_y = theano.shared(np.asarray(data_y, dtype = theano.config.floatX)) return shared_x, t.cast(shared_y, 'int32') def __getstate__(self): ''' Returns the current state of the model and RNN. ''' params = self.getparams() weights = self.rnn.getweights() return (params, weights) def __setstate__(self, state): ''' Sets the parameters for the model and RNN. ''' params, weights = state self.setparams(params) self.ready() self.rnn.setweights(weights) def load(self, path): ''' Unpickles a pickled model. ''' fs = open(path, 'rb') self.logger.info('Model state loading from file {}.'.format(path)) state = pickle.load(fs) self.__setstate__(state) fs.close() self.logger.info('Model state loaded.') def save(self, path = None): ''' Pickles the model. ''' if path is None: path = str(uuid.uuid4()) fs = open(path, 'wb') state = self.__getstate__() pickle.dump(state, fs, protocol = pickle.HIGHEST_PROTOCOL) fs.close() self.logger.info('Model state saved to file {}.'.format(path)) def setparams(self, params): ''' Sets the parameters of the model and RNN. ''' self.n_in = params.get('n_in') self.n_hid = params.get('n_hid') self.n_out = params.get('n_out') self.n_epochs = params.get('n_epochs') self.learning_rate = params.get('learning_rate') self.activation = params.get('activation') self.L1_reg = params.get('L1_reg') self.L2_reg = params.get('L2_reg') self.initial_momentum = params.get('initial_momentum') self.final_momentum = params.get('final_momentum') self.momentum_switchover = params.get('momentum_switchover') def getparams(self): ''' Gets the parameters of the model. ''' d = { 'n_in': self.n_in, 'n_hid': self.n_hid, 'n_out': self.n_out, 'n_epochs': self.n_epochs, 'learning_rate': self.learning_rate, 'activation': self.activation, 'L1_reg': self.L1_reg, 'L2_reg': self.L2_reg, 'initial_momentum': self.initial_momentum, 'final_momentum': self.final_momentum, 'momentum_switchover': self.momentum_switchover } return d def defaultparams(self, params = None): ''' Returns the default parameters for the model or ensures that all the necessary parameters are present. ''' d = { 'n_in': 5, 'n_hid': 50, 'n_out': 5, 'n_epochs': 100, 'learning_rate': 0.01, 'activation': t.nnet.sigmoid, 'L1_reg': 0.0, 'L2_reg': 0.0, 'initial_momentum': 0.5, 'final_momentum': 0.9, 'momentum_switchover': 5 } if params is None: return d for key in d.keys(): params[key] = params.get(key) or d.get(key) return params
ap = AutoPoetry(file, delimiters, vocabulary_size, start_token, end_token, unknown_token) (X, T) = ap.get_training_data() with open(data_file, 'wb') as f: pickle.dump((X, T, ap), f) else: with open(data_file, 'rb') as f: (X, T, ap) = pickle.load(f) vocabulary_size = ap.vocabulary_size # RNN training n_features = vocabulary_size n_hiddens = 100 epoch = 100 learning_rate = 1e-1 lr_factor = 0.9 rnn = RNN(n_features, n_hiddens, bptt_truncate=10) rnn.train(X, T, epoch=epoch, learning_rate=learning_rate, lr_factor=lr_factor) # Generate sentences num_sentences = 100 senten_min_length = 3 for i in range(num_sentences): sent = [] # We want long sentences, not sentences with one or two words while len(sent) < senten_min_length: sent = ap.generate_sentence(rnn) print(''.join(sent))
def train_rnn(num_batches_per_bunch = 512, batch_size = 1, num_bunches_queue = 5, offset = 0, path_name = '/exports/work/inf_hcrc_cstr_udialogue/siva/data/'): voc_list = Vocabulary(path_name + 'train') voc_list.vocab_create() vocab = voc_list.vocab vocab_size = voc_list.vocab_size dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size) dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size ) dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size ) print '..building the model' #symbolic variables for input, target vector and batch index index = T.lscalar('index') x = T.fvector('x') h0 = T.fvector('h0') y = T.ivector('y') learning_rate = T.fscalar('learning_rate') #theano shared variables for train, valid and test train_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True) train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) valid_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True) valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) test_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True) test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) rng = numpy.random.RandomState() classifier = RNN(rng = rng, input = x, intial_hidden = h0, n_in = vocab_size, n_hidden = int(sys.argv[1]), n_out = vocab_size) cost = classifier.negative_log_likelihood(y) ht1_values = numpy.ones((int(sys.argv[1]), ), dtype = 'float32') ht1 = theano.shared(value = ht1_values, name = 'hidden_state') #constructor for learning rate class learnrate_schedular = LearningRateNewBob(start_rate = float(sys.argv[2]), scale_by=.5, max_epochs=9999,\ min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.) log_likelihood = classifier.sum(y) likelihood = classifier.likelihood(y) #test_model test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood], \ givens = {x: test_set_x1, y: test_set_y, h0: ht1}) #validation_model validate_model = theano.function(inputs = [], outputs = [log_likelihood], \ givens = {x: valid_set_x1, y: valid_set_y, h0: ht1}) gradient_param = [] #calculates the gradient of cost with respect to parameters for param in classifier.params: gradient_param.append(T.cast(T.grad(cost, param), 'float32')) updates = [] #updates the parameters for param, gradient in zip(classifier.params, gradient_param): updates.append((param, T.cast(param - learning_rate * gradient - 0.000001 * param, dtype = 'float32'))) #hidden_output = classifier.inputlayer.output #training_model train_model = theano.function(inputs = [learning_rate], outputs = [cost, classifier.inputlayer.output], updates = updates, \ givens = {x: train_set_x1, y: train_set_y, h0:ht1}) print '.....training' best_valid_loss = numpy.inf start_time = time.time() while(learnrate_schedular.get_rate() != 0): print 'learning_rate:', learnrate_schedular.get_rate() print 'epoch_number:', learnrate_schedular.epoch frames_showed, progress = 0, 0 start_epoch_time = time.time() dataprovider_train.reset() for feats_lab_tuple in dataprovider_train: features, labels = feats_lab_tuple if labels is None or features is None: continue frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 train_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) train_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) out = train_model(numpy.asarray(learnrate_schedular.get_rate(), dtype = 'float32')) ht1.set_value(numpy.asarray(out[1], dtype = 'float32'), borrow = True) progress += 1 if progress%10000==0: end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) train_set_x1.set_value(numpy.empty((1, ), dtype = 'float32')) train_set_y.set_value(numpy.empty((1), dtype = 'int32')) end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) #classifier_name = 'MLP' + str(learnrate_schedular.epoch) #save_mlp(classifier, path+exp_name1 , classifier_name) print 'Validating...' valid_losses = [] log_likelihood = [] valid_frames_showed, progress = 0, 0 start_valid_time = time.time() # it is also stop of training time dataprovider_valid.reset() for feats_lab_tuple in dataprovider_valid: features, labels = feats_lab_tuple if labels is None or features is None: continue valid_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 valid_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) valid_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) log_likelihood.append(validate_model()) valid_set_x1.set_value(numpy.empty((1), 'float32')) valid_set_y.set_value(numpy.empty((1), 'int32')) progress += 1 if progress%1000==0: end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) entropy = (-numpy.sum(log_likelihood)/valid_frames_showed) print entropy, numpy.sum(log_likelihood) if entropy < best_valid_loss: learning_rate = learnrate_schedular.get_next_rate(entropy) best_valid_loss = entropy else: learnrate_schedular.rate = 0.0 end_time = time.time() print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.) print 'Testing...' log_likelihood = [] likelihoods = [] test_frames_showed, progress = 0, 0 start_test_time = time.time() # it is also stop of training time dataprovider_test.reset() for feats_lab_tuple in dataprovider_test: features, labels = feats_lab_tuple if labels is None or features is None: continue test_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 test_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) test_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) out = test_model() log_likelihood.append(out[0]) likelihoods.append(out[1]) progress += 1 if progress%1000==0: end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) #save_posteriors(log_likelihood, likelihoods, weight_path+file_name2) print numpy.sum(log_likelihood)
hidden_size = 300 # Size of hidden layer of neurons (H) seq_length = 50 # Number of steps to unroll the RNN learning_rate = 2e-3 with open('data/input.txt') as f: data = f.read().replace('\n', ' ').encode('ascii', 'ignore') args = { 'hidden_size': hidden_size, 'seq_length': seq_length, 'learning_rate': learning_rate, 'data': data } # Initialized the RNN and run the first epoch rnn = RNN(args) inputs, hidden, loss = rnn.step() i = 0 while True: inputs, hidden, loss = rnn.step(hidden) if i % 100 == 0: print "Iteration {}:".format(i) print "Loss: {}".format(loss) print ''.join(rnn.generate(hidden, inputs[0], 140)) print "" if i % 10000 == 0: rnn.save_model()
class RNNPOSTagger: def __init__(self, vocab, tags): self.vocab = vocab self.tags = tags self.tag_ndx = dict(map(swap_tuple, enumerate(self.tags))) self.vocab_ndx = dict(map(swap_tuple, enumerate(self.vocab))) def save_to_file(self, filename): f = open(filename, 'wb') context = {} context['vocab'] = self.vocab context['tags'] = self.tags context['n_inputs'] = self.rnn.n_inputs context['n_outputs'] = self.rnn.n_outputs context['n_hidden'] = self.rnn.n_hidden context['U'] = matrix_to_list(self.rnn.U) context['V'] = matrix_to_list(self.rnn.V) context['W'] = matrix_to_list(self.rnn.W) f.write(json.dumps(context)) f.close() def load_from_file(self, filename): f = open(filename, 'rb') context = json.loads(f.read()) self.vocab = context['vocab'] self.tags = context['tags'] self.__init__(self.vocab, self.tags) self.rnn = RNN(context['n_inputs'], context['n_outputs'], context['n_hidden']) self.rnn.U = array([array(x) for x in context['U']]) self.rnn.V = array([array(x) for x in context['V']]) self.rnn.W = array([array(x) for x in context['W']]) f.close() def train(self, data, hidden_layer_cnt = 40): n_input = len(self.vocab) n_output = len(self.tags) n_hidden = hidden_layer_cnt self.rnn = RNN(n_input, n_output, n_hidden) training_set = self.prepare_training_set(data) n_epochs = 50 max_rate = 0.0001 learning_coeff = 1.0 history_cnt = 10 learning_rate_history = [0.0 for i in range(history_cnt)] history_pointer = 0 for epoch in range(n_epochs): print "Running epoch #%d" % epoch curr_rate = self.rnn.train(training_set, 0.5) #200.0/(len(training_set))) # * learning_coeff)) learning_rate_history[history_pointer] = curr_rate history_pointer = (history_pointer + 1) % history_cnt rate = sum(learning_rate_history) #max_rate = max([curr_rate, max_rate]) #learning_coeff -= (max_rate - curr_rate) / (n_epochs) #print curr_rate, max_rate, learning_coeff #if rate < 0.1: # break if epoch % 20 == 0: self.save_to_file('_tmp_save') def get_tag(self, word, tag, hidden_state): if word in SENTENCE_SEPARATORS: hidden_state = self.rnn.get_hidden_state_matrix() return word, hidden_state input_vector = mat(zeros((len(self.vocab), 1))) if self.vocab_ndx.has_key(word): input_vector[self.vocab_ndx[word],0] = 1.0 #self.rnn.reset_hidden() res, hidden_state = self.rnn.feed(input_vector, hidden_state) res_ndx = res.argmax() #print res[res_ndx] return self.tags[res_ndx], hidden_state else: return "", hidden_state def prepare_training_set(self, data): vocab_size = len(self.vocab) tag_count = len(self.tags) res = [] for word, pos in data: #x = zeros(vocab_size) #y = zeros(tag_count) #x[self.vocab_ndx[word]] = 1 #y[self.tag_ndx[pos]] = 1 #res += [(x, y)] if word in SENTENCE_SEPARATORS: res += [(None, None)] continue res += [(self.vocab_ndx[word], self.tag_ndx[pos])] return res
idx2word = dict((k,v) for v,k in dic['words2idx'].iteritems()) train_lex, train_ne, train_y = train_set valid_lex, valid_ne, valid_y = valid_set test_lex, test_ne, test_y = test_set vocsize = len(dic['words2idx']) nclasses = len(dic['labels2idx']) nsentences = len(train_lex) # instanciate the model numpy.random.seed(s['seed']) random.seed(s['seed']) rnn = RNN( nh = s['nhidden'], nc = nclasses, ne = vocsize, de = s['emb_dimension'], cs = s['win'] ) # train with early stopping on validation set best_f1 = -numpy.inf s['clr'] = s['lr'] for e in xrange(s['nepochs']): # shuffle shuffle([train_lex, train_ne, train_y], s['seed']) s['ce'] = e tic = time.time() # consider the whole sentence as a mini-batch and perform one update per sentence for i in range(nsentences): cwords = contextwin(train_lex[i], s['win']) words = map(lambda x: numpy.asarray(x).astype('int32'),\
all_train_x = train_x.append([i for i in dev_x]) all_train_y = train_y.append([j for j in dev_y]) np.random.seed(s['seed']) random.seed(s['seed']) ''' nh :: dimension of the hidden layer nc :: number of classes ne :: number of word embeddings in the vocabulary de :: dimension of the word embeddings cs :: word window context size ''' rnn = RNN( nh = s['nhidden'], nc = len(languages()), ne = s['vocab_size'], de = s['emb_dimension'], cs = s['win']) best_f1 = -numpy.inf s['clr'] = s['lr'] for e in range(s['nepochs']): # shuffle shuffle([train_lex, train_ne, train_y], s['seed']) s['ce'] = e tic = time.time() for i in xrange(nsentences): cwords = contextwin(train_lex[i], s['win']) words = map(lambda x: numpy.asarray(x).astype('int32'),\ minibatch(cwords, s['bs'])) labels = train_y[i]
with open('data/input.txt') as f: data = f.read().replace('\n', ' ').encode('ascii', 'ignore') data = data.lower() data = nltk.word_tokenize(data) args = { 'hidden_size': hidden_size, 'seq_length': seq_length, 'learning_rate': learning_rate, 'data': data } # Initialized the RNN and run the first epoch rnn = RNN(args) inputs, hidden, loss = rnn.step() i = 0 while True: inputs, hidden, loss = rnn.step(hidden) if i % 100 == 0: print "Iteration {}:".format(i) print "Loss: {}".format(loss) print ' '.join(rnn.generate(hidden, inputs[0], 15)) print "" # if i % 10000 == 0: # rnn.save_model()
treeTxt = treeTxt + ")" #Print a sentence. prnt(train[0].root) nltktree = Tree.fromstring(treeTxt) nltktree.pretty_print() ############################### # Create a toy model for testing. ############################### numW = len(treeM.loadWordMap()) wvecDim = 10 outputDim = 5 rnn = RNN(wvecDim, outputDim, numW, mbSize = 4) rnn.initParams() rnn.L, rnn.W, rnn.b, rnn.Ws, rnn.bs = rnn.stack # Zero gradients rnn.dW[:] = 0 rnn.db[:] = 0 rnn.dWs[:] = 0 rnn.dbs[:] = 0 rnn.dL = collections.defaultdict(rnn.defaultVec) ost = 0.0 correct = [] guess = [] total = 0.0
#!/usr/bin/env python3 # -*- coding: utf-8 -*- from rnn import RNN x = [0, 4, 2, 5, 7] t = [4, 2, 5, 7, 1] n_hiddens = 4 n_features = 10 rnn = RNN(n_features, n_hiddens) rnn.forward_propagation(x) rnn.check_gradient(x, t) rnn.train([x], [t], 100)
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option("--test",action="store_true",dest="test",default=False) # Optimizer parser.add_option("--minibatch",dest="minibatch",type="int",default=30) parser.add_option("--optimizer",dest="optimizer",type="string", default="adagrad") parser.add_option("--epochs",dest="epochs",type="int",default=50) parser.add_option("--step",dest="step",type="float",default=1e-2) parser.add_option("--wvecDim",dest="wvecDim",type="int",default=30) parser.add_option("--outputDim",dest="outputDim",type="int",default=2) parser.add_option("--alpha",dest="alpha",type="int",default=0.2) parser.add_option("--outFile",dest="outFile",type="string", default="models/test.bin") parser.add_option("--inFile",dest="inFile",type="string", default="models/test.bin") parser.add_option("--data",dest="data",type="string",default="brae.pos") parser.add_option("--dev",dest="dev",type="string",default="brae.dev") parser.add_option("--wordMap",dest="map",type="string",default="brae.tot") (opts,args)=parser.parse_args(args) # make this false if you dont care about your accuracies per epoch, makes things faster! evaluate_accuracy_while_training = True # Testing if opts.test: test(opts.inFile,opts.data) return print "Loading data..." train_accuracies = [] dev_accuracies = [] trees = tr.printtree(opts.data) opts.numWords = len(tr.loadWordMap(opts.map)) nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.alpha,opts.minibatch) nn.initParams() sgd = optimizer.SGD(nn,alpha=opts.step,minibatch=opts.minibatch, optimizer=opts.optimizer) dev_trees = tr.printtree(opts.dev) for e in range(opts.epochs): start = time.time() print "Running epoch %d"%e sgd.run(trees) end = time.time() print "Time per epoch : %f"%(end-start) with open(opts.outFile,'w') as fid: pickle.dump(opts,fid) pickle.dump(sgd.costt,fid) nn.toFile(fid) if evaluate_accuracy_while_training: print "testing on training set real quick" train_accuracies.append(test(opts.outFile,opts.data,trees)) print "testing on dev set real quick" dev_accuracies.append(test(opts.outFile,opts.dev,dev_trees)) if evaluate_accuracy_while_training: pdb.set_trace() print train_accuracies print dev_accuracies
def __init__(self, s): self.rnn = RNN(s['ne'], s['de'], s['win'], s['nh'], s['nc'], np.random.RandomState(s['seed'])) self.s = s
# -*- coding: utf-8 -*- import sys reload(sys) sys.setdefaultencoding('utf8') from rnn import RNN # To load the model rnn = RNN.load_model("model.json") inputs, hidden, loss = rnn.step() print rnn.generate(hidden, inputs[0], 140)