def train(self, X, XY, V, VY, count_dict, word_dict, embed_map): """ Trains the LBL """ self.start = self.seed self.init_params(embed_map, count_dict, XY) inds = np.arange(len(X)) numbatches = len(inds) / self.batchsize curr = 1e20 counter = 0 target=None num = 15000 # Main loop stop.display_phase(1) for epoch in range(self.maxepoch): self.epoch = epoch tic = time.time() prng = RandomState(self.seed + epoch + 1) prng.shuffle(inds) for minibatch in range(numbatches): batchX = X[inds[minibatch::numbatches]] batchY = XY[inds[minibatch::numbatches]] (words, acts, preds) = self.forward(batchX) self.backward(batchY, preds, acts, words, batchX) self.update_params(batchX) self.update_hyperparams() toc = time.time() # Results and stopping criteria obj = self.compute_obj(X[:num], XY[:num]) obj_val = self.compute_obj(V[:num], VY[:num]) if self.verbose > 0: stop.display_results(epoch, toc-tic, obj, obj_val) (curr, counter) = stop.update_result(curr, obj_val, counter) if counter == 0: stop.save_model(self, self.loc) stopping_target = obj if stop.criteria_complete(self, epoch, curr, obj, counter, self.k, obj_val, target): if self.criteria == 'maxepoch': break elif self.criteria == 'validation_pp': self = stop.load_model(self.loc) counter = 0 X = np.r_[X, V] XY = vstack([XY, VY]).tocsr() self.criteria = 'll_train_heldout' target = stopping_target #obj stop.display_phase(2) inds = range(X.shape[0]) prng.shuffle(inds) numbatches = len(inds) / self.batchsize elif self.criteria == 'll_train_heldout': break
def train(self, X, XY, V, VY, count_dict, word_dict, embed_map): """ Trains the LBL """ self.start = self.seed self.init_params(embed_map, count_dict, XY) inds = np.arange(len(X)) numbatches = len(inds) / self.batchsize curr = 1e20 counter = 0 target = None num = 15000 # Main loop stop.display_phase(1) for epoch in range(self.maxepoch): self.epoch = epoch tic = time.time() prng = RandomState(self.seed + epoch + 1) prng.shuffle(inds) for minibatch in range(numbatches): batchX = X[inds[minibatch::numbatches]] batchY = XY[inds[minibatch::numbatches]] (words, acts, preds) = self.forward(batchX) self.backward(batchY, preds, acts, words, batchX) self.update_params(batchX) self.update_hyperparams() toc = time.time() # Results and stopping criteria obj = self.compute_obj(X[:num], XY[:num]) obj_val = self.compute_obj(V[:num], VY[:num]) if self.verbose > 0: stop.display_results(epoch, toc - tic, obj, obj_val) (curr, counter) = stop.update_result(curr, obj_val, counter) if counter == 0: stop.save_model(self, self.loc) stopping_target = obj if stop.criteria_complete(self, epoch, curr, obj, counter, self.k, obj_val, target): if self.criteria == 'maxepoch': break elif self.criteria == 'validation_pp': self = stop.load_model(self.loc) counter = 0 X = np.r_[X, V] XY = vstack([XY, VY]).tocsr() self.criteria = 'll_train_heldout' target = stopping_target #obj stop.display_phase(2) inds = range(X.shape[0]) prng.shuffle(inds) numbatches = len(inds) / self.batchsize elif self.criteria == 'll_train_heldout': break
def train(self, X, indX, XY, V, indV, VY, IM, count_dict, word_dict, embed_map): """ Trains the LBL """ self.start = self.seed self.init_params(embed_map, count_dict, XY) inds = np.arange(len(X)) numbatches = len(inds) / self.batchsize curr = 1e20 counter = 0 target = None num = 15000 x = T.matrix('x', dtype='int32') y = T.matrix('y') im = T.matrix('im') lr = T.scalar('lr') mom = T.scalar('mom') (words, acts, IF, preds) = self.forward(x, im) obj_T = self.compute_obj(x, im, y) compute_obj_T = theano.function([x, im, y], obj_T) train_batch = theano.function([x, im, y, lr, mom], obj_T, updates=self.update_params( obj_T, x, lr, mom), on_unused_input='warn') log_file = open("train_valid_err.txt", 'w') # Main loop stop.display_phase(1) for epoch in range(self.maxepoch): self.epoch = epoch tic = time.time() prng = RandomState(self.seed + epoch + 1) prng.shuffle(inds) obj = 0.0 for minibatch in range(numbatches): batchX = X[inds[minibatch::numbatches]].astype(np.int32) batchY = XY[inds[minibatch::numbatches]].toarray().astype( theano.config.floatX) batchindX = indX[inds[minibatch::numbatches]].astype( np.int32).flatten() batchIm = IM[batchindX].astype(theano.config.floatX) obj += train_batch(batchX, batchIm, batchY, self.eta_t, self.p_t) self.update_hyperparams() toc = time.time() # Results and stopping criteria obj_val = compute_obj_T( V[:num].astype(np.int32), IM[indV[:num].astype(int).flatten()].astype( theano.config.floatX), VY[:num].toarray().astype(theano.config.floatX)) log_file.write('{} {}\n'.format(obj, obj_val)) if self.verbose > 0: stop.display_results(epoch, toc - tic, obj, obj_val) (curr, counter) = stop.update_result(curr, obj_val, counter) if counter == 0: stop.save_model_theano(self, self.loc) stopping_target = obj if stop.criteria_complete(self, epoch, curr, obj, counter, self.k, obj_val, target): if self.criteria == 'maxepoch': break elif self.criteria == 'validation_pp': stop.load_model_theano(self, self.loc) counter = 0 X = np.r_[X, V] XY = vstack([XY, VY]).tocsr() indX = np.r_[indX, indV] self.criteria = 'll_train_heldout' target = stopping_target #obj stop.display_phase(2) inds = range(X.shape[0]) prng.shuffle(inds) numbatches = len(inds) / self.batchsize elif self.criteria == 'll_train_heldout': break log_file.close()
def train(self, X, indX, XY, V, indV, VY, IM, count_dict, word_dict, embed_map): """ Trains the LBL """ self.start = self.seed self.init_params(embed_map, count_dict, XY) inds = np.arange(len(X)) numbatches = len(inds) / self.batchsize curr = 1e20 counter = 0 target=None num = 15000 x = T.matrix('x', dtype='int32') y = T.matrix('y') im = T.matrix('im') lr = T.scalar('lr') mom = T.scalar('mom') (words, acts, IF, preds) = self.forward(x, im) obj_T = self.compute_obj(x, im, y) compute_obj_T = theano.function([x, im, y], obj_T) train_batch = theano.function([x, im, y, lr, mom], obj_T, updates=self.update_params(obj_T, x, lr, mom), on_unused_input='warn') log_file = open("train_valid_err.txt", 'w') # Main loop stop.display_phase(1) for epoch in range(self.maxepoch): self.epoch = epoch tic = time.time() prng = RandomState(self.seed + epoch + 1) prng.shuffle(inds) obj = 0.0 for minibatch in range(numbatches): batchX = X[inds[minibatch::numbatches]].astype(np.int32) batchY = XY[inds[minibatch::numbatches]].toarray().astype(theano.config.floatX) batchindX = indX[inds[minibatch::numbatches]].astype(np.int32).flatten() batchIm = IM[batchindX].astype(theano.config.floatX) obj += train_batch(batchX, batchIm, batchY, self.eta_t, self.p_t) self.update_hyperparams() toc = time.time() # Results and stopping criteria obj_val = compute_obj_T(V[:num].astype(np.int32), IM[indV[:num].astype(int).flatten()].astype(theano.config.floatX), VY[:num].toarray().astype(theano.config.floatX)) log_file.write('{} {}\n'.format(obj, obj_val)) if self.verbose > 0: stop.display_results(epoch, toc-tic, obj, obj_val) (curr, counter) = stop.update_result(curr, obj_val, counter) if counter == 0: stop.save_model_theano(self, self.loc) stopping_target = obj if stop.criteria_complete(self, epoch, curr, obj, counter, self.k, obj_val, target): if self.criteria == 'maxepoch': break elif self.criteria == 'validation_pp': stop.load_model_theano(self, self.loc) counter = 0 X = np.r_[X, V] XY = vstack([XY, VY]).tocsr() indX = np.r_[indX, indV] self.criteria = 'll_train_heldout' target = stopping_target #obj stop.display_phase(2) inds = range(X.shape[0]) prng.shuffle(inds) numbatches = len(inds) / self.batchsize elif self.criteria == 'll_train_heldout': break log_file.close()