def __init__(self,model,config, optimizer='SGD'): self.model = model(config=config) if optimizer == 'SGD': self.optimizer = SGD(lr=config['learning_rate'], decay=config['weight_decay'], momentum=config['momentum']) elif optimizer =='Adagrad': self.optimizer = Adagrad(lr=config['learning_rate'], decay=config['weight_decay']) elif optimizer =='RMSprop': self.optimizer = RMSprop(lr=config['learning_rate']) self.config = config
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # Optimization method for finding weights that minimizes loss optimizer = RMSprop(learning_rate=0.01) # Perceptron clf = Perceptron(n_iterations=5000, activation_function=ExpLU, optimizer=optimizer, early_stopping=True, plot_errors=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main(): config = { "optimizer": "rnn", "problem": "mnist", "rollout_length": 100, # This is 100 in the paper "learning_rate": 0.1, "decay_rate": 0.9, "meta_layers": 2, "meta_hidden_size": 20, "layers": 2, "hidden_size": 100, "activation": 'relu', "preprocess": True, "max_to_keep": 3, "retrain": False, "dim": 10, "range_of_means": 10, "range_of_stds": 10, "summary_dir": "summary", "checkpoint_dir": "data_ckpt", "batch_size": 10000, "training_iters": 4000, "log_iters": 100 } # create the experiments dirs create_dirs([config["summary_dir"], config["checkpoint_dir"]]) # create tensorflow session sess = tf.Session() # create your data generator # create an instance of the model you want if config["problem"] == "simple": data = SimpleDG(config) model = LinearRegressionModel(config) elif config["problem"] == "mnist": data = MNISTDG(config) model = MNISTModel(config) else: raise ValueError("{} is not a valid problem".format(config["problem"])) # create tensorboard logger # logger = Logger(sess, config) # create trainer and pass all the previous components to it # trainer = LinearRegressionTrainer(sess, model, data, config, logger) sess.run(tf.global_variables_initializer()) if config["optimizer"] == "sgd": optim = SGD(config) losses = learn(optim, model, config["rollout_length"]) elif config["optimizer"] == "rms": optim = RMSprop(config) losses = learn(optim, model, config["rollout_length"]) elif config["optimizer"] == "rnn": optim = RNNOptimizer(config) losses = learn(optim, model, config["rollout_length"]) if config["retrain"]: optim.train(losses, sess, data) else: optim.load(sess) else: raise ValueError("{} is not a valid optimizer".format( config["optimizer"])) # initialize variables in optimizee # (can't initialize all here because it would potentially overwrite the trained optimizer) sess.run( tf.variables_initializer([ var for var in tf.trainable_variables(scope=optim.__class__.__name__) ])) x = np.arange(config["rollout_length"] + 1) for i in range(3): sess.run( tf.variables_initializer([ var for var in tf.trainable_variables( scope=optim.__class__.__name__) ])) data.refresh_parameters(seed=i) data_x, data_y = next(data.next_batch(config["batch_size"])) l = sess.run([losses], feed_dict={ "input:0": data_x, "label:0": data_y }) print(l) p1, = plt.semilogy(x, l[0], label=config["optimizer"]) plt.legend(handles=[p1]) plt.title('Losses') plt.show() # TODO compare different optimizers data.refresh_parameters() data_x, data_y = next(data.next_batch(100, mode="train")) pred = sess.run(model.prediction, feed_dict={ "input:0": data_x, "label:0": data_y }) print( list( zip(pred, np.argmax(data_y, axis=1), pred == np.argmax(data_y, axis=1)))) # calculate accuracy on test data seed = np.random.randint(low=0, high=1e6) data.refresh_parameters(seed=seed) data_x, data_y = next(data.next_batch(5000, mode="train")) acc = sess.run(model.accuracy, feed_dict={ "input:0": data_x, "label:0": data_y }) print("Train accuracy: {}".format(acc)) data_x, data_y = next(data.next_batch(5000, mode="test")) acc = sess.run(model.accuracy, feed_dict={ "input:0": data_x, "label:0": data_y }) print("Test accuracy: {}".format(acc))
def train_classifier(train, valid, test, W, n_p=10, n_words=10000, n_x=300, n_h=200, patience=10, max_epochs=50, lrate=0.001, n_train=10000, optimizer='RMSprop', batch_size=50, valid_batch_size=50, dispFreq=10, validFreq=100, saveFreq=500, eps=1e-3): """ train, valid, test : datasets W : the word embedding initialization n_words : vocabulary size n_x : word embedding dimension n_h : LSTM/GRU number of hidden units n_z : latent embedding sapce for a sentence patience : Number of epoch to wait before early stop if no progress max_epochs : The maximum number of epoch to run lrate : learning rate optimizer : methods to do optimization batch_size : batch size during training valid_batch_size : The batch size used for validation/test set dispFreq : Display to stdout the training progress every N updates validFreq : Compute the validation error after this number of update. """ options = {} options['n_p'] = n_p options['n_words'] = n_words options['n_x'] = n_x options['n_h'] = n_h options['patience'] = patience options['max_epochs'] = max_epochs options['lrate'] = lrate options['optimizer'] = optimizer options['batch_size'] = batch_size options['valid_batch_size'] = valid_batch_size options['dispFreq'] = dispFreq options['validFreq'] = validFreq #if config.method in ['SVGD', 'SVGD_KFAC']: patience = 5 logger.info('Model options {}'.format(options)) logger.info('{} train examples'.format(len(train[0]))) logger.info('{} valid examples'.format(len(valid[0]))) logger.info('{} test examples'.format(len(test[0]))) logger.info('Building model...') assert np.min(train[1]) == 0 and np.max(train[1]) == 1 n_y = np.max(train[1]) + 1 options['n_y'] = n_y params = init_params(options, W) tparams = init_tparams(params) (use_noise, x, mask, y, f_pred_prob, f_pred, cost, cache) = build_model(tparams, options) lr_theano = tensor.scalar(name='lr') ntrain_theano = tensor.scalar(name='ntrain') if config.method == 'pSGLD': f_grad_shared, f_update = pSGLD(tparams, cost, [x, mask, y], ntrain_theano, lr_theano) elif config.method == 'SGLD': f_grad_shared, f_update = SGLD(tparams, cost, [x, mask, y], ntrain_theano, lr_theano) elif config.method == 'RMSprop': f_grad_shared, f_update = RMSprop(tparams, cost, [x, mask, y], lr_theano) elif config.method == 'SVGD': f_grad_shared, f_update = SVGD(tparams, cost, [x, mask, y], ntrain_theano, lr_theano, kfac=False) elif config.method == 'SVGD_KFAC': f_grad_shared, f_update = SVGD(tparams, cost, [x, mask, y], ntrain_theano, lr_theano, kfac=True, average=True, cache=cache, eps=eps, n_p=n_p) elif config.method == 'MIXTURE_KFAC': f_grad_shared, f_update = SVGD(tparams, cost, [x, mask, y], ntrain_theano, lr_theano, kfac=True, average=False, cache=cache, eps=eps, n_p=n_p) #print 'Training model...' logger.info('Training model...') kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size) kf_test = get_minibatches_idx(len(test[0]), valid_batch_size) estop = False # early stop history_errs = [] best_train_err, best_valid_err, best_test_err = 0., 0., 0. bad_counter = 0 uidx = 0 # the number of update done start_time = time.time() n_average = 0 train_probs = np.zeros((len(train[0]), n_y)) valid_probs = np.zeros((len(valid[0]), n_y)) test_probs = np.zeros((len(test[0]), n_y)) try: for eidx in xrange(max_epochs): print tparams.keys() from optimizers import sqr_dist ##['Wemb', 'lstm_encoder_W', 'lstm_encoder_U', 'lstm_encoder_rev_W', 'lstm_encoder_rev_U', 'Wy'] tv = tensor.flatten(tparams['Wy'], 2) ftv = theano.function([], sqr_dist(tv, tv)) otv = ftv() print(np.min(otv), np.max(otv), np.mean(otv), np.median(otv), np.sum(otv**2) / n_p) n_samples = 0 kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for _, train_index in kf: uidx += 1 #use_noise.set_value(0.5) use_noise.set_value(config.dropout) y = [train[1][t] for t in train_index] x = [train[0][t] for t in train_index] x, mask, y = prepare_data(x, y) n_samples += x.shape[1] cost = f_grad_shared(x, mask, y) if config.method == 'RMSprop': f_update(lrate) elif config.method in ['SVGD', 'pSGLD', 'SGLD']: f_update(lrate, n_train) elif config.method in ['SVGD_KFAC', 'MIXTURE_KFAC']: f_update(lrate, n_train, x, mask, y) if np.isnan(cost) or np.isinf(cost): logger.info('NaN detected') estop = True break return 1., 1., 1. if np.mod(uidx, dispFreq) == 0: logger.info('Epoch {} Update {} Cost {}'.format( eidx, uidx, cost)) if np.mod(uidx, saveFreq) == 0: logger.info('Saving ...') saveto = 'results/%s.npz' % save_prefix np.savez(saveto, history_errs=history_errs) logger.info('Done ...') if np.mod(uidx, validFreq) == 0: use_noise.set_value(0.) if eidx < 1: train_err = pred_error(f_pred, prepare_data, train, kf) valid_err = pred_error(f_pred, prepare_data, valid, kf_valid) test_err = pred_error(f_pred, prepare_data, test, kf_test) history_errs.append([valid_err, test_err, train_err]) else: train_probs_curr = pred_probs(f_pred_prob, prepare_data, train, kf, options) valid_probs_curr = pred_probs(f_pred_prob, prepare_data, valid, kf_valid, options) test_probs_curr = pred_probs(f_pred_prob, prepare_data, test, kf_test, options) train_probs = (n_average * train_probs + train_probs_curr) / (n_average + 1) valid_probs = (n_average * valid_probs + valid_probs_curr) / (n_average + 1) test_probs = (n_average * test_probs + test_probs_curr) / (n_average + 1) n_average += 1 train_pred = train_probs.argmax(axis=1) valid_pred = valid_probs.argmax(axis=1) test_pred = test_probs.argmax(axis=1) train_err = (train_pred == np.array(train[1])).sum() train_err = 1. - numpy_floatX(train_err) / len( train[0]) valid_err = (valid_pred == np.array(valid[1])).sum() valid_err = 1. - numpy_floatX(valid_err) / len( valid[0]) test_err = (test_pred == np.array(test[1])).sum() test_err = 1. - numpy_floatX(test_err) / len(test[0]) history_errs.append([valid_err, test_err, train_err]) if (uidx == 0 or valid_err <= np.array(history_errs)[:, 0].min()): best_train_err = train_err best_valid_err = valid_err best_test_err = test_err bad_counter = 0 logger.info('Train {} Valid {} Test {}'.format( train_err, valid_err, test_err)) if (len(history_errs) > patience and valid_err >= np.array(history_errs)[:-patience, 0].min()): #valid_err >= np.array(history_errs)[:-patience,0].mean()): bad_counter += 1 #valid_err >= np.array(history_errs)[:-patience,0].mean()): if bad_counter > patience: logger.info('Early Stop!') estop = True break logger.info('Seen {} samples'.format(n_samples)) if estop: break except KeyboardInterrupt: logger.info('Training interupted') end_time = time.time() logger.info('Train {} Valid {} Test {}'.format(best_train_err, best_valid_err, best_test_err)) saveto = 'results/%s.npz' % save_prefix np.savez(saveto, train_err=best_train_err, valid_err=best_valid_err, test_err=best_test_err, history_errs=history_errs) logger.info('The code run for {} epochs, with {} sec/epochs'.format( eidx + 1, (end_time - start_time) / (1. * (eidx + 1)))) #print >> sys.stderr, ('Training took %.1fs' % # (end_time - start_time)) return best_train_err, best_valid_err, best_test_err
class ModelCompiler(object): def __init__(self,model,config, optimizer='SGD'): self.model = model(config=config) if optimizer == 'SGD': self.optimizer = SGD(lr=config['learning_rate'], decay=config['weight_decay'], momentum=config['momentum']) elif optimizer =='Adagrad': self.optimizer = Adagrad(lr=config['learning_rate'], decay=config['weight_decay']) elif optimizer =='RMSprop': self.optimizer = RMSprop(lr=config['learning_rate']) self.config = config def share_var(self, data_xy, testing=False, borrow=True): if testing: assert type(data_xy) == np.ndarray, "using test data in testing step" shared_x = theano.shared(np.asarray(data_xy,dtype=theano.config.floatX),borrow=borrow) return shared_x else: # training assert type(data_xy) == tuple, "label data was missing or something else" data_x, data_y = data_xy shared_x = theano.tensor._shared(np.asarray(data_x,dtype=theano.config.floatX),borrow=borrow) shared_y = theano.tensor._shared(np.asarray(data_y,dtype=theano.config.floatX),borrow=borrow) return shared_x, T.cast(shared_y,'int32') def _train_by_sentence_init_(self, x_train, y_train, x_val, y_val, l_t, l_v): #x_train, y_train, x_val, y_val = [], [], [], [] #for each in train: # x_train.extend(each['data'].tolist()) # y_train.extend(each['label'].tolist()) #for each in val: # x_val.extend(each['data'].tolist()) # y_val.extend(each['label'].tolist()) #x_train = np.asarray(x_train).astype('float32') #y_train = np.asarray(y_train).astype('int32') #x_val = np.asarray(x_val).astype('float32') #y_val = np.asarray(y_val).astype('int32') self.learning_rate_decay = self.config['learning_rate_decay'] train_set_x, train_set_y = self.share_var((x_train,y_train)) valid_set_x, valid_set_y = self.share_var((x_val,y_val)) #batch_size = self.model.batch_size #n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size #self.n_train_batches = n_train_batches l_t= T.cast(theano.tensor._shared(np.asarray(l_t,dtype=theano.config.floatX),borrow=True),'int32') l_v= T.cast(theano.tensor._shared(np.asarray(l_v,dtype=theano.config.floatX),borrow=True),'int32') self.layers = self.model.layers x = self.model.x y = self.model.y index = T.lscalar() # index to a [mini]batch cost = self.model.cost params = self.model.params errors = self.model.errors train_model = theano.function( inputs=[index], outputs=[cost,errors], updates=self.optimizer.get_updates(params=params,cost=cost), givens={ x: train_set_x[l_t[index]:l_t[index+1]], y: train_set_y[l_t[index]:l_t[index+1]] } ) #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size #self.n_valid_batches = n_valid_batches validate_model = theano.function( inputs=[index], outputs=errors, givens={ x: valid_set_x[l_v[index]:l_v[index+1]], y: valid_set_y[l_v[index]:l_v[index+1]] } ) return train_model, validate_model def train_by_sentence(self, x_train, y_train, x_val, y_val, index_train,index_val, save_model=False): """ - train: {name:'sentenceID', data:[features], label:[labels]} """ #train_model, validate_model = self._train_by_order_init_(train, val) train_model, validate_model = self._train_by_sentence_init_(x_train, y_train, x_val, y_val,index_train,index_val) patience = 10000 patience_increase = 4 improvement_threshold = 0.995 validation_frequency = len(index_train)-1#min(self.n_train_batches, patience / 2) best_validation_loss = np.inf best_val_acc = 0. best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False n_epochs = self.config['n_epochs'] t_cost, t_acc, v_acc = [], [], [] print 'start training...' while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 DropoutLayer.SetDropoutOn() for minibatch_index in xrange(len(index_train)-1): minibatch_avg_cost,train_acc = train_model(minibatch_index) iter = (epoch - 1) * (len(index_train)-1) + minibatch_index if (iter + 1) % validation_frequency == 0: DropoutLayer.SetDropoutOff() validation_losses = [validate_model(i) for i in xrange(len(index_val)-1)] this_validation_loss = np.mean(validation_losses) this_val_acc = 1 - this_validation_loss this_train_acc = 1 - train_acc print('epoch %i/%s, cost %.4f , train acc %.4f , val acc %.4f ' %(epoch,str(n_epochs),minibatch_avg_cost,(this_train_acc),(this_val_acc))) t_cost.append(round(minibatch_avg_cost,5)) t_acc.append(round(this_train_acc,5)) v_acc.append(round(this_val_acc,5)) if save_model: if this_val_acc > best_val_acc: best_val_acc = this_val_acc #print "best val acc at epoch %i is %.4f" %(epoch,best_val_acc) folder = "./snapshot_{0}_{1}/".format(epoch, round(best_val_acc,3)) os.mkdir(folder) tools.save_weights(self.layers, folder, epoch) #print "model saved at epoch %i" %(epoch) if this_validation_loss < best_validation_loss: if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # if this_train_acc - this_val_acc >0.05: # done_looping = True # break # if patience <= iter: # done_looping = True # break if self.learning_rate_decay == True: if epoch % 5 == 0: rate = theano.shared(np.cast[theano.config.floatX](0.5)) self.optimizer.lr = self.optimizer.lr * rate self.record = { 'training loss' : t_cost, 'training accuracy' : t_acc, 'validation accuracy' : v_acc } end_time = time.clock() print(('Optimization complete. Best validation score of %f %% \n obtained at iteration %i, with test performance %f %%') %(best_validation_loss * 100., best_iter + 1, test_score * 100.)) #print >> sys.stderr,('The code for file '+os.path.split(__file__)[1] +' ran for %.2fm' % ((end_time - start_time) / 60.)) def _train_without_val_init_(self, train_set_x, train_set_y): self.learning_rate_decay = self.config['learning_rate_decay'] batch_size = self.model.batch_size n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size self.n_train_batches = n_train_batches self.layers = self.model.layers x = self.model.x y = self.model.y index = T.lscalar() # index to a [mini]batch cost = self.model.cost params = self.model.params errors = self.model.errors train_model = theano.function( inputs=[index], outputs=[cost,errors], updates=self.optimizer.get_updates(params=params,cost=cost), givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) return train_model def train_without_val(self, train_set_x, train_set_y, save_model=False): train_model = self._train_without_val_init_(train_set_x, train_set_y) patience = 10000 patience_increase = 4 improvement_threshold = 0.995 validation_frequency = self.n_train_batches best_validation_loss = np.inf best_train_acc = 0. best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False n_epochs = self.config['n_epochs'] t_cost, t_acc, v_acc = [], [], [] print 'start training...' while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 DropoutLayer.SetDropoutOn() for minibatch_index in xrange(self.n_train_batches): minibatch_avg_cost,train_acc = train_model(minibatch_index) this_train_acc = 1 - train_acc print('epoch %i/%s, cost %.4f , train acc %.4f ' %(epoch,str(n_epochs),minibatch_avg_cost,(this_train_acc))) if save_model: if this_train_acc > best_train_acc: best_train_acc = this_train_acc #print "best val acc at epoch %i is %.4f" %(epoch,best_val_acc) folder = "./snapshot_{0}_{1}/".format(epoch, round(best_train_acc,3)) os.mkdir(folder) tools.save_weights(self.layers, folder, epoch) #print "model saved at epoch %i" %(epoch) if self.learning_rate_decay == True: if epoch % 5 == 0: rate = theano.shared(np.cast[theano.config.floatX](0.5)) self.optimizer.lr = self.optimizer.lr * rate end_time = time.clock() def _train_init_(self, train_set_x, train_set_y, valid_set_x,valid_set_y): self.learning_rate_decay = self.config['learning_rate_decay'] batch_size = self.model.batch_size n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size self.n_train_batches = n_train_batches self.layers = self.model.layers x = self.model.x y = self.model.y index = T.lscalar() # index to a [mini]batch cost = self.model.cost params = self.model.params errors = self.model.errors train_model = theano.function( inputs=[index], outputs=[cost,errors], updates=self.optimizer.get_updates(params=params,cost=cost), givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size self.n_valid_batches = n_valid_batches validate_model = theano.function( inputs=[index], outputs=errors, givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) return train_model, validate_model def train(self, train_set_x, train_set_y, valid_set_x, valid_set_y, save_model=False): train_model, validate_model = self._train_init_(train_set_x, train_set_y, valid_set_x, valid_set_y) patience = 10000 patience_increase = 4 improvement_threshold = 0.995 validation_frequency = min(self.n_train_batches, patience / 2) best_validation_loss = np.inf best_val_acc = 0. best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False n_epochs = self.config['n_epochs'] t_cost, t_acc, v_acc = [], [], [] print 'start training...' while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 DropoutLayer.SetDropoutOn() for minibatch_index in xrange(self.n_train_batches): minibatch_avg_cost,train_acc = train_model(minibatch_index) iter = (epoch - 1) * self.n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: DropoutLayer.SetDropoutOff() validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)] this_validation_loss = np.mean(validation_losses) this_val_acc = 1 - this_validation_loss this_train_acc = 1 - train_acc print('epoch %i/%s, cost %.4f , train acc %.4f , val acc %.4f ' %(epoch,str(n_epochs),minibatch_avg_cost,(this_train_acc),(this_val_acc))) t_cost.append(round(minibatch_avg_cost,5)) t_acc.append(round(this_train_acc,5)) v_acc.append(round(this_val_acc,5)) if save_model: if this_val_acc > best_val_acc: best_val_acc = this_val_acc #print "best val acc at epoch %i is %.4f" %(epoch,best_val_acc) folder = "./snapshot_{0}_{1}/".format(epoch, round(best_val_acc,3)) os.mkdir(folder) tools.save_weights(self.layers, folder, epoch) #print "model saved at epoch %i" %(epoch) if this_validation_loss < best_validation_loss: if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # if this_train_acc - this_val_acc >0.05: # done_looping = True # break # if patience <= iter: # done_looping = True # break if self.learning_rate_decay == True: if epoch % 5 == 0: rate = theano.shared(np.cast[theano.config.floatX](0.5)) self.optimizer.lr = self.optimizer.lr * rate self.record = { 'training loss' : t_cost, 'training accuracy' : t_acc, 'validation accuracy' : v_acc } end_time = time.clock() print(('Optimization complete. Best validation score of %f %% \n obtained at iteration %i, with test performance %f %%') %(best_validation_loss * 100., best_iter + 1, test_score * 100.)) #print >> sys.stderr,('The code for file '+os.path.split(__file__)[1] +' ran for %.2fm' % ((end_time - start_time) / 60.)) def load(self): layers = self.model.layers dir = self.model.snapshot if not os.path.isdir(dir): raise IOError('no such snapshot file: %s' %(dir)) snapshots = glob.glob(dir+'*.npy') e = self.config['e_snapshot'] tools.load_weights(layers, dir, e) def predict_by_sentence(self, test_set_x, index_test, load_model=None, dropout=False): assert load_model != None, "load_model should be True of False" #batch_size = self.model.batch_size #n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size #self.n_test_batches = n_test_batches test_set_x = self.share_var(test_set_x,testing=True) layers = self.model.layers x = self.model.x y = self.model.y index = T.lscalar() # index to a [mini]batch predict_times = len(index_test)-1 index_test= T.cast(theano.tensor._shared(np.asarray(index_test,dtype=theano.config.floatX),borrow=True),'int32') if load_model == True: dir = self.model.snapshot if not os.path.isdir(dir): raise IOError('no such snapshot file: %s' %(dir)) snapshots = glob.glob(dir+'*.npy') #e = os.path.basename(snapshots[0])[-5] e = self.config['e_snapshot'] if dropout == False: tools.load_weights(layers, dir, e) else: tools.dropout_load_weights(layers, dir, e) test_model = theano.function( inputs = [index], outputs = self.model.y_pred, givens={ x: test_set_x[index_test[index]:index_test[(index + 1)]], } ) n_test = test_set_x.get_value(borrow=True).shape[0] y_pred = np.array([]) DropoutLayer.SetDropoutOff() print "predict on %d datas" %(int(n_test)) for i in xrange(predict_times): y_pred = np.concatenate((y_pred,test_model(i)),axis=0) return y_pred def predict(self, test_set_x, load_model=None, dropout=False): assert load_model != None, "load_model should be True of False" #batch_size = self.model.batch_size #n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size #self.n_test_batches = n_test_batches layers = self.model.layers x = self.model.x y = self.model.y index = T.lscalar() # index to a [mini]batch if load_model == True: dir = self.model.snapshot if not os.path.isdir(dir): raise IOError('no such snapshot file: %s' %(dir)) snapshots = glob.glob(dir+'*.npy') #e = os.path.basename(snapshots[0])[-5] e = self.config['e_snapshot'] if dropout == False: tools.load_weights(layers, dir, e) else: tools.dropout_load_weights(layers, dir, e) test_model = theano.function( inputs = [index], outputs = self.model.y_pred, givens={ x: test_set_x[index:(index + 1)], } ) n_test = test_set_x.get_value(borrow=True).shape[0] y_pred = np.zeros(n_test) DropoutLayer.SetDropoutOff() print "predict on %d datas" %(int(n_test)) for i in xrange(n_test): y_pred[i] = int(test_model(i)) return y_pred def proba(self, X, load_model=None): assert load_model != None, "load_model should be True of False" layers = self.model.layers x = self.model.x y = self.model.y index = T.lscalar() # index to a [mini]batch if load_model == True: dir = self.model.snapshot if not os.path.isdir(dir): raise IOError('no such snapshot file: %s' %(dir)) snapshots = glob.glob(dir+'*.npy') #e = os.path.basename(snapshots[0])[-5] e = self.config['e_snapshot'] tools.load_weights(layers, dir, e) prob_model = theano.function( inputs = [index], outputs = self.model.proba, givens={ x: X[index:(index + 1)], } ) y_prob = [] n_test = X.get_value(borrow=True).shape[0] DropoutLayer.SetDropoutOff() print "getting probability on %d datas" %(int(n_test)) for i in xrange(n_test): y_prob.append(prob_model(i)) return np.asarray(y_prob).reshape(n_test,y_prob[0].shape[1])
# plt.subplot(1, 4, 2) # plt.imshow(img[1]) # plt.subplot(1, 4, 3) # plt.imshow(img[2]) # plt.subplot(1, 4, 4) # plt.imshow(img[3]) model = MNISTNet() loss = SoftmaxCrossEntropy(num_class=10) # define your learning rate sheduler def func(lr, iteration): if iteration % 1000 == 0: return lr * 0.5 else: return lr rms = RMSprop(lr=0.001, decay=0, sheduler_func=func) l2 = L2(w=0.001) # L2 regularization with lambda=0.001 model.compile(optimizer=rms, loss=loss, regularization=l2) train_results, val_results, test_results = model.train(mnist, train_batch=30, val_batch=1000, test_batch=1000, epochs=2, val_intervals=100, test_intervals=300, print_intervals=100)
# Initial x x0 = np.array([-2., -1.]) # Some global settings max_iter = 5000 tol = 1e-8 # Optimization methods gd = GD(fun, jac, lr=0.0005, max_iter=max_iter, tol=tol) mom1 = GD(fun, jac, lr=0.0005, momentum=0.5, max_iter=max_iter, tol=tol) mom2 = GD(fun, jac, lr=0.0005, momentum=0.9, max_iter=max_iter, tol=tol) nest = GD(fun, jac, lr=0.0005, momentum=0.5, nesterov=True, max_iter=max_iter, tol=tol) agrad = Adagrad(fun, jac, lr=0.1, max_iter=max_iter, tol=tol) adelta = Adadelta(fun, jac, lr=1., max_iter=max_iter, tol=tol) rms = RMSprop(fun, jac, lr=0.001, max_iter=max_iter, tol=tol) adam = Adam(fun, jac, lr=0.01, max_iter=max_iter, tol=tol) optimizers = [gd, mom1, nest, agrad, adelta, rms, adam] labels = ['GD', 'Momentum', 'Nesterov', 'Adagrad', 'Adadelta', 'RMSprop', 'Adam'] # Initialise lists for x-values at each iteration, and final x-value for each # optimisation method xall = [] xfinal = [] feval = [] # Loop over all optimizers for opt in optimizers: # Minimise the function opt.optimize(x0)
backprop_depth=SEQUENCE_LENGTH, stateful=True), LSTM(size=512, input_size=512, batch_size=BATCH_SIZE, backprop_depth=SEQUENCE_LENGTH, stateful=True), TimeDistributed( Dense(size=EMBEDDING_LENGTH, input_size=512, activation=SparseSoftmax()))) if RESTORE_MODEL_PATH: model.loadParams(RESTORE_MODEL_PATH) optimizer = RMSprop(learning_rate=lambda n: 0.001) loss_function = VectorCrossEntropy model.assignOptimizer(optimizer) if RESTORE_OPTIMIZER_PATH: optimizer.load(RESTORE_OPTIMIZER_PATH) for epoch in range(INITIAL_EPOCH, NR_OF_EPOCHS + INITIAL_EPOCH): loss, accuracy = model.train(makeBatches(source, SEQUENCE_LENGTH, EMBEDDING_LENGTH), lossfunc=loss_function) model.saveParams( f"{MODEL_PATH}{MODEL_NAME}-{epoch:02d}-loss_{loss:.5f}-acc_{accuracy:.5f}.nn" ) optimizer.save(f"{MODEL_PATH}{epoch:02d}-optimizer.json")