def ready(self): # Матрица входа, размера n_batch * n_sentence self.x = T.lmatrix('x') # Результирующие классы для каждого предложения в batch self.y = T.ivector('y') self.rng = np.random.RandomState(self.init_params['seed']) set_rng(self.rng) print "CNN building..." clf_builder = self.get_clf_builder(self.init_params['clf']) self.network = clf_builder(input_var=self.x, batch_size=self.init_params['batch_size'], sentence_len=self.init_params['sent_len'], vocab_size=self.init_params['vocab_size'], word_dimension=self.init_params['word_dim'], word_embedding=self.init_params['word_embedding'], non_static=self.init_params['non_static'], n_out=self.init_params['n_out'], arch_params=self.arch_params) # ключевое слово deterministic отключает стохастическое поведение, например, убирает dropout self.p_y_given_x = lasagne.layers.get_output(self.network, self.x, deterministic=True) self.y_pred = T.argmax(self.p_y_given_x, axis=1) self.predict_wrap = theano.function(inputs=[self.x], outputs=self.y_pred, allow_input_downcast=True) self.predict_proba_wrap = theano.function(inputs=[self.x], outputs=self.p_y_given_x, allow_input_downcast=True) print "CNN building finished" self.is_ready = True
def test_specified_rng(self, input_layer): from lasagne.layers.noise import GaussianNoiseLayer input = theano.shared(numpy.ones((100, 100))) seed = 123456789 set_rng(RandomState(seed)) result = GaussianNoiseLayer(input_layer).get_output_for(input) result_eval1 = result.eval() set_rng(RandomState(seed)) result = GaussianNoiseLayer(input_layer).get_output_for(input) result_eval2 = result.eval() assert numpy.allclose(result_eval1, result_eval2)
def test_specified_rng(): from lasagne.random import set_rng from lasagne.init import (Normal, Uniform, GlorotNormal, GlorotUniform, Sparse, Orthogonal) from numpy.random import RandomState from numpy import allclose seed = 123456789 for init_class in [Normal, Uniform, GlorotNormal, GlorotUniform, Sparse, Orthogonal]: set_rng(RandomState(seed)) sample1 = init_class().sample((100, 100)) set_rng(RandomState(seed)) sample2 = init_class().sample((100, 100)) assert allclose(sample1, sample2),\ ("random initialization was inconsistent for {}" .format(init_class.__name__))
def test_specified_rng(self, input_layer): from lasagne.layers.noise import DropoutLayer input = theano.shared(numpy.ones((100, 100))) seed = 123456789 rng = get_rng() set_rng(RandomState(seed)) result = DropoutLayer(input_layer).get_output_for(input) result_eval1 = result.eval() set_rng(RandomState(seed)) result = DropoutLayer(input_layer).get_output_for(input) result_eval2 = result.eval() set_rng(rng) # reset to original RNG for other tests assert numpy.allclose(result_eval1, result_eval2)
def do_regression(num_epochs=2, # No. of epochs to train init_file=None, # Saved parameters to initialise training epoch_size=680780, # Whole dataset size valid_size=34848, train_batch_multiple=10637, # No. of minibatches per batch valid_batch_multiple=1089, # No. of minibatches per batch train_minibatch_size=64, valid_minibatch_size=32, eval_multiple=50, # No. of minibatches to ave. in report save_model=True, input_width=19, rng_seed=100009, cross_val=0, # Cross-validation subset label dataver=1, # Label for different runs/architectures/etc rate_init=1.0, rate_decay=0.999983): ################################################### ################# 0. User inputs ################## ################################################### for i in range(1,len(sys.argv)): if sys.argv[i].startswith('-'): option = sys.argv[i][1:] if option == 'i': init_file = sys.argv[i+1] elif option[0:2] == 'v=' : dataver = int(option[2:]) elif option[0:3] == 'cv=' : cross_val = int(option[3:]) elif option[0:3] == 'rs=' : rng_seed = int(option[3:]) elif option[0:3] == 'ri=' : rate_init = np.float32(option[3:]) elif option[0:3] == 'rd=' : rate_decay = np.float32(option[3:]) print("Running with dataver %s" % (dataver)) print("Running with cross_val %s" % (cross_val)) ################################################### ############# 1. Housekeeping values ############## ################################################### # Batch size is possibly not equal to epoch size due to memory limits train_batch_size = train_batch_multiple*train_minibatch_size assert epoch_size >= train_batch_size # Number of times we expect the training/validation generator to be called max_train_gen_calls = (num_epochs*epoch_size)//train_batch_size # Number of evaluations (total minibatches / eval_multiple) num_eval = max_train_gen_calls*train_batch_multiple / eval_multiple ################################################### ###### 2. Define model and theano variables ####### ################################################### if rng_seed is not None: print("Setting RandomState with seed=%i" % (rng_seed)) rng = np.random.RandomState(rng_seed) set_rng(rng) print("Defining variables...") index = T.lscalar() # Minibatch index x = T.tensor3('x') # Inputs y = T.fvector('y') # Target print("Defining model...") network_0 = build_1Dregression_v1( input_var=x, input_width=input_width, nin_units=12, h_num_units=[64,128,256,128,64], h_grad_clip=1.0, output_width=1 ) if init_file is not None: print("Loading initial model parametrs...") init_model = np.load(init_file) init_params = init_model[init_model.files[0]] LL.set_all_param_values([network_0], init_params) ################################################### ################ 3. Import data ################### ################################################### # Loading data generation model parameters print("Defining shared variables...") train_set_y = theano.shared(np.zeros(1, dtype=theano.config.floatX), borrow=True) train_set_x = theano.shared(np.zeros((1,1,1), dtype=theano.config.floatX), borrow=True) valid_set_y = theano.shared(np.zeros(1, dtype=theano.config.floatX), borrow=True) valid_set_x = theano.shared(np.zeros((1,1,1), dtype=theano.config.floatX), borrow=True) # Validation data (pick a single augmented instance, rand0 here) print("Creating validation data...") chunk_valid_data = np.load( "./valid/data_valid_augmented_cv%s_t%s_rand0.npy" % (cross_val, input_width) ).astype(theano.config.floatX) chunk_valid_answers = np.load( "./valid/data_valid_expected_cv%s.npy" % (cross_val) ).astype(theano.config.floatX) print("chunk_valid_answers.shape", chunk_valid_answers.shape) print("Assigning validation data...") valid_set_y.set_value(chunk_valid_answers[:]) valid_set_x.set_value(chunk_valid_data.transpose(0,2,1)) # Create output directory if not os.path.exists("output_cv%s_v%s" % (cross_val, dataver)): os.makedirs("output_cv%s_v%s" % (cross_val, dataver)) ################################################### ########### 4. Create Loss expressions ############ ################################################### print("Defining loss expressions...") prediction_0 = LL.get_output(network_0) train_loss = aggregate(T.abs_(prediction_0 - y.dimshuffle(0,'x'))) valid_prediction_0 = LL.get_output(network_0, deterministic=True) valid_loss = aggregate(T.abs_(valid_prediction_0 - y.dimshuffle(0,'x'))) ################################################### ############ 5. Define update method ############# ################################################### print("Defining update choices...") params = LL.get_all_params(network_0, trainable=True) learn_rate = T.scalar('learn_rate', dtype=theano.config.floatX) updates = lasagne.updates.adadelta(train_loss, params, learning_rate=learn_rate) ################################################### ######### 6. Define train/valid functions ######### ################################################### print("Defining theano functions...") train_model = theano.function( [index, learn_rate], train_loss, updates=updates, givens={ x: train_set_x[(index*train_minibatch_size): ((index+1)*train_minibatch_size)], y: train_set_y[(index*train_minibatch_size): ((index+1)*train_minibatch_size)] } ) validate_model = theano.function( [index], valid_loss, givens={ x: valid_set_x[index*valid_minibatch_size: (index+1)*valid_minibatch_size], y: valid_set_y[index*valid_minibatch_size: (index+1)*valid_minibatch_size] } ) ################################################### ################ 7. Begin training ################ ################################################### print("Begin training...") sys.stdout.flush() cum_iterations = 0 this_train_loss = 0.0 this_valid_loss = 0.0 best_valid_loss = np.inf best_iter = 0 train_eval_scores = np.empty(num_eval) valid_eval_scores = np.empty(num_eval) eval_index = 0 aug_index = 0 for batch in range(max_train_gen_calls): start_time = time.time() chunk_train_data = np.load( "./train/data_train_augmented_cv%s_t%s_rand%s.npy" % (cross_val, input_width, aug_index) ).astype(theano.config.floatX) chunk_train_answers = np.load( "./train/data_train_expected_cv%s.npy" % (cross_val) ).astype(theano.config.floatX) train_set_y.set_value(chunk_train_answers[:]) train_set_x.set_value(chunk_train_data.transpose(0, 2, 1)) # Iterate over minibatches in each batch for mini_index in range(train_batch_multiple): this_rate = np.float32(rate_init*(rate_decay**cum_iterations)) this_train_loss += train_model(mini_index, this_rate) cum_iterations += 1 # Report loss if (cum_iterations % eval_multiple == 0): this_train_loss = this_train_loss / eval_multiple this_valid_loss = np.mean([validate_model(i) for i in range(valid_batch_multiple)]) train_eval_scores[eval_index] = this_train_loss valid_eval_scores[eval_index] = this_valid_loss # Save report every five evaluations if ((eval_index+1) % 5 == 0): np.savetxt( "output_cv%s_v%s/training_scores.txt" % (cross_val, dataver), train_eval_scores, fmt="%.5f" ) np.savetxt( "output_cv%s_v%s/validation_scores.txt" % (cross_val, dataver), valid_eval_scores, fmt="%.5f" ) np.savetxt( "output_cv%s_v%s/last_learn_rate.txt" % (cross_val, dataver), [np.array(this_rate)], fmt="%.5f" ) # Save model if best validation score if (this_valid_loss < best_valid_loss): best_valid_loss = this_valid_loss best_iter = cum_iterations-1 if save_model: np.savez("output_cv%s_v%s/model.npz" % (cross_val, dataver), LL.get_all_param_values(network_0)) # Reset evaluation reports eval_index += 1 this_train_loss = 0.0 this_valid_loss = 0.0 aug_index += 1 end_time = time.time() print("Computing time for batch %d: %f" % (batch, end_time-start_time)) print("Best validation loss %f after %d epochs" % (best_valid_loss, (best_iter*train_minibatch_size//epoch_size))) del train_set_x, train_set_y, valid_set_x, valid_set_y gc.collect() return None
def build_resnet_model(): log.i('BUILDING RESNET MODEL...') # Random Seed lasagne_random.set_rng(cfg.getRandomState()) # Input layer for images net = l.InputLayer((None, cfg.IM_DIM, cfg.IM_SIZE[1], cfg.IM_SIZE[0])) # First Convolution net = l.Conv2DLayer(net, num_filters=cfg.FILTERS[0], filter_size=cfg.KERNEL_SIZES[0], pad='same', W=initialization(cfg.NONLINEARITY), nonlinearity=None) log.i(("\tFIRST CONV OUT SHAPE:", l.get_output_shape(net), "LAYER:", len(l.get_all_layers(net)) - 1)) # Residual Stacks for i in range(0, len(cfg.FILTERS)): net = resblock(net, filters=cfg.FILTERS[i] * cfg.RESNET_K, kernel_size=cfg.KERNEL_SIZES[i], stride=2, num_groups=cfg.NUM_OF_GROUPS[i]) for _ in range(1, cfg.RESNET_N): net = resblock(net, filters=cfg.FILTERS[i] * cfg.RESNET_K, kernel_size=cfg.KERNEL_SIZES[i], num_groups=cfg.NUM_OF_GROUPS[i], preactivated=False) log.i(("\tRES STACK", i + 1, "OUT SHAPE:", l.get_output_shape(net), "LAYER:", len(l.get_all_layers(net)) - 1)) # Post Activation net = batch_norm(net) net = l.NonlinearityLayer(net, nonlinearity=nonlinearity(cfg.NONLINEARITY)) # Pooling net = l.GlobalPoolLayer(net) log.i(("\tFINAL POOLING SHAPE:", l.get_output_shape(net), "LAYER:", len(l.get_all_layers(net)) - 1)) # Classification Layer net = l.DenseLayer(net, len(cfg.CLASSES), nonlinearity=nonlinearity('identity'), W=initialization('identity')) net = l.NonlinearityLayer(net, nonlinearity=nonlinearity('softmax')) log.i(("\tFINAL NET OUT SHAPE:", l.get_output_shape(net), "LAYER:", len(l.get_all_layers(net)))) log.i("...DONE!") # Model stats log.i(("MODEL HAS", (sum(hasattr(layer, 'W') for layer in l.get_all_layers(net))), "WEIGHTED LAYERS")) log.i(("MODEL HAS", l.count_params(net), "PARAMS")) return net
# make directory for results if not os.path.exists(save_dir): os.makedirs(save_dir) # save ALL parser arguments with open(os.path.join(save_dir, 'exp_settings.txt'), 'w') as f: for key in sorted(args_dict): f.write(key + '\t' + str(args_dict[key]) + '\n') locals().update(args_dict) assert lbda is None assert size == 2000 if seed is None: seed = np.random.randint(2**32 - 1) set_rng(np.random.RandomState(seed)) np.random.seed(seed + 1000) input_dim, train_x, train_y, valid_x, valid_y, test_x, test_y = get_regression_dataset( dataset, data_path=os.environ['HOME'] + '/BayesianHypernetCW/') datasets = [ get_regression_dataset(dataset, data_path=os.environ['HOME'] + '/BayesianHypernetCW/') for _ in range(n_trials) ] # SET HPARAMS FOR SEARCH (override grid search if provided as flag) if grid_search: # TODO: better grid... length_scales = 1000.**np.arange( -3, 1
parser.add_argument('--static_bias', default=1, type=int) parser.add_argument('--alpha', default=2, type=float) parser.add_argument('--beta', default=1, type=float) parser.add_argument('--save_dir', default='./models', type=str) args = parser.parse_args() print args if args.flow == '0': args.flow = None elif args.flow == 'IAF' or args.flow == 'RealNVP': pass else: raise Exception('flow type {} not supported'.format(args.flow)) set_rng(np.random.RandomState(args.seed)) np.random.seed(args.seed + 1000) if args.prior == 'log_normal': pr = 0 if args.prior == 'log_laplace': pr = 1 if args.dropout: dp = 1 else: dp = 0 if args.flow is None: fl = '0' else:
total_nll /= batch_cnt total_fer /= batch_cnt return total_nll, total_fer if __name__ == '__main__': ############### # load config # ############### parser = get_arg_parser() args = parser.parse_args() args.save_path = get_save_path(args) set_rng(numpy.random.RandomState(111)) ################### # get reload path # ################### if not args.reload_model: reload_path = args.save_path + '_last_model.pkl' if os.path.exists(reload_path): print('Previously trained model detected: {}'.format(reload_path)) print('Training continues') args.reload_model = reload_path ############## # print args # ##############
def retrain(trainX, trainY, testX, testY, theta_mat, lambda_mat, learning_rate=5e-4, rate_decay=1.0, init_scale=0.2, scale_decay=0.998, momentum=0.0, minibatch_size=64, num_epochs=70, rng_seed=2017, model_path=None, model_to_save=None): if rng_seed is not None: print("Setting RandomState with seed=%i" % (rng_seed)) rng = np.random.RandomState(rng_seed) set_rng(rng) index = T.lscalar() # Minibatch index x = T.tensor3('x') # Inputs y = T.fmatrix('y') # Target #define and initialize RNN network network_0 = build_rnn_net(input_var=x, input_width=time_step, input_dim=feature_dim, nin_units=12, h_num_units=[16, 16], h_grad_clip=5.0, output_width=time_step) if not os.path.isfile(model_path): print("Model file does not exist!") return None init_model = np.load(model_path) init_params = init_model[init_model.files[0]] LL.set_all_param_values([network_0], init_params) train_set_y = theano.shared(np.zeros((1, time_step), dtype=theano.config.floatX), borrow=True) train_set_x = theano.shared(np.zeros((1, time_step, feature_dim), dtype=theano.config.floatX), borrow=True) valid_set_y = theano.shared(np.zeros((1, time_step), dtype=theano.config.floatX), borrow=True) valid_set_x = theano.shared(np.zeros((1, time_step, feature_dim), dtype=theano.config.floatX), borrow=True) test_set_x = theano.shared(np.zeros((1, time_step, feature_dim), dtype=theano.config.floatX), borrow=True) theta = theano.shared( np.zeros((time_step, time_step), dtype=theano.config.floatX)) lamda = theano.shared( np.zeros((time_step, time_step), dtype=theano.config.floatX)) out_x = LL.BatchNormLayer(network_0) #define updates params = LL.get_all_params(out_x, trainable=True) r = lasagne.regularization.regularize_network_params(out_x, l2) semi_x = LL.get_output(out_x, deterministic=True) #define SGCRF in theano expressions S_yy = T.dot(y.T, y) / minibatch_size S_yx = T.dot(y.T, semi_x) / minibatch_size S_xx = T.dot(semi_x.T, semi_x) / minibatch_size ilamda = T.nlinalg.matrix_inverse(lamda) t1 = T.dot(S_yy, lamda) t2 = 2 * T.dot(S_yx, theta) t3 = T.dot(T.dot(T.dot(ilamda, theta.T), S_xx), theta) det_lamda = T.nlinalg.det(lamda) loss = -T.log(det_lamda) + T.nlinalg.trace(t1 + t2 + t3) eigen_lamda, _ = T.nlinalg.eig(lamda) train_loss = -T.sum(T.log(eigen_lamda)) + T.nlinalg.trace(t1 + t2 + t3) lamda_diag = T.nlinalg.diag(lamda) regularized_loss = loss + 1e-4 * r + 1e-3 * l1(theta) + 1e-3 * l1( lamda - lamda_diag) learn_rate = T.scalar('learn_rate', dtype=theano.config.floatX) momentum = T.scalar('momentum', dtype=theano.config.floatX) scale_rate = T.scalar('scale_rate', dtype=theano.config.floatX) # scale the grads of theta, lamda new_params = [theta, lamda] new_grads = T.grad(regularized_loss, new_params) for i in range(len(new_grads)): new_grads[i] *= scale_rate grads = T.grad(regularized_loss, params) params += new_params grads += new_grads clipped_grads = lasagne.updates.total_norm_constraint(grads, 5.0) updates = lasagne.updates.nesterov_momentum(clipped_grads, params, learning_rate=learn_rate, momentum=momentum) pred_x = LL.get_output(out_x, deterministic=True) valid_predictions = -T.dot(T.dot(ilamda, theta.T), pred_x.T).T valid_loss = T.mean(T.abs_(pred_x - y)) train_model = theano.function( [index, learn_rate, momentum, scale_rate], train_loss, updates=updates, givens={ x: train_set_x[(index * minibatch_size):((index + 1) * minibatch_size)], y: train_set_y[(index * minibatch_size):((index + 1) * minibatch_size)] }) validate_model = theano.function( [index], valid_loss, givens={ x: valid_set_x[index * minibatch_size:(index + 1) * minibatch_size], y: valid_set_y[index * minibatch_size:(index + 1) * minibatch_size] }) test_model = theano.function( [index], valid_predictions, givens={ x: test_set_x[(index * minibatch_size):((index + 1) * minibatch_size)], }) this_train_loss = 0.0 this_valid_loss = 0.0 best_valid_loss = np.inf best_train_loss = np.inf best_test_loss = np.inf eval_starts = 0 near_convergence = 1500 # to be set eval_multiple = 10 eval_num = 1000 train_eval_scores = np.ones(eval_num) valid_eval_scores = np.ones(eval_num) test_eval_scores = np.ones(eval_num) cum_iterations = 0 eval_index = 0 theta.set_value(theta_mat.astype(np.float32)) lamda.set_value(lambda_mat.astype(np.float32)) batch_num = trainX.shape[0] // minibatch_size near_convergence = batch_num * (num_epochs - 10) for i in range(num_epochs): x_train, y_train, x_cv, y_cv = shuffle_data(trainX, trainY, testX, testY) train_batch_num = x_train.shape[ 0] // minibatch_size #discard last small batch valid_batch_num = x_cv.shape[0] // minibatch_size + 1 start_time = time.time() train_set_y.set_value(y_train[:]) train_set_x.set_value(x_train) valid_set_y.set_value(y_cv[:]) valid_set_x.set_value(x_cv) test_set_x.set_value(x_cv) # if(num_epochs % 10 == 0): # learning_rate *= 0.7 # Iterate over minibatches in each batch for mini_index in xrange(train_batch_num): this_rate = np.float32(learning_rate * (rate_decay**cum_iterations)) this_scale_rate = np.float32(init_scale * (scale_decay**cum_iterations)) # adaptive momentum this_momentum = 0.99 if cum_iterations > near_convergence: this_momentum = 0.90 this_train_loss += train_model(mini_index, this_rate, this_momentum, this_scale_rate) cum_iterations += 1 if np.isnan(this_train_loss): print "Training Error!!!!!!!!!" return # begin evaluation and report loss if (cum_iterations % eval_multiple == 0 and cum_iterations > eval_starts): this_train_loss = this_train_loss / eval_multiple this_valid_loss = np.mean( [validate_model(k) for k in xrange(valid_batch_num)]) predictions = np.concatenate( [test_model(k) for k in xrange(valid_batch_num)]) this_test_loss = np.mean(np.abs(predictions - y_cv)) train_eval_scores[eval_index] = this_train_loss valid_eval_scores[eval_index] = this_valid_loss test_eval_scores[eval_index] = this_test_loss # Save model if best validation score if (this_valid_loss < best_valid_loss): best_valid_loss = this_valid_loss if (this_test_loss < best_test_loss): best_test_loss = this_test_loss #np.savez(model_to_save, LL.get_all_param_values(network_0)) print("Training Loss:", this_train_loss) print("Validation Loss:", this_valid_loss) print("Test Loss:", this_test_loss) print("Current scale rate:", this_scale_rate) eval_index += 1 this_train_loss = 0.0 this_valid_loss = 0.0 end_time = time.time() print("Computing time for epoch %d: %f" % (i, end_time - start_time)) cur_train_loss = np.min(train_eval_scores) cur_valid_loss = np.min(valid_eval_scores) cur_test_loss = np.min(test_eval_scores) print( "The best training loss in epoch!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! %f" % cur_train_loss) print( "The best validation loss in epoch!!!!!!!!!!!!!!!!!!!!!!!!!!!!! : %f" % cur_valid_loss) print("The best test loss in epoch!!!!!!!!!!!!!!!!!!!!!!!!!!!!! : %f" % cur_test_loss) print("Best loss in training: %f" % best_train_loss) print("Best loss in cross-validation: %f" % best_valid_loss) print("Best loss in testing: %f" % best_test_loss) del train_set_x, train_set_y, valid_set_x, valid_set_y, trainX, trainY gc.collect()
from lasagne import objectives from lasagne import updates import lasagne import theano import theano.tensor as T import warnings warnings.filterwarnings("ignore") ######################## CONFIG ######################### #Fixed random seed RANDOM_SEED = 1337 RANDOM = np.random.RandomState(RANDOM_SEED) lasagne_random.set_rng(RANDOM) #Training settings EPOCHS = 20 LR_START = 0.0005 LR_END = 0.000001 ################### DATASET HANDLING #################### DATASET_PATH = 'GSTB_Dataset' def parseDataset(): #Subfolders are used as class labels classes = [folder for folder in sorted(os.listdir(DATASET_PATH))]