def test_norm_constraint_dim6_raises(): import numpy as np import theano from lasagne.updates import norm_constraint max_norm = 0.01 param = theano.shared(np.random.randn(1, 2, 3, 4, 5, 6).astype(theano.config.floatX)) with pytest.raises(ValueError) as excinfo: norm_constraint(param, max_norm) assert "Unsupported tensor dimensionality" in str(excinfo.value)
def test_norm_constraint_dim6_raises(): import numpy as np import theano from lasagne.updates import norm_constraint max_norm = 0.01 param = theano.shared( np.random.randn(1, 2, 3, 4, 5, 6).astype(theano.config.floatX)) with pytest.raises(ValueError) as excinfo: norm_constraint(param, max_norm) assert "Unsupported tensor dimensionality" in str(excinfo.value)
def clip_grads(grads, clip, clip_type): if clip > 0.1: if clip_type == "norm": grads = [ norm_constraint(p, clip) if p.ndim > 1 else T.clip( p, -clip, clip) for p in grads ] elif clip_type == "global": norm = T.sqrt(T.sum([T.sum(T.sqr(g)) for g in grads]) * 2) + 1e-7 scale = clip * T.min([1 / norm, 1. / clip]).astype("float32") grads = [g * scale for g in grads] return grads
def optimiser(self, num_samples, approximate_by_css, css_num_samples, grad_norm_constraint, update, update_kwargs, saved_update=None, iwae=False): """Optimiser for the AUTR model. Calculates the gradients and the updates in order to optimise the objective function based on SGVB. :param num_samples: (int) the number of samples in the SGVB MC part :param grad_norm_constraint: (instance) includes any gradient constraints (such as clipping) :param update: (updater) optimiser update function :param update_kwargs: (dictionary) kwargs for the update function :param saved_update: (bool) if we want to use previously saved updates :param iwae: (bool) if we are to use IWAE instead of SGVB :return optimiser, updates: returns the optimiser function and the corresponding updates""" # input tensors x = T.imatrix('x') # N * max(L) y = T.imatrix('y') # N * max(L) # KL annealing tensor beta = T.scalar('beta') # optimiser tensors elbo, kl, log_p_x, log_p_y = self.symbolic_elbo(x, y, num_samples, beta, approximate_by_css=approximate_by_css, css_num_samples=css_num_samples, iwae=iwae) # all the parameters of the recognition + generative model params = self.recognition_model.get_params() + self.generative_model_x.get_params() + self.generative_model_y.get_params() # gradients with respect to elbo. Since theano does gradient descent, we minimize the # negative objective function instead of optimize the objective function. grads = T.grad(-elbo, params) # if we have gradient constraints apply them if grad_norm_constraint is not None: grads = [norm_constraint(g, grad_norm_constraint) if g.ndim > 1 else g for g in grads] # add the calculated gradients and parameters to the kwargs of the updater update_kwargs['loss_or_grads'] = grads update_kwargs['params'] = params # get updates from parameters and gradients updates = update(**update_kwargs) # if we have previously saved updates apply them if saved_update is not None: for u, v in zip(updates, saved_update.keys()): u.set_value(v.get_value()) # compile the theano function that calculated the elbo while # also running optimisation on the parameters of the model. optimiser = theano.function(inputs=[x, y, beta], outputs=[elbo, kl, log_p_x, log_p_y], updates=updates, allow_input_downcast=True) return optimiser, updates
def __init__(self, incoming, num_units, Wfc=init.Normal(), nonlinearity=rectify, mnc=False, b=init.Constant(0.), **kwargs): super(DenseLayer, self).__init__(incoming) self.num_units = num_units self.nonlinearity = nonlinearity self.num_inputs = int(np.prod(self.input_shape[1:])) # what is srng? self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.W = self.add_param(Wfc, (self.num_inputs, self.num_units), name="W") # max norm constraint if mnc: self.W = updates.norm_constraint(self.W, mnc) self.b = self.add_param(b, (num_units,), name="b", regularizable=False)
def optimiser(self, num_samples, grad_norm_constraint, update, update_kwargs, saved_update=None): """ :param num_samples: scalar :param grad_norm_constraint: ... :param update: ... :param update_kwargs: ... :param saved_update: ... :return: """ x = T.imatrix('x') # N x max(L) x_m = T.imatrix('x_m') # N x max(L) beta = T.scalar('beta') # scalar drop_mask = T.matrix('drop_mask') # N x max(L) if self.teacher_forcing: elbo, kl, pp = self.symbolic_elbo(x, x_m, num_samples, beta, drop_mask) else: elbo, kl, pp, scan_updates = self.symbolic_elbo(x, x_m, num_samples, beta, drop_mask) params = self.generative_model.get_params() + self.recognition_model.get_params() + [self.all_embeddings] grads = T.grad(-elbo, params, disconnected_inputs='ignore') if grad_norm_constraint is not None: grads = [norm_constraint(g, grad_norm_constraint) if g.ndim > 1 else g for g in grads] update_kwargs['loss_or_grads'] = grads update_kwargs['params'] = params updates = update(**update_kwargs) if not self.teacher_forcing: for var in scan_updates: updates[var] = scan_updates[var] if saved_update is not None: for u, v in zip(updates, saved_update.keys()): u.set_value(v.get_value()) optimiser = theano.function(inputs=[x, x_m, beta, drop_mask], outputs=[elbo, kl, pp], updates=updates, allow_input_downcast=True, on_unused_input='ignore', ) return optimiser, updates
def optimiser(self, num_samples, approximate_by_css, css_num_samples, grad_norm_constraint, update, update_kwargs, saved_update=None): x = T.imatrix('x') # N * max(L) beta = T.scalar('beta') elbo, kl = self.symbolic_elbo(x, num_samples, beta, approximate_by_css=approximate_by_css, css_num_samples=css_num_samples) params = self.generative_model.get_params( ) + self.recognition_model.get_params() + [self.all_embeddings] grads = T.grad(-elbo, params) if grad_norm_constraint is not None: grads = [ norm_constraint(g, grad_norm_constraint) if g.ndim > 1 else g for g in grads ] update_kwargs['loss_or_grads'] = grads update_kwargs['params'] = params updates = update(**update_kwargs) if saved_update is not None: for u, v in zip(updates, saved_update.keys()): u.set_value(v.get_value()) optimiser = theano.function( inputs=[x, beta], outputs=[elbo, kl], updates=updates, allow_input_downcast=True, on_unused_input='ignore', ) return optimiser, updates
def test_norm_constraint(ndim): import numpy as np import theano from lasagne.updates import norm_constraint from lasagne.utils import compute_norms max_norm = 0.01 param = theano.shared(np.random.randn(*((25,) * ndim)).astype(theano.config.floatX)) update = norm_constraint(param, max_norm) apply_update = theano.function([], [], updates=[(param, update)]) apply_update() assert param.dtype == update.dtype assert np.max(compute_norms(param.get_value())) <= max_norm * (1 + PCT_TOLERANCE)
def test_norm_constraint(ndim): import numpy as np import theano from lasagne.updates import norm_constraint from lasagne.utils import compute_norms max_norm = 0.01 param = theano.shared( np.random.randn(*((25, ) * ndim)).astype(theano.config.floatX)) update = norm_constraint(param, max_norm) apply_update = theano.function([], [], updates=[(param, update)]) apply_update() assert param.dtype == update.dtype assert (np.max(compute_norms(param.get_value())) <= max_norm * (1 + PCT_TOLERANCE))
def test_norm_constraint_norm_axes(): import numpy as np import theano from lasagne.updates import norm_constraint from lasagne.utils import compute_norms max_norm = 0.01 norm_axes = (0, 2) param = theano.shared( np.random.randn(10, 20, 30, 40).astype(theano.config.floatX) ) update = norm_constraint(param, max_norm, norm_axes=norm_axes) apply_update = theano.function([], [], updates=[(param, update)]) apply_update() assert param.dtype == update.dtype assert (np.max(compute_norms(param.get_value(), norm_axes=norm_axes)) <= max_norm*(1 + PCT_TOLERANCE))
def __init__(self, incoming, num_units, Wfc=init.Normal(), nonlinearity=rectify, mnc=False, g=init.Constant(1.), b=init.Constant(0.), **kwargs): super(WeightNormLayer, self).__init__(incoming) self.num_units = num_units self.nonlinearity = nonlinearity self.num_inputs = int(np.prod(self.input_shape[1:])) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.W_norm = self.add_param(Wfc, (self.num_inputs, self.num_units), name="W_norm") self.g = self.add_param(g, (self.num_units, ), name="g") self.b = self.add_param(b, (self.num_units, ), name="b", regularizable=False) W_axes_to_sum = 0 W_dimshuffle_args = ['x', 0] self.W = self.W_norm * ( self.g / T.sqrt(T.sum(T.square(self.W_norm), axis=W_axes_to_sum)) ) # max norm constraint if mnc: self.W = updates.norm_constraint(self.W, mnc)
def iter_update(epoch): losses = [] #self.learning_rate.set_value(self.learning_rate.get_value() * np.array(0.99, dtype=theano.config.floatX)) for i in xrange(nb_batches): losses.append(self._iter_update_batch(i)) # max norm if self.max_norm is not None: for max_norm_layer, layer in zip(self.max_norm, self._layers): layer.W = updates.norm_constraint( layer.W, self.max_norm) losses = np.array(losses) d = OrderedDict() d["epoch"] = epoch #d["loss_train_std"] = losses.std() #d["loss_train"] = losses.mean() d["loss_train"] = self._get_loss( self.X_train, self.y_train_transformed, 1.) d["accuracy_train"] = ( self.predict(self.X_train) == self.y_train).mean() if X_valid is not None and y_valid is not None: d["loss_valid"] = self._get_loss( X_valid, y_valid_transformed, 1.) if self.is_classification == True: d["accuracy_valid"] = ( self.predict(X_valid) == y_valid).mean() if self.verbose > 0: if (epoch % self.report_each) == 0: print(tabulate([d], headers="keys")) self._stats.append(d) return d
def main(): configure_theano() options = parse_options() config_file = 'config/leave_one_out.ini' print('loading config file: {}'.format(config_file)) config = ConfigParser.ConfigParser() config.read(config_file) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(config.get('data', 'dct')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') ae_finetuned_diff = config.get('models', 'finetuned_diff') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) do_finetune = config.getboolean('training', 'do_finetune') save_finetune = config.getboolean('training', 'save_finetune') load_finetune = config.getboolean('training', 'load_finetune') load_finetune_diff = config.getboolean('training', 'load_finetune_diff') # 53 subjects, 70 utterances, 5 view angles # s[x]_v[y]_u[z].mp4 # resized, height, width = (26, 44) # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec', # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__'] print(data.keys()) X = data['dataMatrix'].astype('float32') y = data['targetsVec'].astype('int32') y = y.reshape((len(y), )) dct_feats = dct_data['dctFeatures'].astype('float32') uniques = np.unique(y) print('number of classifications: {}'.format(len(uniques))) subjects = data['subjectsVec'].astype('int') subjects = subjects.reshape((len(subjects), )) video_lens = data['videoLengthVec'].astype('int') video_lens = video_lens.reshape((len(video_lens, ))) # X = reorder_data(X, (26, 44), 'f', 'c') # print('performing sequencewise mean image removal...') # X = sequencewise_mean_image_subtraction(X, video_lens) # visualize_images(X[550:650], (26, 44)) X_diff = compute_diff_images(X, video_lens) # mean remove dct features dct_feats = sequencewise_mean_image_subtraction(dct_feats, video_lens) test_subject_ids = [options['test_subj']] train_subject_ids = range(1, 54) for subj in test_subject_ids: train_subject_ids.remove(subj) if 'results' in options: results_file = options['results'] f = open(results_file, mode='a') print(train_subject_ids) print(test_subject_ids) train_X, train_y, train_dct, train_X_diff, train_vidlens, train_subjects, \ test_X, test_y, test_dct, test_X_diff, test_vidlens, test_subjects = \ split_data(X, y, dct_feats, X_diff, subjects, video_lens, train_subject_ids, test_subject_ids) assert train_X.shape[0] + test_X.shape[0] == len(X) assert train_y.shape[0] + test_y.shape[0] == len(y) assert train_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens) assert train_subjects.shape[0] + test_subjects.shape[0] == len(subjects) train_X = normalize_input(train_X, centralize=True) test_X = normalize_input(test_X, centralize=True) # featurewise normalize dct features train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) test_dct = (test_dct - dct_mean) / dct_std if do_finetune: print('performing finetuning on pretrained encoder: {}'.format( ae_pretrained)) ae = load_dbn(ae_pretrained) ae.initialize() ae.fit(train_X, train_X) if save_finetune: print('saving finetuned encoder: {}...'.format(ae_finetuned)) pickle.dump(ae, open(ae_finetuned, 'wb')) if load_finetune: print('loading finetuned encoder: {}...'.format(ae_finetuned)) ae = pickle.load(open(ae_finetuned, 'rb')) ae.initialize() if load_finetune_diff: print('loading finetuned encoder: {}...'.format(ae_finetuned_diff)) ae_diff = pickle.load(open(ae_finetuned_diff, 'rb')) ae_diff.initialize() # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') dct = T.tensor3('dct', dtype='float32') inputs = T.tensor3('inputs', dtype='float32') inputs_diff = T.tensor3('inputs_diff', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing end to end model...') ''' network = create_end_to_end_model(dbn, (None, None, 1144), inputs, (None, None), mask, 250, window) ''' network = adenet_v5.create_model(ae, ae_diff, (None, None, 1144), inputs, (None, None), mask, (None, None, 90), dct, (None, None, 1144), inputs_diff, 250, window, 10) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy( predictions, targets)) updates = adadelta(cost, all_params, learning_rate=lr) # updates = adagrad(cost, all_params, learning_rate=lr) use_max_constraint = False if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases updates[param] = norm_constraint( param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) train = theano.function([inputs, targets, mask, dct, inputs_diff, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function( [inputs, targets, mask, dct, inputs_diff, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean( las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs, targets, mask, dct, inputs_diff, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, dct, inputs_diff, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 10 EPOCH_SIZE = 120 BATCH_SIZE = 10 WINDOW_SIZE = 9 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) integral_lens = compute_integral_len(train_vidlens) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(test_vidlens) dct_val = gen_seq_batch_from_idx(test_dct, idxs_val, test_vidlens, integral_lens_val, np.max(test_vidlens)) X_diff_val = gen_seq_batch_from_idx(test_X_diff, idxs_val, test_vidlens, integral_lens_val, np.max(test_vidlens)) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m, d, X_diff, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, d, X_diff, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE, val_fn) class_rate.append(cr) print( "Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch >= decay_start - 1: lr.set_value(lr.get_value() * lr_decay) phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10'] print('Final Model') print('classification rate: {}, validation loss: {}'.format( best_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(best_conf, phrases, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate, savefilename='valid_cost') if 'results' in options: print('writing to results file: {}...'.format(options['results'])) f.write('{}, {}, {}\n'.format(test_subject_ids[0], best_cr, best_val)) f.close()
def main(): configure_theano() config_file = 'config/normal.ini' config = ConfigParser.ConfigParser() config.read(config_file) print('loading config file: {}'.format(config_file)) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) do_finetune = config.getboolean('training', 'do_finetune') save_finetune = config.getboolean('training', 'save_finetune') load_finetune = config.getboolean('training', 'load_finetune') # create the necessary variable mappings data_matrix = data['dataMatrix'].astype('float32') data_matrix_len = data_matrix.shape[0] targets_vec = data['targetsVec'] vid_len_vec = data['videoLengthVec'] iter_vec = data['iterVec'] indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec) train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec) assert len(train_vidlen_vec) == 520 assert len(test_vidlen_vec) == 260 assert np.sum(vid_len_vec) == data_matrix_len # split the data train_data = data_matrix[indexes == True] train_targets = targets_vec[indexes == True] train_targets = train_targets.reshape((len(train_targets),)) test_data = data_matrix[indexes == False] test_targets = targets_vec[indexes == False] test_targets = test_targets.reshape((len(test_targets),)) # indexes for a particular letter # idx = [i for i, elem in enumerate(test_targets) if elem == 20] # resize the input data to 40 x 30 # train_data_resized = resize_images(train_data).astype(np.float32) # normalize the inputs [0 - 1] # train_data_resized = normalize_input(train_data_resized, centralize=True) # test_data_resized = resize_images(test_data).astype(np.float32) # test_data_resized = normalize_input(test_data_resized, centralize=True) if do_finetune: print('fine-tuning...') dbn = load_dbn(ae_pretrained) dbn.initialize() dbn.fit(train_data, train_data) res = dbn.predict(test_data) # print(res.shape) visualize_reconstruction(test_data[300:336], res[300:336]) if save_finetune: pickle.dump(dbn, open(ae_finetuned, 'wb')) if load_finetune: print('loading pre-trained encoding layers...') dbn = pickle.load(open(ae_finetuned, 'rb')) dbn.initialize() # res = dbn.predict(test_data) # visualize_reconstruction(test_data[300:336], res[300:336]) # exit() load_convae = False if load_convae: print('loading pre-trained convolutional autoencoder...') encoder = load_model('models/conv_encoder_norm.dat') inputs = las.layers.get_all_layers(encoder)[0].input_var else: inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing end to end model...') network = baseline_end2end.create_model(dbn, (None, None, 1200), inputs, (None, None), mask, 250) print_network(network) # draw_to_file(las.layers.get_all_layers(network), 'network.png', verbose=True) # exit() print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets)) updates = las.updates.adadelta(cost, all_params, learning_rate=lr) # updates = las.updates.adam(cost, all_params, learning_rate=lr) use_max_constraint = False if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) train = theano.function( [inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 30 EPOCH_SIZE = 20 BATCH_SIZE = 26 WINDOW_SIZE = 9 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_data, train_targets, train_vidlen_vec, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(test_data, test_targets, test_vidlen_vec, batchsize=len(test_vidlen_vec)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m) print('\r', end='') cost = compute_train_cost(X, y, m) val_cost = compute_test_cost(X_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn) class_rate.append(cr) print("Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch > decay_start: # 20, 8 lr.set_value(lr.get_value() * lr_decay) letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] print('Best Model') print('classification rate: {}, validation loss: {}'.format(best_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(best_conf, letters, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate, 'e2e_valid_cost')
def main(): configure_theano() config_file = 'config/separate_train.ini' print('loading config file: {}'.format(config_file)) config = ConfigParser.ConfigParser() config.read(config_file) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) lstm_units = int(config.get('training', 'lstm_units')) output_units = int(config.get('training', 'output_units')) do_finetune = config.getboolean('training', 'do_finetune') save_finetune = config.getboolean('training', 'save_finetune') load_finetune = config.getboolean('training', 'load_finetune') # 53 subjects, 70 utterances, 5 view angles # s[x]_v[y]_u[z].mp4 # resized, height, width = (26, 44) # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec', # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__'] print(data.keys()) X = data['dataMatrix'].astype( 'float32') # .reshape((-1, 26, 44), order='f').reshape((-1, 26 * 44)) y = data['targetsVec'].astype('int32') y = y.reshape((len(y), )) uniques = np.unique(y) print('number of classifications: {}'.format(len(uniques))) subjects = data['subjectsVec'].astype('int') subjects = subjects.reshape((len(subjects), )) video_lens = data['videoLengthVec'].astype('int') video_lens = video_lens.reshape((len(video_lens, ))) train_subject_ids = read_data_split_file('data/train.txt') val_subject_ids = read_data_split_file('data/val.txt') test_subject_ids = read_data_split_file('data/test.txt') print('Train: {}'.format(train_subject_ids)) print('Validation: {}'.format(val_subject_ids)) print('Test: {}'.format(test_subject_ids)) train_X, train_y, train_vidlens, train_subjects, \ val_X, val_y, val_vidlens, val_subjects, \ test_X, test_y, test_vidlens, test_subjects = \ split_data(X, y, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids) assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X) assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y) assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[ 0] == len(video_lens) assert train_subjects.shape[0] + val_subjects.shape[ 0] + test_subjects.shape[0] == len(subjects) train_X = normalize_input(train_X, centralize=True) test_X = normalize_input(test_X, centralize=True) if do_finetune: dbn = load_dbn(ae_pretrained) dbn.initialize() dbn.fit(train_X, train_X) recon = dbn.predict(test_X) visualize_reconstruction(reorder_data(test_X[800:864], (26, 44)), reorder_data(recon[800:864], (26, 44)), shape=(26, 44)) if save_finetune: pickle.dump(dbn, open(ae_finetuned, 'wb')) if load_finetune: print('loading pre-trained encoding layers...') dbn = pickle.load(open(ae_finetuned, 'rb')) dbn.initialize() # recon = dbn.predict(test_X) # visualize_reconstruction(reorder_data(test_X[800:864], (26, 44)), # reorder_data(recon[800:864], (26, 44)), # shape=(26, 44)) encoder = extract_encoder(dbn) train_X = encoder.predict(train_X) val_X = encoder.predict(val_X) test_X = encoder.predict(test_X) # train_X = concat_first_second_deltas(train_X, train_vidlens) # val_X = concat_first_second_deltas(val_X, val_vidlens) # test_X = concat_first_second_deltas(test_X, test_vidlens) # featurewise normalize train_X, mean, std = featurewise_normalize_sequence(train_X) val_X = (val_X - mean) / std test_X = (test_X - mean) / std # recon = dbn.predict(test_X) # visualize_reconstruction(test_X[550:650], recon[550:650], (26, 44)) # exit() # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing lstm classifier...') network = lstm_classifier_baseline.create_model( (None, None, 50), inputs, (None, None), mask, lstm_units, output_units) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy( predictions, targets)) updates = adadelta(cost, all_params, learning_rate=lr) # updates = las.updates.apply_momentum(sgd(cost, all_params, learning_rate=lr), all_params, 0.1) use_max_constraint = False if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases updates[param] = norm_constraint( param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) train = theano.function([inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean( las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function([inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 30 EPOCH_SIZE = 120 BATCH_SIZE = 10 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 10 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, _ = next(val_datagen) X_test, y_test, mask_test, _ = next(test_datagen) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, _ = next(datagen) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m) print('\r', end='') cost = compute_train_cost(X, y, m) val_cost = compute_test_cost(X_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr test_cr, test_conf = evaluate_model(X_test, y_test, mask_test, val_fn) print( "Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)". format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch > decay_start: lr.set_value(lr.get_value() * lr_decay) phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) print('confusion matrix: ') plot_confusion_matrix(test_conf, phrases, fmt='grid') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(config.get('data', 'dct')) diff_data = load_mat_file(config.get('data', 'diff')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') ae_finetuned_diff = config.get('models', 'finetuned_diff') fusiontype = config.get('models', 'fusiontype') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) do_finetune = config.getboolean('training', 'do_finetune') save_finetune = config.getboolean('training', 'save_finetune') load_finetune = config.getboolean('training', 'load_finetune') load_finetune_diff = config.getboolean('training', 'load_finetune_diff') # create the necessary variable mappings data_matrix = data['dataMatrix'] data_matrix_len = data_matrix.shape[0] targets_vec = data['targetsVec'] vid_len_vec = data['videoLengthVec'] iter_vec = data['iterVec'] dct_feats = dct_data['dctFeatures'] diff_data_matrix = diff_data['dataMatrix'] # samplewise normalize # print('sameplewise mean normalize...') # data_matrix = normalize_input(data_matrix) # diff_data_matrix = normalize_input(diff_data_matrix) # diff_data_matrix = compute_diff_images(data_matrix, vid_len_vec.reshape((-1,))).astype('float32') # mean remove # dct_feats = dct_feats[:, 0:30] # dct_feats = sequencewise_mean_image_subtraction(dct_feats, vid_len_vec.reshape((-1,))) indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec) train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec) assert len(train_vidlen_vec) == 520 assert len(test_vidlen_vec) == 260 assert np.sum(vid_len_vec) == data_matrix_len # split the data train_data = data_matrix[indexes == True] train_targets = targets_vec[indexes == True] train_targets = train_targets.reshape((len(train_targets), )) test_data = data_matrix[indexes == False] test_targets = targets_vec[indexes == False] test_targets = test_targets.reshape((len(test_targets), )) train_diff_data = diff_data_matrix[indexes == True] test_diff_data = diff_data_matrix[indexes == False] # split the dct features + featurewise mean normalize train_dct = dct_feats[indexes == True].astype(np.float32) test_dct = dct_feats[indexes == False].astype(np.float32) train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) test_dct = (test_dct - dct_mean) / dct_std if do_finetune: print('fine-tuning...') ae = load_dbn(ae_pretrained) ae.initialize() ae.fit(train_data, train_data) res = ae.predict(test_data) # print(res.shape) visualize_reconstruction(test_data[300:336], res[300:336]) if save_finetune: pickle.dump(ae, open(ae_finetuned, 'wb')) if load_finetune: print('loading pre-trained encoding layers...') ae = pickle.load(open(ae_finetuned, 'rb')) ae.initialize() if load_finetune_diff: print('loading pre-trained diff image encoding layers...') diff_ae = pickle.load(open(ae_finetuned_diff, 'rb')) diff_ae.initialize() load_convae = False if load_convae: print('loading pre-trained convolutional autoencoder...') encoder = load_model('models/conv_encoder_norm.dat') inputs_raw = las.layers.get_all_layers(encoder)[0].input_var else: inputs_raw = T.tensor3('inputs_raw', dtype='float32') inputs_diff = T.tensor3('inputs_diff', dtype='float32') window = T.iscalar('theta') dct = T.tensor3('dct', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing end to end model...') ''' network = adenet_v1.create_model(dbn, (None, None, 1200), inputs, (None, None), mask, (None, None, 90), dct, 250, window) network = deltanet.create_model(dbn, (None, None, 1200), inputs, (None, None), mask, 250, window) network = adenet_v2.create_model(dbn, (None, None, 1200), inputs, (None, None), mask, (None, None, 90), dct, 250, window) network = adenet_v2.create_model(ae, (None, None, 1200), inputs_raw, (None, None), mask, (None, None, 90), dct, 250, window) ''' network, l_fuse = adenet_v3.create_model(ae, diff_ae, (None, None, 1200), inputs_raw, (None, None), mask, (None, None, 90), dct, (None, None, 1200), inputs_diff, 250, window, 26, fusiontype) print_network(network) draw_to_file(las.layers.get_all_layers(network), 'adenet_v3.png') print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy( predictions, targets)) updates = las.updates.adadelta(cost, all_params, learning_rate=lr) # updates = las.updates.adam(cost, all_params, learning_rate=lr) use_max_constraint = False if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases updates[param] = norm_constraint( param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) train = theano.function( [inputs_raw, targets, mask, dct, inputs_diff, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function( [inputs_raw, targets, mask, dct, inputs_diff, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean( las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs_raw, targets, mask, dct, inputs_diff, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs_raw, mask, dct, inputs_diff, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 25 EPOCH_SIZE = 20 BATCH_SIZE = 26 WINDOW_SIZE = 9 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_data, train_targets, train_vidlen_vec, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(test_data, test_targets, test_vidlen_vec, batchsize=len(test_vidlen_vec)) integral_lens = compute_integral_len(train_vidlen_vec) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(test_vidlen_vec) dct_val = gen_seq_batch_from_idx(test_dct, idxs_val, test_vidlen_vec, integral_lens_val, np.max(test_vidlen_vec)) diff_val = gen_seq_batch_from_idx(test_diff_data, idxs_val, test_vidlen_vec, integral_lens_val, np.max(test_vidlen_vec)) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlen_vec, integral_lens, np.max(train_vidlen_vec)) diff = gen_seq_batch_from_idx(train_diff_data, batch_idxs, train_vidlen_vec, integral_lens, np.max(train_vidlen_vec)) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m, d, diff, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, d, diff, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val, diff_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, dct_val, diff_val, WINDOW_SIZE, val_fn) class_rate.append(cr) print( "Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr if fusiontype == 'adasum': adascale_param = las.layers.get_all_param_values( l_fuse, scaling_param=True) if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch >= decay_start - 1: lr.set_value(lr.get_value() * lr_decay) letters = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ] print('Best Model') print('classification rate: {}, validation loss: {}'.format( best_cr, best_val)) if fusiontype == 'adasum': print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') plot_confusion_matrix(best_conf, letters, fmt='latex') plot_validation_cost(cost_train, cost_val, class_rate, 'e2e_valid_cost') if options['write_results']: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{}\n'.format(fusiontype, best_cr, best_val))
def main(): configure_theano() config_file = 'config/trimodal.ini' print('loading config file: {}'.format(config_file)) config = ConfigParser.ConfigParser() config.read(config_file) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(config.get('data', 'dct')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') ae_finetuned_diff = config.get('models', 'finetuned_diff') use_adascale = config.getboolean('models', 'use_adascale') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) do_finetune = config.getboolean('training', 'do_finetune') save_finetune = config.getboolean('training', 'save_finetune') load_finetune = config.getboolean('training', 'load_finetune') load_finetune_diff = config.getboolean('training', 'load_finetune_diff') # 53 subjects, 70 utterances, 5 view angles # s[x]_v[y]_u[z].mp4 # resized, height, width = (26, 44) # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec', # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__'] print(data.keys()) X = data['dataMatrix'].astype('float32') y = data['targetsVec'].astype('int32') y = y.reshape((len(y),)) dct_feats = dct_data['dctFeatures'].astype('float32') uniques = np.unique(y) print('number of classifications: {}'.format(len(uniques))) subjects = data['subjectsVec'].astype('int') subjects = subjects.reshape((len(subjects),)) video_lens = data['videoLengthVec'].astype('int') video_lens = video_lens.reshape((len(video_lens,))) # X = reorder_data(X, (26, 44), 'f', 'c') # print('performing sequencewise mean image removal...') # X = sequencewise_mean_image_subtraction(X, video_lens) # visualize_images(X[550:650], (26, 44)) X_diff = compute_diff_images(X, video_lens) # mean remove dct features dct_feats = sequencewise_mean_image_subtraction(dct_feats, video_lens) train_subject_ids = read_data_split_file('data/train_val.txt') test_subject_ids = read_data_split_file('data/test.txt') print(train_subject_ids) print(test_subject_ids) train_X, train_y, train_dct, train_X_diff, train_vidlens, train_subjects, \ test_X, test_y, test_dct, test_X_diff, test_vidlens, test_subjects = \ split_data(X, y, dct_feats, X_diff, subjects, video_lens, train_subject_ids, test_subject_ids) assert train_X.shape[0] + test_X.shape[0] == len(X) assert train_y.shape[0] + test_y.shape[0] == len(y) assert train_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens) assert train_subjects.shape[0] + test_subjects.shape[0] == len(subjects) train_X = normalize_input(train_X, centralize=True) test_X = normalize_input(test_X, centralize=True) # featurewise normalize dct features train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) test_dct = (test_dct - dct_mean) / dct_std if do_finetune: print('performing finetuning on pretrained encoder: {}'.format(ae_pretrained)) ae = load_dbn(ae_pretrained) ae.initialize() ae.fit(train_X, train_X) if save_finetune: print('saving finetuned encoder: {}...'.format(ae_finetuned)) pickle.dump(ae, open(ae_finetuned, 'wb')) if load_finetune: print('loading finetuned encoder: {}...'.format(ae_finetuned)) ae = pickle.load(open(ae_finetuned, 'rb')) ae.initialize() if load_finetune_diff: print('loading finetuned encoder: {}...'.format(ae_finetuned_diff)) ae_diff = pickle.load(open(ae_finetuned_diff, 'rb')) ae_diff.initialize() # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') dct = T.tensor3('dct', dtype='float32') inputs = T.tensor3('inputs', dtype='float32') inputs_diff = T.tensor3('inputs_diff', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing end to end model...') ''' network = create_end_to_end_model(dbn, (None, None, 1144), inputs, (None, None), mask, 250, window) ''' network, adascale = adenet_v5.create_model(ae, ae_diff, (None, None, 1144), inputs, (None, None), mask, (None, None, 90), dct, (None, None, 1144), inputs_diff, 250, window, 10, use_adascale) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets)) updates = adadelta(cost, all_params, learning_rate=lr) # updates = adagrad(cost, all_params, learning_rate=lr) use_max_constraint = False if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) train = theano.function( [inputs, targets, mask, dct, inputs_diff, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask, dct, inputs_diff, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs, targets, mask, dct, inputs_diff, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, dct, inputs_diff, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 30 EPOCH_SIZE = 120 BATCH_SIZE = 10 WINDOW_SIZE = 9 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) integral_lens = compute_integral_len(train_vidlens) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(test_vidlens) dct_val = gen_seq_batch_from_idx(test_dct, idxs_val, test_vidlens, integral_lens_val, np.max(test_vidlens)) X_diff_val = gen_seq_batch_from_idx(test_X_diff, idxs_val, test_vidlens, integral_lens_val, np.max(test_vidlens)) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m, d, X_diff, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, d, X_diff, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE, val_fn) class_rate.append(cr) print("Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr if use_adascale: adascale_param = las.layers.get_all_param_values(adascale, scaling_param=True) if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch >= decay_start - 1: lr.set_value(lr.get_value() * lr_decay) phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10'] print('Final Model') print('classification rate: {}, validation loss: {}'.format(best_cr, best_val)) if use_adascale: print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') plot_confusion_matrix(best_conf, phrases, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate, savefilename='valid_cost')
def norm_constraint(self, max_norm, norm_axes=None, epsilon=1e-7): self.constraints.append(lambda x: norm_constraint( x, max_norm=max_norm, norm_axes=norm_axes, epsilon=epsilon)) return self
def main(): def signal_handler(signal, frame): global terminate terminate = True print('terminating...'.format(terminate)) signal.signal(signal.SIGINT, signal_handler) configure_theano() options = parse_options() X, X_val = generate_data() # X = np.reshape(X, (-1, 1, 30, 40))[:-5] print('X type and shape:', X.dtype, X.shape) print('X.min():', X.min()) print('X.max():', X.max()) # X_val = np.reshape(X_val, (-1, 1, 30, 40))[:-1] print('X_val type and shape:', X_val.dtype, X_val.shape) print('X_val.min():', X_val.min()) print('X_val.max():', X_val.max()) # we need our target to be 1 dimensional X_out = X.reshape((X.shape[0], -1)) X_val_out = X_val.reshape((X_val.shape[0], -1)) print('X_out:', X_out.dtype, X_out.shape) print('X_val_out', X_val_out.dtype, X_val_out.shape) # X_noisy = apply_gaussian_noise(X_out) # visualize_reconstruction(X_noisy[0:25], X_out[0:25], shape=(28, 28)) # X = np.reshape(X_noisy, (-1, 1, 28, 28)) print('constructing and compiling model...') # input_var = T.tensor4('input', dtype='float32') input_var = T.tensor3('input', dtype='float32') target_var = T.matrix('output', dtype='float32') lr = theano.shared(np.array(0.8, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(0.9, dtype=theano.config.floatX) # try building a reshaping layer # network = create_model(input_var, (None, 1, 30, 40), options) l_input = InputLayer((None, None, 1200), input_var, name='input') l_input = ReshapeLayer(l_input, (-1, 1, 30, 40), name='reshape_input') # l_input = InputLayer((None, 1, 30, 40), input_var, name='input') if options['MODEL'] == 'normal': network, encoder = avletters_convae.create_model(l_input, options) if options['MODEL'] == 'batchnorm': network, encoder = avletters_convae_bn.create_model(l_input, options) if options['MODEL'] == 'dropout': network, encoder = avletters_convae_drop.create_model(l_input, options) if options['MODEL'] == 'bn+dropout': network, encoder = avletters_convae_bndrop.create_model(l_input, options) print('AE Network architecture: {}'.format(options['MODEL'])) print_network(network) recon = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(squared_error(recon, target_var)) updates = adadelta(cost, all_params, lr) # updates = las.updates.apply_nesterov_momentum(updates, all_params, momentum=0.90) use_max_constraint = False print('apply max norm constraint: {}'.format(use_max_constraint)) if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases # updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) updates[param] = norm_constraint(param, MAX_NORM) train = theano.function([input_var, target_var], recon, updates=updates, allow_input_downcast=True) train_cost_fn = theano.function([input_var, target_var], cost, allow_input_downcast=True) eval_recon = las.layers.get_output(network, deterministic=True) eval_cost = T.mean(las.objectives.squared_error(eval_recon, target_var)) eval_cost_fn = theano.function([input_var, target_var], eval_cost, allow_input_downcast=True) recon_fn = theano.function([input_var], eval_recon, allow_input_downcast=True) if terminate: exit() NUM_EPOCHS = options['NUM_EPOCHS'] EPOCH_SIZE = options['EPOCH_SIZE'] NO_STRIDES = options['NO_STRIDES'] VAL_NO_STRIDES = options['VAL_NO_STRIDES'] print('begin training for {} epochs...'.format(NUM_EPOCHS)) datagen = batch_iterator(X, X_out, 128) costs = [] val_costs = [] for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): batch_X, batch_y = next(datagen) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(batch_X), lr.get_value()) print(print_str, end='') sys.stdout.flush() batch_X = batch_X.reshape((-1, 1, 1200)) train(batch_X, batch_y) print('\r', end='') if terminate: break if terminate: break cost = batch_compute_cost(X, X_out, NO_STRIDES, train_cost_fn) val_cost = batch_compute_cost(X_val, X_val_out, VAL_NO_STRIDES, eval_cost_fn) costs.append(cost) val_costs.append(val_cost) print("Epoch {} train cost = {}, validation cost = {} ({:.1f}sec) " .format(epoch + 1, cost, val_cost, time.time() - time_start)) if epoch > 10: lr.set_value(lr.get_value() * lr_decay) X_val_recon = recon_fn(X_val) visualize_reconstruction(X_val_out[450:550], X_val_recon[450:550], shape=(30, 40), savefilename='avletters') plot_validation_cost(costs, val_costs, None, savefilename='valid_cost') conv2d1 = las.layers.get_all_layers(network)[2] visualize.plot_conv_weights(conv2d1, (15, 14)).savefig('conv2d1.png') print('saving encoder...') save_model(encoder, 'models/conv_encoder.dat') save_model(network, 'models/conv_ae.dat')
def main(): def signal_handler(signal, frame): global terminate terminate = True print('terminating...'.format(terminate)) signal.signal(signal.SIGINT, signal_handler) configure_theano() options = parse_options() X, X_val = generate_data() # X = np.reshape(X, (-1, 1, 30, 40))[:-5] print('X type and shape:', X.dtype, X.shape) print('X.min():', X.min()) print('X.max():', X.max()) # X_val = np.reshape(X_val, (-1, 1, 30, 40))[:-1] print('X_val type and shape:', X_val.dtype, X_val.shape) print('X_val.min():', X_val.min()) print('X_val.max():', X_val.max()) # we need our target to be 1 dimensional X_out = X.reshape((X.shape[0], -1)) X_val_out = X_val.reshape((X_val.shape[0], -1)) print('X_out:', X_out.dtype, X_out.shape) print('X_val_out', X_val_out.dtype, X_val_out.shape) # X_noisy = apply_gaussian_noise(X_out) # visualize_reconstruction(X_noisy[0:25], X_out[0:25], shape=(28, 28)) # X = np.reshape(X_noisy, (-1, 1, 28, 28)) print('constructing and compiling model...') # input_var = T.tensor4('input', dtype='float32') input_var = T.tensor3('input', dtype='float32') target_var = T.matrix('output', dtype='float32') lr = theano.shared(np.array(0.8, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(0.9, dtype=theano.config.floatX) # try building a reshaping layer # network = create_model(input_var, (None, 1, 30, 40), options) l_input = InputLayer((None, None, 1200), input_var, name='input') l_input = ReshapeLayer(l_input, (-1, 1, 30, 40), name='reshape_input') # l_input = InputLayer((None, 1, 30, 40), input_var, name='input') if options['MODEL'] == 'normal': network, encoder = avletters_convae.create_model(l_input, options) if options['MODEL'] == 'batchnorm': network, encoder = avletters_convae_bn.create_model(l_input, options) if options['MODEL'] == 'dropout': network, encoder = avletters_convae_drop.create_model(l_input, options) if options['MODEL'] == 'bn+dropout': network, encoder = avletters_convae_bndrop.create_model( l_input, options) print('AE Network architecture: {}'.format(options['MODEL'])) print_network(network) recon = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(squared_error(recon, target_var)) updates = adadelta(cost, all_params, lr) # updates = las.updates.apply_nesterov_momentum(updates, all_params, momentum=0.90) use_max_constraint = False print('apply max norm constraint: {}'.format(use_max_constraint)) if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases # updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) updates[param] = norm_constraint(param, MAX_NORM) train = theano.function([input_var, target_var], recon, updates=updates, allow_input_downcast=True) train_cost_fn = theano.function([input_var, target_var], cost, allow_input_downcast=True) eval_recon = las.layers.get_output(network, deterministic=True) eval_cost = T.mean(las.objectives.squared_error(eval_recon, target_var)) eval_cost_fn = theano.function([input_var, target_var], eval_cost, allow_input_downcast=True) recon_fn = theano.function([input_var], eval_recon, allow_input_downcast=True) if terminate: exit() NUM_EPOCHS = options['NUM_EPOCHS'] EPOCH_SIZE = options['EPOCH_SIZE'] NO_STRIDES = options['NO_STRIDES'] VAL_NO_STRIDES = options['VAL_NO_STRIDES'] print('begin training for {} epochs...'.format(NUM_EPOCHS)) datagen = batch_iterator(X, X_out, 128) costs = [] val_costs = [] for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): batch_X, batch_y = next(datagen) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(batch_X), lr.get_value()) print(print_str, end='') sys.stdout.flush() batch_X = batch_X.reshape((-1, 1, 1200)) train(batch_X, batch_y) print('\r', end='') if terminate: break if terminate: break cost = batch_compute_cost(X, X_out, NO_STRIDES, train_cost_fn) val_cost = batch_compute_cost(X_val, X_val_out, VAL_NO_STRIDES, eval_cost_fn) costs.append(cost) val_costs.append(val_cost) print("Epoch {} train cost = {}, validation cost = {} ({:.1f}sec) ". format(epoch + 1, cost, val_cost, time.time() - time_start)) if epoch > 10: lr.set_value(lr.get_value() * lr_decay) X_val_recon = recon_fn(X_val) visualize_reconstruction(X_val_out[450:550], X_val_recon[450:550], shape=(30, 40), savefilename='avletters') plot_validation_cost(costs, val_costs, None, savefilename='valid_cost') conv2d1 = las.layers.get_all_layers(network)[2] visualize.plot_conv_weights(conv2d1, (15, 14)).savefig('conv2d1.png') print('saving encoder...') save_model(encoder, 'models/conv_encoder.dat') save_model(network, 'models/conv_ae.dat')
def create_nnet(input_dims, action_dims, observation_dims, value_dims, learning_rate, grad_clip=None, l1_weight=None, l2_weight=None, num_hidden_units=20, num_hidden_action_units=None, num_hidden_observ_units=None, num_hidden_value_units=None, batch_size=32, max_train_epochs=1, hidden_nonlinearity=nonlinearities.rectify, output_nonlinearity=None, update_method=updates.sgd): commonlayers = [] commonlayers.append(layers.InputLayer(shape=(None, input_dims))) commonlayers.append(DenseLayer(commonlayers[-1], num_hidden_units, nonlinearity=hidden_nonlinearity)) if num_hidden_action_units is None: actionlayers = [DenseLayer(commonlayers[-1], action_dims, nonlinearity=output_nonlinearity)] else: actionlayers = [DenseLayer(commonlayers[-1], num_hidden_action_units, nonlinearity=output_nonlinearity)] actionlayers.append(DenseLayer(actionlayers[-1], action_dims, nonlinearity=output_nonlinearity)) if num_hidden_observ_units is None: observlayers = [DenseLayer(commonlayers[-1], observation_dims, nonlinearity=output_nonlinearity)] else: observlayers = [DenseLayer(commonlayers[-1], num_hidden_observ_units, nonlinearity=output_nonlinearity)] observlayers.append(DenseLayer(observlayers[-1], observation_dims, nonlinearity=output_nonlinearity)) if num_hidden_value_units is None: dvaluelayers = [DenseLayer(commonlayers[-1], value_dims, nonlinearity=output_nonlinearity)] else: dvaluelayers = [DenseLayer(commonlayers[-1], num_hidden_value_units, nonlinearity=output_nonlinearity)] dvaluelayers.append(DenseLayer(dvaluelayers[-1], value_dims, nonlinearity=output_nonlinearity)) actvallayers = [layers.ConcatLayer([actionlayers[-1], dvaluelayers[-1]])] obsvallayers = [layers.ConcatLayer([observlayers[-1], dvaluelayers[-1]])] concatlayers = [layers.ConcatLayer([actionlayers[-1], observlayers[-1], dvaluelayers[-1]])] action_prediction = layers.get_output(actionlayers[-1]) dvalue_prediction = layers.get_output(dvaluelayers[-1]) actval_prediction = layers.get_output(actvallayers[-1]) obsval_prediction = layers.get_output(obsvallayers[-1]) concat_prediction = layers.get_output(concatlayers[-1]) input_var = commonlayers[0].input_var action_target = T.matrix(name="action_target", dtype=floatX) dvalue_target = T.matrix(name="value_target", dtype=floatX) actval_target = T.matrix(name="actval_target", dtype=floatX) obsval_target = T.matrix(name="obsval_target", dtype=floatX) concat_target = T.matrix(name="concat_target", dtype=floatX) action_loss = objectives.squared_error(action_prediction, action_target).mean() obsval_loss = objectives.squared_error(obsval_prediction, obsval_target).mean() dvalue_loss = objectives.squared_error(dvalue_prediction, dvalue_target).mean() actval_loss = objectives.squared_error(actval_prediction, actval_target).mean() concat_loss = objectives.squared_error(concat_prediction, concat_target).mean() if l1_weight is not None: action_l1penalty = regularize_layer_params(commonlayers + actionlayers, l1) * l1_weight obsval_l1penalty = regularize_layer_params(commonlayers + observlayers + dvaluelayers, l1) * l1_weight dvalue_l1penalty = regularize_layer_params(commonlayers + dvaluelayers, l1) * l1_weight actval_l1penalty = regularize_layer_params(commonlayers + actionlayers + dvaluelayers, l1) * l1_weight concat_l1penalty = regularize_layer_params(commonlayers + actionlayers + observlayers + dvaluelayers, l1) * l1_weight action_loss += action_l1penalty obsval_loss += obsval_l1penalty dvalue_loss += dvalue_l1penalty actval_loss += actval_l1penalty concat_loss += concat_l1penalty if l2_weight is not None: action_l2penalty = regularize_layer_params(commonlayers + actionlayers, l2) * l2_weight obsval_l2penalty = regularize_layer_params(commonlayers + observlayers + dvaluelayers, l2) * l2_weight dvalue_l2penalty = regularize_layer_params(commonlayers + dvaluelayers, l2) * l2_weight actval_l2penalty = regularize_layer_params(commonlayers + actionlayers + dvaluelayers, l2) * l2_weight concat_l2penalty = regularize_layer_params(commonlayers + actionlayers + observlayers + dvaluelayers, l2) * l2_weight action_loss += action_l2penalty obsval_loss += obsval_l2penalty dvalue_loss += dvalue_l2penalty actval_loss += actval_l2penalty concat_loss += concat_l2penalty action_params = layers.get_all_params(actionlayers[-1], trainable=True) obsval_params = layers.get_all_params(obsvallayers[-1], trainable=True) dvalue_params = layers.get_all_params(dvaluelayers[-1], trainable=True) actval_params = layers.get_all_params(actvallayers[-1], trainable=True) concat_params = layers.get_all_params(concatlayers[-1], trainable=True) if grad_clip is not None: action_grads = theano.grad(action_loss, action_params) obsval_grads = theano.grad(obsval_loss, obsval_params) dvalue_grads = theano.grad(dvalue_loss, dvalue_params) actval_grads = theano.grad(actval_loss, actval_params) concat_grads = theano.grad(concat_loss, concat_params) action_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in action_grads] obsval_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in obsval_grads] dvalue_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in dvalue_grads] actval_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in actval_grads] concat_grads = [updates.norm_constraint(grad, grad_clip, range(grad.ndim)) for grad in concat_grads] action_updates = update_method(action_grads, action_params, learning_rate) obsval_updates = update_method(obsval_grads, obsval_params, learning_rate) dvalue_updates = update_method(dvalue_grads, dvalue_params, learning_rate) actval_updates = update_method(actval_grads, actval_params, learning_rate) concat_updates = update_method(concat_grads, concat_params, learning_rate) else: action_updates = update_method(action_loss, action_params, learning_rate) obsval_updates = update_method(obsval_loss, obsval_params, learning_rate) dvalue_updates = update_method(dvalue_loss, dvalue_params, learning_rate) actval_updates = update_method(actval_loss, actval_params, learning_rate) concat_updates = update_method(concat_loss, concat_params, learning_rate) fit_action = theano.function([input_var, action_target], action_loss, updates=action_updates) fit_obsval = theano.function([input_var, obsval_target], obsval_loss, updates=obsval_updates) fit_dvalue = theano.function([input_var, dvalue_target], dvalue_loss, updates=dvalue_updates) fit_actval = theano.function([input_var, actval_target], actval_loss, updates=actval_updates) fit_concat = theano.function([input_var, concat_target], concat_loss, updates=concat_updates) predict_action = theano.function([input_var], action_prediction) predict_obsval = theano.function([input_var], obsval_prediction) predict_dvalue = theano.function([input_var], dvalue_prediction) predict_actval = theano.function([input_var], actval_prediction) predict_concat = theano.function([input_var], concat_prediction) nnet = Mock( fit_action=fit_action, fit_obsval=fit_obsval, fit_value=fit_dvalue, fit_actval=fit_actval, fit_both=fit_concat, predict_action=predict_action, predict_obsval=predict_obsval, predict_value=predict_dvalue, predict_actval=predict_actval, predict_both=predict_concat, ) return nnet
def main(): configure_theano() config_file = 'config/separate_train.ini' print('loading config file: {}'.format(config_file)) config = ConfigParser.ConfigParser() config.read(config_file) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) lstm_units = int(config.get('training', 'lstm_units')) output_units = int(config.get('training', 'output_units')) do_finetune = config.getboolean('training', 'do_finetune') save_finetune = config.getboolean('training', 'save_finetune') load_finetune = config.getboolean('training', 'load_finetune') # 53 subjects, 70 utterances, 5 view angles # s[x]_v[y]_u[z].mp4 # resized, height, width = (26, 44) # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec', # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__'] print(data.keys()) X = data['dataMatrix'].astype('float32') # .reshape((-1, 26, 44), order='f').reshape((-1, 26 * 44)) y = data['targetsVec'].astype('int32') y = y.reshape((len(y),)) uniques = np.unique(y) print('number of classifications: {}'.format(len(uniques))) subjects = data['subjectsVec'].astype('int') subjects = subjects.reshape((len(subjects),)) video_lens = data['videoLengthVec'].astype('int') video_lens = video_lens.reshape((len(video_lens,))) train_subject_ids = read_data_split_file('data/train.txt') val_subject_ids = read_data_split_file('data/val.txt') test_subject_ids = read_data_split_file('data/test.txt') print('Train: {}'.format(train_subject_ids)) print('Validation: {}'.format(val_subject_ids)) print('Test: {}'.format(test_subject_ids)) train_X, train_y, train_vidlens, train_subjects, \ val_X, val_y, val_vidlens, val_subjects, \ test_X, test_y, test_vidlens, test_subjects = \ split_data(X, y, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids) assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X) assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y) assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens) assert train_subjects.shape[0] + val_subjects.shape[0] + test_subjects.shape[0] == len(subjects) train_X = normalize_input(train_X, centralize=True) test_X = normalize_input(test_X, centralize=True) if do_finetune: dbn = load_dbn(ae_pretrained) dbn.initialize() dbn.fit(train_X, train_X) recon = dbn.predict(test_X) visualize_reconstruction(reorder_data(test_X[800:864], (26, 44)), reorder_data(recon[800:864], (26, 44)), shape=(26, 44)) if save_finetune: pickle.dump(dbn, open(ae_finetuned, 'wb')) if load_finetune: print('loading pre-trained encoding layers...') dbn = pickle.load(open(ae_finetuned, 'rb')) dbn.initialize() # recon = dbn.predict(test_X) # visualize_reconstruction(reorder_data(test_X[800:864], (26, 44)), # reorder_data(recon[800:864], (26, 44)), # shape=(26, 44)) encoder = extract_encoder(dbn) train_X = encoder.predict(train_X) val_X = encoder.predict(val_X) test_X = encoder.predict(test_X) # train_X = concat_first_second_deltas(train_X, train_vidlens) # val_X = concat_first_second_deltas(val_X, val_vidlens) # test_X = concat_first_second_deltas(test_X, test_vidlens) # featurewise normalize train_X, mean, std = featurewise_normalize_sequence(train_X) val_X = (val_X - mean) / std test_X = (test_X - mean) / std # recon = dbn.predict(test_X) # visualize_reconstruction(test_X[550:650], recon[550:650], (26, 44)) # exit() # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing lstm classifier...') network = lstm_classifier_baseline.create_model((None, None, 50), inputs, (None, None), mask, lstm_units, output_units) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets)) updates = adadelta(cost, all_params, learning_rate=lr) # updates = las.updates.apply_momentum(sgd(cost, all_params, learning_rate=lr), all_params, 0.1) use_max_constraint = False if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) train = theano.function( [inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 30 EPOCH_SIZE = 120 BATCH_SIZE = 10 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 10 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, _ = next(val_datagen) X_test, y_test, mask_test, _ = next(test_datagen) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, _ = next(datagen) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m) print('\r', end='') cost = compute_train_cost(X, y, m) val_cost = compute_test_cost(X_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr test_cr, test_conf = evaluate_model(X_test, y_test, mask_test, val_fn) print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch > decay_start: lr.set_value(lr.get_value() * lr_decay) phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) print('confusion matrix: ') plot_confusion_matrix(test_conf, phrases, fmt='grid') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')
def train_setup(): # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") print( " with input dimension {0},{1},{2}".format( config.image_height, \ config.image_width, \ config. image_channel ) ) network = cnn_archi( input_var, \ config.image_channel,\ config.image_height, config.image_width,\ config.output_length ) print('Number of parameters : {0}'.format(count_params(network))) if (config.init_model is not None): with np.load(config.init_model) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] set_all_param_values(network, param_values) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) ent_loss = categorical_crossentropy(prediction, target_var) ent_loss = ent_loss.mean() l1_regu = config.l1_regu * regularize_network_params(network, l1) l2_regu = config.l2_regu * regularize_network_params(network, l2) loss = ent_loss + l1_regu + l2_regu # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = get_all_params(network, trainable=True) #grads = T.grad( loss, params ) #scaled_grads = norm_constraint( grads, 5. ) updates = nesterov_momentum(loss, params, \ learning_rate=config.learning_rate, \ momentum=config.momentum ) #updates = rmsprop( loss , params, learning_rate = config.learning_rate ) for param in get_all_params(network, regularizable=True): norm_axis = None if param.ndim == 1: norm_axis = [0] updates[param] = norm_constraint( updates[param], \ 5. * compute_norms( param.get_value() ).mean(), norm_axes = norm_axis ) #Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = get_output(network, deterministic=True) test_classes = T.argmax(test_prediction, axis=1) test_loss = categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.eq(test_classes, target_var) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var,target_var], \ ent_loss,\ updates=updates, \ allow_input_downcast=True) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], \ [test_loss, test_prediction, test_acc], \ allow_input_downcast=True ) return network, train_fn, val_fn
elif run_parameters.unsupervised_cost_fun == 'categorical_crossentropy': test_loss1 = objectives.categorical_crossentropy(test_reconstruction, input_var) if supervised_cost_fun == 'squared_error': test_loss2 = objectives.squared_error(test_prediction, target_var) elif supervised_cost_fun == 'categorical_crossentropy': test_loss2 = objectives.categorical_crossentropy(test_prediction, target_var) test_loss = losses_ratio[0] * test_loss1.mean() + \ losses_ratio[1] * test_loss2.mean() + \ losses_ratio[2] * l2_penalties.mean() + \ losses_ratio[3] * sparse_regularizer # Compute gradient in case of gradient clipping if run_parameters.clip_gradient[0] is not None: grad = T.grad(loss, params) if run_parameters.clip_gradient[0] is 0: # softclip grad = [updates.norm_constraint(g, run_parameters.clip_gradient[1], range(g.ndim)) for g in grad] elif run_parameters.clip_gradient[0] is 1: grad = [T.clip(g, run_parameters.clip_gradient[0], run_parameters.clip_gradient[1]) for g in grad] loss = grad # Update function to train # sgd_lr = run_parameters.update_lr sgd_lr = theano.shared(utils.floatX(run_parameters.update_lr)) sgd_lr_decay = utils.floatX(1.0) sgd_lr_decay_threshold = utils.floatX(1.0) updates_function = updates.adam(loss, params, run_parameters.update_lr) # Compile train function train_fn = theano.function([input_var, target_var, labeled_var], loss, updates=updates_function, allow_input_downcast=True, on_unused_input='ignore') # Compile test prediction function
def norm_constraint(self, max_norm, norm_axes=None, epsilon=1e-7): self.constraints.append(lambda x: norm_constraint(x, max_norm=max_norm, norm_axes=norm_axes, epsilon=epsilon)) return self