def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_pretrained = config.get('models', 'pretrained') input_dimension = config.getint('models', 'input_dimension') output_classes = config.getint('models', 'output_classes') lstm_size = config.getint('models', 'lstm_size') nonlinearity = options['nonlinearity'] if 'nonlinearity' in options else config.get('models', 'nonlinearity') if nonlinearity == 'sigmoid': nonlinearity = sigmoid if nonlinearity == 'rectify': nonlinearity = rectify # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') no_epochs = int(options['no_epochs']) if 'no_epochs' in options else config.getint('training', 'no_epochs') weight_init = options['weight_init'] if 'weight_init' in options else config.get('training', 'weight_init') learning_rate = options['learning_rate'] if 'learning_rate' in options \ else config.getfloat('training', 'learning_rate') epochsize = options['epochsize'] if 'epochsize' in options else config.getint('training', 'epochsize') batchsize = options['batchsize'] if 'batchsize' in options else config.getint('training', 'batchsize') use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('training', 'use_peepholes') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() train_subject_ids = read_data_split_file('data/train.txt') val_subject_ids = read_data_split_file('data/val.txt') test_subject_ids = read_data_split_file('data/test.txt') data_matrix = data['dataMatrix'] targets_vec = data['targetsVec'].reshape((-1,)) subjects_vec = data['subjectsVec'].reshape((-1,)) vidlen_vec = data['videoLengthVec'].reshape((-1,)) data_matrix = sequencewise_mean_image_subtraction(data_matrix, vidlen_vec) train_X, train_y, train_vidlens, train_subjects, \ val_X, val_y, val_vidlens, val_subjects, \ test_X, test_y, test_vidlens, test_subjects = split_seq_data(data_matrix, targets_vec, subjects_vec, vidlen_vec, train_subject_ids, val_subject_ids, test_subject_ids) train_y += 1 val_y += 1 test_y += 1 ''' train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1,)) val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1,)) test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1,)) train_X = data['trData'].astype('float32') val_X = data['valData'].astype('float32') test_X = data['testData'].astype('float32') train_y = data['trTargetsVec'].astype('int').reshape((-1,)) + 1 # +1 to handle the -1 introduced in lstm_gendata val_y = data['valTargetsVec'].astype('int').reshape((-1,)) + 1 test_y = data['testTargetsVec'].astype('int').reshape((-1,)) + 1 ''' train_X = reorder_data(train_X, (30, 50)) val_X = reorder_data(val_X, (30, 50)) test_X = reorder_data(test_X, (30, 50)) train_X = sequencewise_mean_image_subtraction(train_X, train_vidlens) val_X = sequencewise_mean_image_subtraction(val_X, val_vidlens) test_X = sequencewise_mean_image_subtraction(test_X, test_vidlens) weights, biases = load_dbn(ae_pretrained) train_X = normalize_input(train_X, centralize=True) val_X = normalize_input(val_X, centralize=True) test_X = normalize_input(test_X, centralize=True) # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') network = deltanet_majority_vote.create_model_using_pretrained_encoder(weights, biases, (None, None, input_dimension), inputs, (None, None), mask, lstm_size, window, output_classes, weight_init_fn, use_peepholes, nonlinearity) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = las.updates.adam(cost, all_params, learning_rate=learning_rate) train = theano.function( [inputs, targets, mask, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function( [inputs, targets, mask, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] WINDOW_SIZE = 9 STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, _ = next(val_datagen) # Use this test set to check final classification performance X_test, y_test, mask_test, _ = next(test_datagen) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(no_epochs): time_start = time.time() for i in range(epochsize): X, y, m, _ = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) print_str = 'Epoch {} batch {}/{}: {} examples using adam at learning rate = {:.4f}'.format( epoch + 1, i + 1, epochsize, len(X), learning_rate) print(print_str, end='') sys.stdout.flush() train(X, y, m, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, WINDOW_SIZE, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost val_cr = cr test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, WINDOW_SIZE, val_fn) print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) best_params = las.layers.get_all_param_values(network) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] print('Final Model') print('test CR: {}, val CR: {}, val loss: {}'.format(test_cr, val_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(test_conf, numbers, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate) if 'write_results' in options: with open(options['write_results'], mode='a') as f: f.write('{},{},{}\n'.format(test_cr, val_cr, best_val)) if 'save_best' in options: print('Saving the best model so far...') las.layers.set_all_param_values(network, best_params) save_model_params(network, options['save_best']) print('Model Saved!')
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_finetuned = config.get('models', 'finetuned') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) load_finetune = config.getboolean('training', 'load_finetune') lstm_units = config.getint('training', 'lstm_units') output_units = config.getint('training', 'output_units') train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1,)) val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1,)) test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1,)) train_X = data['trData'].astype('float32') val_X = data['valData'].astype('float32') test_X = data['testData'].astype('float32') train_y = data['trTargetsVec'].astype('int').reshape((-1,)) + 1 # +1 to handle the -1 introduced in lstm_gendata val_y = data['valTargetsVec'].astype('int').reshape((-1,)) + 1 test_y = data['testTargetsVec'].astype('int').reshape((-1,)) + 1 if load_finetune: print('loading finetuned encoder: {}...'.format(ae_finetuned)) ae = pickle.load(open(ae_finetuned, 'rb')) ae.initialize() train_X = normalize_input(train_X, centralize=True) val_X = normalize_input(val_X, centralize=True) test_X = normalize_input(test_X, centralize=True) if load_finetune: print('loading pre-trained encoding layers...') dbn = pickle.load(open(ae_finetuned, 'rb')) dbn.initialize() # recon = dbn.predict(test_X) # visualize_reconstruction(test_X[550:650], recon[550:650], (26, 44)) # exit() # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing end to end model...') # network = deltanet.create_model(dbn, (None, None, 1500), inputs, # (None, None), mask, lstm_units, window, output_units) network = baseline_end2end.create_model(dbn, (None, None, 1500), inputs, (None, None), mask, lstm_units, output_units) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets)) updates = las.updates.adadelta(cost, all_params, learning_rate=lr) train = theano.function( [inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 30 EPOCH_SIZE = 45 BATCH_SIZE = 20 STRIP_SIZE = 3 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, _ = next(val_datagen) # Use this test set to check final classification performance X_test, y_test, mask_test, _ = next(test_datagen) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, _ = next(datagen) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m) print('\r', end='') cost = compute_train_cost(X, y, m) val_cost = compute_test_cost(X_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost test_cr, test_conf = evaluate_model(X_test, y_test, mask_test, val_fn) print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) best_params = las.layers.get_all_param_values(network) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch + 1 >= decay_start: lr.set_value(lr.get_value() * lr_decay) numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] print('Final Model') print('classification rate: {}, validation loss: {}'.format(test_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(test_conf, numbers, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate) if 'save_best' in options: print('Saving the best model so far...') las.layers.set_all_param_values(network, best_params) save_model_params(network, options['save_best'])
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('Reading Config File: {}...'.format(config_file)) print(config.items('stream1')) print(config.items('lstm_classifier')) print(config.items('training')) print('CLI options: {}'.format(options.items())) print('preprocessing dataset...') data = load_mat_file(config.get('stream1', 'data')) stream1_dim = config.getint('stream1', 'input_dimensions') output_classes = config.getint('lstm_classifier', 'output_classes') output_classnames = config.get('lstm_classifier', 'output_classnames').split(',') matlab_target_offset = config.getboolean('lstm_classifier', 'matlab_target_offset') lstm_size = config.getint('lstm_classifier', 'lstm_size') weight_init = options[ 'weight_init'] if 'weight_init' in options else config.get( 'lstm_classifier', 'weight_init') use_peepholes = options[ 'use_peepholes'] if 'use_peepholes' in options else config.getboolean( 'lstm_classifier', 'use_peepholes') windowsize = config.getint('lstm_classifier', 'windowsize') # data preprocessing options meanremove = config.getboolean('stream1', 'meanremove') samplewisenormalize = config.getboolean('stream1', 'samplewisenormalize') featurewisenormalize = config.getboolean('stream1', 'featurewisenormalize') # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') num_epoch = int( options['num_epoch']) if 'num_epoch' in options else config.getint( 'training', 'num_epoch') learning_rate = options['learning_rate'] if 'learning_rate' in options \ else config.getfloat('training', 'learning_rate') epochsize = config.getint('training', 'epochsize') batchsize = config.getint('training', 'batchsize') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() train_subject_ids = read_data_split_file( config.get('training', 'train_subjects_file')) val_subject_ids = read_data_split_file( config.get('training', 'val_subjects_file')) test_subject_ids = read_data_split_file( config.get('training', 'test_subjects_file')) data_matrix = data['dataMatrix'].astype('float32') targets_vec = data['targetsVec'].reshape((-1, )) subjects_vec = data['subjectsVec'].reshape((-1, )) vidlen_vec = data['videoLengthVec'].reshape((-1, )) if samplewisenormalize: data_matrix = normalize_input(data_matrix) if meanremove: data_matrix = sequencewise_mean_image_subtraction( data_matrix, vidlen_vec) data_matrix = concat_first_second_deltas(data_matrix, vidlen_vec, windowsize) train_dct, train_y, train_vidlens, train_subjects, \ val_dct, val_y, val_vidlens, val_subjects, \ test_dct, test_y, test_vidlens, test_subjects = split_seq_data(data_matrix, targets_vec, subjects_vec, vidlen_vec, train_subject_ids, val_subject_ids, test_subject_ids) if matlab_target_offset: train_y -= 1 val_y -= 1 test_y -= 1 # featurewise normalize dct features if featurewisenormalize: train_dct, dct_mean, dct_std = featurewise_normalize_sequence( train_dct) val_dct = (val_dct - dct_mean) / dct_std test_dct = (test_dct - dct_mean) / dct_std # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') network = lstm_classifier_majority_vote.create_model( (None, None, stream1_dim * 3), inputs, (None, None), mask, lstm_size, output_classes, weight_init_fn, use_peepholes) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = adam(cost, all_params, learning_rate=learning_rate) train = theano.function([inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function([inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_tr = float('inf') best_cr = 0.0 datagen = gen_lstm_batch_random(train_dct, train_y, train_vidlens, batchsize=batchsize) val_datagen = gen_lstm_batch_random(val_dct, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_dct, test_y, test_vidlens, batchsize=len(test_vidlens)) integral_lens = compute_integral_len(train_vidlens) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(val_vidlens) dct_val = gen_seq_batch_from_idx(val_dct, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) X_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(test_vidlens) dct_test = gen_seq_batch_from_idx(test_dct, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(num_epoch): time_start = time.time() for i in range(epochsize): _, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples using adam with learning rate = {}'.format( epoch + 1, i + 1, epochsize, len(y), learning_rate) print(print_str, end='') sys.stdout.flush() train(d, y, m) print('\r', end='') cost = compute_train_cost(d, y, m) val_cost = compute_test_cost(dct_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(dct_val, y_val_evaluate, mask_val, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr test_cr, test_conf = evaluate_model2(dct_test, y_test, mask_test, val_fn) print( "Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) best_params = las.layers.get_all_param_values(network) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)". format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) # plot confusion matrix table_str = plot_confusion_matrix(test_conf, output_classnames, fmt='pipe') print('confusion matrix: ') print(table_str) if 'save_plot' in options: prefix = options['save_plot'] plot_validation_cost(cost_train, cost_val, savefilename='{}.validloss.png'.format(prefix)) with open('{}.confmat.txt'.format(prefix), mode='a') as f: f.write(table_str) f.write('\n\n') if 'write_results' in options: print('writing results to {}'.format(options['write_results'])) results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{}\n'.format(test_cr, best_cr, best_val)) if 'save_best' in options: print('saving best model...') las.layers.set_all_param_values(network, best_params) save_model_params(network, options['save_best']) print('best model saved to {}'.format(options['save_best']))
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_pretrained = config.get('models', 'pretrained') input_dimension = config.getint('models', 'input_dimension') output_classes = config.getint('models', 'output_classes') lstm_size = config.getint('models', 'lstm_size') nonlinearity = options[ 'nonlinearity'] if 'nonlinearity' in options else config.get( 'models', 'nonlinearity') if nonlinearity == 'sigmoid': nonlinearity = sigmoid if nonlinearity == 'rectify': nonlinearity = rectify # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') no_epochs = int( options['no_epochs']) if 'no_epochs' in options else config.getint( 'training', 'no_epochs') weight_init = options[ 'weight_init'] if 'weight_init' in options else config.get( 'training', 'weight_init') learning_rate = options['learning_rate'] if 'learning_rate' in options \ else config.getfloat('training', 'learning_rate') epochsize = options[ 'epochsize'] if 'epochsize' in options else config.getint( 'training', 'epochsize') batchsize = options[ 'batchsize'] if 'batchsize' in options else config.getint( 'training', 'batchsize') use_peepholes = options[ 'use_peepholes'] if 'use_peepholes' in options else config.getboolean( 'training', 'use_peepholes') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1, )) val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1, )) test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1, )) train_X = data['trData'].astype('float32') val_X = data['valData'].astype('float32') test_X = data['testData'].astype('float32') train_y = data['trTargetsVec'].astype('int').reshape( (-1, )) + 1 # +1 to handle the -1 introduced in lstm_gendata val_y = data['valTargetsVec'].astype('int').reshape((-1, )) + 1 test_y = data['testTargetsVec'].astype('int').reshape((-1, )) + 1 train_X = reorder_data(train_X, (30, 50)) val_X = reorder_data(val_X, (30, 50)) test_X = reorder_data(test_X, (30, 50)) train_X = sequencewise_mean_image_subtraction(train_X, train_vidlens) val_X = sequencewise_mean_image_subtraction(val_X, val_vidlens) test_X = sequencewise_mean_image_subtraction(test_X, test_vidlens) weights, biases = load_dbn(ae_pretrained) train_X = normalize_input(train_X, centralize=True) val_X = normalize_input(val_X, centralize=True) test_X = normalize_input(test_X, centralize=True) # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') network = deltanet_majority_vote.create_model_using_pretrained_encoder( weights, biases, (None, None, input_dimension), inputs, (None, None), mask, lstm_size, window, output_classes, weight_init_fn, use_peepholes, nonlinearity) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = las.updates.adam(cost, all_params, learning_rate=learning_rate) train = theano.function([inputs, targets, mask, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function([inputs, targets, mask, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] WINDOW_SIZE = 9 STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, _ = next(val_datagen) # Use this test set to check final classification performance X_test, y_test, mask_test, _ = next(test_datagen) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(no_epochs): time_start = time.time() for i in range(epochsize): X, y, m, _ = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) print_str = 'Epoch {} batch {}/{}: {} examples using adam at learning rate = {:.4f}'.format( epoch + 1, i + 1, epochsize, len(X), learning_rate) print(print_str, end='') sys.stdout.flush() train(X, y, m, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, WINDOW_SIZE, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, WINDOW_SIZE, val_fn) print( "Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) best_params = las.layers.get_all_param_values(network) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)". format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] print('Final Model') print('classification rate: {}, validation loss: {}'.format( test_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(test_conf, numbers, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate) if 'save_best' in options: print('Saving the best model so far...') las.layers.set_all_param_values(network, best_params) save_model_params(network, options['save_best']) print('test reloading...') network = load_model_params(network, 'models/unimodal_with_val.pkl') test_predictions = las.layers.get_output(network, deterministic=True) val_fn = theano.function([inputs, mask, window], test_predictions, allow_input_downcast=True) test_cr, test_conf = evaluate_model(X_test, y_test, mask_test, WINDOW_SIZE, val_fn) print('classification rate: {}, validation loss: {}'.format( test_cr, best_val))
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(config.get('data', 'dct')) ae_pretrained = config.get('models', 'pretrained') ae_pretrained_diff = config.get('models', 'pretrained_diff') fusiontype = config.get('models', 'fusiontype') lstm_size = config.getint('models', 'lstm_size') output_classes = config.getint('models', 'output_classes') use_peepholes = options[ 'use_peepholes'] if 'use_peepholes' in options else config.getboolean( 'models', 'use_peepholes') use_blstm = config.getboolean('models', 'use_blstm') delta_window = config.getint('models', 'delta_window') input_dimensions = config.getint('models', 'input_dimensions') # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') num_epoch = int( options['num_epoch']) if 'num_epoch' in options else config.getint( 'training', 'num_epoch') weight_init = options[ 'weight_init'] if 'weight_init' in options else config.get( 'training', 'weight_init') use_finetuning = config.getboolean('training', 'use_finetuning') learning_rate = config.getfloat('training', 'learning_rate') batchsize = config.getint('training', 'batchsize') epochsize = config.getint('training', 'epochsize') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() # 53 subjects, 70 utterances, 5 view angles # s[x]_v[y]_u[z].mp4 # resized, height, width = (26, 44) # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec', # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__'] print(data.keys()) X = data['dataMatrix'].astype('float32') y = data['targetsVec'].astype('int32') y = y.reshape((len(y), )) dct_feats = dct_data['dctFeatures'].astype('float32') uniques = np.unique(y) print('number of classifications: {}'.format(len(uniques))) subjects = data['subjectsVec'].astype('int') subjects = subjects.reshape((len(subjects), )) video_lens = data['videoLengthVec'].astype('int') video_lens = video_lens.reshape((len(video_lens, ))) # X = reorder_data(X, (26, 44), 'f', 'c') # print('performing sequencewise mean image removal...') # X = sequencewise_mean_image_subtraction(X, video_lens) # visualize_images(X[550:650], (26, 44)) X_diff = compute_diff_images(X, video_lens) # mean remove dct features dct_feats = sequencewise_mean_image_subtraction(dct_feats, video_lens) train_subject_ids = read_data_split_file('data/train.txt') val_subject_ids = read_data_split_file('data/val.txt') test_subject_ids = read_data_split_file('data/test.txt') print('Train: {}'.format(train_subject_ids)) print('Validation: {}'.format(val_subject_ids)) print('Test: {}'.format(test_subject_ids)) train_X, train_y, train_dct, train_X_diff, train_vidlens, train_subjects, \ val_X, val_y, val_dct, val_X_diff, val_vidlens, val_subjects, \ test_X, test_y, test_dct, test_X_diff, test_vidlens, test_subjects = \ split_data(X, y, dct_feats, X_diff, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids) assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X) assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y) assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[ 0] == len(video_lens) assert train_subjects.shape[0] + val_vidlens.shape[ 0] + test_subjects.shape[0] == len(subjects) train_X = normalize_input(train_X, centralize=True) val_X = normalize_input(val_X, centralize=True) test_X = normalize_input(test_X, centralize=True) print('loading pretrained encoder: {}...'.format(ae_pretrained)) ae = load_dbn(ae_pretrained) print('loading pretrained encoder: {}...'.format(ae_pretrained_diff)) ae_diff = load_dbn(ae_pretrained_diff) # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') inputs = T.tensor3('inputs', dtype='float32') inputs_diff = T.tensor3('inputs_diff', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') if use_blstm: network, l_fuse = adenet_v2_2.create_model( ae, ae_diff, (None, None, input_dimensions), inputs, (None, None), mask, (None, None, input_dimensions), inputs_diff, lstm_size, window, output_classes, fusiontype, weight_init_fn, use_peepholes) else: network, l_fuse = adenet_v2_4.create_model( ae, ae_diff, (None, None, input_dimensions), inputs, (None, None), mask, (None, None, input_dimensions), inputs_diff, lstm_size, window, output_classes, fusiontype, weight_init_fn, use_peepholes) print_network(network) # draw_to_file(las.layers.get_all_layers(network), 'network.png') print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = adam(cost, all_params, learning_rate=learning_rate) train = theano.function([inputs, targets, mask, inputs_diff, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function( [inputs, targets, mask, inputs_diff, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function( [inputs, targets, mask, inputs_diff, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, inputs_diff, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize) integral_lens = compute_integral_len(train_vidlens) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(val_vidlens) X_diff_val = gen_seq_batch_from_idx(val_X_diff, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) # we use the test set to check final classification rate X_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(test_vidlens) X_diff_test = gen_seq_batch_from_idx(test_X_diff, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(num_epoch): time_start = time.time() for i in range(epochsize): X, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format( epoch + 1, i + 1, epochsize, len(X)) print(print_str, end='') sys.stdout.flush() train(X, y, m, X_diff, delta_window) print('\r', end='') cost = compute_train_cost(X, y, m, X_diff, delta_window) val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val, delta_window) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, X_diff_val, delta_window, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_cr = cr if fusiontype == 'adasum': adascale_param = las.layers.get_all_param_values( l_fuse, scaling_param=True) test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, X_diff_test, delta_window, val_fn) print( "Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)". format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) if fusiontype == 'adasum': print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') plot_confusion_matrix(test_conf, phrases, fmt='latex') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost') if 'write_results' in options: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{},{},{}\n'.format(validation_window, weight_init, use_peepholes, use_blstm, use_finetuning)) s = ','.join([str(v) for v in cost_train]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in cost_val]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in class_rate]) f.write('{}\n'.format(s)) f.write('{},{},{}\n'.format(fusiontype, best_cr, best_val))
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(config.get('data', 'dct')) ae_finetuned = config.get('models', 'finetuned') ae_finetuned_diff = config.get('models', 'finetuned_diff') fusiontype = config.get('models', 'fusiontype') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) load_finetune = config.getboolean('training', 'load_finetune') load_finetune_diff = config.getboolean('training', 'load_finetune_diff') train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1, )) val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1, )) test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1, )) train_X = data['trData'].astype('float32') val_X = data['valData'].astype('float32') test_X = data['testData'].astype('float32') train_dct = dct_data['trDctFeatures'].astype('float32') val_dct = dct_data['valDctFeatures'].astype('float32') test_dct = dct_data['testDctFeatures'].astype('float32') train_X_diff = compute_diff_images(train_X, train_vidlens) val_X_diff = compute_diff_images(val_X, val_vidlens) test_X_diff = compute_diff_images(test_X, test_vidlens) train_y = data['trTargetsVec'].astype('int').reshape( (-1, )) + 1 # +1 to handle the -1 introduced in lstm_gendata val_y = data['valTargetsVec'].astype('int').reshape((-1, )) + 1 test_y = data['testTargetsVec'].astype('int').reshape((-1, )) + 1 # featurewise normalize dct features train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) val_dct = (val_dct - dct_mean) / dct_std test_dct = (test_dct - dct_mean) / dct_std if load_finetune: print('loading finetuned encoder: {}...'.format(ae_finetuned)) ae = pickle.load(open(ae_finetuned, 'rb')) ae.initialize() if load_finetune_diff: print('loading finetuned encoder: {}...'.format(ae_finetuned_diff)) ae_diff = pickle.load(open(ae_finetuned_diff, 'rb')) ae_diff.initialize() # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') dct = T.tensor3('dct', dtype='float32') inputs = T.tensor3('inputs', dtype='float32') inputs_diff = T.tensor3('inputs_diff', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing end to end model...') network, l_fuse = adenet_v3.create_model(ae, ae_diff, (None, None, 1500), inputs, (None, None), mask, (None, None, 90), dct, (None, None, 1500), inputs_diff, 250, window, 10, fusiontype) print_network(network) # draw_to_file(las.layers.get_all_layers(network), 'network.png') print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy( predictions, targets)) updates = adadelta(cost, all_params, learning_rate=lr) # updates = adagrad(cost, all_params, learning_rate=lr) train = theano.function([inputs, targets, mask, dct, inputs_diff, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function( [inputs, targets, mask, dct, inputs_diff, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean( las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs, targets, mask, dct, inputs_diff, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, dct, inputs_diff, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 30 EPOCH_SIZE = 45 BATCH_SIZE = 20 WINDOW_SIZE = 9 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) integral_lens = compute_integral_len(train_vidlens) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(val_vidlens) dct_val = gen_seq_batch_from_idx(val_dct, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) X_diff_val = gen_seq_batch_from_idx(val_X_diff, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) # we use the test set to check final classification rate X_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(test_vidlens) dct_test = gen_seq_batch_from_idx(test_dct, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) X_diff_test = gen_seq_batch_from_idx(test_X_diff, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m, d, X_diff, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, d, X_diff, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_cr = cr if fusiontype == 'adasum': adascale_param = las.layers.get_all_param_values( l_fuse, scaling_param=True) test_cr, test_conf = evaluate_model(X_test, y_test, mask_test, dct_test, X_diff_test, WINDOW_SIZE, val_fn) print( "Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)". format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch + 1 >= decay_start: lr.set_value(lr.get_value() * lr_decay) numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) if fusiontype == 'adasum': print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') plot_confusion_matrix(test_conf, numbers, fmt='latex') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost') if options['write_results']: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{}\n'.format(fusiontype, test_cr, best_val))
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_pretrained = config.get('models', 'pretrained') lstm_units = int(config.get('models', 'lstm_units')) output_classes = int(config.get('models', 'output_classes')) weight_init = config.get('models', 'weight_init') delta_window = config.getint('models', 'delta_window') nonlinearity = select_nonlinearity(config.get('models', 'nonlinearity')) weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() learning_rate = float(config.get('training', 'learning_rate')) no_epochs = config.getint('training', 'no_epochs') use_peepholes = config.getboolean('training', 'use_peepholes') epochsize = config.getint('training', 'epochsize') batchsize = config.getint('training', 'batchsize') validation_window = config.getint('training', 'validation_window') # 53 subjects, 70 utterances, 5 view angles # s[x]_v[y]_u[z].mp4 # resized, height, width = (26, 44) # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec', # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__'] print(data.keys()) X = data['dataMatrix'].astype( 'float32') # .reshape((-1, 26, 44), order='f').reshape((-1, 26 * 44)) y = data['targetsVec'].astype('int32') y = y.reshape((len(y), )) uniques = np.unique(y) print('number of classifications: {}'.format(len(uniques))) subjects = data['subjectsVec'].astype('int') subjects = subjects.reshape((len(subjects), )) video_lens = data['videoLengthVec'].astype('int') video_lens = video_lens.reshape((len(video_lens, ))) train_subject_ids = read_data_split_file('data/train.txt') val_subject_ids = read_data_split_file('data/val.txt') test_subject_ids = read_data_split_file('data/test.txt') print('Train: {}'.format(train_subject_ids)) print('Validation: {}'.format(val_subject_ids)) print('Test: {}'.format(test_subject_ids)) train_X, train_y, train_vidlens, train_subjects, \ val_X, val_y, val_vidlens, val_subjects, \ test_X, test_y, test_vidlens, test_subjects = \ split_data(X, y, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids) assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X) assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y) assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[ 0] == len(video_lens) assert train_subjects.shape[0] + val_subjects.shape[ 0] + test_subjects.shape[0] == len(subjects) train_X = normalize_input(train_X, centralize=True) val_X = normalize_input(val_X, centralize=True) test_X = normalize_input(test_X, centralize=True) weights, biases = load_dbn(ae_pretrained) # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') network = deltanet_majority_vote.create_model_using_pretrained_encoder( weights, biases, (None, None, 1144), inputs, (None, None), mask, lstm_units, window, output_classes, weight_init_fn, use_peepholes, nonlinearity) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = adam(cost, all_params, learning_rate) train = theano.function([inputs, targets, mask, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function([inputs, targets, mask, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, _ = next(val_datagen) # Use this test set to check final classification performance X_test, y_test, mask_test, _ = next(test_datagen) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(no_epochs): time_start = time.time() for i in range(epochsize): X, y, m, _ = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) print_str = 'Epoch {} batch {}/{}: {} examples using adam at learning rate = {:.4f}'.format( epoch + 1, i + 1, epochsize, len(X), learning_rate) print(print_str, end='') sys.stdout.flush() train(X, y, m, delta_window) print('\r', end='') cost = compute_train_cost(X, y, m, delta_window) val_cost = compute_test_cost(X_val, y_val, mask_val, delta_window) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, delta_window, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, delta_window, val_fn) print( "Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)". format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10'] print('Final Model') print('classification rate: {}, validation loss: {}'.format( test_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(test_conf, phrases, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate)
def construct_lstm(input_size, lstm_size, output_size, train_data_gen, val_data_gen): # All gates have initializers for the input-to-gate and hidden state-to-gate # weight matrices, the cell-to-gate weight vector, the bias vector, and the nonlinearity. # The convention is that gates use the standard sigmoid nonlinearity, # which is the default for the Gate class. gate_parameters = Gate(W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), b=las.init.Constant(0.)) cell_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) # prepare the input layers # By setting the first and second dimensions to None, we allow # arbitrary minibatch sizes with arbitrary sequence lengths. # The number of feature dimensions is 150, as described above. l_in = InputLayer(shape=(None, None, input_size), name='input') # This input will be used to provide the network with masks. # Masks are expected to be matrices of shape (n_batch, n_time_steps); # both of these dimensions are variable for us so we will use # an input shape of (None, None) l_mask = InputLayer(shape=(None, None), name='mask') # Our LSTM will have 250 hidden/cell units N_HIDDEN = lstm_size l_lstm = LSTMLayer( l_in, N_HIDDEN, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm1') ''' # The "backwards" layer is the same as the first, # except that the backwards argument is set to True. l_lstm_back = LSTMLayer( l_in, N_HIDDEN, ingate=gate_parameters, mask_input=l_mask, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, learn_init=True, grad_clipping=5., backwards=True) # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum = ElemwiseSumLayer([l_lstm, l_lstm_back]) # implement drop-out regularization l_dropout = DropoutLayer(l_sum) l_lstm2 = LSTMLayer( l_dropout, N_HIDDEN, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5.) # The "backwards" layer is the same as the first, # except that the backwards argument is set to True. l_lstm_back2 = LSTMLayer( l_dropout, N_HIDDEN, ingate=gate_parameters, mask_input=l_mask, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, learn_init=True, grad_clipping=5., backwards=True) # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum2 = ElemwiseSumLayer([l_lstm2, l_lstm_back2]) ''' # The l_forward layer creates an output of dimension (batch_size, SEQ_LENGTH, N_HIDDEN) # Since we are only interested in the final prediction, we isolate that quantity and feed it to the next layer. # The output of the sliced layer will then be of size (batch_size, N_HIDDEN) l_forward_slice = SliceLayer(l_lstm, -1, 1, name='slice') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer(l_forward_slice, num_units=output_size, nonlinearity=las.nonlinearities.softmax, name='output') print_network(l_out) # draw_to_file(las.layers.get_all_layers(l_out), 'network.png') # Symbolic variable for the target network output. # It will be of shape n_batch, because there's only 1 target value per sequence. target_values = T.ivector('target_output') # This matrix will tell the network the length of each sequences. # The actual values will be supplied by the gen_data function. mask = T.matrix('mask') # lasagne.layers.get_output produces an expression for the output of the net prediction = las.layers.get_output(l_out) # The value we care about is the final value produced for each sequence # so we simply slice it out. # predicted_values = network_output[:, -1] # Our cost will be categorical cross entropy error cost = T.mean( las.objectives.categorical_crossentropy(prediction, target_values)) # cost = T.mean((predicted_values - target_values) ** 2) # Retrieve all parameters from the network all_params = las.layers.get_all_params(l_out, trainable=True) # Compute adam updates for training # updates = las.updates.adam(cost, all_params) updates = adadelta(cost, all_params) # Theano functions for training and computing cost train = theano.function([l_in.input_var, target_values, l_mask.input_var], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function( [l_in.input_var, target_values, l_mask.input_var], cost, allow_input_downcast=True) test_prediction = las.layers.get_output(l_out, deterministic=True) test_cost = T.mean( las.objectives.categorical_crossentropy(test_prediction, target_values)) compute_val_cost = theano.function( [l_in.input_var, target_values, l_mask.input_var], test_cost, allow_input_downcast=True) val_fn = theano.function([l_in.input_var, l_mask.input_var], test_prediction, allow_input_downcast=True) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val = next(val_data_gen) # We'll train the network with 10 epochs of 100 minibatches each cost_train = [] cost_val = [] class_rate = [] best_val = float('inf') best_conf = None best_cr = 0.0 NUM_EPOCHS = 30 EPOCH_SIZE = 26 STRIP_SIZE = 3 MAX_LOSS = 0.05 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE, )) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for _ in range(EPOCH_SIZE): X, y, m, _ = next(train_data_gen) train(X, y, m) train_cost = compute_train_cost(X, y, m) val_cost = compute_val_cost(X_val, y_val, mask_val) cr, conf = evaluate_model(X_val, y_val, mask_val, val_fn) cost_train.append(train_cost) cost_val.append(val_cost) class_rate.append(cr) train_strip[epoch % STRIP_SIZE] = train_cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk print( "Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_cr = cr best_conf = conf if epoch >= VALIDATION_WINDOW and early_stop(val_window): break letters = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ] print('Final Model') print('classification rate: {}'.format(best_cr)) print('validation loss: {}'.format(best_val)) print('confusion matrix: ') plot_confusion_matrix(best_conf, letters, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate)
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_finetuned = config.get('models', 'finetuned') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) load_finetune = config.getboolean('training', 'load_finetune') lstm_units = config.getint('training', 'lstm_units') output_units = config.getint('training', 'output_units') train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1, )) val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1, )) test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1, )) train_X = data['trData'].astype('float32') val_X = data['valData'].astype('float32') test_X = data['testData'].astype('float32') train_y = data['trTargetsVec'].astype('int').reshape( (-1, )) + 1 # +1 to handle the -1 introduced in lstm_gendata val_y = data['valTargetsVec'].astype('int').reshape((-1, )) + 1 test_y = data['testTargetsVec'].astype('int').reshape((-1, )) + 1 if load_finetune: print('loading finetuned encoder: {}...'.format(ae_finetuned)) ae = pickle.load(open(ae_finetuned, 'rb')) ae.initialize() train_X = normalize_input(train_X, centralize=True) val_X = normalize_input(val_X, centralize=True) test_X = normalize_input(test_X, centralize=True) if load_finetune: print('loading pre-trained encoding layers...') dbn = pickle.load(open(ae_finetuned, 'rb')) dbn.initialize() # recon = dbn.predict(test_X) # visualize_reconstruction(test_X[550:650], recon[550:650], (26, 44)) # exit() # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing end to end model...') # network = deltanet.create_model(dbn, (None, None, 1500), inputs, # (None, None), mask, lstm_units, window, output_units) network = baseline_end2end.create_model(dbn, (None, None, 1500), inputs, (None, None), mask, lstm_units, output_units) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy( predictions, targets)) updates = las.updates.adadelta(cost, all_params, learning_rate=lr) train = theano.function([inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean( las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function([inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 30 EPOCH_SIZE = 45 BATCH_SIZE = 20 STRIP_SIZE = 3 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, _ = next(val_datagen) # Use this test set to check final classification performance X_test, y_test, mask_test, _ = next(test_datagen) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, _ = next(datagen) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m) print('\r', end='') cost = compute_train_cost(X, y, m) val_cost = compute_test_cost(X_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost test_cr, test_conf = evaluate_model(X_test, y_test, mask_test, val_fn) print( "Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) best_params = las.layers.get_all_param_values(network) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)". format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch + 1 >= decay_start: lr.set_value(lr.get_value() * lr_decay) numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] print('Final Model') print('classification rate: {}, validation loss: {}'.format( test_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(test_conf, numbers, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate) if 'save_best' in options: print('Saving the best model so far...') las.layers.set_all_param_values(network, best_params) save_model_params(network, options['save_best'])
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) data_audio = load_mat_file(config.get('data', 'audio')) ae_pretrained = config.get('models', 'pretrained') ae_diff_pretrained = config.get('models', 'pretrained_diff') fusiontype = config.get('models', 'fusiontype') lstm_size = config.getint('models', 'lstm_size') output_classes = config.getint('models', 'output_classes') nonlinearity = options[ 'nonlinearity'] if 'nonlinearity' in options else config.get( 'models', 'nonlinearity') if nonlinearity == 'sigmoid': nonlinearity = sigmoid if nonlinearity == 'rectify': nonlinearity = rectify # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') num_epoch = int( options['num_epoch']) if 'num_epoch' in options else config.getint( 'training', 'num_epoch') weight_init = options[ 'weight_init'] if 'weight_init' in options else config.get( 'training', 'weight_init') learning_rate = options['learning_rate'] if 'learning_rate' in options \ else config.getfloat('training', 'learning_rate') use_peepholes = options[ 'use_peepholes'] if 'use_peepholes' in options else config.getboolean( 'training', 'use_peepholes') input_dimension = config.getint('models', 'input_dimension') input_dimension2 = config.getint('models', 'input_dimension2') use_blstm = config.getboolean('training', 'use_blstm') use_finetuning = config.getboolean('training', 'use_finetuning') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1, )) val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1, )) test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1, )) train_X = data['trData'].astype('float32') val_X = data['valData'].astype('float32') test_X = data['testData'].astype('float32') train_X_audio = data_audio['trData'].astype('float32') val_X_audio = data_audio['valData'].astype('float32') test_X_audio = data_audio['testData'].astype('float32') # +1 to handle the -1 introduced in lstm_gendata train_y = data['trTargetsVec'].astype('int').reshape((-1, )) + 1 val_y = data['valTargetsVec'].astype('int').reshape((-1, )) + 1 test_y = data['testTargetsVec'].astype('int').reshape((-1, )) + 1 train_X = reorder_data(train_X, (30, 50)) val_X = reorder_data(val_X, (30, 50)) test_X = reorder_data(test_X, (30, 50)) visual_weights, visual_biases = load_dbn(ae_pretrained) audio_weights, audio_biases = load_dbn(ae_diff_pretrained) # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') visual_input = T.tensor3('visual_input', dtype='float32') audio_input = T.tensor3('audio_input', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') visual_net = avnet.create_pretrained_substream( visual_weights, visual_biases, (None, None, input_dimension), visual_input, (None, None), mask, 'visual', lstm_size, window, nonlinearity, weight_init_fn, use_peepholes) audio_net = avnet.create_pretrained_substream( audio_weights, audio_biases, (None, None, input_dimension2), audio_input, (None, None), mask, 'audio', lstm_size, window, nonlinearity, weight_init_fn, use_peepholes) network, l_fuse = avnet.create_model([visual_net, audio_net], (None, None), mask, lstm_size, output_classes, fusiontype, weight_init_fn, use_peepholes) print_network(network) # draw_to_file(las.layers.get_all_layers(network), 'network.png') print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = adam(cost, all_params, learning_rate=learning_rate) train = theano.function([visual_input, targets, mask, audio_input, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function( [visual_input, targets, mask, audio_input, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function( [visual_input, targets, mask, audio_input, window], test_cost, allow_input_downcast=True) val_fn = theano.function([visual_input, mask, audio_input, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] EPOCH_SIZE = 90 BATCH_SIZE = 10 WINDOW_SIZE = 9 STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_tr = float('inf') best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) integral_lens = compute_integral_len(train_vidlens) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(val_vidlens) X_diff_val = gen_seq_batch_from_idx(val_X_audio, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) # we use the test set to check final classification rate X_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(test_vidlens) X_diff_test = gen_seq_batch_from_idx(test_X_audio, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(num_epoch): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) X_diff = gen_seq_batch_from_idx(train_X_audio, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples using adam with learning rate {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), learning_rate) print(print_str, end='') sys.stdout.flush() train(X, y, m, X_diff, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, X_diff, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, X_diff_val, WINDOW_SIZE, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_tr = cost best_cr = cr if fusiontype == 'adasum': adascale_param = las.layers.get_all_param_values( l_fuse, scaling_param=True) test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, X_diff_test, WINDOW_SIZE, val_fn) print( "Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)". format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) if fusiontype == 'adasum': print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') plot_confusion_matrix(test_conf, numbers, fmt='latex') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost') if 'write_results' in options: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format( use_finetuning, 'yes', use_peepholes, 'adam', weight_init, 'RELU', use_blstm, learning_rate, best_tr, best_val, best_cr * 100, test_cr * 100)) s = ','.join([str(v) for v in cost_train]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in cost_val]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in class_rate]) f.write('{}\n'.format(s))
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(options['dct_data'] if 'dct_data' in options else config.get('data', 'dct')) no_coeff = options['no_coeff'] if 'no_coeff' in options else config.getint('models', 'no_coeff') no_epochs = options['no_epochs'] if 'no_epochs' in options else config.getint('training', 'no_epochs') validation_window = options['validation_window'] if 'validation_window' in options \ else config.getint('training', 'validation_window') epochsize = options['epochsize'] if 'epochsize' in options else config.getint('training', 'epochsize') batchsize = options['batchsize'] if 'batchsize' in options else config.getint('training', 'batchsize') # 53 subjects, 70 utterances, 5 view angles # s[x]_v[y]_u[z].mp4 # resized, height, width = (26, 44) # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec', # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__'] print(data.keys()) X = data['dataMatrix'].astype('float32') y = data['targetsVec'].astype('int32') y = y.reshape((len(y),)) dct_feats = dct_data['dctFeatures'].astype('float32') uniques = np.unique(y) print('number of classifications: {}'.format(len(uniques))) subjects = data['subjectsVec'].astype('int') subjects = subjects.reshape((len(subjects),)) video_lens = data['videoLengthVec'].astype('int') video_lens = video_lens.reshape((len(video_lens,))) # X = reorder_data(X, (26, 44), 'f', 'c') # print('performing sequencewise mean image removal...') # X = sequencewise_mean_image_subtraction(X, video_lens) # visualize_images(X[550:650], (26, 44)) # mean remove dct features # dct_feats = sequencewise_mean_image_subtraction(dct_feats, video_lens) train_subject_ids = read_data_split_file('data/train.txt') val_subject_ids = read_data_split_file('data/val.txt') test_subject_ids = read_data_split_file('data/test.txt') print('Train: {}'.format(train_subject_ids)) print('Validation: {}'.format(val_subject_ids)) print('Test: {}'.format(test_subject_ids)) train_X, train_y, train_dct, train_vidlens, train_subjects, \ val_X, val_y, val_dct, val_vidlens, val_subjects, \ test_X, test_y, test_dct, test_vidlens, test_subjects = \ split_data(X, y, dct_feats, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids) assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X) assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y) assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens) assert train_subjects.shape[0] + val_subjects.shape[0] + test_subjects.shape[0] == len(subjects) train_X = normalize_input(train_X, centralize=True) val_X = normalize_input(val_X, centralize=True) test_X = normalize_input(test_X, centralize=True) # featurewise normalize dct features train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) val_dct = (val_dct - dct_mean) / dct_std test_dct = (test_dct - dct_mean) / dct_std # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') print('constructing end to end model...') network = lstm_classifier_baseline.create_model((None, None, no_coeff*3), inputs, (None, None), mask, 250, 10) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets)) updates = adam(cost, all_params) train = theano.function( [inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) integral_lens = compute_integral_len(train_vidlens) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(val_vidlens) dct_val = gen_seq_batch_from_idx(val_dct, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) X_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(test_vidlens) dct_test = gen_seq_batch_from_idx(test_dct, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) for epoch in range(no_epochs): time_start = time.time() for i in range(epochsize): X, y, m, batch_idxs = next(datagen) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format( epoch + 1, i + 1, epochsize, len(X)) print(print_str, end='') sys.stdout.flush() train(d, y, m) print('\r', end='') cost = compute_train_cost(d, y, m) val_cost = compute_test_cost(dct_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(dct_val, y_val, mask_val, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_cr = cr test_cr, test_conf = evaluate_model(dct_test, y_test, mask_test, val_fn) print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) print('confusion matrix: ') plot_confusion_matrix(test_conf, phrases, fmt='latex') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost') if 'write_results' in options: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{}\n'.format(test_cr, best_val))
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) diff_data = load_mat_file(config.get('data', 'diff')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') ae_finetuned_diff = config.get('models', 'finetuned_diff') fusiontype = config.get('models', 'fusiontype') do_finetune = config.getboolean('training', 'do_finetune') save_finetune = config.getboolean('training', 'save_finetune') load_finetune = config.getboolean('training', 'load_finetune') load_finetune_diff = config.getboolean('training', 'load_finetune_diff') model = config.get('models', 'model') # capture training parameters update_rule = options['update_rule'] if 'update_rule' in options else config.get('training', 'update_rule') learning_rate = float(options['learning_rate']) \ if 'learning_rate' in options else config.getfloat('training', 'learning_rate') decay_rate = float(options['decay_rate']) if 'decay_rate' in options else config.getfloat('training', 'decay_rate') decay_start = int(options['decay_start']) if 'decay_start' in options else config.getint('training', 'decay_start') validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') t1 = int(options['t1']) if 't1' in options else config.getint('training', 't1') num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch') weight_init = options['weight_init'] if 'weight_init' in options else config.get('training', 'weight_init') use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('training', 'use_peepholes') if update_rule == 'sgdm' or update_rule == 'sgdnm': momentum = float(options['momentum']) if 'momentum' in options else config.getfloat('training', 'momentum') momentum_schedule = options['momentum_schedule'] \ if 'momentum_schedule' in options else config.get('training', 'momentum_schedule') mm_schedule = [float(m) for m in momentum_schedule.split(',')] weight_init_fn = las.init.Orthogonal() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() # create the necessary variable mappings data_matrix = data['dataMatrix'] data_matrix_len = data_matrix.shape[0] targets_vec = data['targetsVec'] vid_len_vec = data['videoLengthVec'] iter_vec = data['iterVec'] diff_data_matrix = diff_data['dataMatrix'] # samplewise normalize # print('sameplewise mean normalize...') # data_matrix = normalize_input(data_matrix) # diff_data_matrix = normalize_input(diff_data_matrix) # diff_data_matrix = compute_diff_images(data_matrix, vid_len_vec.reshape((-1,))).astype('float32') # mean remove # dct_feats = dct_feats[:, 0:30] # dct_feats = sequencewise_mean_image_subtraction(dct_feats, vid_len_vec.reshape((-1,))) indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec) train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec) assert len(train_vidlen_vec) == 520 assert len(test_vidlen_vec) == 260 assert np.sum(vid_len_vec) == data_matrix_len # split the data train_data = data_matrix[indexes == True] train_targets = targets_vec[indexes == True] train_targets = train_targets.reshape((len(train_targets),)) test_data = data_matrix[indexes == False] test_targets = targets_vec[indexes == False] test_targets = test_targets.reshape((len(test_targets),)) train_diff_data = diff_data_matrix[indexes == True] test_diff_data = diff_data_matrix[indexes == False] if do_finetune: print('fine-tuning...') ae = load_dbn(ae_pretrained) ae.initialize() ae.fit(train_data, train_data) res = ae.predict(test_data) # print(res.shape) visualize_reconstruction(test_data[300:336], res[300:336]) if save_finetune: pickle.dump(ae, open(ae_finetuned, 'wb')) if load_finetune: print('loading pre-trained encoding layers...') ae = pickle.load(open(ae_finetuned, 'rb')) ae.initialize() if load_finetune_diff: print('loading pre-trained diff image encoding layers...') diff_ae = pickle.load(open(ae_finetuned_diff, 'rb')) diff_ae.initialize() load_convae = False if load_convae: print('loading pre-trained convolutional autoencoder...') encoder = load_model('models/conv_encoder_norm.dat') inputs_raw = las.layers.get_all_layers(encoder)[0].input_var else: inputs_raw = T.tensor3('inputs_raw', dtype='float32') inputs_diff = T.tensor3('inputs_diff', dtype='float32') window = T.iscalar('theta') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) if update_rule == 'sgdm' or update_rule == 'sgdnm': mm = theano.shared(np.array(momentum, dtype=theano.config.floatX), name='momentum') print('constructing end to end model...') if model == 'adenet_v2_1': network, l_fuse = adenet_v2_1.create_model(ae, diff_ae, (None, None, 1200), inputs_raw, (None, None), mask, (None, None, 1200), inputs_diff, 250, window, 26, fusiontype, w_init_fn=weight_init_fn, use_peepholes=use_peepholes) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets)) if update_rule == 'adadelta': updates = las.updates.adadelta(cost, all_params, learning_rate=lr) if update_rule == 'sgdm': updates = las.updates.sgd(cost, all_params, learning_rate=lr) updates = las.updates.apply_momentum(updates, all_params, momentum=mm) if update_rule == 'sgdnm': updates = las.updates.sgd(cost, all_params, learning_rate=lr) updates = las.updates.apply_nesterov_momentum(updates, all_params, momentum=mm) if update_rule == 'adam': updates = las.updates.adam(cost, all_params) train = theano.function( [inputs_raw, targets, mask, inputs_diff, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs_raw, targets, mask, inputs_diff, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs_raw, targets, mask, inputs_diff, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs_raw, mask, inputs_diff, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] EPOCH_SIZE = 20 BATCH_SIZE = 26 WINDOW_SIZE = 9 STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_data, train_targets, train_vidlen_vec, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(test_data, test_targets, test_vidlen_vec, batchsize=len(test_vidlen_vec)) integral_lens = compute_integral_len(train_vidlen_vec) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(test_vidlen_vec) diff_val = gen_seq_batch_from_idx(test_diff_data, idxs_val, test_vidlen_vec, integral_lens_val, np.max(test_vidlen_vec)) for epoch in range(num_epoch): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) diff = gen_seq_batch_from_idx(train_diff_data, batch_idxs, train_vidlen_vec, integral_lens, np.max(train_vidlen_vec)) if update_rule == 'adam': print_str = 'Epoch {} batch {}/{}: {} examples with {} using default params'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), update_rule) if update_rule == 'adadelta': print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f} with {}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()), update_rule) if update_rule == 'sgdm' or update_rule == 'sgdnm': print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}, ' \ 'momentum = {:.4f} with {}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()), float(mm.get_value()), update_rule) print(print_str, end='') sys.stdout.flush() train(X, y, m, diff, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, diff, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, diff_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, diff_val, WINDOW_SIZE, val_fn) class_rate.append(cr) print("Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr else: if epoch >= t1 and (update_rule == 'sgdm' or update_rule == 'sgdnm'): lr.set_value(max(lr.get_value() * lr_decay, 0.001)) if mm_schedule: mm.set_value(mm_schedule.pop(0)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break # learning rate decay if epoch + 1 >= decay_start: lr.set_value(lr.get_value() * lr_decay) letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] print('Best Model') print('classification rate: {}, validation loss: {}'.format(best_cr, best_val)) if fusiontype == 'adasum': adascale_param = las.layers.get_all_param_values(l_fuse, scaling_param=True) print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') if not options['no_plot']: plot_confusion_matrix(best_conf, letters, fmt='latex') plot_validation_cost(cost_train, cost_val, class_rate, 'e2e_valid_cost') if 'write_results' in options: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{},{},{},{},{},{},{}\n'.format(update_rule, learning_rate, decay_rate, momentum, decay_start, t1, validation_window, weight_init, use_peepholes)) s = ','.join([str(v) for v in cost_train]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in cost_val]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in class_rate]) f.write('{}\n'.format(s)) f.write('{},{},{}\n'.format(fusiontype, best_cr, best_val))
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('CLI options: {}'.format(options.items())) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(config.get('data', 'dct')) no_coeff = config.getint('models', 'no_coeff') output_classes = config.getint('models', 'output_classes') lstm_size = config.getint('models', 'lstm_size') # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') no_epochs = int(options['no_epochs']) if 'no_epochs' in options else config.getint('training', 'no_epochs') weight_init = options['weight_init'] if 'weight_init' in options else config.get('training', 'weight_init') learning_rate = options['learning_rate'] if 'learning_rate' in options \ else config.getfloat('training', 'learning_rate') epochsize = options['epochsize'] if 'epochsize' in options else config.getint('training', 'epochsize') batchsize = options['batchsize'] if 'batchsize' in options else config.getint('training', 'batchsize') use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('training', 'use_peepholes') use_blstm = config.getboolean('training', 'use_blstm') use_finetuning = config.getboolean('training', 'use_finetuning') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1,)) val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1,)) test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1,)) train_X = data['trData'].astype('float32') val_X = data['valData'].astype('float32') test_X = data['testData'].astype('float32') train_dct = dct_data['trDctFeatures'].astype('float32') val_dct = dct_data['valDctFeatures'].astype('float32') test_dct = dct_data['testDctFeatures'].astype('float32') # +1 to handle the -1 introduced in lstm_gendata train_y = data['trTargetsVec'].astype('int').reshape((-1,)) + 1 val_y = data['valTargetsVec'].astype('int').reshape((-1,)) + 1 test_y = data['testTargetsVec'].astype('int').reshape((-1,)) + 1 # featurewise normalize dct features train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) val_dct = (val_dct - dct_mean) / dct_std test_dct = (test_dct - dct_mean) / dct_std # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') network = lstm_classifier_majority_vote.create_model((None, None, no_coeff*3), inputs, (None, None), mask, lstm_size, output_classes, w_init=weight_init_fn) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = adam(cost, all_params) train = theano.function( [inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function( [inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_tr = float('inf') best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) integral_lens = compute_integral_len(train_vidlens) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(val_vidlens) dct_val = gen_seq_batch_from_idx(val_dct, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) X_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(test_vidlens) dct_test = gen_seq_batch_from_idx(test_dct, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(no_epochs): time_start = time.time() for i in range(epochsize): _, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format( epoch + 1, i + 1, epochsize, len(y)) print(print_str, end='') sys.stdout.flush() train(d, y, m) print('\r', end='') cost = compute_train_cost(d, y, m) val_cost = compute_test_cost(dct_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(dct_val, y_val_evaluate, mask_val, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr test_cr, test_conf = evaluate_model2(dct_test, y_test, mask_test, val_fn) print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) print('confusion matrix: ') plot_confusion_matrix(test_conf, numbers, fmt='latex') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost') if 'write_results' in options: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(use_finetuning, 'yes', use_peepholes, 'adam', weight_init, 'N/A', use_blstm, learning_rate, best_tr, best_val, best_cr*100, test_cr*100)) s = ','.join([str(v) for v in cost_train]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in cost_val]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in class_rate]) f.write('{}\n'.format(s))
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_pretrained = config.get('models', 'pretrained') ae_pretrained_diff = config.get('models', 'pretrained_diff') fusiontype = config.get('models', 'fusiontype') # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch') weight_init = options['weight_init'] if 'weight_init' in options else config.get('training', 'weight_init') learning_rate = options['learning_rate'] if 'learning_rate' in options \ else config.getfloat('training', 'learning_rate') use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('training', 'use_peepholes') epochsize = config.getint('training', 'epochsize') batchsize = config.getint('training', 'batchsize') windowsize = config.getint('training', 'windowsize') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() train_subject_ids = read_data_split_file('data/train.txt') val_subject_ids = read_data_split_file('data/val.txt') test_subject_ids = read_data_split_file('data/test.txt') data_matrix = data['dataMatrix'] targets_vec = data['targetsVec'].reshape((-1,)) subjects_vec = data['subjectsVec'].reshape((-1,)) vidlen_vec = data['videoLengthVec'].reshape((-1,)) data_matrix = reorder_data(data_matrix, (30, 50)) train_X, train_y, train_vidlens, train_subjects, \ val_X, val_y, val_vidlens, val_subjects, \ test_X, test_y, test_vidlens, test_subjects = split_seq_data(data_matrix, targets_vec, subjects_vec, vidlen_vec, train_subject_ids, val_subject_ids, test_subject_ids) train_X_diff = compute_diff_images(train_X, train_vidlens) val_X_diff = compute_diff_images(val_X, val_vidlens) test_X_diff = compute_diff_images(test_X, test_vidlens) train_X = sequencewise_mean_image_subtraction(train_X, train_vidlens) val_X = sequencewise_mean_image_subtraction(val_X, val_vidlens) test_X = sequencewise_mean_image_subtraction(test_X, test_vidlens) ae = load_dbn(ae_pretrained) ae_diff = load_dbn(ae_pretrained_diff) # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') inputs = T.tensor3('inputs', dtype='float32') inputs_diff = T.tensor3('inputs_diff', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') network, l_fuse = adenet_v2_2.create_model(ae, ae_diff, (None, None, 1500), inputs, (None, None), mask, (None, None, 1500), inputs_diff, 250, window, 10, fusiontype, w_init_fn=weight_init_fn, use_peepholes=use_peepholes) print_network(network) # draw_to_file(las.layers.get_all_layers(network), 'network.png') print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = adam(cost, all_params, learning_rate=learning_rate) train = theano.function( [inputs, targets, mask, inputs_diff, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask, inputs_diff, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function( [inputs, targets, mask, inputs_diff, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, inputs_diff, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_tr = float('inf') best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize) integral_lens = compute_integral_len(train_vidlens) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(val_vidlens) X_diff_val = gen_seq_batch_from_idx(val_X_diff, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) # we use the test set to check final classification rate X_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(test_vidlens) X_diff_test = gen_seq_batch_from_idx(test_X_diff, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(num_epoch): time_start = time.time() for i in range(epochsize): X, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format( epoch + 1, i + 1, epochsize, len(X)) print(print_str, end='') sys.stdout.flush() train(X, y, m, X_diff, windowsize) print('\r', end='') cost = compute_train_cost(X, y, m, X_diff, windowsize) val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val, windowsize) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, X_diff_val, windowsize, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_tr = cost best_cr = cr if fusiontype == 'adasum': adascale_param = las.layers.get_all_param_values(l_fuse, scaling_param=True) test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, X_diff_test, windowsize, val_fn) print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) if fusiontype == 'adasum': print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') plot_confusion_matrix(test_conf, numbers, fmt='latex') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost') if 'write_results' in options: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{}\n'.format(test_cr, best_cr, best_val))
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('stream1')) print(config.items('stream2')) print(config.items('stream3')) print(config.items('lstm_classifier')) print(config.items('training')) print('preprocessing dataset...') # stream 1 s1_data = load_mat_file(config.get('stream1', 'data')) s1_imagesize = tuple( [int(d) for d in config.get('stream1', 'imagesize').split(',')]) s1 = config.get('stream1', 'model') s1_inputdim = config.getint('stream1', 'input_dimensions') s1_shape = config.get('stream1', 'shape') s1_nonlinearities = config.get('stream1', 'nonlinearities') # stream 2 s2_data = load_mat_file(config.get('stream2', 'data')) s2_imagesize = tuple( [int(d) for d in config.get('stream2', 'imagesize').split(',')]) s2 = config.get('stream2', 'model') s2_inputdim = config.getint('stream2', 'input_dimensions') s2_shape = config.get('stream2', 'shape') s2_nonlinearities = config.get('stream2', 'nonlinearities') # stream 3 s3_data = load_mat_file(config.get('stream3', 'data')) s3_imagesize = tuple( [int(d) for d in config.get('stream3', 'imagesize').split(',')]) s3 = config.get('stream3', 'model') s3_inputdim = config.getint('stream3', 'input_dimensions') s3_shape = config.get('stream3', 'shape') s3_nonlinearities = config.get('stream3', 'nonlinearities') # lstm classifier fusiontype = config.get('lstm_classifier', 'fusiontype') weight_init = options[ 'weight_init'] if 'weight_init' in options else config.get( 'lstm_classifier', 'weight_init') use_peepholes = options[ 'use_peepholes'] if 'use_peepholes' in options else config.getboolean( 'lstm_classifier', 'use_peepholes') windowsize = config.getint('lstm_classifier', 'windowsize') output_classes = config.getint('lstm_classifier', 'output_classes') output_classnames = config.get('lstm_classifier', 'output_classnames').split(',') lstm_size = config.getint('lstm_classifier', 'lstm_size') matlab_target_offset = config.getboolean('lstm_classifier', 'matlab_target_offset') use_dropout = config.getboolean('lstm_classifier', 'use_dropout') # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') num_epoch = int( options['num_epoch']) if 'num_epoch' in options else config.getint( 'training', 'num_epoch') learning_rate = options['learning_rate'] if 'learning_rate' in options \ else config.getfloat('training', 'learning_rate') epochsize = config.getint('training', 'epochsize') batchsize = config.getint('training', 'batchsize') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() train_subject_ids = read_data_split_file( config.get('training', 'train_subjects_file')) val_subject_ids = read_data_split_file( config.get('training', 'val_subjects_file')) test_subject_ids = read_data_split_file( config.get('training', 'test_subjects_file')) s1_data_matrix = s1_data['dataMatrix'].astype('float32') s2_data_matrix = s2_data['dataMatrix'].astype('float32') s3_data_matrix = s3_data['dataMatrix'].astype('float32') targets_vec = s1_data['targetsVec'].reshape((-1, )) subjects_vec = s1_data['subjectsVec'].reshape((-1, )) vidlen_vec = s1_data['videoLengthVec'].reshape((-1, )) force_align_data = config.getboolean('stream1', 'force_align_data') if matlab_target_offset: targets_vec -= 1 s1_data_matrix = presplit_dataprocessing(s1_data_matrix, vidlen_vec, config, 'stream1', imagesize=s1_imagesize) s2_data_matrix = presplit_dataprocessing(s2_data_matrix, vidlen_vec, config, 'stream2', imagesize=s2_imagesize) s3_data_matrix = presplit_dataprocessing(s3_data_matrix, vidlen_vec, config, 'stream3', imagesize=s3_imagesize) if force_align_data: s2_targets_vec = s2_data['targetsVec'].reshape((-1, )) s2_vidlen_vec = s2_data['videoLengthVec'].reshape((-1, )) s3_targets_vec = s3_data['targetsVec'].reshape((-1, )) s3_vidlen_vec = s3_data['videoLengthVec'].reshape((-1, )) orig_streams = [ (s1_data_matrix, targets_vec, vidlen_vec), (s2_data_matrix, s2_targets_vec, s2_vidlen_vec), (s3_data_matrix, s3_targets_vec, s3_vidlen_vec), ] new_streams = multistream_force_align(orig_streams) s1_data_matrix, targets_vec, vidlen_vec = new_streams[0] s2_data_matrix, _, _ = new_streams[1] s3_data_matrix, _, _ = new_streams[2] s1_train_X, s1_train_y, s1_train_vidlens, s1_train_subjects, \ s1_val_X, s1_val_y, s1_val_vidlens, s1_val_subjects, \ s1_test_X, s1_test_y, s1_test_vidlens, s1_test_subjects = split_seq_data(s1_data_matrix, targets_vec, subjects_vec, vidlen_vec, train_subject_ids, val_subject_ids, test_subject_ids) s2_train_X, s2_train_y, s2_train_vidlens, s2_train_subjects, \ s2_val_X, s2_val_y, s2_val_vidlens, s2_val_subjects, \ s2_test_X, s2_test_y, s2_test_vidlens, s2_test_subjects = split_seq_data(s2_data_matrix, targets_vec, subjects_vec, vidlen_vec, train_subject_ids, val_subject_ids, test_subject_ids) s3_train_X, s3_train_y, s3_train_vidlens, s3_train_subjects, \ s3_val_X, s3_val_y, s3_val_vidlens, s3_val_subjects, \ s3_test_X, s3_test_y, s3_test_vidlens, s3_test_subjects = split_seq_data(s3_data_matrix, targets_vec, subjects_vec, vidlen_vec, train_subject_ids, val_subject_ids, test_subject_ids) s1_train_X, s1_val_X, s1_test_X = postsplit_datapreprocessing( s1_train_X, s1_val_X, s1_test_X, config, 'stream1') s2_train_X, s2_val_X, s2_test_X = postsplit_datapreprocessing( s2_train_X, s2_val_X, s2_test_X, config, 'stream2') s3_train_X, s3_val_X, s3_test_X = postsplit_datapreprocessing( s3_train_X, s3_val_X, s3_test_X, config, 'stream3') ae1 = load_decoder(s1, s1_shape, s1_nonlinearities) ae2 = load_decoder(s2, s2_shape, s2_nonlinearities) ae3 = load_decoder(s3, s3_shape, s3_nonlinearities) # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') inputs1 = T.tensor3('inputs1', dtype='float32') inputs2 = T.tensor3('inputs2', dtype='float32') inputs3 = T.tensor3('inputs3', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') if use_dropout: network, l_fuse = adenet_3stream_dropout.create_model( ae1, ae2, ae3, (None, None, s1_inputdim), inputs1, (None, None, s2_inputdim), inputs2, (None, None, s3_inputdim), inputs3, (None, None), mask, lstm_size, window, output_classes, fusiontype, w_init_fn=weight_init_fn, use_peepholes=use_peepholes) else: network, l_fuse = adenet_3stream.create_model( ae1, ae2, ae3, (None, None, s1_inputdim), inputs1, (None, None, s2_inputdim), inputs2, (None, None, s3_inputdim), inputs3, (None, None), mask, lstm_size, window, output_classes, fusiontype, w_init_fn=weight_init_fn, use_peepholes=use_peepholes) print_network(network) # draw_to_file(las.layers.get_all_layers(network), 'network.png') print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = adam(cost, all_params, learning_rate=learning_rate) train = theano.function([inputs1, inputs2, inputs3, targets, mask, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function( [inputs1, inputs2, inputs3, targets, mask, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function( [inputs1, inputs2, inputs3, targets, mask, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs1, inputs2, inputs3, mask, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_cr = 0.0 datagen = gen_lstm_batch_random(s1_train_X, s1_train_y, s1_train_vidlens, batchsize=batchsize) integral_lens = compute_integral_len(s1_train_vidlens) val_datagen = gen_lstm_batch_random(s1_val_X, s1_val_y, s1_val_vidlens, batchsize=len(s1_val_vidlens)) test_datagen = gen_lstm_batch_random(s1_test_X, s1_test_y, s1_test_vidlens, batchsize=len(s1_test_vidlens)) # We'll use this "validation set" to periodically check progress X_s1_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(s1_val_vidlens) X_s2_val = gen_seq_batch_from_idx(s2_val_X, idxs_val, s1_val_vidlens, integral_lens_val, np.max(s1_val_vidlens)) X_s3_val = gen_seq_batch_from_idx(s3_val_X, idxs_val, s1_val_vidlens, integral_lens_val, np.max(s1_val_vidlens)) # we use the test set to check final classification rate X_s1_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(s1_test_vidlens) X_s2_test = gen_seq_batch_from_idx(s2_test_X, idxs_test, s1_test_vidlens, integral_lens_test, np.max(s1_test_vidlens)) X_s3_test = gen_seq_batch_from_idx(s3_test_X, idxs_test, s1_test_vidlens, integral_lens_test, np.max(s1_test_vidlens)) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(num_epoch): time_start = time.time() for i in range(epochsize): X_s1, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) X_s2 = gen_seq_batch_from_idx(s2_train_X, batch_idxs, s1_train_vidlens, integral_lens, np.max(s1_train_vidlens)) X_s3 = gen_seq_batch_from_idx(s3_train_X, batch_idxs, s1_train_vidlens, integral_lens, np.max(s1_train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples using adam with learning rate = {}'.format( epoch + 1, i + 1, epochsize, len(X_s1), learning_rate) print(print_str, end='') sys.stdout.flush() train(X_s1, X_s2, X_s3, y, m, windowsize) print('\r', end='') cost = compute_train_cost(X_s1, X_s2, X_s3, y, m, windowsize) val_cost = compute_test_cost(X_s1_val, X_s2_val, X_s3_val, y_val, mask_val, windowsize) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_s1_val, X_s2_val, X_s3_val, y_val_evaluate, mask_val, windowsize, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_cr = cr test_cr, test_conf = evaluate_model2(X_s1_test, X_s2_test, X_s3_test, y_test, mask_test, windowsize, val_fn) print( "Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) best_params = las.layers.get_all_param_values(network) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)". format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) # plot confusion matrix table_str = plot_confusion_matrix(test_conf, output_classnames, fmt='pipe') print('confusion matrix: ') print(table_str) if 'save_plot' in options: prefix = options['save_plot'] plot_validation_cost(cost_train, cost_val, savefilename='{}.validloss.png'.format(prefix)) with open('{}.confmat.txt'.format(prefix), mode='a') as f: f.write(table_str) f.write('\n\n') if 'write_results' in options: print('writing results to {}'.format(options['write_results'])) results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{}\n'.format(test_cr, best_cr, best_val)) if 'save_best' in options: print('saving best model...') las.layers.set_all_param_values(network, best_params) save_model_params(network, options['save_best']) print('best model saved to {}'.format(options['save_best']))
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('stream1')) print(config.items('lstm_classifier')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('stream1', 'data')) has_encoder = config.getboolean('stream1', 'has_encoder') stream1_dim = config.getint('stream1', 'input_dimensions') imagesize = tuple([int(d) for d in config.get('stream1', 'imagesize').split(',')]) if has_encoder: stream1 = config.get('stream1', 'model') stream1_shape = config.get('stream1', 'shape') stream1_nonlinearities = config.get('stream1', 'nonlinearities') # lstm classifier output_classes = config.getint('lstm_classifier', 'output_classes') output_classnames = config.get('lstm_classifier', 'output_classnames').split(',') lstm_size = config.getint('lstm_classifier', 'lstm_size') matlab_target_offset = config.getboolean('lstm_classifier', 'matlab_target_offset') # lstm classifier configurations weight_init = options['weight_init'] if 'weight_init' in options else config.get('lstm_classifier', 'weight_init') use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('lstm_classifier', 'use_peepholes') use_blstm = True if config.has_option('lstm_classifier', 'use_blstm') else False windowsize = config.getint('lstm_classifier', 'windowsize') # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch') learning_rate = options['learning_rate'] if 'learning_rate' in options \ else config.getfloat('training', 'learning_rate') epochsize = config.getint('training', 'epochsize') batchsize = config.getint('training', 'batchsize') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() data_matrix = data['dataMatrix'].astype('float32') targets_vec = data['targetsVec'].reshape((-1,)) subjects_vec = data['subjectsVec'].reshape((-1,)) vidlen_vec = data['videoLengthVec'].reshape((-1,)) iter_vec = data['iterVec'].reshape((-1,)) data_matrix = presplit_dataprocessing(data_matrix, vidlen_vec, config, 'stream1', imagesize=imagesize) indexes = create_split_index(len(data_matrix), vidlen_vec, iter_vec) train_vidlen_vec, test_vidlen_vec = split_videolen(vidlen_vec, iter_vec) if matlab_target_offset: targets_vec -= 1 # split the data train_data = data_matrix[indexes == True] train_targets = targets_vec[indexes == True] train_targets = train_targets.reshape((len(train_targets),)) test_data = data_matrix[indexes == False] test_targets = targets_vec[indexes == False] test_targets = test_targets.reshape((len(test_targets),)) train_data, test_data = postsplit_datapreprocessing(train_data, test_data, config, 'stream1') inputs = T.tensor3('inputs', dtype='float32') window = T.iscalar('theta') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') if not has_encoder: network = deltanet_v1.create_model((None, None, stream1_dim), inputs, (None, None), mask, window, lstm_size, output_classes, weight_init_fn, use_peepholes, use_blstm) else: ae1 = load_decoder(stream1, stream1_shape, stream1_nonlinearities) network = deltanet_majority_vote.create_model(ae1, (None, None, stream1_dim), inputs, (None, None), mask, lstm_size, window, output_classes, weight_init_fn, use_peepholes) print_network(network) draw_to_file(las.layers.get_all_layers(network), 'network.png', verbose=True) # exit() print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = las.updates.adam(cost, all_params, learning_rate) train = theano.function( [inputs, targets, mask, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function( [inputs, targets, mask, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_data, train_targets, train_vidlen_vec, batchsize=batchsize) val_datagen = gen_lstm_batch_random(test_data, test_targets, test_vidlen_vec, batchsize=len(test_vidlen_vec)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(num_epoch): time_start = time.time() for i in range(epochsize): X, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, epochsize, len(X), learning_rate) print(print_str, end='') sys.stdout.flush() train(X, y, m, windowsize) print('\r', end='') cost = compute_train_cost(X, y, m, windowsize) val_cost = compute_test_cost(X_val, y_val, mask_val, windowsize) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, windowsize, val_fn) class_rate.append(cr) print("Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break print('Best Model') print('classification rate: {}, validation loss: {}'.format(best_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(best_conf, output_classnames, fmt='latex') plot_validation_cost(cost_train, cost_val, class_rate)
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('CLI options: {}'.format(options.items())) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(config.get('data', 'dct')) no_coeff = config.getint('models', 'no_coeff') output_classes = config.getint('models', 'output_classes') lstm_size = config.getint('models', 'lstm_size') # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') no_epochs = int( options['no_epochs']) if 'no_epochs' in options else config.getint( 'training', 'no_epochs') weight_init = options[ 'weight_init'] if 'weight_init' in options else config.get( 'training', 'weight_init') learning_rate = options['learning_rate'] if 'learning_rate' in options \ else config.getfloat('training', 'learning_rate') epochsize = options[ 'epochsize'] if 'epochsize' in options else config.getint( 'training', 'epochsize') batchsize = options[ 'batchsize'] if 'batchsize' in options else config.getint( 'training', 'batchsize') use_peepholes = options[ 'use_peepholes'] if 'use_peepholes' in options else config.getboolean( 'training', 'use_peepholes') use_blstm = config.getboolean('training', 'use_blstm') use_finetuning = config.getboolean('training', 'use_finetuning') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1, )) val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1, )) test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1, )) train_X = data['trData'].astype('float32') val_X = data['valData'].astype('float32') test_X = data['testData'].astype('float32') train_dct = dct_data['trDctFeatures'].astype('float32') val_dct = dct_data['valDctFeatures'].astype('float32') test_dct = dct_data['testDctFeatures'].astype('float32') # +1 to handle the -1 introduced in lstm_gendata train_y = data['trTargetsVec'].astype('int').reshape((-1, )) + 1 val_y = data['valTargetsVec'].astype('int').reshape((-1, )) + 1 test_y = data['testTargetsVec'].astype('int').reshape((-1, )) + 1 # featurewise normalize dct features train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) val_dct = (val_dct - dct_mean) / dct_std test_dct = (test_dct - dct_mean) / dct_std # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') network = lstm_classifier_majority_vote.create_model( (None, None, no_coeff * 3), inputs, (None, None), mask, lstm_size, output_classes, w_init=weight_init_fn) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = adam(cost, all_params) train = theano.function([inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function([inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_tr = float('inf') best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) integral_lens = compute_integral_len(train_vidlens) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(val_vidlens) dct_val = gen_seq_batch_from_idx(val_dct, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) X_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(test_vidlens) dct_test = gen_seq_batch_from_idx(test_dct, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(no_epochs): time_start = time.time() for i in range(epochsize): _, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format( epoch + 1, i + 1, epochsize, len(y)) print(print_str, end='') sys.stdout.flush() train(d, y, m) print('\r', end='') cost = compute_train_cost(d, y, m) val_cost = compute_test_cost(dct_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(dct_val, y_val_evaluate, mask_val, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr test_cr, test_conf = evaluate_model2(dct_test, y_test, mask_test, val_fn) print( "Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)". format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) print('confusion matrix: ') plot_confusion_matrix(test_conf, numbers, fmt='latex') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost') if 'write_results' in options: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format( use_finetuning, 'yes', use_peepholes, 'adam', weight_init, 'N/A', use_blstm, learning_rate, best_tr, best_val, best_cr * 100, test_cr * 100)) s = ','.join([str(v) for v in cost_train]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in cost_val]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in class_rate]) f.write('{}\n'.format(s))
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(options['dct_data'] if 'dct_data' in options else config.get('data', 'dct')) no_coeff = options['no_coeff'] if 'no_coeff' in options else config.getint( 'models', 'no_coeff') no_epochs = options[ 'no_epochs'] if 'no_epochs' in options else config.getint( 'training', 'no_epochs') validation_window = options['validation_window'] if 'validation_window' in options \ else config.getint('training', 'validation_window') epochsize = options[ 'epochsize'] if 'epochsize' in options else config.getint( 'training', 'epochsize') batchsize = options[ 'batchsize'] if 'batchsize' in options else config.getint( 'training', 'batchsize') # create the necessary variable mappings data_matrix = data['dataMatrix'].astype('float32') data_matrix_len = data_matrix.shape[0] targets_vec = data['targetsVec'] vid_len_vec = data['videoLengthVec'] iter_vec = data['iterVec'] dct_feats = dct_data['dctFeatures'].astype('float32') indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec) train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec) assert len(train_vidlen_vec) == 520 assert len(test_vidlen_vec) == 260 assert np.sum(vid_len_vec) == data_matrix_len # split the data train_data = data_matrix[indexes == True] train_targets = targets_vec[indexes == True] train_targets = train_targets.reshape((len(train_targets), )) test_data = data_matrix[indexes == False] test_targets = targets_vec[indexes == False] test_targets = test_targets.reshape((len(test_targets), )) # split the dct features train_dct = dct_feats[indexes == True].astype(np.float32) test_dct = dct_feats[indexes == False].astype(np.float32) train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) test_dct = (test_dct - dct_mean) / dct_std inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') print('constructing end to end model...') network = lstm_classifier_baseline.create_model( (None, None, no_coeff * 3), inputs, (None, None), mask, 250, 26) print_network(network) draw_to_file(las.layers.get_all_layers(network), 'network.png', verbose=True) # exit() print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy( predictions, targets)) updates = las.updates.adam(cost, all_params) train = theano.function([inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean( las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function([inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_data, train_targets, train_vidlen_vec, batchsize=batchsize) val_datagen = gen_lstm_batch_random(test_data, test_targets, test_vidlen_vec, batchsize=len(test_vidlen_vec)) integral_lens = compute_integral_len(train_vidlen_vec) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(test_vidlen_vec) dct_val = gen_seq_batch_from_idx(test_dct, idxs_val, test_vidlen_vec, integral_lens_val, np.max(test_vidlen_vec)) for epoch in range(no_epochs): time_start = time.time() for i in range(epochsize): X, y, m, batch_idxs = next(datagen) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlen_vec, integral_lens, np.max(train_vidlen_vec)) print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format( epoch + 1, i + 1, epochsize, len(X)) print(print_str, end='') sys.stdout.flush() train(d, y, m) print('\r', end='') cost = compute_train_cost(d, y, m) val_cost = compute_test_cost(dct_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(dct_val, y_val, mask_val, val_fn) class_rate.append(cr) print( "Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break letters = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ] print('Best Model') print('classification rate: {}, validation loss: {}'.format( best_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(best_conf, letters, fmt='latex') plot_validation_cost(cost_train, cost_val, class_rate)
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(options['dct_data'] if 'dct_data' in options else config.get('data', 'dct')) no_coeff = options['no_coeff'] if 'no_coeff' in options else config.getint('models', 'no_coeff') no_epochs = options['no_epochs'] if 'no_epochs' in options else config.getint('training', 'no_epochs') validation_window = options['validation_window'] if 'validation_window' in options \ else config.getint('training', 'validation_window') epochsize = options['epochsize'] if 'epochsize' in options else config.getint('training', 'epochsize') batchsize = options['batchsize'] if 'batchsize' in options else config.getint('training', 'batchsize') # create the necessary variable mappings data_matrix = data['dataMatrix'].astype('float32') data_matrix_len = data_matrix.shape[0] targets_vec = data['targetsVec'] vid_len_vec = data['videoLengthVec'] iter_vec = data['iterVec'] dct_feats = dct_data['dctFeatures'].astype('float32') indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec) train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec) assert len(train_vidlen_vec) == 520 assert len(test_vidlen_vec) == 260 assert np.sum(vid_len_vec) == data_matrix_len # split the data train_data = data_matrix[indexes == True] train_targets = targets_vec[indexes == True] train_targets = train_targets.reshape((len(train_targets),)) test_data = data_matrix[indexes == False] test_targets = targets_vec[indexes == False] test_targets = test_targets.reshape((len(test_targets),)) # split the dct features train_dct = dct_feats[indexes == True].astype(np.float32) test_dct = dct_feats[indexes == False].astype(np.float32) train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) test_dct = (test_dct - dct_mean) / dct_std inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') print('constructing end to end model...') network = lstm_classifier_baseline.create_model((None, None, no_coeff*3), inputs, (None, None), mask, 250, 26) print_network(network) draw_to_file(las.layers.get_all_layers(network), 'network.png', verbose=True) # exit() print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets)) updates = las.updates.adam(cost, all_params) train = theano.function( [inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_data, train_targets, train_vidlen_vec, batchsize=batchsize) val_datagen = gen_lstm_batch_random(test_data, test_targets, test_vidlen_vec, batchsize=len(test_vidlen_vec)) integral_lens = compute_integral_len(train_vidlen_vec) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(test_vidlen_vec) dct_val = gen_seq_batch_from_idx(test_dct, idxs_val, test_vidlen_vec, integral_lens_val, np.max(test_vidlen_vec)) for epoch in range(no_epochs): time_start = time.time() for i in range(epochsize): X, y, m, batch_idxs = next(datagen) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlen_vec, integral_lens, np.max(train_vidlen_vec)) print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format( epoch + 1, i + 1, epochsize, len(X)) print(print_str, end='') sys.stdout.flush() train(d, y, m) print('\r', end='') cost = compute_train_cost(d, y, m) val_cost = compute_test_cost(dct_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(dct_val, y_val, mask_val, val_fn) class_rate.append(cr) print("Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] print('Best Model') print('classification rate: {}, validation loss: {}'.format(best_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(best_conf, letters, fmt='latex') plot_validation_cost(cost_train, cost_val, class_rate)
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(config.get('data', 'dct')) ae_finetuned = config.get('models', 'finetuned') ae_finetuned_diff = config.get('models', 'finetuned_diff') fusiontype = config.get('models', 'fusiontype') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) load_finetune = config.getboolean('training', 'load_finetune') load_finetune_diff = config.getboolean('training', 'load_finetune_diff') train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1,)) val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1,)) test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1,)) train_X = data['trData'].astype('float32') val_X = data['valData'].astype('float32') test_X = data['testData'].astype('float32') train_dct = dct_data['trDctFeatures'].astype('float32') val_dct = dct_data['valDctFeatures'].astype('float32') test_dct = dct_data['testDctFeatures'].astype('float32') train_X_diff = compute_diff_images(train_X, train_vidlens) val_X_diff = compute_diff_images(val_X, val_vidlens) test_X_diff = compute_diff_images(test_X, test_vidlens) train_y = data['trTargetsVec'].astype('int').reshape((-1,)) + 1 # +1 to handle the -1 introduced in lstm_gendata val_y = data['valTargetsVec'].astype('int').reshape((-1,)) + 1 test_y = data['testTargetsVec'].astype('int').reshape((-1,)) + 1 # featurewise normalize dct features train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) val_dct = (val_dct - dct_mean) / dct_std test_dct = (test_dct - dct_mean) / dct_std if load_finetune: print('loading finetuned encoder: {}...'.format(ae_finetuned)) ae = pickle.load(open(ae_finetuned, 'rb')) ae.initialize() if load_finetune_diff: print('loading finetuned encoder: {}...'.format(ae_finetuned_diff)) ae_diff = pickle.load(open(ae_finetuned_diff, 'rb')) ae_diff.initialize() # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') dct = T.tensor3('dct', dtype='float32') inputs = T.tensor3('inputs', dtype='float32') inputs_diff = T.tensor3('inputs_diff', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing end to end model...') network, l_fuse = adenet_v3.create_model(ae, ae_diff, (None, None, 1500), inputs, (None, None), mask, (None, None, 90), dct, (None, None, 1500), inputs_diff, 250, window, 10, fusiontype) print_network(network) # draw_to_file(las.layers.get_all_layers(network), 'network.png') print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets)) updates = adadelta(cost, all_params, learning_rate=lr) # updates = adagrad(cost, all_params, learning_rate=lr) train = theano.function( [inputs, targets, mask, dct, inputs_diff, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask, dct, inputs_diff, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs, targets, mask, dct, inputs_diff, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, dct, inputs_diff, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 30 EPOCH_SIZE = 45 BATCH_SIZE = 20 WINDOW_SIZE = 9 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) integral_lens = compute_integral_len(train_vidlens) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(val_vidlens) dct_val = gen_seq_batch_from_idx(val_dct, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) X_diff_val = gen_seq_batch_from_idx(val_X_diff, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) # we use the test set to check final classification rate X_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(test_vidlens) dct_test = gen_seq_batch_from_idx(test_dct, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) X_diff_test = gen_seq_batch_from_idx(test_X_diff, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m, d, X_diff, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, d, X_diff, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_cr = cr if fusiontype == 'adasum': adascale_param = las.layers.get_all_param_values(l_fuse, scaling_param=True) test_cr, test_conf = evaluate_model(X_test, y_test, mask_test, dct_test, X_diff_test, WINDOW_SIZE, val_fn) print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch + 1 >= decay_start: lr.set_value(lr.get_value() * lr_decay) numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) if fusiontype == 'adasum': print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') plot_confusion_matrix(test_conf, numbers, fmt='latex') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost') if options['write_results']: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{}\n'.format(fusiontype, test_cr, best_val))
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_pretrained = config.get('models', 'pretrained') lstm_units = int(config.get('models', 'lstm_units')) output_classes = int(config.get('models', 'output_classes')) weight_init = config.get('models', 'weight_init') delta_window = config.getint('models', 'delta_window') nonlinearity = select_nonlinearity(config.get('models', 'nonlinearity')) weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() learning_rate = float(config.get('training', 'learning_rate')) no_epochs = config.getint('training', 'no_epochs') use_peepholes = config.getboolean('training', 'use_peepholes') epochsize = config.getint('training', 'epochsize') batchsize = config.getint('training', 'batchsize') validation_window = config.getint('training', 'validation_window') # 53 subjects, 70 utterances, 5 view angles # s[x]_v[y]_u[z].mp4 # resized, height, width = (26, 44) # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec', # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__'] print(data.keys()) X = data['dataMatrix'].astype('float32') # .reshape((-1, 26, 44), order='f').reshape((-1, 26 * 44)) y = data['targetsVec'].astype('int32') y = y.reshape((len(y),)) uniques = np.unique(y) print('number of classifications: {}'.format(len(uniques))) subjects = data['subjectsVec'].astype('int') subjects = subjects.reshape((len(subjects),)) video_lens = data['videoLengthVec'].astype('int') video_lens = video_lens.reshape((len(video_lens,))) train_subject_ids = read_data_split_file('data/train.txt') val_subject_ids = read_data_split_file('data/val.txt') test_subject_ids = read_data_split_file('data/test.txt') print('Train: {}'.format(train_subject_ids)) print('Validation: {}'.format(val_subject_ids)) print('Test: {}'.format(test_subject_ids)) train_X, train_y, train_vidlens, train_subjects, \ val_X, val_y, val_vidlens, val_subjects, \ test_X, test_y, test_vidlens, test_subjects = \ split_data(X, y, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids) assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X) assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y) assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens) assert train_subjects.shape[0] + val_subjects.shape[0] + test_subjects.shape[0] == len(subjects) train_X = normalize_input(train_X, centralize=True) val_X = normalize_input(val_X, centralize=True) test_X = normalize_input(test_X, centralize=True) weights, biases = load_dbn(ae_pretrained) # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') network = deltanet_majority_vote.create_model_using_pretrained_encoder(weights, biases, (None, None, 1144), inputs, (None, None), mask, lstm_units, window, output_classes, weight_init_fn, use_peepholes, nonlinearity) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = adam(cost, all_params, learning_rate) train = theano.function( [inputs, targets, mask, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function( [inputs, targets, mask, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, _ = next(val_datagen) # Use this test set to check final classification performance X_test, y_test, mask_test, _ = next(test_datagen) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(no_epochs): time_start = time.time() for i in range(epochsize): X, y, m, _ = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) print_str = 'Epoch {} batch {}/{}: {} examples using adam at learning rate = {:.4f}'.format( epoch + 1, i + 1, epochsize, len(X), learning_rate) print(print_str, end='') sys.stdout.flush() train(X, y, m, delta_window) print('\r', end='') cost = compute_train_cost(X, y, m, delta_window) val_cost = compute_test_cost(X_val, y_val, mask_val, delta_window) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, delta_window, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, delta_window, val_fn) print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10'] print('Final Model') print('classification rate: {}, validation loss: {}'.format(test_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(test_conf, phrases, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate)
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(config.get('data', 'dct')) ae_pretrained = config.get('models', 'pretrained') ae_pretrained_diff = config.get('models', 'pretrained_diff') fusiontype = config.get('models', 'fusiontype') lstm_size = config.getint('models', 'lstm_size') output_classes = config.getint('models', 'output_classes') use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('models', 'use_peepholes') use_blstm = config.getboolean('models', 'use_blstm') delta_window = config.getint('models', 'delta_window') input_dimensions = config.getint('models', 'input_dimensions') # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch') weight_init = options['weight_init'] if 'weight_init' in options else config.get('training', 'weight_init') use_finetuning = config.getboolean('training', 'use_finetuning') learning_rate = config.getfloat('training', 'learning_rate') batchsize = config.getint('training', 'batchsize') epochsize = config.getint('training', 'epochsize') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() # 53 subjects, 70 utterances, 5 view angles # s[x]_v[y]_u[z].mp4 # resized, height, width = (26, 44) # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec', # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__'] print(data.keys()) X = data['dataMatrix'].astype('float32') y = data['targetsVec'].astype('int32') y = y.reshape((len(y),)) dct_feats = dct_data['dctFeatures'].astype('float32') uniques = np.unique(y) print('number of classifications: {}'.format(len(uniques))) subjects = data['subjectsVec'].astype('int') subjects = subjects.reshape((len(subjects),)) video_lens = data['videoLengthVec'].astype('int') video_lens = video_lens.reshape((len(video_lens,))) # X = reorder_data(X, (26, 44), 'f', 'c') # print('performing sequencewise mean image removal...') # X = sequencewise_mean_image_subtraction(X, video_lens) # visualize_images(X[550:650], (26, 44)) X_diff = compute_diff_images(X, video_lens) # mean remove dct features dct_feats = sequencewise_mean_image_subtraction(dct_feats, video_lens) train_subject_ids = read_data_split_file('data/train_30_10_12.txt') val_subject_ids = read_data_split_file('data/val_30_10_12.txt') test_subject_ids = read_data_split_file('data/test_30_10_12.txt') print('Train: {}'.format(train_subject_ids)) print('Validation: {}'.format(val_subject_ids)) print('Test: {}'.format(test_subject_ids)) train_X, train_y, train_dct, train_X_diff, train_vidlens, train_subjects, \ val_X, val_y, val_dct, val_X_diff, val_vidlens, val_subjects, \ test_X, test_y, test_dct, test_X_diff, test_vidlens, test_subjects = \ split_data(X, y, dct_feats, X_diff, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids) assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X) assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y) assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens) assert train_subjects.shape[0] + val_vidlens.shape[0] + test_subjects.shape[0] == len(subjects) train_X = normalize_input(train_X, centralize=True) val_X = normalize_input(val_X, centralize=True) test_X = normalize_input(test_X, centralize=True) train_y -= 1 val_y -= 1 test_y -= 1 print('loading pretrained encoder: {}...'.format(ae_pretrained)) ae = load_dbn(ae_pretrained) print('loading pretrained encoder: {}...'.format(ae_pretrained_diff)) ae_diff = load_dbn(ae_pretrained_diff) # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') inputs = T.tensor3('inputs', dtype='float32') inputs_diff = T.tensor3('inputs_diff', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') if use_blstm: network, l_fuse = adenet_v2_2.create_model(ae, ae_diff, (None, None, input_dimensions), inputs, (None, None), mask, (None, None, input_dimensions), inputs_diff, lstm_size, window, output_classes, fusiontype, weight_init_fn, use_peepholes) else: network, l_fuse = adenet_v2_4.create_model(ae, ae_diff, (None, None, input_dimensions), inputs, (None, None), mask, (None, None, input_dimensions), inputs_diff, lstm_size, window, output_classes, fusiontype, weight_init_fn, use_peepholes) print_network(network) # draw_to_file(las.layers.get_all_layers(network), 'network.png') print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = adam(cost, all_params, learning_rate=learning_rate) train = theano.function( [inputs, targets, mask, inputs_diff, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask, inputs_diff, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function( [inputs, targets, mask, inputs_diff, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, inputs_diff, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize) integral_lens = compute_integral_len(train_vidlens) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(val_vidlens) X_diff_val = gen_seq_batch_from_idx(val_X_diff, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) # we use the test set to check final classification rate X_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(test_vidlens) X_diff_test = gen_seq_batch_from_idx(test_X_diff, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(num_epoch): time_start = time.time() for i in range(epochsize): X, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format(epoch + 1, i + 1, epochsize, len(X)) print(print_str, end='') sys.stdout.flush() train(X, y, m, X_diff, delta_window) print('\r', end='') cost = compute_train_cost(X, y, m, X_diff, delta_window) val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val, delta_window) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, X_diff_val, delta_window, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_cr = cr if fusiontype == 'adasum': adascale_param = las.layers.get_all_param_values(l_fuse, scaling_param=True) test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, X_diff_test, delta_window, val_fn) print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) if fusiontype == 'adasum': print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') plot_confusion_matrix(test_conf, phrases, fmt='latex') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost') if 'write_results' in options: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{},{},{}\n'.format(validation_window, weight_init, use_peepholes, use_blstm, use_finetuning)) s = ','.join([str(v) for v in cost_train]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in cost_val]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in class_rate]) f.write('{}\n'.format(s)) f.write('{},{},{}\n'.format(fusiontype, best_cr, best_val))
def main(): configure_theano() options = parse_options() config_file = 'config/leave_one_out.ini' print('loading config file: {}'.format(config_file)) config = ConfigParser.ConfigParser() config.read(config_file) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(config.get('data', 'dct')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') ae_finetuned_diff = config.get('models', 'finetuned_diff') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) do_finetune = config.getboolean('training', 'do_finetune') save_finetune = config.getboolean('training', 'save_finetune') load_finetune = config.getboolean('training', 'load_finetune') load_finetune_diff = config.getboolean('training', 'load_finetune_diff') # 53 subjects, 70 utterances, 5 view angles # s[x]_v[y]_u[z].mp4 # resized, height, width = (26, 44) # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec', # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__'] print(data.keys()) X = data['dataMatrix'].astype('float32') y = data['targetsVec'].astype('int32') y = y.reshape((len(y), )) dct_feats = dct_data['dctFeatures'].astype('float32') uniques = np.unique(y) print('number of classifications: {}'.format(len(uniques))) subjects = data['subjectsVec'].astype('int') subjects = subjects.reshape((len(subjects), )) video_lens = data['videoLengthVec'].astype('int') video_lens = video_lens.reshape((len(video_lens, ))) # X = reorder_data(X, (26, 44), 'f', 'c') # print('performing sequencewise mean image removal...') # X = sequencewise_mean_image_subtraction(X, video_lens) # visualize_images(X[550:650], (26, 44)) X_diff = compute_diff_images(X, video_lens) # mean remove dct features dct_feats = sequencewise_mean_image_subtraction(dct_feats, video_lens) test_subject_ids = [options['test_subj']] train_subject_ids = range(1, 54) for subj in test_subject_ids: train_subject_ids.remove(subj) if 'results' in options: results_file = options['results'] f = open(results_file, mode='a') print(train_subject_ids) print(test_subject_ids) train_X, train_y, train_dct, train_X_diff, train_vidlens, train_subjects, \ test_X, test_y, test_dct, test_X_diff, test_vidlens, test_subjects = \ split_data(X, y, dct_feats, X_diff, subjects, video_lens, train_subject_ids, test_subject_ids) assert train_X.shape[0] + test_X.shape[0] == len(X) assert train_y.shape[0] + test_y.shape[0] == len(y) assert train_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens) assert train_subjects.shape[0] + test_subjects.shape[0] == len(subjects) train_X = normalize_input(train_X, centralize=True) test_X = normalize_input(test_X, centralize=True) # featurewise normalize dct features train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) test_dct = (test_dct - dct_mean) / dct_std if do_finetune: print('performing finetuning on pretrained encoder: {}'.format( ae_pretrained)) ae = load_dbn(ae_pretrained) ae.initialize() ae.fit(train_X, train_X) if save_finetune: print('saving finetuned encoder: {}...'.format(ae_finetuned)) pickle.dump(ae, open(ae_finetuned, 'wb')) if load_finetune: print('loading finetuned encoder: {}...'.format(ae_finetuned)) ae = pickle.load(open(ae_finetuned, 'rb')) ae.initialize() if load_finetune_diff: print('loading finetuned encoder: {}...'.format(ae_finetuned_diff)) ae_diff = pickle.load(open(ae_finetuned_diff, 'rb')) ae_diff.initialize() # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') dct = T.tensor3('dct', dtype='float32') inputs = T.tensor3('inputs', dtype='float32') inputs_diff = T.tensor3('inputs_diff', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing end to end model...') ''' network = create_end_to_end_model(dbn, (None, None, 1144), inputs, (None, None), mask, 250, window) ''' network = adenet_v5.create_model(ae, ae_diff, (None, None, 1144), inputs, (None, None), mask, (None, None, 90), dct, (None, None, 1144), inputs_diff, 250, window, 10) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy( predictions, targets)) updates = adadelta(cost, all_params, learning_rate=lr) # updates = adagrad(cost, all_params, learning_rate=lr) use_max_constraint = False if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases updates[param] = norm_constraint( param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) train = theano.function([inputs, targets, mask, dct, inputs_diff, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function( [inputs, targets, mask, dct, inputs_diff, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean( las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs, targets, mask, dct, inputs_diff, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, dct, inputs_diff, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 10 EPOCH_SIZE = 120 BATCH_SIZE = 10 WINDOW_SIZE = 9 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) integral_lens = compute_integral_len(train_vidlens) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(test_vidlens) dct_val = gen_seq_batch_from_idx(test_dct, idxs_val, test_vidlens, integral_lens_val, np.max(test_vidlens)) X_diff_val = gen_seq_batch_from_idx(test_X_diff, idxs_val, test_vidlens, integral_lens_val, np.max(test_vidlens)) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m, d, X_diff, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, d, X_diff, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE, val_fn) class_rate.append(cr) print( "Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch >= decay_start - 1: lr.set_value(lr.get_value() * lr_decay) phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10'] print('Final Model') print('classification rate: {}, validation loss: {}'.format( best_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(best_conf, phrases, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate, savefilename='valid_cost') if 'results' in options: print('writing to results file: {}...'.format(options['results'])) f.write('{}, {}, {}\n'.format(test_subject_ids[0], best_cr, best_val)) f.close()
def construct_lstm(input_size, lstm_size, output_size, train_data_gen, val_data_gen): # All gates have initializers for the input-to-gate and hidden state-to-gate # weight matrices, the cell-to-gate weight vector, the bias vector, and the nonlinearity. # The convention is that gates use the standard sigmoid nonlinearity, # which is the default for the Gate class. gate_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), b=las.init.Constant(0.)) cell_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) # prepare the input layers # By setting the first and second dimensions to None, we allow # arbitrary minibatch sizes with arbitrary sequence lengths. # The number of feature dimensions is 150, as described above. l_in = InputLayer(shape=(None, None, input_size), name='input') # This input will be used to provide the network with masks. # Masks are expected to be matrices of shape (n_batch, n_time_steps); # both of these dimensions are variable for us so we will use # an input shape of (None, None) l_mask = InputLayer(shape=(None, None), name='mask') # Our LSTM will have 250 hidden/cell units N_HIDDEN = lstm_size l_lstm = LSTMLayer( l_in, N_HIDDEN, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm1') ''' # The "backwards" layer is the same as the first, # except that the backwards argument is set to True. l_lstm_back = LSTMLayer( l_in, N_HIDDEN, ingate=gate_parameters, mask_input=l_mask, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, learn_init=True, grad_clipping=5., backwards=True) # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum = ElemwiseSumLayer([l_lstm, l_lstm_back]) # implement drop-out regularization l_dropout = DropoutLayer(l_sum) l_lstm2 = LSTMLayer( l_dropout, N_HIDDEN, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5.) # The "backwards" layer is the same as the first, # except that the backwards argument is set to True. l_lstm_back2 = LSTMLayer( l_dropout, N_HIDDEN, ingate=gate_parameters, mask_input=l_mask, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, learn_init=True, grad_clipping=5., backwards=True) # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum2 = ElemwiseSumLayer([l_lstm2, l_lstm_back2]) ''' # The l_forward layer creates an output of dimension (batch_size, SEQ_LENGTH, N_HIDDEN) # Since we are only interested in the final prediction, we isolate that quantity and feed it to the next layer. # The output of the sliced layer will then be of size (batch_size, N_HIDDEN) l_forward_slice = SliceLayer(l_lstm, -1, 1, name='slice') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer( l_forward_slice, num_units=output_size, nonlinearity=las.nonlinearities.softmax, name='output') print_network(l_out) # draw_to_file(las.layers.get_all_layers(l_out), 'network.png') # Symbolic variable for the target network output. # It will be of shape n_batch, because there's only 1 target value per sequence. target_values = T.ivector('target_output') # This matrix will tell the network the length of each sequences. # The actual values will be supplied by the gen_data function. mask = T.matrix('mask') # lasagne.layers.get_output produces an expression for the output of the net prediction = las.layers.get_output(l_out) # The value we care about is the final value produced for each sequence # so we simply slice it out. # predicted_values = network_output[:, -1] # Our cost will be categorical cross entropy error cost = T.mean(las.objectives.categorical_crossentropy(prediction, target_values)) # cost = T.mean((predicted_values - target_values) ** 2) # Retrieve all parameters from the network all_params = las.layers.get_all_params(l_out, trainable=True) # Compute adam updates for training # updates = las.updates.adam(cost, all_params) updates = adadelta(cost, all_params) # Theano functions for training and computing cost train = theano.function( [l_in.input_var, target_values, l_mask.input_var], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function( [l_in.input_var, target_values, l_mask.input_var], cost, allow_input_downcast=True) test_prediction = las.layers.get_output(l_out, deterministic=True) test_cost = T.mean(las.objectives.categorical_crossentropy(test_prediction, target_values)) compute_val_cost = theano.function([l_in.input_var, target_values, l_mask.input_var], test_cost, allow_input_downcast=True) val_fn = theano.function([l_in.input_var, l_mask.input_var], test_prediction, allow_input_downcast=True) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val = next(val_data_gen) # We'll train the network with 10 epochs of 100 minibatches each cost_train = [] cost_val = [] class_rate = [] best_val = float('inf') best_conf = None best_cr = 0.0 NUM_EPOCHS = 30 EPOCH_SIZE = 26 STRIP_SIZE = 3 MAX_LOSS = 0.05 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE,)) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for _ in range(EPOCH_SIZE): X, y, m, _ = next(train_data_gen) train(X, y, m) train_cost = compute_train_cost(X, y, m) val_cost = compute_val_cost(X_val, y_val, mask_val) cr, conf = evaluate_model(X_val, y_val, mask_val, val_fn) cost_train.append(train_cost) cost_val.append(val_cost) class_rate.append(cr) train_strip[epoch % STRIP_SIZE] = train_cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk print("Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_cr = cr best_conf = conf if epoch >= VALIDATION_WINDOW and early_stop(val_window): break letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] print('Final Model') print('classification rate: {}'.format(best_cr)) print('validation loss: {}'.format(best_val)) print('confusion matrix: ') plot_confusion_matrix(best_conf, letters, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate)
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('stream1')) print(config.items('stream2')) print(config.items('lstm_classifier')) print(config.items('training')) print('preprocessing dataset...') # stream 1 s1_data = load_mat_file(config.get('stream1', 'data')) s1_imagesize = tuple([int(d) for d in config.get('stream1', 'imagesize').split(',')]) s1 = config.get('stream1', 'model') s1_inputdim = config.getint('stream1', 'input_dimensions') s1_shape = config.get('stream1', 'shape') s1_nonlinearities = config.get('stream1', 'nonlinearities') # stream 2 s2_data = load_mat_file(config.get('stream2', 'data')) s2_imagesize = tuple([int(d) for d in config.get('stream2', 'imagesize').split(',')]) s2 = config.get('stream2', 'model') s2_inputdim = config.getint('stream2', 'input_dimensions') s2_shape = config.get('stream2', 'shape') s2_nonlinearities = config.get('stream2', 'nonlinearities') # lstm classifier fusiontype = config.get('lstm_classifier', 'fusiontype') weight_init = options['weight_init'] if 'weight_init' in options else config.get('lstm_classifier', 'weight_init') use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('lstm_classifier', 'use_peepholes') output_classes = config.getint('lstm_classifier', 'output_classes') output_classnames = config.get('lstm_classifier', 'output_classnames').split(',') lstm_size = config.getint('lstm_classifier', 'lstm_size') matlab_target_offset = config.getboolean('lstm_classifier', 'matlab_target_offset') # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch') learning_rate = options['learning_rate'] if 'learning_rate' in options \ else config.getfloat('training', 'learning_rate') epochsize = config.getint('training', 'epochsize') batchsize = config.getint('training', 'batchsize') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() train_subject_ids = read_data_split_file(config.get('training', 'train_subjects_file')) val_subject_ids = read_data_split_file(config.get('training', 'val_subjects_file')) test_subject_ids = read_data_split_file(config.get('training', 'test_subjects_file')) s1_data_matrix = s1_data['dataMatrix'].astype('float32') s2_data_matrix = s2_data['dataMatrix'].astype('float32') targets_vec = s1_data['targetsVec'].reshape((-1,)) subjects_vec = s1_data['subjectsVec'].reshape((-1,)) vidlen_vec = s1_data['videoLengthVec'].reshape((-1,)) if matlab_target_offset: targets_vec -= 1 s1_data_matrix = presplit_dataprocessing(s1_data_matrix, vidlen_vec, config, 'stream1', imagesize=s1_imagesize) s2_data_matrix = presplit_dataprocessing(s2_data_matrix, vidlen_vec, config, 'stream2', imagesize=s2_imagesize) s1_train_X, s1_train_y, s1_train_vidlens, s1_train_subjects, \ s1_val_X, s1_val_y, s1_val_vidlens, s1_val_subjects, \ s1_test_X, s1_test_y, s1_test_vidlens, s1_test_subjects = split_seq_data(s1_data_matrix, targets_vec, subjects_vec, vidlen_vec, train_subject_ids, val_subject_ids, test_subject_ids) s2_train_X, s2_train_y, s2_train_vidlens, s2_train_subjects, \ s2_val_X, s2_val_y, s2_val_vidlens, s2_val_subjects, \ s2_test_X, s2_test_y, s2_test_vidlens, s2_test_subjects = split_seq_data(s2_data_matrix, targets_vec, subjects_vec, vidlen_vec, train_subject_ids, val_subject_ids, test_subject_ids) s1_train_X, s1_val_X, s1_test_X = postsplit_datapreprocessing(s1_train_X, s1_val_X, s1_test_X, config, 'stream1') s2_train_X, s2_val_X, s2_test_X = postsplit_datapreprocessing(s2_train_X, s2_val_X, s2_test_X, config, 'stream2') ae1 = load_decoder(s1, s1_shape, s1_nonlinearities) ae2 = load_decoder(s2, s2_shape, s2_nonlinearities) # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) inputs1 = T.tensor3('inputs1', dtype='float32') inputs2 = T.tensor3('inputs2', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') network, l_fuse = adenet_v2_nodelta.create_model(ae1, ae2, (None, None, s1_inputdim), inputs1, (None, None), mask, (None, None, s2_inputdim), inputs2, lstm_size, output_classes, fusiontype, w_init_fn=weight_init_fn, use_peepholes=use_peepholes) print_network(network) # draw_to_file(las.layers.get_all_layers(network), 'network.png') print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = adam(cost, all_params, learning_rate=learning_rate) train = theano.function( [inputs1, targets, mask, inputs2], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs1, targets, mask, inputs2], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function( [inputs1, targets, mask, inputs2], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs1, mask, inputs2], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_cr = 0.0 datagen = gen_lstm_batch_random(s1_train_X, s1_train_y, s1_train_vidlens, batchsize=batchsize) integral_lens = compute_integral_len(s1_train_vidlens) val_datagen = gen_lstm_batch_random(s1_val_X, s1_val_y, s1_val_vidlens, batchsize=len(s1_val_vidlens)) test_datagen = gen_lstm_batch_random(s1_test_X, s1_test_y, s1_test_vidlens, batchsize=len(s1_test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(s1_val_vidlens) X_diff_val = gen_seq_batch_from_idx(s2_val_X, idxs_val, s1_val_vidlens, integral_lens_val, np.max(s1_val_vidlens)) # we use the test set to check final classification rate X_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(s1_test_vidlens) X_diff_test = gen_seq_batch_from_idx(s2_test_X, idxs_test, s1_test_vidlens, integral_lens_test, np.max(s1_test_vidlens)) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(num_epoch): time_start = time.time() for i in range(epochsize): X, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) X_diff = gen_seq_batch_from_idx(s2_train_X, batch_idxs, s1_train_vidlens, integral_lens, np.max(s1_train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format( epoch + 1, i + 1, epochsize, len(X)) print(print_str, end='') sys.stdout.flush() train(X, y, m, X_diff) print('\r', end='') cost = compute_train_cost(X, y, m, X_diff) val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, X_diff_val, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_cr = cr test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, X_diff_test, val_fn) print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) best_params = las.layers.get_all_param_values(network) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) # plot confusion matrix table_str = plot_confusion_matrix(test_conf, output_classnames, fmt='pipe') print('confusion matrix: ') print(table_str) if 'save_plot' in options: prefix = options['save_plot'] plot_validation_cost(cost_train, cost_val, savefilename='{}.validloss.png'.format(prefix)) with open('{}.confmat.txt'.format(prefix), mode='a') as f: f.write(table_str) f.write('\n\n') if 'write_results' in options: print('writing results to {}'.format(options['write_results'])) results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{}\n'.format(test_cr, best_cr, best_val)) if 'save_best' in options: print('saving best model...') las.layers.set_all_param_values(network, best_params) save_model_params(network, options['save_best']) print('best model saved to {}'.format(options['save_best']))
def main(): configure_theano() print('preprocessing dataset...') data = load_mat_file('data/allData_mouthROIs.mat') # create the necessary variable mappings data_matrix = data['dataMatrix'] data_matrix_len = data_matrix.shape[0] targets_vec = data['targetsVec'] vid_len_vec = data['videoLengthVec'] iter_vec = data['iterVec'] indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec) train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec) assert len(train_vidlen_vec) == 520 assert len(test_vidlen_vec) == 260 assert np.sum(vid_len_vec) == data_matrix_len # split the data train_data = data_matrix[indexes == True] train_targets = targets_vec[indexes == True] train_targets = train_targets.reshape((len(train_targets), )) test_data = data_matrix[indexes == False] test_targets = targets_vec[indexes == False] test_targets = test_targets.reshape((len(test_targets), )) # indexes for a particular letter # idx = [i for i, elem in enumerate(test_targets) if elem == 20] # resize the input data to 40 x 30 train_data_resized = resize_images(train_data).astype(np.float32) # normalize the inputs [0 - 1] train_data_resized = normalize_input(train_data_resized, centralize=True) test_data_resized = resize_images(test_data).astype(np.float32) test_data_resized = normalize_input(test_data_resized, centralize=True) print('compute delta features and featurewise normalize...') encode_fn = compile_encoder() deltafeatures = concat_first_second_deltas(encode_fn(train_data_resized), train_vidlen_vec)[:, -100:] deltafeatures_val = concat_first_second_deltas( encode_fn(test_data_resized), test_vidlen_vec)[:, -100:] deltafeatures, mean, std = featurewise_normalize_sequence(deltafeatures) deltafeatures_val = (deltafeatures_val - mean) / std print('train delta features: {}'.format(deltafeatures.shape)) print('validation delta features: {}'.format(deltafeatures_val.shape)) gate_p, cell_p = generate_lstm_parameters() # create lstm input_var = T.tensor3('input', dtype='float32') mask_var = T.matrix('mask', dtype='uint8') target_var = T.ivector('target') window_var = T.iscalar('window') lr = theano.shared(np.array(0.7, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(0.80, dtype=theano.config.floatX) l_input = InputLayer((None, None, 100), input_var, name='input') l_mask = InputLayer((None, None), mask_var, name='mask') l_lstm = LSTMLayer( l_input, 250, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_p, forgetgate=gate_p, cell=cell_p, outgate=gate_p, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm') l_forward_slice1 = SliceLayer(l_lstm, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. network = DenseLayer(l_forward_slice1, num_units=26, nonlinearity=las.nonlinearities.softmax, name='output') print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean( las.objectives.categorical_crossentropy(predictions, target_var)) updates = las.updates.adadelta(cost, all_params, learning_rate=lr) # updates = las.updates.adam(cost, all_params, learning_rate=lr) train = theano.function([input_var, target_var, mask_var], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([input_var, target_var, mask_var], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean( las.objectives.categorical_crossentropy(test_predictions, target_var)) compute_test_cost = theano.function([input_var, target_var, mask_var], test_cost, allow_input_downcast=True) val_fn = theano.function([input_var, mask_var], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 40 EPOCH_SIZE = 20 BATCH_SIZE = 26 WINDOW_SIZE = 9 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_conf = None best_cr = 0.0 # create train and eval loop data_gen = gen_lstm_batch_random(deltafeatures, train_targets, train_vidlen_vec, batchsize=BATCH_SIZE) data_gen_val = gen_lstm_batch_random(deltafeatures_val, test_targets, test_vidlen_vec, batchsize=len(test_vidlen_vec)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, _ = next(data_gen_val) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, _ = next(data_gen) train(X, y, m) cost = compute_train_cost(X, y, m) val_cost = compute_test_cost(X_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn) class_rate.append(cr) print( "Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch > 8: lr.set_value(lr.get_value() * lr_decay) letters = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ] print('Best Model') print('classification rate: {}, validation loss: {}'.format( best_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(best_conf, letters, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate)
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(config.get('data', 'dct')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') fusiontype = config.get('models', 'fusiontype') # capture training parameters update_rule = options[ 'update_rule'] if 'update_rule' in options else config.get( 'training', 'update_rule') learning_rate = float(options['learning_rate']) \ if 'learning_rate' in options else config.getfloat('training', 'learning_rate') decay_rate = float( options['decay_rate']) if 'decay_rate' in options else config.getfloat( 'training', 'decay_rate') decay_start = int( options['decay_start']) if 'decay_start' in options else config.getint( 'training', 'decay_start') validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') t1 = int(options['t1']) if 't1' in options else config.getint( 'training', 't1') num_epoch = int( options['num_epoch']) if 'num_epoch' in options else config.getint( 'training', 'num_epoch') weight_init = options[ 'weight_init'] if 'weight_init' in options else config.get( 'training', 'weight_init') use_peepholes = options[ 'use_peepholes'] if 'use_peepholes' in options else config.getboolean( 'training', 'use_peepholes') use_blstm = config.getboolean('training', 'use_blstm') use_finetuning = config.getboolean('training', 'use_finetuning') if update_rule == 'sgdm' or update_rule == 'sgdnm': momentum = float( options['momentum']) if 'momentum' in options else config.getfloat( 'training', 'momentum') momentum_schedule = options['momentum_schedule'] \ if 'momentum_schedule' in options else config.get('training', 'momentum_schedule') mm_schedule = [float(m) for m in momentum_schedule.split(',')] weight_init_fn = las.init.Orthogonal() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() # create the necessary variable mappings data_matrix = data['dataMatrix'].astype('float32') data_matrix_len = data_matrix.shape[0] targets_vec = data['targetsVec'] vid_len_vec = data['videoLengthVec'] iter_vec = data['iterVec'] dct_feats = dct_data['dctFeatures'].astype('float32') print('samplewise normalize images...') data_matrix = normalize_input(data_matrix, True) # mean remove # dct_feats = dct_feats[:, 0:30] # dct_feats = sequencewise_mean_image_subtraction(dct_feats, vid_len_vec.reshape((-1,))) indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec) train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec) assert len(train_vidlen_vec) == 520 assert len(test_vidlen_vec) == 260 assert np.sum(vid_len_vec) == data_matrix_len # split the data train_data = data_matrix[indexes == True] train_targets = targets_vec[indexes == True] train_targets = train_targets.reshape((len(train_targets), )) test_data = data_matrix[indexes == False] test_targets = targets_vec[indexes == False] test_targets = test_targets.reshape((len(test_targets), )) # split the dct features train_dct = dct_feats[indexes == True].astype(np.float32) test_dct = dct_feats[indexes == False].astype(np.float32) train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) test_dct = (test_dct - dct_mean) / dct_std finetune = False if finetune: print('fine-tuning...') dbn = load_dbn(ae_pretrained) dbn.initialize() dbn.fit(train_data, train_data) res = dbn.predict(test_data) # print(res.shape) visualize_reconstruction(test_data[300:336], res[300:336]) save = False if save: pickle.dump(dbn, open(ae_finetuned, 'wb')) load = True if load: print('loading pre-trained encoding layers...') dbn = pickle.load(open(ae_finetuned, 'rb')) dbn.initialize() # recon = dbn.predict(test_data) # visualize_reconstruction(test_data[300:364], recon[300:364]) # exit() load_convae = False if load_convae: print('loading pre-trained convolutional autoencoder...') encoder = load_model('models/conv_encoder_norm.dat') inputs = las.layers.get_all_layers(encoder)[0].input_var else: inputs = T.tensor3('inputs', dtype='float32') window = T.iscalar('theta') dct = T.tensor3('dct', dtype='float32') mask = T.matrix('mask', dtype='uint8') # targets = T.ivector('targets') targets = T.imatrix('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) if update_rule == 'sgdm' or update_rule == 'sgdnm': mm = theano.shared(np.array(momentum, dtype=theano.config.floatX), name='momentum') print('constructing end to end model...') if use_blstm: network, l_fuse = adenet_v2.create_model(dbn, (None, None, 1200), inputs, (None, None), mask, (None, None, 90), dct, 250, window, 26, fusiontype, w_init_fn=weight_init_fn, use_peepholes=use_peepholes) else: network, l_fuse = adenet_v2_3.create_model(dbn, (None, None, 1200), inputs, (None, None), mask, (None, None, 90), dct, 250, window, 26, fusiontype, w_init_fn=weight_init_fn, use_peepholes=use_peepholes) print_network(network) draw_to_file(las.layers.get_all_layers(network), 'network.png') print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) # cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets)) cost = temporal_softmax_loss(predictions, targets, mask) if update_rule == 'adadelta': updates = las.updates.adadelta(cost, all_params, learning_rate=lr) if update_rule == 'sgdm': updates = las.updates.sgd(cost, all_params, learning_rate=lr) updates = las.updates.apply_momentum(updates, all_params, momentum=mm) if update_rule == 'sgdnm': updates = las.updates.sgd(cost, all_params, learning_rate=lr) updates = las.updates.apply_nesterov_momentum(updates, all_params, momentum=mm) if update_rule == 'adam': updates = las.updates.adam(cost, all_params) train = theano.function([inputs, targets, mask, dct, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask, dct, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) # test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets)) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function([inputs, targets, mask, dct, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, dct, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] EPOCH_SIZE = 20 BATCH_SIZE = 26 WINDOW_SIZE = 9 STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_data, train_targets, train_vidlen_vec, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(test_data, test_targets, test_vidlen_vec, batchsize=len(test_vidlen_vec)) integral_lens = compute_integral_len(train_vidlen_vec) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(test_vidlen_vec) dct_val = gen_seq_batch_from_idx(test_dct, idxs_val, test_vidlen_vec, integral_lens_val, np.max(test_vidlen_vec)) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(num_epoch): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlen_vec, integral_lens, np.max(train_vidlen_vec)) if update_rule == 'adam': print_str = 'Epoch {} batch {}/{}: {} examples with {} using default params'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), update_rule) if update_rule == 'adadelta': print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f} with {}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()), update_rule) if update_rule == 'sgdm' or update_rule == 'sgdnm': print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}, ' \ 'momentum = {:.4f} with {}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value()), float(mm.get_value()), update_rule) print(print_str, end='') sys.stdout.flush() train(X, y, m, d, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, d, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, dct_val, WINDOW_SIZE, val_fn) class_rate.append(cr) print( "Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr else: if epoch >= t1 and (update_rule == 'sgdm' or update_rule == 'sgdnm'): lr.set_value(max(lr.get_value() * lr_decay, 0.001)) if mm_schedule: mm.set_value(mm_schedule.pop(0)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break # learning rate decay if epoch + 1 >= decay_start: lr.set_value(lr.get_value() * lr_decay) letters = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ] print('Best Model') print('classification rate: {}, validation loss: {}'.format( best_cr, best_val)) if fusiontype == 'adasum': adascale_param = las.layers.get_all_param_values(l_fuse, scaling_param=True) print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') if not options['no_plot']: plot_confusion_matrix(best_conf, letters, fmt='latex') plot_validation_cost(cost_train, cost_val, class_rate, 'e2e_valid_cost') if 'write_results' in options: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{},{},{}\n'.format(validation_window, weight_init, use_peepholes, use_blstm, use_finetuning)) s = ','.join([str(v) for v in cost_train]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in cost_val]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in class_rate]) f.write('{}\n'.format(s)) f.write('{},{},{}\n'.format(fusiontype, best_cr, best_val))
def train_deltanet(save=True): configure_theano() print('preprocessing dataset...') data = load_mat_file('data/allData_mouthROIs.mat') # create the necessary variable mappings data_matrix = data['dataMatrix'] data_matrix_len = data_matrix.shape[0] targets_vec = data['targetsVec'] vid_len_vec = data['videoLengthVec'] iter_vec = data['iterVec'] indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec) train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec) assert len(train_vidlen_vec) == 520 assert len(test_vidlen_vec) == 260 assert np.sum(vid_len_vec) == data_matrix_len # split the data train_data = data_matrix[indexes == True] train_targets = targets_vec[indexes == True] train_targets = train_targets.reshape((len(train_targets), )) test_data = data_matrix[indexes == False] test_targets = targets_vec[indexes == False] test_targets = test_targets.reshape((len(test_targets), )) # indexes for a particular letter # idx = [i for i, elem in enumerate(test_targets) if elem == 20] # resize the input data to 40 x 30 train_data_resized = resize_images(train_data).astype(np.float32) # normalize the inputs [0 - 1] train_data_resized = normalize_input(train_data_resized, centralize=True) test_data_resized = resize_images(test_data).astype(np.float32) test_data_resized = normalize_input(test_data_resized, centralize=True) input_var = T.tensor3('input', dtype='float32') mask_var = T.matrix('mask', dtype='uint8') window_var = T.iscalar('window') target_var = T.ivector('target') lr = theano.shared(np.array(1.0, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(0.90, dtype=theano.config.floatX) dbn = load_finetuned_dbn('models/avletters_ae_finetune.dat') network = deltanet.create_model(dbn, (None, None, 1200), input_var, (None, None), mask_var, 250, window_var) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean( las.objectives.categorical_crossentropy(predictions, target_var)) updates = las.updates.adadelta(cost, all_params, learning_rate=lr) # updates = las.updates.adam(cost, all_params, learning_rate=lr) train = theano.function([input_var, target_var, mask_var, window_var], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function( [input_var, target_var, mask_var, window_var], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean( las.objectives.categorical_crossentropy(test_predictions, target_var)) compute_test_cost = theano.function( [input_var, target_var, mask_var, window_var], test_cost, allow_input_downcast=True) val_fn = theano.function([input_var, mask_var, window_var], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 40 EPOCH_SIZE = 20 BATCH_SIZE = 26 WINDOW_SIZE = 9 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_conf = None best_cr = 0.0 # create train and eval loop data_gen = gen_lstm_batch_random(train_data_resized, train_targets, train_vidlen_vec, batchsize=BATCH_SIZE) data_gen_val = gen_lstm_batch_random(test_data_resized, test_targets, test_vidlen_vec, batchsize=len(test_vidlen_vec)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, _ = next(data_gen_val) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, _ = next(data_gen) train(X, y, m, WINDOW_SIZE) cost = compute_train_cost(X, y, m, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model1(X_val, y_val, mask_val, WINDOW_SIZE, val_fn) class_rate.append(cr) print( "Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr if best_cr > 0.55: print('saving a good encoder...') encoder = extract_encoder(network, (None, 1200), 2, 6) pickle.dump(encoder, open('models/end2end_encoder.dat', 'wb')) if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch > 12: lr.set_value(lr.get_value() * lr_decay) letters = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ] print('Best Model') print('classification rate: {}, validation loss: {}'.format( best_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(best_conf, letters, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate)
def main(): configure_theano() config_file = 'config/trimodal.ini' print('loading config file: {}'.format(config_file)) config = ConfigParser.ConfigParser() config.read(config_file) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(config.get('data', 'dct')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') ae_finetuned_diff = config.get('models', 'finetuned_diff') use_adascale = config.getboolean('models', 'use_adascale') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) do_finetune = config.getboolean('training', 'do_finetune') save_finetune = config.getboolean('training', 'save_finetune') load_finetune = config.getboolean('training', 'load_finetune') load_finetune_diff = config.getboolean('training', 'load_finetune_diff') # 53 subjects, 70 utterances, 5 view angles # s[x]_v[y]_u[z].mp4 # resized, height, width = (26, 44) # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec', # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__'] print(data.keys()) X = data['dataMatrix'].astype('float32') y = data['targetsVec'].astype('int32') y = y.reshape((len(y),)) dct_feats = dct_data['dctFeatures'].astype('float32') uniques = np.unique(y) print('number of classifications: {}'.format(len(uniques))) subjects = data['subjectsVec'].astype('int') subjects = subjects.reshape((len(subjects),)) video_lens = data['videoLengthVec'].astype('int') video_lens = video_lens.reshape((len(video_lens,))) # X = reorder_data(X, (26, 44), 'f', 'c') # print('performing sequencewise mean image removal...') # X = sequencewise_mean_image_subtraction(X, video_lens) # visualize_images(X[550:650], (26, 44)) X_diff = compute_diff_images(X, video_lens) # mean remove dct features dct_feats = sequencewise_mean_image_subtraction(dct_feats, video_lens) train_subject_ids = read_data_split_file('data/train_val.txt') test_subject_ids = read_data_split_file('data/test.txt') print(train_subject_ids) print(test_subject_ids) train_X, train_y, train_dct, train_X_diff, train_vidlens, train_subjects, \ test_X, test_y, test_dct, test_X_diff, test_vidlens, test_subjects = \ split_data(X, y, dct_feats, X_diff, subjects, video_lens, train_subject_ids, test_subject_ids) assert train_X.shape[0] + test_X.shape[0] == len(X) assert train_y.shape[0] + test_y.shape[0] == len(y) assert train_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens) assert train_subjects.shape[0] + test_subjects.shape[0] == len(subjects) train_X = normalize_input(train_X, centralize=True) test_X = normalize_input(test_X, centralize=True) # featurewise normalize dct features train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) test_dct = (test_dct - dct_mean) / dct_std if do_finetune: print('performing finetuning on pretrained encoder: {}'.format(ae_pretrained)) ae = load_dbn(ae_pretrained) ae.initialize() ae.fit(train_X, train_X) if save_finetune: print('saving finetuned encoder: {}...'.format(ae_finetuned)) pickle.dump(ae, open(ae_finetuned, 'wb')) if load_finetune: print('loading finetuned encoder: {}...'.format(ae_finetuned)) ae = pickle.load(open(ae_finetuned, 'rb')) ae.initialize() if load_finetune_diff: print('loading finetuned encoder: {}...'.format(ae_finetuned_diff)) ae_diff = pickle.load(open(ae_finetuned_diff, 'rb')) ae_diff.initialize() # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') dct = T.tensor3('dct', dtype='float32') inputs = T.tensor3('inputs', dtype='float32') inputs_diff = T.tensor3('inputs_diff', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing end to end model...') ''' network = create_end_to_end_model(dbn, (None, None, 1144), inputs, (None, None), mask, 250, window) ''' network, adascale = adenet_v5.create_model(ae, ae_diff, (None, None, 1144), inputs, (None, None), mask, (None, None, 90), dct, (None, None, 1144), inputs_diff, 250, window, 10, use_adascale) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets)) updates = adadelta(cost, all_params, learning_rate=lr) # updates = adagrad(cost, all_params, learning_rate=lr) use_max_constraint = False if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) train = theano.function( [inputs, targets, mask, dct, inputs_diff, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask, dct, inputs_diff, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs, targets, mask, dct, inputs_diff, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, dct, inputs_diff, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 30 EPOCH_SIZE = 120 BATCH_SIZE = 10 WINDOW_SIZE = 9 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) integral_lens = compute_integral_len(train_vidlens) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(test_vidlens) dct_val = gen_seq_batch_from_idx(test_dct, idxs_val, test_vidlens, integral_lens_val, np.max(test_vidlens)) X_diff_val = gen_seq_batch_from_idx(test_X_diff, idxs_val, test_vidlens, integral_lens_val, np.max(test_vidlens)) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m, d, X_diff, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, d, X_diff, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, dct_val, X_diff_val, WINDOW_SIZE, val_fn) class_rate.append(cr) print("Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr if use_adascale: adascale_param = las.layers.get_all_param_values(adascale, scaling_param=True) if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch >= decay_start - 1: lr.set_value(lr.get_value() * lr_decay) phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10'] print('Final Model') print('classification rate: {}, validation loss: {}'.format(best_cr, best_val)) if use_adascale: print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') plot_confusion_matrix(best_conf, phrases, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate, savefilename='valid_cost')
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) data_audio = load_mat_file(config.get('data', 'audio')) ae_pretrained = config.get('models', 'pretrained') ae_diff_pretrained = config.get('models', 'pretrained_diff') fusiontype = config.get('models', 'fusiontype') lstm_size = config.getint('models', 'lstm_size') output_classes = config.getint('models', 'output_classes') nonlinearity = options['nonlinearity'] if 'nonlinearity' in options else config.get('models', 'nonlinearity') if nonlinearity == 'sigmoid': nonlinearity = sigmoid if nonlinearity == 'rectify': nonlinearity = rectify # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch') weight_init = options['weight_init'] if 'weight_init' in options else config.get('training', 'weight_init') learning_rate = options['learning_rate'] if 'learning_rate' in options \ else config.getfloat('training', 'learning_rate') use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('training', 'use_peepholes') input_dimension = config.getint('models', 'input_dimension') input_dimension2 = config.getint('models', 'input_dimension2') use_blstm = config.getboolean('training', 'use_blstm') use_finetuning = config.getboolean('training', 'use_finetuning') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() train_vidlens = data['trVideoLengthVec'].astype('int').reshape((-1,)) val_vidlens = data['valVideoLengthVec'].astype('int').reshape((-1,)) test_vidlens = data['testVideoLengthVec'].astype('int').reshape((-1,)) train_X = data['trData'].astype('float32') val_X = data['valData'].astype('float32') test_X = data['testData'].astype('float32') train_X_audio = data_audio['trData'].astype('float32') val_X_audio = data_audio['valData'].astype('float32') test_X_audio = data_audio['testData'].astype('float32') # +1 to handle the -1 introduced in lstm_gendata train_y = data['trTargetsVec'].astype('int').reshape((-1,)) + 1 val_y = data['valTargetsVec'].astype('int').reshape((-1,)) + 1 test_y = data['testTargetsVec'].astype('int').reshape((-1,)) + 1 train_X = reorder_data(train_X, (30, 50)) val_X = reorder_data(val_X, (30, 50)) test_X = reorder_data(test_X, (30, 50)) visual_weights, visual_biases = load_dbn(ae_pretrained) audio_weights, audio_biases = load_dbn(ae_diff_pretrained) # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') visual_input = T.tensor3('visual_input', dtype='float32') audio_input = T.tensor3('audio_input', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') visual_net = avnet.create_pretrained_substream(visual_weights, visual_biases, (None, None, input_dimension), visual_input, (None, None), mask, 'visual', lstm_size, window, nonlinearity, weight_init_fn, use_peepholes) audio_net = avnet.create_pretrained_substream(audio_weights, audio_biases, (None, None, input_dimension2), audio_input, (None, None), mask, 'audio', lstm_size, window, nonlinearity, weight_init_fn, use_peepholes) network, l_fuse = avnet.create_model([visual_net, audio_net], (None, None), mask, lstm_size, output_classes, fusiontype, weight_init_fn, use_peepholes) print_network(network) # draw_to_file(las.layers.get_all_layers(network), 'network.png') print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = adam(cost, all_params, learning_rate=learning_rate) train = theano.function( [visual_input, targets, mask, audio_input, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([visual_input, targets, mask, audio_input, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function( [visual_input, targets, mask, audio_input, window], test_cost, allow_input_downcast=True) val_fn = theano.function([visual_input, mask, audio_input, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] EPOCH_SIZE = 90 BATCH_SIZE = 10 WINDOW_SIZE = 9 STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_tr = float('inf') best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) integral_lens = compute_integral_len(train_vidlens) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(val_vidlens) X_diff_val = gen_seq_batch_from_idx(val_X_audio, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) # we use the test set to check final classification rate X_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(test_vidlens) X_diff_test = gen_seq_batch_from_idx(test_X_audio, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(num_epoch): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) X_diff = gen_seq_batch_from_idx(train_X_audio, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples using adam with learning rate {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), learning_rate) print(print_str, end='') sys.stdout.flush() train(X, y, m, X_diff, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, X_diff, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, X_diff_val, WINDOW_SIZE, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_tr = cost best_cr = cr if fusiontype == 'adasum': adascale_param = las.layers.get_all_param_values(l_fuse, scaling_param=True) test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, X_diff_test, WINDOW_SIZE, val_fn) print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) if fusiontype == 'adasum': print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') plot_confusion_matrix(test_conf, numbers, fmt='latex') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost') if 'write_results' in options: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(use_finetuning, 'yes', use_peepholes, 'adam', weight_init, 'RELU', use_blstm, learning_rate, best_tr, best_val, best_cr*100, test_cr*100)) s = ','.join([str(v) for v in cost_train]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in cost_val]) f.write('{}\n'.format(s)) s = ','.join([str(v) for v in class_rate]) f.write('{}\n'.format(s))
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) dct_data = load_mat_file(config.get('data', 'dct')) diff_data = load_mat_file(config.get('data', 'diff')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') ae_finetuned_diff = config.get('models', 'finetuned_diff') fusiontype = config.get('models', 'fusiontype') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) do_finetune = config.getboolean('training', 'do_finetune') save_finetune = config.getboolean('training', 'save_finetune') load_finetune = config.getboolean('training', 'load_finetune') load_finetune_diff = config.getboolean('training', 'load_finetune_diff') # create the necessary variable mappings data_matrix = data['dataMatrix'] data_matrix_len = data_matrix.shape[0] targets_vec = data['targetsVec'] vid_len_vec = data['videoLengthVec'] iter_vec = data['iterVec'] dct_feats = dct_data['dctFeatures'] diff_data_matrix = diff_data['dataMatrix'] # samplewise normalize # print('sameplewise mean normalize...') # data_matrix = normalize_input(data_matrix) # diff_data_matrix = normalize_input(diff_data_matrix) # diff_data_matrix = compute_diff_images(data_matrix, vid_len_vec.reshape((-1,))).astype('float32') # mean remove # dct_feats = dct_feats[:, 0:30] # dct_feats = sequencewise_mean_image_subtraction(dct_feats, vid_len_vec.reshape((-1,))) indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec) train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec) assert len(train_vidlen_vec) == 520 assert len(test_vidlen_vec) == 260 assert np.sum(vid_len_vec) == data_matrix_len # split the data train_data = data_matrix[indexes == True] train_targets = targets_vec[indexes == True] train_targets = train_targets.reshape((len(train_targets), )) test_data = data_matrix[indexes == False] test_targets = targets_vec[indexes == False] test_targets = test_targets.reshape((len(test_targets), )) train_diff_data = diff_data_matrix[indexes == True] test_diff_data = diff_data_matrix[indexes == False] # split the dct features + featurewise mean normalize train_dct = dct_feats[indexes == True].astype(np.float32) test_dct = dct_feats[indexes == False].astype(np.float32) train_dct, dct_mean, dct_std = featurewise_normalize_sequence(train_dct) test_dct = (test_dct - dct_mean) / dct_std if do_finetune: print('fine-tuning...') ae = load_dbn(ae_pretrained) ae.initialize() ae.fit(train_data, train_data) res = ae.predict(test_data) # print(res.shape) visualize_reconstruction(test_data[300:336], res[300:336]) if save_finetune: pickle.dump(ae, open(ae_finetuned, 'wb')) if load_finetune: print('loading pre-trained encoding layers...') ae = pickle.load(open(ae_finetuned, 'rb')) ae.initialize() if load_finetune_diff: print('loading pre-trained diff image encoding layers...') diff_ae = pickle.load(open(ae_finetuned_diff, 'rb')) diff_ae.initialize() load_convae = False if load_convae: print('loading pre-trained convolutional autoencoder...') encoder = load_model('models/conv_encoder_norm.dat') inputs_raw = las.layers.get_all_layers(encoder)[0].input_var else: inputs_raw = T.tensor3('inputs_raw', dtype='float32') inputs_diff = T.tensor3('inputs_diff', dtype='float32') window = T.iscalar('theta') dct = T.tensor3('dct', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing end to end model...') ''' network = adenet_v1.create_model(dbn, (None, None, 1200), inputs, (None, None), mask, (None, None, 90), dct, 250, window) network = deltanet.create_model(dbn, (None, None, 1200), inputs, (None, None), mask, 250, window) network = adenet_v2.create_model(dbn, (None, None, 1200), inputs, (None, None), mask, (None, None, 90), dct, 250, window) network = adenet_v2.create_model(ae, (None, None, 1200), inputs_raw, (None, None), mask, (None, None, 90), dct, 250, window) ''' network, l_fuse = adenet_v3.create_model(ae, diff_ae, (None, None, 1200), inputs_raw, (None, None), mask, (None, None, 90), dct, (None, None, 1200), inputs_diff, 250, window, 26, fusiontype) print_network(network) draw_to_file(las.layers.get_all_layers(network), 'adenet_v3.png') print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy( predictions, targets)) updates = las.updates.adadelta(cost, all_params, learning_rate=lr) # updates = las.updates.adam(cost, all_params, learning_rate=lr) use_max_constraint = False if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases updates[param] = norm_constraint( param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) train = theano.function( [inputs_raw, targets, mask, dct, inputs_diff, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function( [inputs_raw, targets, mask, dct, inputs_diff, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean( las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs_raw, targets, mask, dct, inputs_diff, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs_raw, mask, dct, inputs_diff, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 25 EPOCH_SIZE = 20 BATCH_SIZE = 26 WINDOW_SIZE = 9 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_data, train_targets, train_vidlen_vec, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(test_data, test_targets, test_vidlen_vec, batchsize=len(test_vidlen_vec)) integral_lens = compute_integral_len(train_vidlen_vec) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(test_vidlen_vec) dct_val = gen_seq_batch_from_idx(test_dct, idxs_val, test_vidlen_vec, integral_lens_val, np.max(test_vidlen_vec)) diff_val = gen_seq_batch_from_idx(test_diff_data, idxs_val, test_vidlen_vec, integral_lens_val, np.max(test_vidlen_vec)) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) d = gen_seq_batch_from_idx(train_dct, batch_idxs, train_vidlen_vec, integral_lens, np.max(train_vidlen_vec)) diff = gen_seq_batch_from_idx(train_diff_data, batch_idxs, train_vidlen_vec, integral_lens, np.max(train_vidlen_vec)) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m, d, diff, WINDOW_SIZE) print('\r', end='') cost = compute_train_cost(X, y, m, d, diff, WINDOW_SIZE) val_cost = compute_test_cost(X_val, y_val, mask_val, dct_val, diff_val, WINDOW_SIZE) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, dct_val, diff_val, WINDOW_SIZE, val_fn) class_rate.append(cr) print( "Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr if fusiontype == 'adasum': adascale_param = las.layers.get_all_param_values( l_fuse, scaling_param=True) if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch >= decay_start - 1: lr.set_value(lr.get_value() * lr_decay) letters = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ] print('Best Model') print('classification rate: {}, validation loss: {}'.format( best_cr, best_val)) if fusiontype == 'adasum': print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') plot_confusion_matrix(best_conf, letters, fmt='latex') plot_validation_cost(cost_train, cost_val, class_rate, 'e2e_valid_cost') if options['write_results']: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{}\n'.format(fusiontype, best_cr, best_val))
def main(): configure_theano() config_file = 'config/separate_train.ini' print('loading config file: {}'.format(config_file)) config = ConfigParser.ConfigParser() config.read(config_file) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) lstm_units = int(config.get('training', 'lstm_units')) output_units = int(config.get('training', 'output_units')) do_finetune = config.getboolean('training', 'do_finetune') save_finetune = config.getboolean('training', 'save_finetune') load_finetune = config.getboolean('training', 'load_finetune') # 53 subjects, 70 utterances, 5 view angles # s[x]_v[y]_u[z].mp4 # resized, height, width = (26, 44) # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec', # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__'] print(data.keys()) X = data['dataMatrix'].astype('float32') # .reshape((-1, 26, 44), order='f').reshape((-1, 26 * 44)) y = data['targetsVec'].astype('int32') y = y.reshape((len(y),)) uniques = np.unique(y) print('number of classifications: {}'.format(len(uniques))) subjects = data['subjectsVec'].astype('int') subjects = subjects.reshape((len(subjects),)) video_lens = data['videoLengthVec'].astype('int') video_lens = video_lens.reshape((len(video_lens,))) train_subject_ids = read_data_split_file('data/train.txt') val_subject_ids = read_data_split_file('data/val.txt') test_subject_ids = read_data_split_file('data/test.txt') print('Train: {}'.format(train_subject_ids)) print('Validation: {}'.format(val_subject_ids)) print('Test: {}'.format(test_subject_ids)) train_X, train_y, train_vidlens, train_subjects, \ val_X, val_y, val_vidlens, val_subjects, \ test_X, test_y, test_vidlens, test_subjects = \ split_data(X, y, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids) assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X) assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y) assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[0] == len(video_lens) assert train_subjects.shape[0] + val_subjects.shape[0] + test_subjects.shape[0] == len(subjects) train_X = normalize_input(train_X, centralize=True) test_X = normalize_input(test_X, centralize=True) if do_finetune: dbn = load_dbn(ae_pretrained) dbn.initialize() dbn.fit(train_X, train_X) recon = dbn.predict(test_X) visualize_reconstruction(reorder_data(test_X[800:864], (26, 44)), reorder_data(recon[800:864], (26, 44)), shape=(26, 44)) if save_finetune: pickle.dump(dbn, open(ae_finetuned, 'wb')) if load_finetune: print('loading pre-trained encoding layers...') dbn = pickle.load(open(ae_finetuned, 'rb')) dbn.initialize() # recon = dbn.predict(test_X) # visualize_reconstruction(reorder_data(test_X[800:864], (26, 44)), # reorder_data(recon[800:864], (26, 44)), # shape=(26, 44)) encoder = extract_encoder(dbn) train_X = encoder.predict(train_X) val_X = encoder.predict(val_X) test_X = encoder.predict(test_X) # train_X = concat_first_second_deltas(train_X, train_vidlens) # val_X = concat_first_second_deltas(val_X, val_vidlens) # test_X = concat_first_second_deltas(test_X, test_vidlens) # featurewise normalize train_X, mean, std = featurewise_normalize_sequence(train_X) val_X = (val_X - mean) / std test_X = (test_X - mean) / std # recon = dbn.predict(test_X) # visualize_reconstruction(test_X[550:650], recon[550:650], (26, 44)) # exit() # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing lstm classifier...') network = lstm_classifier_baseline.create_model((None, None, 50), inputs, (None, None), mask, lstm_units, output_units) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets)) updates = adadelta(cost, all_params, learning_rate=lr) # updates = las.updates.apply_momentum(sgd(cost, all_params, learning_rate=lr), all_params, 0.1) use_max_constraint = False if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) train = theano.function( [inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 30 EPOCH_SIZE = 120 BATCH_SIZE = 10 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 10 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, _ = next(val_datagen) X_test, y_test, mask_test, _ = next(test_datagen) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, _ = next(datagen) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m) print('\r', end='') cost = compute_train_cost(X, y, m) val_cost = compute_test_cost(X_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr test_cr, test_conf = evaluate_model(X_test, y_test, mask_test, val_fn) print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch > decay_start: lr.set_value(lr.get_value() * lr_decay) phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) print('confusion matrix: ') plot_confusion_matrix(test_conf, phrases, fmt='grid') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')
def main(): configure_theano() config_file = 'config/separate_train.ini' print('loading config file: {}'.format(config_file)) config = ConfigParser.ConfigParser() config.read(config_file) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) lstm_units = int(config.get('training', 'lstm_units')) output_units = int(config.get('training', 'output_units')) do_finetune = config.getboolean('training', 'do_finetune') save_finetune = config.getboolean('training', 'save_finetune') load_finetune = config.getboolean('training', 'load_finetune') # 53 subjects, 70 utterances, 5 view angles # s[x]_v[y]_u[z].mp4 # resized, height, width = (26, 44) # ['dataMatrix', 'targetH', 'targetsPerVideoVec', 'videoLengthVec', '__header__', 'targetsVec', # '__globals__', 'iterVec', 'filenamesVec', 'dataMatrixCells', 'subjectsVec', 'targetW', '__version__'] print(data.keys()) X = data['dataMatrix'].astype( 'float32') # .reshape((-1, 26, 44), order='f').reshape((-1, 26 * 44)) y = data['targetsVec'].astype('int32') y = y.reshape((len(y), )) uniques = np.unique(y) print('number of classifications: {}'.format(len(uniques))) subjects = data['subjectsVec'].astype('int') subjects = subjects.reshape((len(subjects), )) video_lens = data['videoLengthVec'].astype('int') video_lens = video_lens.reshape((len(video_lens, ))) train_subject_ids = read_data_split_file('data/train.txt') val_subject_ids = read_data_split_file('data/val.txt') test_subject_ids = read_data_split_file('data/test.txt') print('Train: {}'.format(train_subject_ids)) print('Validation: {}'.format(val_subject_ids)) print('Test: {}'.format(test_subject_ids)) train_X, train_y, train_vidlens, train_subjects, \ val_X, val_y, val_vidlens, val_subjects, \ test_X, test_y, test_vidlens, test_subjects = \ split_data(X, y, subjects, video_lens, train_subject_ids, val_subject_ids, test_subject_ids) assert train_X.shape[0] + val_X.shape[0] + test_X.shape[0] == len(X) assert train_y.shape[0] + val_y.shape[0] + test_y.shape[0] == len(y) assert train_vidlens.shape[0] + val_vidlens.shape[0] + test_vidlens.shape[ 0] == len(video_lens) assert train_subjects.shape[0] + val_subjects.shape[ 0] + test_subjects.shape[0] == len(subjects) train_X = normalize_input(train_X, centralize=True) test_X = normalize_input(test_X, centralize=True) if do_finetune: dbn = load_dbn(ae_pretrained) dbn.initialize() dbn.fit(train_X, train_X) recon = dbn.predict(test_X) visualize_reconstruction(reorder_data(test_X[800:864], (26, 44)), reorder_data(recon[800:864], (26, 44)), shape=(26, 44)) if save_finetune: pickle.dump(dbn, open(ae_finetuned, 'wb')) if load_finetune: print('loading pre-trained encoding layers...') dbn = pickle.load(open(ae_finetuned, 'rb')) dbn.initialize() # recon = dbn.predict(test_X) # visualize_reconstruction(reorder_data(test_X[800:864], (26, 44)), # reorder_data(recon[800:864], (26, 44)), # shape=(26, 44)) encoder = extract_encoder(dbn) train_X = encoder.predict(train_X) val_X = encoder.predict(val_X) test_X = encoder.predict(test_X) # train_X = concat_first_second_deltas(train_X, train_vidlens) # val_X = concat_first_second_deltas(val_X, val_vidlens) # test_X = concat_first_second_deltas(test_X, test_vidlens) # featurewise normalize train_X, mean, std = featurewise_normalize_sequence(train_X) val_X = (val_X - mean) / std test_X = (test_X - mean) / std # recon = dbn.predict(test_X) # visualize_reconstruction(test_X[550:650], recon[550:650], (26, 44)) # exit() # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing lstm classifier...') network = lstm_classifier_baseline.create_model( (None, None, 50), inputs, (None, None), mask, lstm_units, output_units) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy( predictions, targets)) updates = adadelta(cost, all_params, learning_rate=lr) # updates = las.updates.apply_momentum(sgd(cost, all_params, learning_rate=lr), all_params, 0.1) use_max_constraint = False if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases updates[param] = norm_constraint( param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) train = theano.function([inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean( las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function([inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 30 EPOCH_SIZE = 120 BATCH_SIZE = 10 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 10 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, _ = next(val_datagen) X_test, y_test, mask_test, _ = next(test_datagen) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, _ = next(datagen) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m) print('\r', end='') cost = compute_train_cost(X, y, m) val_cost = compute_test_cost(X_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr test_cr, test_conf = evaluate_model(X_test, y_test, mask_test, val_fn) print( "Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)". format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch > decay_start: lr.set_value(lr.get_value() * lr_decay) phrases = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) print('confusion matrix: ') plot_confusion_matrix(test_conf, phrases, fmt='grid') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost')
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('data')) print(config.items('models')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_pretrained = config.get('models', 'pretrained') ae_pretrained_diff = config.get('models', 'pretrained_diff') fusiontype = config.get('models', 'fusiontype') # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') num_epoch = int( options['num_epoch']) if 'num_epoch' in options else config.getint( 'training', 'num_epoch') weight_init = options[ 'weight_init'] if 'weight_init' in options else config.get( 'training', 'weight_init') learning_rate = options['learning_rate'] if 'learning_rate' in options \ else config.getfloat('training', 'learning_rate') use_peepholes = options[ 'use_peepholes'] if 'use_peepholes' in options else config.getboolean( 'training', 'use_peepholes') epochsize = config.getint('training', 'epochsize') batchsize = config.getint('training', 'batchsize') windowsize = config.getint('training', 'windowsize') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() train_subject_ids = read_data_split_file('data/train.txt') val_subject_ids = read_data_split_file('data/val.txt') test_subject_ids = read_data_split_file('data/test.txt') data_matrix = data['dataMatrix'] targets_vec = data['targetsVec'].reshape((-1, )) subjects_vec = data['subjectsVec'].reshape((-1, )) vidlen_vec = data['videoLengthVec'].reshape((-1, )) data_matrix = reorder_data(data_matrix, (30, 50)) train_X, train_y, train_vidlens, train_subjects, \ val_X, val_y, val_vidlens, val_subjects, \ test_X, test_y, test_vidlens, test_subjects = split_seq_data(data_matrix, targets_vec, subjects_vec, vidlen_vec, train_subject_ids, val_subject_ids, test_subject_ids) train_X_diff = compute_diff_images(train_X, train_vidlens) val_X_diff = compute_diff_images(val_X, val_vidlens) test_X_diff = compute_diff_images(test_X, test_vidlens) train_X = sequencewise_mean_image_subtraction(train_X, train_vidlens) val_X = sequencewise_mean_image_subtraction(val_X, val_vidlens) test_X = sequencewise_mean_image_subtraction(test_X, test_vidlens) ae = load_dbn(ae_pretrained) ae_diff = load_dbn(ae_pretrained_diff) # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') inputs = T.tensor3('inputs', dtype='float32') inputs_diff = T.tensor3('inputs_diff', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') network, l_fuse = adenet_v2_2.create_model(ae, ae_diff, (None, None, 1500), inputs, (None, None), mask, (None, None, 1500), inputs_diff, 250, window, 10, fusiontype, w_init_fn=weight_init_fn, use_peepholes=use_peepholes) print_network(network) # draw_to_file(las.layers.get_all_layers(network), 'network.png') print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) updates = adam(cost, all_params, learning_rate=learning_rate) train = theano.function([inputs, targets, mask, inputs_diff, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function( [inputs, targets, mask, inputs_diff, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function( [inputs, targets, mask, inputs_diff, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask, inputs_diff, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE, )) best_val = float('inf') best_tr = float('inf') best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize) integral_lens = compute_integral_len(train_vidlens) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) integral_lens_val = compute_integral_len(val_vidlens) X_diff_val = gen_seq_batch_from_idx(val_X_diff, idxs_val, val_vidlens, integral_lens_val, np.max(val_vidlens)) # we use the test set to check final classification rate X_test, y_test, mask_test, idxs_test = next(test_datagen) integral_lens_test = compute_integral_len(test_vidlens) X_diff_test = gen_seq_batch_from_idx(test_X_diff, idxs_test, test_vidlens, integral_lens_test, np.max(test_vidlens)) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(num_epoch): time_start = time.time() for i in range(epochsize): X, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) X_diff = gen_seq_batch_from_idx(train_X_diff, batch_idxs, train_vidlens, integral_lens, np.max(train_vidlens)) print_str = 'Epoch {} batch {}/{}: {} examples using adam'.format( epoch + 1, i + 1, epochsize, len(X)) print(print_str, end='') sys.stdout.flush() train(X, y, m, X_diff, windowsize) print('\r', end='') cost = compute_train_cost(X, y, m, X_diff, windowsize) val_cost = compute_test_cost(X_val, y_val, mask_val, X_diff_val, windowsize) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, X_diff_val, windowsize, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_tr = cost best_cr = cr if fusiontype == 'adasum': adascale_param = las.layers.get_all_param_values( l_fuse, scaling_param=True) test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, X_diff_test, windowsize, val_fn) print( "Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)". format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) if fusiontype == 'adasum': print("final scaling params: {}".format(adascale_param)) print('confusion matrix: ') plot_confusion_matrix(test_conf, numbers, fmt='latex') plot_validation_cost(cost_train, cost_val, savefilename='valid_cost') if 'write_results' in options: results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{}\n'.format(test_cr, best_cr, best_val))
def main(): configure_theano() config_file = 'config/normal.ini' config = ConfigParser.ConfigParser() config.read(config_file) print('loading config file: {}'.format(config_file)) print('preprocessing dataset...') data = load_mat_file(config.get('data', 'images')) ae_pretrained = config.get('models', 'pretrained') ae_finetuned = config.get('models', 'finetuned') learning_rate = float(config.get('training', 'learning_rate')) decay_rate = float(config.get('training', 'decay_rate')) decay_start = int(config.get('training', 'decay_start')) do_finetune = config.getboolean('training', 'do_finetune') save_finetune = config.getboolean('training', 'save_finetune') load_finetune = config.getboolean('training', 'load_finetune') # create the necessary variable mappings data_matrix = data['dataMatrix'].astype('float32') data_matrix_len = data_matrix.shape[0] targets_vec = data['targetsVec'] vid_len_vec = data['videoLengthVec'] iter_vec = data['iterVec'] indexes = create_split_index(data_matrix_len, vid_len_vec, iter_vec) train_vidlen_vec, test_vidlen_vec = split_videolen(vid_len_vec, iter_vec) assert len(train_vidlen_vec) == 520 assert len(test_vidlen_vec) == 260 assert np.sum(vid_len_vec) == data_matrix_len # split the data train_data = data_matrix[indexes == True] train_targets = targets_vec[indexes == True] train_targets = train_targets.reshape((len(train_targets),)) test_data = data_matrix[indexes == False] test_targets = targets_vec[indexes == False] test_targets = test_targets.reshape((len(test_targets),)) # indexes for a particular letter # idx = [i for i, elem in enumerate(test_targets) if elem == 20] # resize the input data to 40 x 30 # train_data_resized = resize_images(train_data).astype(np.float32) # normalize the inputs [0 - 1] # train_data_resized = normalize_input(train_data_resized, centralize=True) # test_data_resized = resize_images(test_data).astype(np.float32) # test_data_resized = normalize_input(test_data_resized, centralize=True) if do_finetune: print('fine-tuning...') dbn = load_dbn(ae_pretrained) dbn.initialize() dbn.fit(train_data, train_data) res = dbn.predict(test_data) # print(res.shape) visualize_reconstruction(test_data[300:336], res[300:336]) if save_finetune: pickle.dump(dbn, open(ae_finetuned, 'wb')) if load_finetune: print('loading pre-trained encoding layers...') dbn = pickle.load(open(ae_finetuned, 'rb')) dbn.initialize() # res = dbn.predict(test_data) # visualize_reconstruction(test_data[300:336], res[300:336]) # exit() load_convae = False if load_convae: print('loading pre-trained convolutional autoencoder...') encoder = load_model('models/conv_encoder_norm.dat') inputs = las.layers.get_all_layers(encoder)[0].input_var else: inputs = T.tensor3('inputs', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.ivector('targets') lr = theano.shared(np.array(learning_rate, dtype=theano.config.floatX), name='learning_rate') lr_decay = np.array(decay_rate, dtype=theano.config.floatX) print('constructing end to end model...') network = baseline_end2end.create_model(dbn, (None, None, 1200), inputs, (None, None), mask, 250) print_network(network) # draw_to_file(las.layers.get_all_layers(network), 'network.png', verbose=True) # exit() print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = T.mean(las.objectives.categorical_crossentropy(predictions, targets)) updates = las.updates.adadelta(cost, all_params, learning_rate=lr) # updates = las.updates.adam(cost, all_params, learning_rate=lr) use_max_constraint = False if use_max_constraint: MAX_NORM = 4 for param in las.layers.get_all_params(network, regularizable=True): if param.ndim > 1: # only apply to dimensions larger than 1, exclude biases updates[param] = norm_constraint(param, MAX_NORM * las.utils.compute_norms(param.get_value()).mean()) train = theano.function( [inputs, targets, mask], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs, targets, mask], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = T.mean(las.objectives.categorical_crossentropy(test_predictions, targets)) compute_test_cost = theano.function( [inputs, targets, mask], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs, mask], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] NUM_EPOCHS = 30 EPOCH_SIZE = 20 BATCH_SIZE = 26 WINDOW_SIZE = 9 STRIP_SIZE = 3 MAX_LOSS = 0.2 VALIDATION_WINDOW = 4 val_window = circular_list(VALIDATION_WINDOW) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_conf = None best_cr = 0.0 datagen = gen_lstm_batch_random(train_data, train_targets, train_vidlen_vec, batchsize=BATCH_SIZE) val_datagen = gen_lstm_batch_random(test_data, test_targets, test_vidlen_vec, batchsize=len(test_vidlen_vec)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) def early_stop(cost_window): if len(cost_window) < 2: return False else: curr = cost_window[0] for idx, cost in enumerate(cost_window): if curr < cost or idx == 0: curr = cost else: return False return True for epoch in range(NUM_EPOCHS): time_start = time.time() for i in range(EPOCH_SIZE): X, y, m, batch_idxs = next(datagen) print_str = 'Epoch {} batch {}/{}: {} examples at learning rate = {:.4f}'.format( epoch + 1, i + 1, EPOCH_SIZE, len(X), float(lr.get_value())) print(print_str, end='') sys.stdout.flush() train(X, y, m) print('\r', end='') cost = compute_train_cost(X, y, m) val_cost = compute_test_cost(X_val, y_val, mask_val) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model(X_val, y_val, mask_val, val_fn) class_rate.append(cr) print("Epoch {} train cost = {}, validation cost = {}, " "generalization loss = {:.3f}, GQ = {:.3f}, classification rate = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if val_cost < best_val: best_val = val_cost best_conf = val_conf best_cr = cr if epoch >= VALIDATION_WINDOW and early_stop(val_window): break # learning rate decay if epoch > decay_start: # 20, 8 lr.set_value(lr.get_value() * lr_decay) letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] print('Best Model') print('classification rate: {}, validation loss: {}'.format(best_cr, best_val)) print('confusion matrix: ') plot_confusion_matrix(best_conf, letters, fmt='grid') plot_validation_cost(cost_train, cost_val, class_rate, 'e2e_valid_cost')
def main(): configure_theano() options = parse_options() config_file = options['config'] config = ConfigParser.ConfigParser() config.read(config_file) print('CLI options: {}'.format(options.items())) print('Reading Config File: {}...'.format(config_file)) print(config.items('stream1')) print(config.items('lstm_classifier')) print(config.items('training')) print('preprocessing dataset...') data = load_mat_file(config.get('stream1', 'data')) stream1 = config.get('stream1', 'model') imagesize = tuple([int(d) for d in config.get('stream1', 'imagesize').split(',')]) stream1_dim = config.getint('stream1', 'input_dimensions') stream1_shape = config.get('stream1', 'shape') stream1_nonlinearities = config.get('stream1', 'nonlinearities') # lstm classifier output_classes = config.getint('lstm_classifier', 'output_classes') output_classnames = config.get('lstm_classifier', 'output_classnames').split(',') lstm_size = config.getint('lstm_classifier', 'lstm_size') matlab_target_offset = config.getboolean('lstm_classifier', 'matlab_target_offset') # data preprocessing options reorderdata = config.getboolean('stream1', 'reorderdata') diffimage = config.getboolean('stream1', 'diffimage') meanremove = config.getboolean('stream1', 'meanremove') samplewisenormalize = config.getboolean('stream1', 'samplewisenormalize') featurewisenormalize = config.getboolean('stream1', 'featurewisenormalize') # lstm classifier configurations weight_init = options['weight_init'] if 'weight_init' in options else config.get('lstm_classifier', 'weight_init') use_peepholes = options['use_peepholes'] if 'use_peepholes' in options else config.getboolean('lstm_classifier', 'use_peepholes') windowsize = config.getint('lstm_classifier', 'windowsize') # capture training parameters validation_window = int(options['validation_window']) \ if 'validation_window' in options else config.getint('training', 'validation_window') num_epoch = int(options['num_epoch']) if 'num_epoch' in options else config.getint('training', 'num_epoch') learning_rate = options['learning_rate'] if 'learning_rate' in options \ else config.getfloat('training', 'learning_rate') epochsize = config.getint('training', 'epochsize') batchsize = config.getint('training', 'batchsize') weight_init_fn = las.init.GlorotUniform() if weight_init == 'glorot': weight_init_fn = las.init.GlorotUniform() if weight_init == 'norm': weight_init_fn = las.init.Normal(0.1) if weight_init == 'uniform': weight_init_fn = las.init.Uniform() if weight_init == 'ortho': weight_init_fn = las.init.Orthogonal() train_subject_ids = read_data_split_file(config.get('training', 'train_subjects_file')) val_subject_ids = read_data_split_file(config.get('training', 'val_subjects_file')) test_subject_ids = read_data_split_file(config.get('training', 'test_subjects_file')) data_matrix = data['dataMatrix'].astype('float32') targets_vec = data['targetsVec'].reshape((-1,)) subjects_vec = data['subjectsVec'].reshape((-1,)) vidlen_vec = data['videoLengthVec'].reshape((-1,)) if reorderdata: data_matrix = reorder_data(data_matrix, (imagesize[0], imagesize[1])) train_X, train_y, train_vidlens, train_subjects, \ val_X, val_y, val_vidlens, val_subjects, \ test_X, test_y, test_vidlens, test_subjects = split_seq_data(data_matrix, targets_vec, subjects_vec, vidlen_vec, train_subject_ids, val_subject_ids, test_subject_ids) if matlab_target_offset: train_y -= 1 val_y -= 1 test_y -= 1 if meanremove: train_X = sequencewise_mean_image_subtraction(train_X, train_vidlens) val_X = sequencewise_mean_image_subtraction(val_X, val_vidlens) test_X = sequencewise_mean_image_subtraction(test_X, test_vidlens) if diffimage: train_X = compute_diff_images(train_X, train_vidlens) val_X = compute_diff_images(val_X, val_vidlens) test_X = compute_diff_images(test_X, test_vidlens) if samplewisenormalize: train_X = normalize_input(train_X) val_X = normalize_input(val_X) test_X = normalize_input(test_X) if featurewisenormalize: train_X, mean, std = featurewise_normalize_sequence(train_X) val_X = (val_X - mean) / std test_X = (test_X - mean) / std ae1 = load_decoder(stream1, stream1_shape, stream1_nonlinearities) # IMPT: the encoder was trained with fortan ordered images, so to visualize # convert all the images to C order using reshape_images_order() # output = dbn.predict(test_X) # test_X = reshape_images_order(test_X, (26, 44)) # output = reshape_images_order(output, (26, 44)) # visualize_reconstruction(test_X[:36, :], output[:36, :], shape=(26, 44)) window = T.iscalar('theta') inputs1 = T.tensor3('inputs1', dtype='float32') mask = T.matrix('mask', dtype='uint8') targets = T.imatrix('targets') print('constructing end to end model...') network = deltanet_majority_vote.create_model(ae1, (None, None, stream1_dim), inputs1, (None, None), mask, lstm_size, window, output_classes, weight_init_fn, use_peepholes) print_network(network) print('compiling model...') predictions = las.layers.get_output(network, deterministic=False) all_params = las.layers.get_all_params(network, trainable=True) cost = temporal_softmax_loss(predictions, targets, mask) default_learning_rate = theano.shared(las.utils.floatX(learning_rate), 'default_lr') lr_config = { 'fc1': theano.shared(las.utils.floatX(0.001)), 'fc2': theano.shared(las.utils.floatX(0.001)), 'fc3': theano.shared(las.utils.floatX(0.001)) } lr_map = custom.updates.generate_lr_map(all_params, lr_config, default_learning_rate) # updates = adam(cost, all_params, default_learning_rate) updates = custom.updates.adam_vlr(cost, all_params, lr_map) train = theano.function( [inputs1, targets, mask, window], cost, updates=updates, allow_input_downcast=True) compute_train_cost = theano.function([inputs1, targets, mask, window], cost, allow_input_downcast=True) test_predictions = las.layers.get_output(network, deterministic=True) test_cost = temporal_softmax_loss(test_predictions, targets, mask) compute_test_cost = theano.function( [inputs1, targets, mask, window], test_cost, allow_input_downcast=True) val_fn = theano.function([inputs1, mask, window], test_predictions, allow_input_downcast=True) # We'll train the network with 10 epochs of 30 minibatches each print('begin training...') cost_train = [] cost_val = [] class_rate = [] STRIP_SIZE = 3 val_window = circular_list(validation_window) train_strip = np.zeros((STRIP_SIZE,)) best_val = float('inf') best_cr = 0.0 datagen = gen_lstm_batch_random(train_X, train_y, train_vidlens, batchsize=batchsize) val_datagen = gen_lstm_batch_random(val_X, val_y, val_vidlens, batchsize=len(val_vidlens)) test_datagen = gen_lstm_batch_random(test_X, test_y, test_vidlens, batchsize=len(test_vidlens)) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val, idxs_val = next(val_datagen) # we use the test set to check final classification rate X_test, y_test, mask_test, idxs_test = next(test_datagen) # reshape the targets for validation y_val_evaluate = y_val y_val = y_val.reshape((-1, 1)).repeat(mask_val.shape[-1], axis=-1) for epoch in range(num_epoch): time_start = time.time() for i in range(epochsize): X, y, m, batch_idxs = next(datagen) # repeat targets based on max sequence len y = y.reshape((-1, 1)) y = y.repeat(m.shape[-1], axis=-1) print_str = 'Epoch {} batch {}/{}: {} examples using adam with learning rate = {}'.format( epoch + 1, i + 1, epochsize, len(X), learning_rate) print(print_str, end='') sys.stdout.flush() train(X, y, m, windowsize) print('\r', end='') cost = compute_train_cost(X, y, m, windowsize) val_cost = compute_test_cost(X_val, y_val, mask_val, windowsize) cost_train.append(cost) cost_val.append(val_cost) train_strip[epoch % STRIP_SIZE] = cost val_window.push(val_cost) gl = 100 * (cost_val[-1] / np.min(cost_val) - 1) pk = 1000 * (np.sum(train_strip) / (STRIP_SIZE * np.min(train_strip)) - 1) pq = gl / pk cr, val_conf = evaluate_model2(X_val, y_val_evaluate, mask_val, windowsize, val_fn) class_rate.append(cr) if val_cost < best_val: best_val = val_cost best_cr = cr test_cr, test_conf = evaluate_model2(X_test, y_test, mask_test, windowsize, val_fn) print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f}, Test CR= {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, test_cr, time.time() - time_start)) best_params = las.layers.get_all_param_values(network) else: print("Epoch {} train cost = {}, val cost = {}, " "GL loss = {:.3f}, GQ = {:.3f}, CR = {:.3f} ({:.1f}sec)" .format(epoch + 1, cost_train[-1], cost_val[-1], gl, pq, cr, time.time() - time_start)) if epoch >= validation_window and early_stop2(val_window, best_val, validation_window): break # Show that learning rates are changed by exploding learning rates for encoder layers # The training loss should increase dramatically and learning should diverge if epoch + 1 == 4: print('explode fc1,fc2,fc3 learning rates to 100.0') lr_config['fc1'].set_value(100.0) lr_config['fc2'].set_value(100.0) lr_config['fc3'].set_value(100.0) print('Final Model') print('CR: {}, val loss: {}, Test CR: {}'.format(best_cr, best_val, test_cr)) # plot confusion matrix table_str = plot_confusion_matrix(test_conf, output_classnames, fmt='pipe') print('confusion matrix: ') print(table_str) if 'save_plot' in options: prefix = options['save_plot'] plot_validation_cost(cost_train, cost_val, savefilename='{}.validloss.png'.format(prefix)) with open('{}.confmat.txt'.format(prefix), mode='a') as f: f.write(table_str) f.write('\n\n') if 'write_results' in options: print('writing results to {}'.format(options['write_results'])) results_file = options['write_results'] with open(results_file, mode='a') as f: f.write('{},{},{}\n'.format(test_cr, best_cr, best_val)) if 'save_best' in options: print('saving best model...') las.layers.set_all_param_values(network, best_params) save_model_params(network, options['save_best']) print('best model saved to {}'.format(options['save_best']))