def main(image_root_dir, labels_dir, output_dir, model='mlp', num_epochs=10, batch_size=100, sample_rate=0, meeting_base='ES2014a'): launch_time = time.strftime('%m%d_%H%M') output_dir = os.path.join(output_dir, launch_time) if not os.path.exists(output_dir): os.makedirs(output_dir) file_identifier = meeting_base + '_' + model + '_' + str(num_epochs) log_file = os.path.join(output_dir, file_identifier + '.log') program_start = time.time() # read in data print('Loading data...') # meeting_base = 'ES2014a' # data_rate = 50 # slice data by this factor if sample_rate < 1: sample_rate = None global height global width height, width, X_train, y_train, X_val, y_val, X_test, y_test = load_meeting_data(image_root_dir, labels_dir, meeting_base, sample_rate) # ipdb.set_trace() output = meeting_base + '\n\n' + strftime('%Y-%m-%d %H:%M:%S\n') # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") if model == 'mlp': network = build_mlp(input_var) elif model.startswith('custom_mlp:'): depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',') network = build_custom_mlp(input_var, int(depth), int(width), float(drop_in), float(drop_hid)) elif model == 'cnn': network = build_cnn(input_var) else: print("Unrecognized model type %r." % model) return output += model + '\n' + str(num_epochs) + '\n' with open(log_file, 'w+') as outf: outf.write(output) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum( loss, params, learning_rate=0.01, momentum=0.9) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) # ipdb.set_trace() # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): print('Starting epoch {} of {}'.format(epoch+1, num_epochs)) # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() i = 0 for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True): print(i) i += 1 inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 print 'Done training' # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 j = 0 print('Starting validation...') for batch in iterate_minibatches(X_val, y_val, batch_size, shuffle=False): print(j) j += 1 inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 print('Done validating') # Then we print the results for this epoch: epoch_output = "Epoch {} of {} took {:.3f}s\n".format( epoch + 1, num_epochs, time.time() - start_time) epoch_output += " training loss:\t\t{:.6f}\n".format(train_err / train_batches) epoch_output += " validation loss:\t\t{:.6f}\n".format(val_err / val_batches) epoch_output += " validation accuracy:\t\t{:.2f} %\n".format(val_acc / val_batches * 100) print(epoch_output) with open(log_file, 'a+') as outf: outf.write(epoch_output) # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 final_output = "Final results:\n" final_output += " test loss:\t\t\t{:.6f}\n".format(test_err / test_batches) final_output += " test accuracy:\t\t{:.2f} %\n".format( test_acc / test_batches * 100) print(final_output) with open(log_file, 'a+') as outf: outf.write(final_output + '\n\n\n\n------------------\n') # output += final_output + '\n\n\n\n--------------------------\n' # with open(os.path.join(output_dir, 'log.txt'), 'a+') as outf: # outf.write(output) outfile = os.path.join(output_dir, file_identifier + '.pickle') with open(outfile, 'wb') as outf: pickle.dump(network, outf) final_output = 'Wrote model to {}\n'.format(outfile) final_output += 'Took {} seconds'.format(time.time() - program_start) print(final_output) with open(log_file, 'a+') as outf: outf.write(final_output + '\n\n\n\n------------------\n') ipdb.set_trace()