def main(train_file, logit_folder, val_file, savename, num_epochs=500, margin=25, base=0.01, mb_size=50, momentum=0.9, temp=1, loss_type='VPPD', preproc=True, hw=0.1, synsets=None, modelFile='./myModel.pkl'): print('Using temperature: %f' % (temp, )) print('Loss type: %s' % (loss_type, )) print('Model file: %s' % (modelFile, )) print("Loading data...") tr_addresses, tr_labels = hd.get_traindata(train_file, synsets) vl_addresses, vl_labels = hd.get_valdata(val_file) # Variables input_var = T.tensor4('inputs') soft_target = T.fmatrix('soft_target') hard_target = T.ivector('hard_target') learning_rate = T.fscalar('learning_rate') im_shape = (227, 227) print("Building model and compiling functions...") network = build_cnn(im_shape, temp, input_var=input_var) # Losses and updates soft_prediction, hard_prediction = lasagne.layers.get_output( network, deterministic=False) _, test_prediction = lasagne.layers.get_output(network, deterministic=True) loss = losses(soft_prediction, hard_prediction, soft_target, hard_target, temp, hw, loss_type) params = lasagne.layers.get_all_params(network) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate, momentum=momentum) train_acc = T.mean(T.eq(T.argmax(soft_prediction, axis=1), T.argmax(soft_target, axis=1)), dtype=theano.config.floatX) test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), hard_target), dtype=theano.config.floatX) # Theano functions train_fn = theano.function( [input_var, soft_target, hard_target, learning_rate], [loss, train_acc], updates=updates) val_fn = theano.function([input_var, hard_target], test_acc) print("Starting training...") # We iterate over epochs: start_time = time.time() for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: learning_rate = get_learning_rate(epoch, margin, base) train_err = 0 train_batches = 0 running_error = [] t_acc = 0 running_acc = [] trdlg = distillation_generator(tr_addresses, logit_folder, im_shape, mb_size, temp=temp, preproc=preproc, shuffle=True, synsets=synsets) for batch in hd.threaded_gen(trdlg, num_cached=500): inputs, soft, hard = batch local_train_err, acc = train_fn(inputs, soft, hard, learning_rate) train_err += local_train_err t_acc += acc running_error.append(local_train_err) running_acc.append(acc) h, m, s = theTime(start_time) train_batches += 1 if train_batches % 257 == 0: save_errors(savename, running_error, err_type='error') save_errors(savename, running_acc, err_type='acc') running_error = [] running_acc = [] sys.stdout.write( 'Time: %d:%02d:%02d Minibatch: %i Training Error: %f\r' % (h, m, s, train_batches, train_err / train_batches)), sys.stdout.flush() print val_acc = 0 val_batches = 0 vldlg = hd.data_and_label_generator(vl_addresses, vl_labels, im_shape, mb_size, shuffle=False, preproc=False) running_val_acc = [] for batch in hd.threaded_gen(vldlg, num_cached=50): inputs, targets = batch val_acc += val_fn(inputs, targets) val_batches += 1 sys.stdout.write('Minibatch: %i Validation Accuracy: %f\r' % (val_batches, val_acc / val_batches * 100)), sys.stdout.flush() running_val_acc.append(val_acc / val_batches) save_errors(savename, running_val_acc, err_type='val_acc') print print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) print(" train loss:\t\t{:.6f}".format(train_err / train_batches)) print(" valid acc:\t\t{:.6f}".format(val_acc / val_batches * 100.)) save_model(network, modelFile)
def main(train_file, logit_folder, val_file, savename, num_epochs=500, margin=25, base=0.01, mb_size=50, momentum=0.9, synsets=None, preproc=False, loss_type='crossentropy', bridgecoeff=1.): print("Loading data...") print('Loss type: %s' % (loss_type,)) print('Bridge coeff: %s' % (bridgecoeff,)) print('Save name: %s' % (savename,)) tr_addresses, tr_labels = hd.get_traindata(train_file, synsets) vl_addresses, vl_labels = hd.get_valdata(val_file) # Variables input_var = T.tensor4('inputs') soft_target = T.fmatrix('soft_target') hard_target = T.ivector('hard_target') learning_rate = T.fscalar('learning_rate') temp = T.fscalar('temp') im_shape = (227, 227) max_norm = 3.87 t = 10. print("Building model and compiling functions...") network = build_cnn(im_shape, input_var=input_var) # Losses and updates prediction = lasagne.layers.get_output(network, deterministic=False) test_prediction = lasagne.layers.get_output(network, deterministic=True) loss = losses(prediction, soft_target, loss_type) loss += bridgecoeff*regularization(prediction, t) train_acc = T.mean(T.eq(T.argmax(prediction, axis=1), T.argmax(soft_target, axis=1)), dtype=theano.config.floatX) params = lasagne.layers.get_all_params(network) for param in params: if param.name == 'W': param = lasagne.updates.norm_constraint(param, max_norm, epsilon=1e-3) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate, momentum=momentum) test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), hard_target), dtype=theano.config.floatX) # Theano functions train_fn = theano.function([input_var, soft_target, learning_rate], [loss, train_acc], updates=updates) val_fn = theano.function([input_var, hard_target], test_acc) print("Starting training...") # We iterate over epochs: start_time = time.time() for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: learning_rate = get_learning_rate(epoch, margin, base) train_err = 0; train_batches = 0; running_error = [] t_acc = 0; running_acc = [] trdlg = hd.data_target_generator(tr_addresses, logit_folder, im_shape, mb_size, preproc=preproc, shuffle=True, synsets=synsets) for batch in hd.threaded_gen(trdlg, num_cached=500): inputs, soft = batch local_train_err, acc = train_fn(inputs, soft, learning_rate) train_err += local_train_err; t_acc += acc running_error.append(local_train_err); running_acc.append(acc) h, m, s = theTime(start_time) train_batches += 1 if train_batches % 257 == 0: save_errors(savename, running_error, err_type='error') save_errors(savename, running_acc, err_type='acc') running_error = []; running_acc = [] sys.stdout.write('Time: %d:%02d:%02d Minibatch: %i Training Error: %f\r' % (h, m, s, train_batches, train_err/train_batches)), sys.stdout.flush() print val_acc = 0; val_batches = 0 vldlg = hd.data_and_label_generator(vl_addresses, vl_labels, im_shape, mb_size) running_val_acc = [] for batch in hd.threaded_gen(vldlg, num_cached=50): inputs, targets = batch val_acc += val_fn(inputs, targets) val_batches += 1 sys.stdout.write('Minibatch: %i Validation Accuracy: %f\r' % (val_batches, val_acc/val_batches * 100)), sys.stdout.flush() running_val_acc.append(val_acc/val_batches) save_errors(savename, running_val_acc, err_type='val_acc') print print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" train loss:\t\t{:.6f}".format(train_err / train_batches)) print(" valid acc:\t\t{:.6f}".format(val_acc / val_batches * 100.))
def main(train_file, val_file, savename, synmap_file, num_epochs=500, alpha=0.1, margin=25, base=0.01, mb_size=50, momentum=0.9, synsets=None): print("Loading data...") print('Alpha: %f' % (alpha,)) print('Save name: %s' % (savename,)) tr_addresses, tr_labels = hd.get_traindata(train_file, synsets) vl_addresses, vl_labels = hd.get_valdata(val_file) synmap = hd.get_synmap(synmap_file) tr_labels = hd.map_labels(tr_labels, synmap) vl_labels = hd.map_labels(vl_labels, synmap) N = len(tr_addresses) print('Num training examples: %i' % (N,)) print('Alpha/N: %e' % (alpha/N,)) # Variables input_var = T.tensor4('inputs') target_var = T.ivector('targets') learning_rate = T.fscalar('learning_rate') im_shape = (227, 227) max_grad = 1. print("Building model and compiling functions...") network = build_cnn(im_shape, input_var=input_var) # Losses and updates prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() + regularization(prediction, alpha/N).mean() params = lasagne.layers.get_all_params(network, deterministic=False) #updates = lasagne.updates.nesterov_momentum(loss, params, # learning_rate=learning_rate, # momentum=momentum) updates = clipped_nesterov_momentum(loss, params, learning_rate, max_grad, momentum=momentum) # Validation and testing test_prediction = lasagne.layers.get_output(network, deterministic=True) train_acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_var), dtype=theano.config.floatX) test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Theano functions train_fn = theano.function([input_var, target_var, learning_rate], [loss, train_acc], updates=updates) val_fn = theano.function([input_var, target_var], test_acc) print("Starting training...") # We iterate over epochs: start_time = time.time() for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: learning_rate = get_learning_rate(epoch, margin, base) train_err = 0; train_batches = 0; running_error = []; running_acc = [] acc = 0. trdlg = hd.data_and_label_generator(tr_addresses, tr_labels, im_shape, mb_size, shuffle=True, preproc=True) for batch in threaded_gen(trdlg, num_cached=500): inputs, targets = batch local_train_err, local_train_acc = train_fn(inputs, targets, learning_rate) train_err += local_train_err; acc += local_train_acc train_batches += 1 if np.isnan(local_train_err): sys.exit() running_error.append(local_train_err) running_acc.append(local_train_acc) if train_batches % 257 == 0: save_errors(savename, running_error, err_type='error') save_errors(savename, running_acc, err_type='acc') running_error = []; running_acc = [] h, m, s = theTime(start_time) sys.stdout.write('Time: %d:%02d:%02d Minibatch: %i Training Error: %f\r' % (h, m, s, train_batches, train_err/train_batches)), sys.stdout.flush() print val_acc = 0; val_batches = 0; running_val_acc=[] vldlg = hd.data_and_label_generator(vl_addresses, vl_labels, im_shape, mb_size, shuffle=False, preproc=False) for batch in threaded_gen(vldlg, num_cached=50): inputs, targets = batch val_acc += val_fn(inputs, targets) val_batches += 1 sys.stdout.write('Minibatch: %i Validation Accuracy: %f\r' % (val_batches, val_acc/val_batches * 100)), sys.stdout.flush() running_val_acc.append(val_acc/val_batches) save_errors(savename, running_val_acc, err_type='val_acc') print print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" train loss:\t\t{:.6f}".format(train_err / train_batches)) print(" valid acc:\t\t{:.6f}".format(val_acc / val_batches * 100.))