def train_test(depth, growth_rate, dropout, augment, validate, epochs, eta, save_weights, save_errors, batchsize=64): # import (deferred until now to make --help faster) import numpy as np import theano import theano.tensor as T import lasagne import densenet as densenet # or "import densenet" for slower version import cifar10 import progress seed = 42 draw_flag = True np.random.seed(seed) # Logging operations output_path = '/results/' + os.getcwd( ).split('/')[-1] + '/' + os.path.basename(__file__).split('.')[ 0] + '/' + time.strftime("%d-%m-%Y_") + time.strftime("%H:%M:%S") + '/' pyscript_name = os.path.basename(__file__) create_result_dirs(output_path, pyscript_name) sys.stdout = Logger(output_path) print('seed: ', seed) # instantiate network print("Instantiating network...") input_var = T.tensor4('inputs') target_var = T.ivector('targets') network = densenet.build_densenet(input_var=input_var, depth=depth, growth_rate=growth_rate, dropout=dropout) if draw_flag: draw_to_file(network, output_path, list_flag=False) draw_to_file(network, output_path, list_flag=False) print( "%d layers with weights, %d parameters" % (sum(hasattr(l, 'W') for l in lasagne.layers.get_all_layers(network)), lasagne.layers.count_params(network, trainable=True))) # load dataset print("Loading dataset...") val_size = 5000 num_labeled_data = 100 # X, y, X_test, y_test = cifar10.load_dataset( # path=os.path.join(os.path.dirname(__file__), 'data')) print("Loading dataset...") dataset = 'MNIST-test' X_test, y_test = load_dataset(dataset) dataset = 'MNIST-train' X, y = load_dataset(dataset) X, X_test = normalize(X, X_test, '[-1, -1]') X, X_test = pad_data(X, X_test, 32) num_data = X.shape[0] if validate == 'test': X_val, y_val = X_test, y_test elif validate: X_val, y_val = X[-5000:], y[-5000:] X_train, y_train = X[:-5000], y[:-5000] # X_train, X_val, y_train, y_val = balanced_subsample(X_train, y_train, subsample_size=10000) X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=val_size, random_state=seed, stratify=y) semi_supervised_flag = False semi_supervised_idx = None if num_labeled_data < num_data: semi_supervised_flag = True semi_supervised_idx = np.int32(np.zeros(num_data)) # X_unlabeled, X_labeled, y_unlabeled, y_labeled, indices = balanced_subsample(X_train, y_train, subsample_size=num_labeled_data, shuffle_flag=False) _, _, _, _, indices = balanced_subsample( X_train, y_train, subsample_size=num_labeled_data, shuffle_flag=False) semi_supervised_idx[indices] = 1 X = np.concatenate((X_train, X_val), axis=0) y = np.concatenate((y_train, y_val), axis=0) X = X[indices] y = y[indices] # define training function print("Compiling training function...") prediction = lasagne.layers.get_output(network) prediction_clean = lasagne.layers.get_output(network, deterministic=True) # note: The Keras implementation clips predictions for the categorical # cross-entropy. This doesn't seem to have a positive effect here. # prediction = T.clip(prediction, 1e-7, 1 - 1e-7) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var).mean() # loss_squared = lambda_clean * lasagne.objectives.squared_error(prediction, prediction_clean).mean() # note: The paper says 1e-4 decay, but 1e-4 in Torch is 5e-5 elsewhere. # However, 1e-4 seems to work better than 5e-5, so we use 1e-4. # note: Torch includes biases in L2 decay. This seems to be important! So # we decay all 'trainable' parameters, not just 'regularizable' ones. l2_loss = 6e-4 * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2, {'trainable': True}) params = lasagne.layers.get_all_params(network, trainable=True) eta = theano.shared(lasagne.utils.floatX(eta), name='eta') updates = lasagne.updates.nesterov_momentum(loss + l2_loss, params, learning_rate=eta, momentum=0.9) train_fn = theano.function([input_var, target_var], loss, updates=updates) l2_fn = theano.function([], l2_loss) # define validation/testing function print("Compiling testing function...") test_loss = lasagne.objectives.categorical_crossentropy( prediction_clean, target_var).mean() test_err = 1 - lasagne.objectives.categorical_accuracy( prediction_clean, target_var).mean(dtype=theano.config.floatX) test_fn = theano.function([input_var, target_var], [test_loss, test_err]) # Finally, launch the training loop. print("Starting training...") if save_errors: errors = [] for epoch in range(epochs): # shrink learning rate at 50% and 75% into training if epoch == (epochs // 2) or epoch == (epochs * 3 // 4): eta.set_value(eta.get_value() * lasagne.utils.floatX(0.1)) # if (epoch == (120)) or (epoch == (210)) or (epoch == (270)): # eta.set_value(eta.get_value() * lasagne.utils.floatX(0.2)) # In each epoch, we do a full pass over the training data: train_loss = 0 # clean_loss = 0 train_batches = len(X) // batchsize batches = cifar10.iterate_minibatches(X, y, batchsize, shuffle=True) if augment: batches = cifar10.augment_minibatches(batches) batches = generate_in_background(batches) batches = progress.progress(batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs), total=train_batches) for inputs, targets in batches: tr_loss = train_fn(inputs, targets) train_loss += tr_loss # clean_loss += cl_loss # And possibly a full pass over the validation data: if validate: val_loss = 0 val_err = 0 val_batches = len(X_val) // batchsize for inputs, targets in cifar10.iterate_minibatches(X_val, y_val, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) val_loss += loss val_err += err # Then we print the results for this epoch: train_loss /= train_batches l2_loss = l2_fn() print(" training loss:\t%.6f" % train_loss) # print(" clean loss: \t%.6f" % clean_loss) print(" L2 loss: \t%.6f" % l2_loss) if save_errors: errors.extend([train_loss, l2_loss]) if validate: val_loss /= val_batches val_err /= val_batches print(" validation loss:\t%.6f" % val_loss) print(" validation error:\t%.2f%%" % (val_err * 100)) if save_errors: errors.extend([val_loss, val_err]) test_loss = 0 test_err = 0 test_batches = len(X_test) // batchsize for inputs, targets in cifar10.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) test_loss += loss test_err += err print(" test loss:\t\t%.6f" % (test_loss / test_batches)) print(" test error:\t\t%.2f%%" % (test_err / test_batches * 100)) # After training, we compute and print the test error: test_loss = 0 test_err = 0 test_batches = len(X_test) // batchsize for inputs, targets in cifar10.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) test_loss += loss test_err += err print("Final results:") print(" test loss:\t\t%.6f" % (test_loss / test_batches)) print(" test error:\t\t%.2f%%" % (test_err / test_batches * 100)) # Optionally, we dump the network weights to a file if save_weights: np.savez(save_weights, *lasagne.layers.get_all_param_values(network)) # Optionally, we dump the learning curves to a file if save_errors: errors = np.asarray(errors).reshape(epochs, -1) np.savez(save_errors, errors=errors)
def train_test(depth, growth_rate, dropout, augment, validate, epochs, eta, save_weights, save_errors, batchsize=64): # import (deferred until now to make --help faster) import numpy as np import theano import theano.tensor as T import lasagne import densenet_fast as densenet # or "import densenet" for slower version import cifar10 import progress # instantiate network print("Instantiating network...") input_var = T.tensor4('inputs') target_var = T.ivector('targets') network = densenet.build_densenet(input_var=input_var, depth=depth, growth_rate=growth_rate, dropout=dropout) print("%d layers with weights, %d parameters" % (sum(hasattr(l, 'W') for l in lasagne.layers.get_all_layers(network)), lasagne.layers.count_params(network, trainable=True))) # load dataset print("Loading dataset...") X_train, y_train, X_test, y_test = cifar10.load_dataset( path=os.path.join(os.path.dirname(__file__), 'data')) if validate == 'test': X_val, y_val = X_test, y_test elif validate: X_val, y_val = X_train[-5000:], y_train[-5000:] X_train, y_train = X_train[:-5000], y_train[:-5000] # define training function print("Compiling training function...") prediction = lasagne.layers.get_output(network) # note: The Keras implementation clips predictions for the categorical # cross-entropy. This doesn't seem to have a positive effect here. # prediction = T.clip(prediction, 1e-7, 1 - 1e-7) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var).mean() # note: The paper says 1e-4 decay, but 1e-4 in Torch is 5e-5 elsewhere. # However, 1e-4 seems to work better than 5e-5, so we use 1e-4. # note: Torch includes biases in L2 decay. This seems to be important! So # we decay all 'trainable' parameters, not just 'regularizable' ones. l2_loss = 1e-4 * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2, {'trainable': True}) params = lasagne.layers.get_all_params(network, trainable=True) eta = theano.shared(lasagne.utils.floatX(eta), name='eta') updates = lasagne.updates.nesterov_momentum( loss + l2_loss, params, learning_rate=eta, momentum=0.9) train_fn = theano.function([input_var, target_var], loss, updates=updates) l2_fn = theano.function([], l2_loss) # define validation/testing function print("Compiling testing function...") test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var).mean() test_err = 1 - lasagne.objectives.categorical_accuracy(test_prediction, target_var).mean() test_fn = theano.function([input_var, target_var], [test_loss, test_err]) # Finally, launch the training loop. print("Starting training...") if save_errors: errors = [] for epoch in range(epochs): # shrink learning rate at 50% and 75% into training if epoch == (epochs // 2) or epoch == (epochs * 3 // 4): eta.set_value(eta.get_value() * lasagne.utils.floatX(0.1)) # In each epoch, we do a full pass over the training data: train_loss = 0 train_batches = len(X_train) // batchsize batches = cifar10.iterate_minibatches(X_train, y_train, batchsize, shuffle=True) if augment: batches = cifar10.augment_minibatches(batches) batches = generate_in_background(batches) batches = progress.progress( batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs), total=train_batches) for inputs, targets in batches: train_loss += train_fn(inputs, targets) # And possibly a full pass over the validation data: if validate: val_loss = 0 val_err = 0 val_batches = len(X_val) // batchsize for inputs, targets in cifar10.iterate_minibatches(X_val, y_val, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) val_loss += loss val_err += err # Then we print the results for this epoch: train_loss /= train_batches l2_loss = l2_fn() print(" training loss:\t%.6f" % train_loss) print(" L2 loss: \t%.6f" % l2_loss) if save_errors: errors.extend([train_loss, l2_loss]) if validate: val_loss /= val_batches val_err /= val_batches print(" validation loss:\t%.6f" % val_loss) print(" validation error:\t%.2f%%" % (val_err * 100)) if save_errors: errors.extend([val_loss, val_err]) # After training, we compute and print the test error: test_loss = 0 test_err = 0 test_batches = len(X_test) // batchsize for inputs, targets in cifar10.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) test_loss += loss test_err += err print("Final results:") print(" test loss:\t\t%.6f" % (test_loss / test_batches)) print(" test error:\t\t%.2f%%" % (test_err / test_batches * 100)) # Optionally, we dump the network weights to a file if save_weights: np.savez(save_weights, *lasagne.layers.get_all_param_values(network)) # Optionally, we dump the learning curves to a file if save_errors: errors = np.asarray(errors).reshape(epochs, -1) np.savez(save_errors, errors=errors)
def train_test(depth, growth_rate, dropout, augment, validate, epochs, eta, save_weights, save_errors, resume, nonlinearity_name, use_cifar10, batchsize): # import (deferred until now to make --help faster) import numpy as np import theano import theano.tensor as T import lasagne import densenet_fast as densenet # or "import densenet" for slower version if use_cifar10 is True: import cifar10 as dataset num_classes = 10 else: print('Using CIFAR-100') import cifar100 as dataset num_classes = 100 import progress # instantiate network print("Instantiating network...") input_var = T.tensor4('inputs') target_var = T.ivector('targets') network = densenet.build_densenet(input_var=input_var, depth=depth, classes=num_classes, growth_rate=growth_rate, dropout=dropout, nonlinearity_name=nonlinearity_name) print("%d layers with weights, %d parameters" % (sum(hasattr(l, 'W') for l in lasagne.layers.get_all_layers(network)), lasagne.layers.count_params(network, trainable=True))) # load dataset print("Loading dataset...") X_train, y_train, X_test, y_test = dataset.load_dataset( path=os.path.join(os.path.dirname(__file__), 'data')) if validate == 'test': X_val, y_val = X_test, y_test elif validate: X_val, y_val = X_train[-5000:], y_train[-5000:] X_train, y_train = X_train[:-5000], y_train[:-5000] # define training function print("Compiling training function...") prediction = lasagne.layers.get_output(network) # note: The Keras implementation clips predictions for the categorical # cross-entropy. This doesn't seem to have a positive effect here. prediction = T.clip(prediction, 1e-7, 1 - 1e-7) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var).mean() # note: The paper says 1e-4 decay, but 1e-4 in Torch is 5e-5 elsewhere. # However, 1e-4 seems to work better than 5e-5, so we use 1e-4. # note: Torch includes biases in L2 decay. This seems to be important! So # we decay all 'trainable' parameters, not just 'regularizable' ones. l2_loss = 1e-4 * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2, {'trainable': True}) params = lasagne.layers.get_all_params(network, trainable=True) eta = theano.shared(lasagne.utils.floatX(eta), name='eta') updates = lasagne.updates.nesterov_momentum( loss + l2_loss, params, learning_rate=eta, momentum=0.9) train_fn = theano.function([input_var, target_var], loss, updates=updates) l2_fn = theano.function([], l2_loss) # define validation/testing function print("Compiling testing function...") test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var).mean() test_err = 1 - lasagne.objectives.categorical_accuracy(test_prediction, target_var).mean() test_fn = theano.function([input_var, target_var], [test_loss, test_err]) start_epoch = 0 if save_errors: errors = [] if resume is True: errors = list(np.load(save_errors)['errors'].reshape(-1)) for i in range(epochs-1,-1,-1): try: with np.load(save_weights+'_'+str(i)+'.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) start_epoch = i+1 print(i, start_epoch) break except: True if start_epoch == 0: assert False, "could not resume" # Finally, launch the training loop. print("Starting training...") orig_lr = eta.get_value() for epoch in range(start_epoch, epochs): # shrink learning rate at 50% and 75% into training if epochs // 2 <= epoch < epochs * 3 // 4: eta.set_value(orig_lr * lasagne.utils.floatX(0.1)) elif epoch >= epochs * 3 // 4: eta.set_value(orig_lr * lasagne.utils.floatX(0.01)) # In each epoch, we do a full pass over the training data: train_loss = 0 train_batches = len(X_train) // batchsize batches = dataset.iterate_minibatches(X_train, y_train, batchsize, shuffle=True) if augment: batches = dataset.augment_minibatches(batches) batches = generate_in_background(batches) batches = progress.progress( batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs), total=train_batches) for inputs, targets in batches: train_loss += train_fn(inputs, targets) # And possibly a full pass over the validation data: if validate: val_loss = 0 val_err = 0 val_batches = len(X_val) // batchsize for inputs, targets in dataset.iterate_minibatches(X_val, y_val, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) val_loss += loss val_err += err else: test_loss = 0 test_err = 0 test_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) test_loss += loss test_err += err # Then we print the results for this epoch: train_loss /= train_batches l2_loss = l2_fn() print(" training loss:\t%.6f" % train_loss) print(" L2 loss: \t%.6f" % l2_loss) if save_errors: errors.extend([train_loss, l2_loss]) if validate: val_loss /= val_batches val_err /= val_batches print(" validation loss:\t%.6f" % val_loss) print(" validation error:\t%.2f%%" % (val_err * 100)) if save_errors: errors.extend([val_loss, val_err]) else: test_loss /= test_batches test_err /= test_batches print(" test loss:\t%.6f" % test_loss) print(" test error:\t%.2f%%" % (test_err * 100)) if save_errors: errors.extend([test_loss, test_err]) if epoch % 1 == 0: # Optionally, we dump the network weights to a file if save_weights: np.savez(save_weights+'_'+str(epoch), *lasagne.layers.get_all_param_values(network)) # Optionally, we dump the learning curves to a file if save_errors: np.savez(save_errors, errors=np.asarray(errors).reshape(-1, 4)) # After training, we compute and print the test error: test_loss = 0 test_err = 0 test_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) test_loss += loss test_err += err print("Final results:") print(" test loss:\t\t%.6f" % (test_loss / test_batches)) print(" test error:\t\t%.2f%%" % (test_err / test_batches * 100)) # Optionally, we dump the network weights to a file if save_weights: np.savez(save_weights, *lasagne.layers.get_all_param_values(network)) # Optionally, we dump the learning curves to a file if save_errors: np.savez(save_errors, errors=np.asarray(errors).reshape(epochs, -1))
def train_test(epochs, eta, save_weights, save_errors, resume, init_name, nonlinearity_name, use_cifar10, batchsize=128): # import (deferred until now to make --help faster) import numpy as np import theano import theano.tensor as T import lasagne if use_cifar10 is True: print('Using CIFAR-10') import cifar10 as dataset num_classes = 10 else: print('Using CIFAR-100') import cifar100 as dataset num_classes = 100 import progress # instantiate network print("Instantiating network...") input_var = T.tensor4('inputs') target_var = T.ivector('targets') if nonlinearity_name == 'relu': f = lasagne.nonlinearities.rectify elif nonlinearity_name == 'elu': f = lasagne.nonlinearities.elu elif nonlinearity_name == 'gelu': def gelu(x): return 0.5 * x * (1 + T.tanh(T.sqrt(2 / np.pi) * (x + 0.044715 * T.pow(x, 3)))) f = gelu network = build_vgg(input_var, num_classes, f, init_name) print("%d layers with weights, %d parameters" % (sum(hasattr(l, 'W') for l in lasagne.layers.get_all_layers(network)), lasagne.layers.count_params(network, trainable=True))) # load dataset print("Loading dataset...") X_train, y_train, X_test, y_test = dataset.load_dataset( path=os.path.join(os.path.dirname(__file__), 'data')) # if validate == 'test': X_val, y_val = X_test, y_test # elif validate: # X_val, y_val = X_train[-5000:], y_train[-5000:] # X_train, y_train = X_train[:-5000], y_train[:-5000] # define training function print("Compiling training function...") prediction = ll.get_output(network) prediction = T.clip(prediction, 1e-7, 1 - 1e-7) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var).mean() l2_loss = 5e-4 * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2, {'regularizable': True}) params = lasagne.layers.get_all_params(network, trainable=True) eta = theano.shared(lasagne.utils.floatX(eta), name='eta') # updates = lasagne.updates.nesterov_momentum( # loss + l2_loss, params, learning_rate=eta) updates = lasagne.updates.adam( loss + l2_loss, params, learning_rate=eta) train_fn = theano.function([input_var, target_var], loss, updates=updates) l2_fn = theano.function([], l2_loss) # define validation/testing function print("Compiling testing function...") test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var).mean() test_err = 1 - lasagne.objectives.categorical_accuracy(test_prediction, target_var).mean() test_fn = theano.function([input_var, target_var], [test_loss, test_err]) start_epoch = 0 if save_errors: errors = [] if resume is True: errors = list(np.load(save_errors)['errors'].reshape(-1)) for i in range(epochs-1,-1,-1): try: with np.load(save_weights+'_'+str(i)+'.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) start_epoch = i+1 print('Restored!', i, start_epoch) break except: True if start_epoch == 0: assert False, "could not resume" # Finally, launch the training loop. print("Starting training...") orig_lr = eta.get_value() for epoch in range(start_epoch, epochs): # eta.set_value(lasagne.utils.floatX(orig_lr * max(0.1 ** (epoch//25), 1e-7))) # restoration friendly code # drop at half and then at three fourths through training if 100 <= epoch < 125: eta.set_value(orig_lr * lasagne.utils.floatX(0.1)) elif epoch >= 125: eta.set_value(orig_lr * lasagne.utils.floatX(0.01)) # In each epoch, we do a full pass over the training data: train_loss = 0 train_batches = len(X_train) // batchsize batches = dataset.iterate_minibatches(X_train, y_train, batchsize, shuffle=True) # augmentation is mandatory! batches = dataset.augment_minibatches(batches) batches = generate_in_background(batches) batches = progress.progress( batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs), total=train_batches) for inputs, targets in batches: train_loss += train_fn(inputs, targets) # And possibly a full pass over the validation data: # if validate: # val_loss = 0 # val_err = 0 # val_batches = len(X_val) // batchsize # for inputs, targets in dataset.iterate_minibatches(X_val, y_val, batchsize, shuffle=False): # loss, err = test_fn(inputs, targets) # val_loss += loss # val_err += err # else: test_loss = 0 test_err = 0 test_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) test_loss += loss test_err += err # Then we print the results for this epoch: train_loss /= train_batches l2_loss = l2_fn() print(" CE loss:\t%.6f" % train_loss) print(" L2 loss: \t%.6f" % l2_loss) print(" Loss: \t%.6f" % (train_loss+l2_loss)) if save_errors: errors.extend([train_loss, l2_loss]) # if validate: # val_loss /= val_batches # val_err /= val_batches # print(" validation loss:\t%.6f" % val_loss) # print(" validation error:\t%.2f%%" % (val_err * 100)) # if save_errors: # errors.extend([val_loss, val_err]) # else: test_loss /= test_batches test_err /= test_batches print(" test loss:\t%.6f" % test_loss) print(" test error:\t%.2f%%" % (test_err * 100)) if save_errors: errors.extend([test_loss, test_err]) if epoch % 25 == 0 and epoch > 100: # Optionally, we dump the network weights to a file if save_weights: np.savez(save_weights+'_'+str(epoch), *lasagne.layers.get_all_param_values(network)) # Optionally, we dump the learning curves to a file if save_errors: np.savez(save_errors, errors=np.asarray(errors).reshape(epoch+1, -1)) # After training, we compute and print the test error: test_loss = 0 test_err = 0 test_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): loss, err = test_fn(inputs, targets) test_loss += loss test_err += err print("Final results:") print(" test loss:\t\t%.6f" % (test_loss / test_batches)) print(" test error:\t\t%.2f%%" % (test_err / test_batches * 100)) # we dump the network weights to a file np.savez(save_weights, *lasagne.layers.get_all_param_values(network)) # Optionally, we dump the learning curves to a file np.savez(save_errors, errors=np.asarray(errors).reshape(epochs, -1))
def train_test(depth, growth_rate, dropout, augment, validate, epochs, eta, save_weights, save_errors, resume, nonlinearity_name, use_cifar10, batchsize=64): # import (deferred until now to make --help faster) import numpy as np import theano import theano.tensor as T import lasagne import densenet_fast_custom as densenet # or "import densenet" for slower version if use_cifar10 is True: import cifar10 as dataset num_classes = 10 else: print('Using CIFAR-100') import cifar100 as dataset num_classes = 100 import progress # instantiate network print("Instantiating network...") input_var = T.tensor4('inputs') target_var = T.ivector('targets') network = densenet.build_densenet(input_var=input_var, depth=depth, classes=num_classes, growth_rate=growth_rate, dropout=dropout, nonlinearity_name=nonlinearity_name) print("%d layers with weights, %d parameters" % (sum(hasattr(l, 'W') for l in lasagne.layers.get_all_layers(network)), lasagne.layers.count_params(network, trainable=True))) # load dataset print("Loading dataset...") X_train, y_train, X_test, y_test = dataset.load_dataset( path=os.path.join(os.path.dirname(__file__), 'data')) if validate == 'test': X_val, y_val = X_test, y_test elif validate: X_val, y_val = X_train[-5000:], y_train[-5000:] X_train, y_train = X_train[:-5000], y_train[:-5000] # define training function print("Compiling training function...") prediction = lasagne.layers.get_output(network) prediction = T.clip(prediction, 1e-7, 1 - 1e-7) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # note: The paper says 1e-4 decay, but 1e-4 in Torch is 5e-5 elsewhere l2_loss = 1e-4 * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2, {'trainable': True}) params = lasagne.layers.get_all_params(network, trainable=True) eta = theano.shared(lasagne.utils.floatX(eta), name='eta') updates = lasagne.updates.nesterov_momentum( loss + l2_loss, params, learning_rate=eta, momentum=0.9) # updates = lasagne.updates.adam( # loss + l2_loss, params, learning_rate=eta) train_fn = theano.function([input_var, target_var], loss, updates=updates) # define validation/testing function print("Compiling testing function...") test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) update_var_prediction = lasagne.layers.get_output(network, deterministic=True, batch_norm_update_averages=True, batch_norm_use_averages=False) loss_var_update = lasagne.objectives.categorical_crossentropy(update_var_prediction, target_var) loss_var_update = loss_var_update.mean() update_var_fn = theano.function([input_var, target_var], loss_var_update) test_loss = test_loss.mean() test_acc = lasagne.objectives.categorical_accuracy(test_prediction, target_var).mean() test_fn = theano.function([input_var, target_var], [test_loss, test_acc]) l2_fn = theano.function([], l2_loss) with np.load("./wider_07_100.npz") as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) # Finally, launch the training loop. print("Starting training...") if save_errors: errors = [] val_err = 0 val_acc = 0 val_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): err, acc = test_fn(inputs, targets) val_err += err val_acc += acc if validate or True: # HACK: validate on test set, for debugging print(" validation loss:\t%.6f" % (val_err / val_batches)) print(" validation error:\t%.2f%%" % ( 100 - val_acc / val_batches * 100)) for epoch in range(5): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = len(X_train) // batchsize batches = dataset.iterate_minibatches(X_train, y_train, batchsize, shuffle=True) if augment: batches = dataset.augment_minibatches(batches) batches = generate_in_background(batches) batches = progress.progress( batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs), total=train_batches) for inputs, targets in batches: train_err += update_var_fn(inputs, targets) # And possibly a full pass over the validation data: if validate: val_err = 0 val_acc = 0 val_batches = len(X_val) // batchsize for inputs, targets in dataset.iterate_minibatches(X_val, y_val, batchsize, shuffle=False): err, acc = test_fn(inputs, targets) val_err += err val_acc += acc else: # HACK: validate on test set, for debugging val_err = 0 val_acc = 0 val_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): err, acc = test_fn(inputs, targets) val_err += err val_acc += acc # Then we print the results for this epoch: print(" training loss:\t%.6f" % (train_err / train_batches)) l2_err = l2_fn() print(" L2 loss: \t%.6f" % l2_err) if save_errors: errors.extend([train_err / train_batches, l2_err]) if validate or True: # HACK: validate on test set, for debugging print(" validation loss:\t%.6f" % (val_err / val_batches)) print(" validation error:\t%.2f%%" % ( 100 - val_acc / val_batches * 100)) if save_errors: errors.extend([val_err / val_batches, 100 - val_acc / val_batches * 100]) if save_weights and epoch % 20 == 0: np.savez(save_weights, *lasagne.layers.get_all_param_values(network)) print('Saved') # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = len(X_test) // batchsize for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False): err, acc = test_fn(inputs, targets) test_err += err test_acc += acc print("Final results:") print(" test loss:\t\t%.6f" % (test_err / test_batches)) print(" test error:\t\t%.2f%%" % ( 100 - test_acc / test_batches * 100))