Or you can try: USE_EXTRA=1 python svhn2.py to also train on the much larger set that includes "extra" data. With this extra data, this gets to 96.40% test accuracy after 12 epochs. 273 seconds per epoch on a GeForce GTX 680 GPU. ''' batch_size = 128 nb_classes = 11 nb_epoch = 12 if "USE_EXTRA" not in os.environ: # standard split is 73,257 train / 26,032 test (X_train, y_train), (X_test, y_test) = svhn2.load_data() else: # svhn2 extra split has an additional 531,131 (!) examples (X_train, y_train), (X_extra, y_extra), (X_test, y_test) = svhn2.load_data( sets=['train', 'extra', 'test']) X_train = np.concatenate([X_train, X_extra]) y_train = np.concatenate([y_train, y_extra]) # print shape of data while model is building print("{1} train samples, {2} channel{0}, {3}x{4}".format( "" if X_train.shape[1] == 1 else "s", *X_train.shape)) print("{1} test samples, {2} channel{0}, {3}x{4}".format( "" if X_test.shape[1] == 1 else "s", *X_test.shape)) # input image dimensions _, img_channels, img_rows, img_cols = X_train.shape
def train(d): # # Log important data about how we were invoked. # L.getLogger("entry").info("INVOCATION: " + " ".join(sys.argv)) L.getLogger("entry").info("HOSTNAME: " + socket.gethostname()) L.getLogger("entry").info("PWD: " + os.getcwd()) summary = "\n" summary += "Environment:\n" summary += summarizeEnvvar("THEANO_FLAGS") + "\n" summary += "\n" summary += "Software Versions:\n" summary += "Theano: " + T.__version__ + "\n" summary += "Keras: " + keras.__version__ + "\n" summary += "\n" summary += "Arguments:\n" summary += "Path to Datasets: " + str(d.datadir) + "\n" summary += "Path to Workspace: " + str(d.workdir) + "\n" summary += "Model: " + str(d.model) + "\n" summary += "Dataset: " + str(d.dataset) + "\n" summary += "Number of Epochs: " + str(d.num_epochs) + "\n" summary += "Batch Size: " + str(d.batch_size) + "\n" summary += "Number of Start Filters: " + str(d.start_filter) + "\n" summary += "Number of Blocks/Stage: " + str(d.num_blocks) + "\n" summary += "Optimizer: " + str(d.optimizer) + "\n" summary += "Learning Rate: " + str(d.lr) + "\n" summary += "Learning Rate Decay: " + str(d.decay) + "\n" summary += "Learning Rate Schedule: " + str(d.schedule) + "\n" summary += "Clipping Norm: " + str(d.clipnorm) + "\n" summary += "Clipping Value: " + str(d.clipval) + "\n" summary += "Dropout Probability: " + str(d.dropout) + "\n" if d.optimizer in ["adam"]: summary += "Beta 1: " + str(d.beta1) + "\n" summary += "Beta 2: " + str(d.beta2) + "\n" else: summary += "Momentum: " + str(d.momentum) + "\n" L.getLogger("entry").info(summary[:-1]) # # Load dataset # L.getLogger("entry").info("Loading dataset {:s} ...".format(d.dataset)) np.random.seed(d.seed % 2**32) if d.dataset == 'cifar10': (X_train, y_train), (X_test, y_test) = cifar10.load_data() nb_classes = 10 n_train = 45000 elif d.dataset == 'cifar100': (X_train, y_train), (X_test, y_test) = cifar100.load_data() nb_classes = 100 n_train = 45000 elif d.dataset == 'svhn': (X_train, y_train), (X_test, y_test) = svhn2.load_data() nb_classes = 10 # Make classes 0 - 9 instead of 1 - 10 y_train = y_train - 1 y_test = y_test - 1 n_train = 65000 # # Compute and Shuffle Training/Validation/Test Split # shuf_inds = np.arange(len(y_train)) np.random.seed(0xDEADBEEF) np.random.shuffle(shuf_inds) train_inds = shuf_inds[:n_train] val_inds = shuf_inds[n_train:] X_train = X_train.astype('float32') / 255.0 X_test = X_test.astype('float32') / 255.0 X_train_split = X_train[train_inds] X_val_split = X_train[val_inds] y_train_split = y_train[train_inds] y_val_split = y_train[val_inds] pixel_mean = np.mean(X_train_split, axis=0) X_train = X_train_split.astype(np.float32) - pixel_mean X_val = X_val_split.astype(np.float32) - pixel_mean X_test = X_test.astype(np.float32) - pixel_mean Y_train = to_categorical(y_train_split, nb_classes) Y_val = to_categorical(y_val_split, nb_classes) Y_test = to_categorical(y_test, nb_classes) if d.no_validation: X_train = np.concatenate([X_train, X_val], axis=0) Y_train = np.concatenate([Y_train, Y_val], axis=0) L.getLogger("entry").info("Training set shape: " + str(X_train.shape)) L.getLogger("entry").info("Validation set shape: " + str(X_val.shape)) L.getLogger("entry").info("Test set shape: " + str(X_test.shape)) L.getLogger("entry").info("Loaded dataset {:s}.".format(d.dataset)) # # Initial Entry or Resume? # initialEpoch = 0 chkptFilename = os.path.join(d.workdir, "chkpts", "ModelChkpt.hdf5") isResuming = os.path.isfile(chkptFilename) if isResuming: # Reload Model and Optimizer L.getLogger("entry").info("Reloading a model from " + chkptFilename + " ...") np.random.seed(d.seed % 2**32) model = KM.load_model(chkptFilename, custom_objects={ "ComplexConv2D": ComplexConv2D, "ComplexBatchNormalization": ComplexBN, "GetReal": GetReal, "GetImag": GetImag }) L.getLogger("entry").info("... reloading complete.") with H.File(chkptFilename, "r") as f: initialEpoch = int(f["initialEpoch"][...]) L.getLogger("entry").info( "Training will restart at epoch {:5d}.".format(initialEpoch + 1)) L.getLogger("entry").info("Compilation Started.") else: # Model L.getLogger("entry").info("Creating new model from scratch.") np.random.seed(d.seed % 2**32) model = getResnetModel(d) # Optimizer if d.optimizer in ["sgd", "nag"]: opt = SGD(lr=d.lr, momentum=d.momentum, decay=d.decay, nesterov=(d.optimizer == "nag"), clipnorm=d.clipnorm) elif d.optimizer == "rmsprop": opt = RMSProp(lr=d.lr, decay=d.decay, clipnorm=d.clipnorm) elif d.optimizer == "adam": opt = Adam(lr=d.lr, beta_1=d.beta1, beta_2=d.beta2, decay=d.decay, clipnorm=d.clipnorm) else: raise ValueError("Unknown optimizer " + d.optimizer) # Compile the model with that optimizer. L.getLogger("entry").info("Compilation Started.") model.compile(opt, 'categorical_crossentropy', metrics=['accuracy']) # # Precompile several backend functions # if d.summary: model.summary() L.getLogger("entry").info("# of Parameters: {:10d}".format( model.count_params())) L.getLogger("entry").info("Compiling Train Function...") t = -time.time() model._make_train_function() t += time.time() L.getLogger("entry").info( " {:10.3f}s".format(t)) L.getLogger("entry").info("Compiling Predict Function...") t = -time.time() model._make_predict_function() t += time.time() L.getLogger("entry").info( " {:10.3f}s".format(t)) L.getLogger("entry").info("Compiling Test Function...") t = -time.time() model._make_test_function() t += time.time() L.getLogger("entry").info( " {:10.3f}s".format(t)) L.getLogger("entry").info("Compilation Ended.") # # Create Callbacks # newLineCb = PrintNewlineAfterEpochCallback() lrSchedCb = LearningRateScheduler(schedule) testErrCb = TestErrorCallback((X_test, Y_test)) saveLastCb = SaveLastModel(d.workdir, period=10) saveBestCb = SaveBestModel(d.workdir) trainValHistCb = TrainValHistory() callbacks = [] callbacks += [newLineCb] if d.schedule == "default": callbacks += [lrSchedCb] callbacks += [testErrCb] callbacks += [saveLastCb] callbacks += [saveBestCb] callbacks += [trainValHistCb] # # Create training data generator # datagen = ImageDataGenerator(height_shift_range=0.125, width_shift_range=0.125, horizontal_flip=True) # # Enter training loop. # L.getLogger("entry").info("**********************************************") if isResuming: L.getLogger("entry").info( "*** Reentering Training Loop @ Epoch {:5d} ***".format( initialEpoch + 1)) else: L.getLogger("entry").info( "*** Entering Training Loop @ First Epoch ***") L.getLogger("entry").info("**********************************************") model.fit_generator(generator=datagen.flow(X_train, Y_train, batch_size=d.batch_size), steps_per_epoch=(len(X_train) + d.batch_size - 1) // d.batch_size, epochs=d.num_epochs, verbose=1, callbacks=callbacks, validation_data=(X_val, Y_val), initial_epoch=initialEpoch) # # Dump histories. # np.savetxt(os.path.join(d.workdir, 'test_loss.txt'), np.asarray(testErrCb.loss_history)) np.savetxt(os.path.join(d.workdir, 'test_acc.txt'), np.asarray(testErrCb.acc_history)) np.savetxt(os.path.join(d.workdir, 'train_loss.txt'), np.asarray(trainValHistCb.train_loss)) np.savetxt(os.path.join(d.workdir, 'train_acc.txt'), np.asarray(trainValHistCb.train_acc)) np.savetxt(os.path.join(d.workdir, 'val_loss.txt'), np.asarray(trainValHistCb.val_loss)) np.savetxt(os.path.join(d.workdir, 'val_acc.txt'), np.asarray(trainValHistCb.val_acc))
Or you can try: USE_EXTRA=1 python svhn2.py to also train on the much larger set that includes "extra" data. With this extra data, this gets to 96.40% test accuracy after 12 epochs. 273 seconds per epoch on a GeForce GTX 680 GPU. ''' batch_size = 128 nb_classes = 11 nb_epoch = 12 if "USE_EXTRA" not in os.environ: # standard split is 73,257 train / 26,032 test (X_train, y_train), (X_test, y_test) = svhn2.load_data() else: # svhn2 extra split has an additional 531,131 (!) examples (X_train, y_train), (X_extra, y_extra), (X_test, y_test) = svhn2.load_data(sets=['train', 'extra', 'test']) X_train = np.concatenate([X_train, X_extra]) y_train = np.concatenate([y_train, y_extra]) # print shape of data while model is building print("{1} train samples, {2} channel{0}, {3}x{4}".format("" if X_train.shape[1] == 1 else "s", *X_train.shape)) print("{1} test samples, {2} channel{0}, {3}x{4}".format("" if X_test.shape[1] == 1 else "s", *X_test.shape)) # input image dimensions _, img_channels, img_rows, img_cols = X_train.shape # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes)