def bootstrap(hparams): output_dir = util.create_run_dir('outputs/last_layer/', hparams) util.write_to_csv(os.path.join(output_dir, 'hparams.csv'), hparams) dataset_train, dataset_val = input_data(hparams) epochs = hparams['epochs'] lr = hparams['lr'] batch_size = hparams['batch_size'] samples = hparams['samples'] p_dropout = hparams['p_dropout'] nb_last_layers = hparams['nb_last_layers'] model = build_last_layer(p_dropout=p_dropout, num_last_layers=nb_last_layers) model_path = 'saved_models/{}/{}_last_layer_{}.h5'.format( hparams['dataset'], hparams['dataset'].split('-')[0], nb_last_layers) model.compile(optimizer=tf.keras.optimizers.SGD(lr=lr), loss='categorical_crossentropy', metrics=['accuracy']) name_in = os.path.join(output_dir, 'p_in.h5') file_in = h5py.File(name_in, 'a') shape_in = ((NUM_TEST_EXAMPLES // batch_size) * batch_size, N_CLASS, samples) proba_in = file_in.create_dataset( 'proba', shape_in, # dtype='f2', compression='gzip') for i in np.arange(samples): dataset_train, dataset_val = input_data(hparams, bootstrap=True) model.load_weights(model_path, by_name=True) hist = model.fit(dataset_train, epochs=epochs, steps_per_epoch=NUM_TRAINING_EXAMPLES // batch_size, verbose=1, validation_data=dataset_val, validation_steps=NUM_TEST_EXAMPLES // batch_size) print('End of boostrap {}'.format(i)) # computing probabilities proba_in[:, :, i] = model.predict(dataset_val, steps=NUM_TEST_EXAMPLES // batch_size) file_in.close() del model gc.collect() print('End of sampling - bootstrap.')
def dropout(hparams): output_dir = util.create_run_dir('outputs/last_layer/', hparams) util.write_to_csv(os.path.join(output_dir, 'hparams.csv'), hparams) (features_train_in, y_train_in), (features_val_in, y_val_in), \ features_val_out = input_data(hparams) n_class = y_train_in.shape[1] epochs = hparams['epochs'] lr = hparams['lr'] batch_size = hparams['batch_size'] samples = hparams['samples'] p_dropout = hparams['p_dropout'] model = build_last_layer(features_train_in, n_class, p_dropout=p_dropout) model_path = 'saved_models/{}/{}.h5'.format( hparams['dataset'], hparams['dataset'].split('-')[0]) model.compile(optimizer=keras.optimizers.SGD(lr=lr), loss='categorical_crossentropy', metrics=['accuracy']) name_in = os.path.join(output_dir, 'p_in.h5') file_in = h5py.File(name_in, 'a') shape_in = (features_val_in.shape[0], n_class, samples) proba_in = file_in.create_dataset( 'proba', shape_in, # dtype='f2', compression='gzip') if features_val_out is not None: name_out = os.path.join(output_dir, 'p_out.h5') file_out = h5py.File(name_out, 'a') shape_out = (features_val_out.shape[0], n_class, samples) proba_out = file_out.create_dataset( 'proba', shape_out, # dtype='f2', compression='gzip') model.load_weights(model_path, by_name=True) model.fit(features_train_in, y_train_in, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(features_val_in, y_val_in)) # Sanity check score = model.evaluate(features_val_in, y_val_in, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) print('End of training') for i in np.arange(samples): # computing probabilities proba_in[:, :, i] = model.predict(features_val_in) if features_val_out is not None: proba_out[:, :, i] = model.predict(features_val_out) file_in.close() if features_val_out is not None: file_out.close() print('End of sampling - dropout.')
def sgd_sgld(hparams): output_dir = util.create_run_dir('outputs/last_layer/', hparams) util.write_to_csv(os.path.join(output_dir, 'hparams.csv'), hparams) (features_train_in, y_train_in), (features_val_in, y_val_in), \ features_val_out = input_data(hparams) n_class = y_train_in.shape[1] samples = hparams['samples'] lr = hparams['lr'] batch_size = hparams['batch_size'] params = { 'optimizer': None, 'samples': samples, 'output_dir': output_dir, 'n_class': n_class } class Prediction(keras.callbacks.Callback): def __init__(self, params, features_val_in, features_val_out): super(Prediction, self).__init__() self.index = 0 if features_val_out is None: self.out_of_dist = True else: self.out_of_dist = False name_in = os.path.join(params['output_dir'], 'p_{}_in.h5'.format(params['optimizer'])) self.file_in = h5py.File(name_in, 'a') shape_in = (features_val_in.shape[0], params['n_class'], params['samples']) self.proba_in = self.file_in.create_dataset( 'proba', shape_in, # dtype='f2', compression='gzip') self.features_val_in = features_val_in if not self.out_of_dist: name_out = os.path.join( params['output_dir'], 'p_{}_out.h5'.format(params['optimizer'])) self.file_out = h5py.File(name_out, 'a') shape_out = (features_val_out.shape[0], params['n_class'], params['samples']) self.proba_out = self.file_out.create_dataset( 'proba', shape_out, # dtype='f2', compression='gzip') self.features_val_out = features_val_out def on_epoch_end(self, epoch, logs={}): self.proba_in[:, :, self.index] = self.model.predict( self.features_val_in) if not self.out_of_dist: self.proba_out[:, :, self.index] = self.model.predict( self.features_val_out) self.index += 1 def on_train_end(self, logs={}): self.file_in.close() if not self.out_of_dist: self.file_out.close() model = build_last_layer(features_train_in, n_class) model_path = 'saved_models/{}/{}.h5'.format( hparams['dataset'], hparams['dataset'].split('-')[0]) for opt, optimizer in zip(['sgd', 'sgld'], [ keras.optimizers.SGD(lr=lr), sgld.SGLD(features_train_in.shape[0], lr=lr) ]): model.load_weights(model_path, by_name=True) params['optimizer'] = opt model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) mc = Prediction(params, features_val_in, features_val_out) hist = model.fit(features_train_in, y_train_in, batch_size=batch_size, epochs=samples, verbose=1, validation_data=(features_val_in, y_val_in), callbacks=[mc]) print('End of sampling using {}'.format(opt)) return hist
def dropout(hparams): n_class = hparams['n_class'] output_dir = util.create_run_dir('outputs/full_network/', hparams) util.write_to_csv(os.path.join(output_dir, 'hparams.csv'), hparams) (features_train_in, y_train_in), (features_val_in, y_val_in), \ features_val_out, index = input_data(hparams) np.save(os.path.join(output_dir, 'index.npy'), index) epochs = hparams['epochs'] lr = hparams['lr'] batch_size = hparams['batch_size'] samples = hparams['samples'] p_dropout = hparams['p_dropout'] model = build_model(n_class, p_dropout=p_dropout) if hparams['dataset'] in ['cifar10-first-10', 'cifar100-first-100']: model_path = 'saved_models/cifar-full-network/{}vgg.h5'.format(hparams['dataset'].split('-')[0]) else: model_path = 'saved_models/{}/{}.h5'.format(hparams['dataset'], hparams['dataset'].split('-')[0]) model.compile(optimizer=keras.optimizers.SGD(lr=lr), loss='categorical_crossentropy', metrics=['accuracy']) #data augmentation datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) height_shift_range=0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(features_train_in) name_in = os.path.join(output_dir, 'p_in.h5') file_in = h5py.File(name_in, 'a') shape_in = (features_val_in.shape[0], n_class, samples) proba_in = file_in.create_dataset('proba', shape_in, # dtype='f2', compression='gzip') if features_val_out is not None: name_out = os.path.join(output_dir, 'p_out.h5') file_out = h5py.File(name_out, 'a') shape_out = (features_val_out.shape[0], n_class, samples) proba_out = file_out.create_dataset('proba', shape_out, # dtype='f2', compression='gzip') model.load_weights(model_path) #, by_name=True) model.fit_generator(datagen.flow(features_train_in, y_train_in, batch_size=batch_size), steps_per_epoch=features_train_in.shape[0] // batch_size, epochs=epochs, verbose=1, validation_data=(features_val_in, y_val_in)) # Sanity check score = model.evaluate(features_val_in, y_val_in, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) print('End of training') for i in np.arange(samples): # computing probabilities proba_in[:, :, i] = model.predict(features_val_in) if features_val_out is not None: proba_out[:, :, i] = model.predict(features_val_out) file_in.close() if features_val_out is not None: file_out.close() print('End of sampling - dropout.')
def sgd_sgld(hparams): n_class = hparams['n_class'] output_dir = util.create_run_dir('outputs/full_network/', hparams) util.write_to_csv(os.path.join(output_dir, 'hparams.csv'), hparams) (features_train_in, y_train_in), (features_val_in, y_val_in), \ features_val_out, index = input_data(hparams) np.save(os.path.join(output_dir, 'index.npy'), index) samples = hparams['samples'] lr = hparams['lr'] batch_size = hparams['batch_size'] params = {'optimizer': None, 'samples': samples, 'output_dir': output_dir, 'n_class': n_class } class Prediction(keras.callbacks.Callback): def __init__(self, params, features_val_in, features_val_out): super(Prediction, self).__init__() self.index = 0 if features_val_out is None: self.out_of_dist = True else: self.out_of_dist = False name_in = os.path.join(params['output_dir'], 'p_{}_in.h5'.format(params['optimizer'])) self.file_in = h5py.File(name_in, 'a') shape_in = (features_val_in.shape[0], params['n_class'], params['samples']) self.proba_in = self.file_in.create_dataset('proba', shape_in, # dtype='f2', compression='gzip') self.features_val_in = features_val_in if not self.out_of_dist: name_out = os.path.join(params['output_dir'], 'p_{}_out.h5'.format(params['optimizer'])) self.file_out = h5py.File(name_out, 'a') shape_out = (features_val_out.shape[0], params['n_class'], params['samples']) self.proba_out = self.file_out.create_dataset('proba', shape_out, # dtype='f2', compression='gzip') self.features_val_out = features_val_out def on_epoch_end(self, epoch, logs={}): self.proba_in[:, :, self.index] = self.model.predict(self.features_val_in) if not self.out_of_dist: self.proba_out[:, :, self.index] = self.model.predict(self.features_val_out) self.index += 1 def on_train_end(self, logs={}): self.file_in.close() if not self.out_of_dist: self.file_out.close() model = build_model(n_class) if hparams['dataset'] in ['cifar10-first-10', 'cifar100-first-100']: model_path = 'saved_models/cifar-full-network/{}vgg.h5'.format(hparams['dataset'].split('-')[0]) else: model_path = 'saved_models/{}/{}.h5'.format(hparams['dataset'], hparams['dataset'].split('-')[0]) #data augmentation datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) height_shift_range=0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(features_train_in) for opt, optimizer in zip(['sgd', 'sgld'], [keras.optimizers.SGD(lr=lr), sgld.SGLD(features_train_in.shape[0], lr=lr)]): # for opt, optimizer in zip(['sgld'], # [sgld.SGLD(features_train_in.shape[0], lr=lr)]): model.load_weights(model_path) params['optimizer'] = opt model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) mc = Prediction(params, features_val_in, features_val_out) # tensorboard = keras.callbacks.TensorBoard(histogram_freq=2, # batch_size=128, # write_graph=False, # write_grads=False, # ) # score = model.evaluate(features_val_in, y_val_in) # print(score) hist = model.fit_generator(datagen.flow(features_train_in, y_train_in, batch_size=batch_size), steps_per_epoch=features_train_in.shape[0] // batch_size, epochs=samples, verbose=1, validation_data=(features_val_in, y_val_in), callbacks=[mc]) print('End of sampling using {}'.format(opt)) return hist
def sgd_sgld(hparams): output_dir = util.create_run_dir('outputs/last_layer/', hparams) util.write_to_csv(os.path.join(output_dir, 'hparams.csv'), hparams) dataset_train, dataset_val = input_data(hparams) n_class = N_CLASS samples = hparams['samples'] lr = hparams['lr'] batch_size = hparams['batch_size'] nb_last_layers = hparams['nb_last_layers'] params = { 'optimizer': None, 'samples': samples, 'batch_size': batch_size, 'output_dir': output_dir, 'n_class': n_class } class Prediction(keras.callbacks.Callback): def __init__(self, params, dataset_val): super(Prediction, self).__init__() self.index = 0 name_in = os.path.join(params['output_dir'], 'p_{}_in.h5'.format(params['optimizer'])) self.file_in = h5py.File(name_in, 'a') self.batch_size = params['batch_size'] shape_in = ((NUM_TEST_EXAMPLES // self.batch_size) * self.batch_size, params['n_class'], params['samples']) self.proba_in = self.file_in.create_dataset( 'proba', shape_in, # dtype='f2', compression='gzip') self.dataset_val = dataset_val def on_epoch_end(self, epoch, logs={}): nb_steps = NUM_TEST_EXAMPLES // self.batch_size self.proba_in[:, :, self.index] = self.model.predict(self.dataset_val, steps=nb_steps) self.index += 1 def on_train_end(self, logs={}): self.file_in.close() model = build_last_layer(num_last_layers=nb_last_layers) model_path = 'saved_models/{}/{}_last_layer_{}.h5'.format( hparams['dataset'], hparams['dataset'].split('-')[0], nb_last_layers) for opt, optimizer in zip(['sgd', 'sgld'], [ tf.keras.optimizers.SGD(lr=lr), tf_sgld.SGLD(NUM_TRAINING_EXAMPLES, lr=lr) ]): # if (opt == 'sgd') & only_sgld: # continue model.load_weights(model_path, by_name=True) params['optimizer'] = opt model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) mc = Prediction(params, dataset_val) hist = model.fit(dataset_train, epochs=samples, steps_per_epoch=NUM_TRAINING_EXAMPLES // batch_size, verbose=1, validation_data=dataset_val, validation_steps=NUM_TEST_EXAMPLES // batch_size, callbacks=[mc]) print('End of sampling using {}'.format(opt)) del model gc.collect() return hist