def train_cifar10(): (x_train, y_train), (x_test, y_test) = load_cifar10() idg = ImageDataGenerator( horizontal_flip=True, height_shift_range=4, width_shift_range=4, fill_mode='reflect' ) idg.fit(x_train) n = 16 k = 8 mdl = create_wide_residual_network(x_train.shape[1:], 10, n, k) mdl.compile(SGDTorch(lr=.1, momentum=0.9, nesterov=True), 'categorical_crossentropy', ['acc']) lr_cb = LearningRateScheduler(lambda e: 0.1 * (0.2 ** (e >= 160 and 3 or e >= 120 and 2 or e >= 60 and 1 or 0))) batch_size = 128 mdl.fit_generator( generator=idg.flow(x_train, to_categorical(y_train), batch_size=batch_size), epochs=200, validation_data=(idg.standardize(x_test), to_categorical(y_test)), callbacks=[lr_cb] ) mdl.save_weights('cifar10_WRN_{}-{}.h5'.format(n, k))
def test_resnet_transtransformer_1c(): n_runs = 10 transformer = transformations.TransTransformer() n, k = (10, 4) mdl_resnet = create_wide_residual_network([21, 21, 1], transformer.n_transforms, n, k) scores_list = [] delta_times_list = [] for i in range(n_runs): start_time = time.time() scores = test_model_loading(transformer, mdl_resnet, load_hits1c, dataset_name='hits-1-c', tf_version='tf1', transformer_name='transtransformed', model_name='resnet', epochs=2) end_time = time.time() delta_times_list.append(end_time - start_time) scores_list.append(scores) file_path = os.path.join(PROJECT_PATH, 'tests', 'aux_results', 'test_models_tf1-tf2.txt') print_scores_times_to_file( file_path, 'Data_transformer_tf1_models_tf1_resnet_transtransformer_1c\n NRUNS: %i' % n_runs, scores_list, delta_times_list) del mdl_resnet
def transformation_cifar10_vs_tinyimagenet(): _, (x_test, y_test) = load_cifar10() x_test_out = load_tinyimagenet('/home/izikgo/Imagenet_resize/Imagenet_resize/') transformer = Transformer(8, 8) n = 16 k = 8 base_mdl = create_wide_residual_network(x_test.shape[1:], 10, n, k) transformations_cls_out = Activation('softmax')(dense(transformer.n_transforms)(base_mdl.get_layer(index=-3).output)) mdl = Model(base_mdl.input, [base_mdl.output, transformations_cls_out]) mdl.load_weights('cifar10_WRN_doublehead-transformations_{}-{}.h5'.format(n, k)) scores_mdl = Model(mdl.input, mdl.output[1]) x_test_all = np.concatenate((x_test, x_test_out)) preds = np.zeros((len(x_test_all), transformer.n_transforms)) for t in range(transformer.n_transforms): preds[:, t] = scores_mdl.predict(transformer.transform_batch(x_test_all, [t] * len(x_test_all)), batch_size=128)[:, t] labels = np.concatenate((np.ones(len(x_test)), np.zeros(len(x_test_out)))) scores = preds.mean(axis=-1) save_roc_pr_curve_data(scores, labels, 'cifar10-vs-tinyimagenet_transformations.npz')
def train_cifar10_transformations(): (x_train, y_train), _ = load_cifar10() transformer = Transformer(8, 8) def data_gen(x, y, batch_size): while True: ind_permutation = np.random.permutation(len(x)) for b_start_ind in range(0, len(x), batch_size): batch_inds = ind_permutation[b_start_ind:b_start_ind + batch_size] x_batch = x[batch_inds] y_batch = y[batch_inds].flatten() if K.image_data_format() == 'channels_first': x_batch = np.transpose(x_batch, (0, 2, 3, 1)) y_t_batch = np.random.randint(0, transformer.n_transforms, size=len(x_batch)) x_batch = transformer.transform_batch(x_batch, y_t_batch) if K.image_data_format() == 'channels_first': x_batch = np.transpose(x_batch, (0, 3, 1, 2)) yield (x_batch, [to_categorical(y_batch, num_classes=10), to_categorical(y_t_batch, num_classes=transformer.n_transforms)]) n = 16 k = 8 base_mdl = create_wide_residual_network(x_train.shape[1:], 10, n, k) transformations_cls_out = Activation('softmax')(dense(transformer.n_transforms)(base_mdl.get_layer(index=-3).output)) mdl = Model(base_mdl.input, [base_mdl.output, transformations_cls_out]) mdl.compile(SGDTorch(lr=.1, momentum=0.9, nesterov=True), 'categorical_crossentropy', ['acc']) lr_cb = LearningRateScheduler(lambda e: 0.1 * (0.2 ** (e >= 160 and 3 or e >= 120 and 2 or e >= 60 and 1 or 0))) batch_size = 128 mdl.fit_generator( generator=data_gen(x_train, y_train, batch_size=batch_size), steps_per_epoch=len(x_train) // batch_size, epochs=200, callbacks=[lr_cb] ) mdl.save_weights('cifar10_WRN_doublehead-transformations_{}-{}.h5'.format(n, k))
def _transformations_experiment(dataset_load_fn, dataset_name, single_class_ind, gpu_q): gpu_to_use = gpu_q.get() os.environ["CUDA_VISIBLE_DEVICES"] = gpu_to_use (x_train, y_train), (x_test, y_test) = dataset_load_fn() if dataset_name in ['cats-vs-dogs']: transformer = Transformer(16, 16) n, k = (16, 8) else: transformer = Transformer(8, 8) n, k = (10, 4) mdl = create_wide_residual_network(x_train.shape[1:], transformer.n_transforms, n, k) mdl.compile('adam', 'categorical_crossentropy', ['acc']) x_train_task = x_train[y_train.flatten() == single_class_ind] transformations_inds = np.tile(np.arange(transformer.n_transforms), len(x_train_task)) x_train_task_transformed = transformer.transform_batch(np.repeat(x_train_task, transformer.n_transforms, axis=0), transformations_inds) batch_size = 128 mdl.fit(x=x_train_task_transformed, y=to_categorical(transformations_inds), batch_size=batch_size, epochs=int(np.ceil(200/transformer.n_transforms)) ) ################################################################################################# # simplified normality score ################################################################################################# # preds = np.zeros((len(x_test), transformer.n_transforms)) # for t in range(transformer.n_transforms): # preds[:, t] = mdl.predict(transformer.transform_batch(x_test, [t] * len(x_test)), # batch_size=batch_size)[:, t] # # labels = y_test.flatten() == single_class_ind # scores = preds.mean(axis=-1) ################################################################################################# def calc_approx_alpha_sum(observations): N = len(observations) f = np.mean(observations, axis=0) return (N * (len(f) - 1) * (-psi(1))) / ( N * np.sum(f * np.log(f)) - np.sum(f * np.sum(np.log(observations), axis=0))) def inv_psi(y, iters=5): # initial estimate cond = y >= -2.22 x = cond * (np.exp(y) + 0.5) + (1 - cond) * -1 / (y - psi(1)) for _ in range(iters): x = x - (psi(x) - y) / polygamma(1, x) return x def fixed_point_dirichlet_mle(alpha_init, log_p_hat, max_iter=1000): alpha_new = alpha_old = alpha_init for _ in range(max_iter): alpha_new = inv_psi(psi(np.sum(alpha_old)) + log_p_hat) if np.sqrt(np.sum((alpha_old - alpha_new) ** 2)) < 1e-9: break alpha_old = alpha_new return alpha_new def dirichlet_normality_score(alpha, p): return np.sum((alpha - 1) * np.log(p), axis=-1) scores = np.zeros((len(x_test),)) observed_data = x_train_task for t_ind in range(transformer.n_transforms): observed_dirichlet = mdl.predict(transformer.transform_batch(observed_data, [t_ind] * len(observed_data)), batch_size=1024) log_p_hat_train = np.log(observed_dirichlet).mean(axis=0) alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet) alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train) x_test_p = mdl.predict(transformer.transform_batch(x_test, [t_ind] * len(x_test)), batch_size=1024) scores += dirichlet_normality_score(mle_alpha_t, x_test_p) scores /= transformer.n_transforms labels = y_test.flatten() == single_class_ind res_file_name = '{}_transformations_{}_{}.npz'.format(dataset_name, get_class_name_from_index(single_class_ind, dataset_name), datetime.now().strftime('%Y-%m-%d-%H%M')) res_file_path = os.path.join(RESULTS_DIR, dataset_name, res_file_name) save_roc_pr_curve_data(scores, labels, res_file_path) mdl_weights_name = '{}_transformations_{}_{}_weights.h5'.format(dataset_name, get_class_name_from_index(single_class_ind, dataset_name), datetime.now().strftime('%Y-%m-%d-%H%M')) mdl_weights_path = os.path.join(RESULTS_DIR, dataset_name, mdl_weights_name) mdl.save_weights(mdl_weights_path) gpu_q.put(gpu_to_use)
def build_network(num_outputs, architecture, classification=False, no_softmax=False, name=None): """ Constructs a CNN. # Arguments: - num_outputs: number of final output units. - architecture: name of the architecture. See ARCHITECTURES for a list of possible values and README.md for descriptions. - classification: If `True`, the final layer will have a softmax activation, otherwise no activation at all. - no_softmax: Usually, the last layer will have a softmax activation if `classification` is True. However, if `no_softmax` is set to True as well, the last layer will not have any activation. - name: The name of the network. # Returns: keras.models.Model """ if architecture.lower().endswith('-selu'): activation = 'selu' architecture = architecture[:-5] else: activation = 'relu' # CIFAR-100 architectures if architecture == 'resnet-32': return cifar_resnet.SmallResNet( 5, filters=[16, 32, 64], activation=activation, include_top=classification, top_activation=None if no_softmax else 'softmax', classes=num_outputs, name=name) elif architecture == 'resnet-110': return cifar_resnet.SmallResNet( 18, filters=[16, 32, 64], activation=activation, include_top=classification, top_activation=None if no_softmax else 'softmax', classes=num_outputs, name=name) elif architecture == 'resnet-110-fc': return cifar_resnet.SmallResNet( 18, filters=[16, 32, 64], activation=activation, include_top=True, top_activation='softmax' if classification and (not no_softmax) else None, classes=num_outputs, name=name) elif architecture == 'resnet-110-wfc': return cifar_resnet.SmallResNet( 18, filters=[32, 64, 128], activation=activation, include_top=True, top_activation='softmax' if classification and (not no_softmax) else None, classes=num_outputs, name=name) elif architecture == 'wrn-28-10': return wrn.create_wide_residual_network( (32, 32, 3), nb_classes=num_outputs, N=4, k=10, verbose=0, final_activation='softmax' if classification and (not no_softmax) else None, name=name) elif architecture == 'densenet-100-12': return densenet.DenseNet(growth_rate=12, depth=100, nb_dense_block=3, bottleneck=False, nb_filter=16, reduction=0.0, classes=num_outputs, activation='softmax' if classification and (not no_softmax) else None, name=name) elif architecture == 'densenet-100-24': return densenet.DenseNet(growth_rate=24, depth=100, nb_dense_block=3, bottleneck=False, nb_filter=16, reduction=0.0, classes=num_outputs, activation='softmax' if classification and (not no_softmax) else None, name=name) elif architecture == 'densenet-bc-190-40': return densenet.DenseNet(growth_rate=40, depth=190, nb_dense_block=3, bottleneck=True, nb_filter=-1, reduction=0.5, classes=num_outputs, activation='softmax' if classification and (not no_softmax) else None, name=name) elif architecture == 'pyramidnet-272-200': return cifar_pyramidnet.PyramidNet( 272, 200, bottleneck=True, activation=activation, classes=num_outputs, top_activation='softmax' if classification and (not no_softmax) else None, name=name) elif architecture == 'pyramidnet-110-270': return cifar_pyramidnet.PyramidNet( 110, 270, bottleneck=False, activation=activation, classes=num_outputs, top_activation='softmax' if classification and (not no_softmax) else None, name=name) elif architecture == 'simple': return plainnet.PlainNet( num_outputs, activation=activation, final_activation='softmax' if classification and (not no_softmax) else None, name=name) # ImageNet architectures elif architecture in ('resnet-50', 'resnet-101', 'resnet-152'): if architecture == 'resnet-101': factory = keras_applications.resnet.ResNet101 elif architecture == 'resnet-152': factory = keras_applications.resnet.ResNet152 else: # ResNet50 has been available from the beginning, while the other two were added in keras-applications 1.0.7. # Thus, we use the initial implementation of ResNet50 for compatibility's sake. factory = keras.applications.ResNet50 rn = factory(include_top=False, weights=None) # Depending on the Keras version, the ResNet50 model may or may not contain a final average pooling layer. rn_out = rn.layers[-2].output if isinstance( rn.layers[-1], keras.layers.AveragePooling2D) else rn.layers[-1].output x = keras.layers.GlobalAvgPool2D(name='avg_pool')(rn_out) x = keras.layers.Dense( num_outputs, activation='softmax' if classification and (not no_softmax) else None, name='prob' if classification else 'embedding')(x) return keras.models.Model(rn.inputs, x, name=name) elif architecture.startswith('rn'): import keras_resnet.models factories = { 'rn18': keras_resnet.models.ResNet18, 'rn34': keras_resnet.models.ResNet34, 'rn50': keras_resnet.models.ResNet50, 'rn101': keras_resnet.models.ResNet101, 'rn152': keras_resnet.models.ResNet152, 'rn200': keras_resnet.models.ResNet200 } input_ = keras.layers.Input((3, None, None)) if K.image_data_format( ) == 'channels_first' else keras.layers.Input((None, None, 3)) rn = factories[architecture](input_, include_top=classification and (not no_softmax), classes=num_outputs, freeze_bn=False, name=name) if (not classification) or no_softmax: x = keras.layers.GlobalAvgPool2D(name='avg_pool')(rn.outputs[-1]) x = keras.layers.Dense( num_outputs, name='prob' if classification else 'embedding', activation=None if no_softmax else 'softmax')(x) rn = keras.models.Model(input_, x, name=name) return rn elif architecture == 'nasnet-a': nasnet = keras.applications.NASNetLarge(include_top=False, input_shape=(224, 224, 3), weights=None, pooling='avg') x = keras.layers.Dense(num_outputs, activation='softmax' if classification and (not no_softmax) else None, name='prob' if classification else 'embedding')( nasnet.output) return keras.models.Model(nasnet.inputs, x, name=name) else: raise ValueError( 'Unknown network architecture: {}'.format(architecture))
num_classes=num_classes) if model_name == 'densenet': print(model_name) model = densenet.DenseNet(nb_classes=num_classes, img_dim=input_shape, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, dropout_rate=0, weight_decay=1e-4) if model_name == 'vgg': model = vgg(input_shape=input_shape, num_classes=num_classes) if model_name == 'wrn': model = create_wide_residual_network(input_dim=input_shape, nb_classes=num_classes, N=2, k=8) if optimizer == 'Adam': opt = Adam(lr=init_lr) elif optimizer == 'SGD': opt = SGD(lr=init_lr, momentum=0.9) elif optimizer == 'RMSprop': opt = RMSprop(lr=init_lr) elif optimizer == 'Adagrad': opt = Adagrad(lr=init_lr) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy', 'top_k_categorical_accuracy']) # model.summary()
(x_train, y_train), (x_val, y_val), (x_test, y_test) = load_hits(n_samples_by_class=10000, test_size=0.20, val_size=0.10, return_val=True) print(x_train.shape) print(x_val.shape) print(x_test.shape) transformer = TransTransformer(8, 8) n, k = (10, 4) mdl = create_wide_residual_network(input_shape=x_train.shape[1:], num_classes=transformer.n_transforms, depth=n, widen_factor=k) mdl.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc']) print(mdl.summary()) # get inliers of specific class x_train_task = x_train[y_train.flatten() == single_class_ind] print(x_train_task.shape) x_val_task = x_val[y_val.flatten() == single_class_ind] print(x_val_task.shape) transformations_inds_train = np.tile(np.arange(transformer.n_transforms),
def test_model_original(transformer, loader, dataset_name='hits-4-c', single_class_ind=1): results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results') save_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data') utils.check_path(results_dir) utils.check_path(save_dir) utils.check_path(os.path.join(results_dir, dataset_name)) # load-save data (x_train, y_train), (x_val, y_val), (x_test, y_test) = loader(return_val=True) normal_data = (x_train, y_train), (x_val, y_val), (x_test, y_test) utils.save_pickle( normal_data, os.path.join(save_dir, 'normal_data_%s_tf1_original.pkl' % dataset_name)) # create model n, k = (10, 4) mdl = create_wide_residual_network(x_train.shape[1:], transformer.n_transforms, n, k) mdl.compile('adam', 'categorical_crossentropy', ['acc']) # get inliers of specific class # get inliers x_train_task = x_train[y_train.flatten() == single_class_ind] print(x_train_task.shape) # transform inliers transformations_inds = np.tile(np.arange(transformer.n_transforms), len(x_train_task)) x_train_task_transformed = transformer.transform_batch( np.repeat(x_train_task, transformer.n_transforms, axis=0), transformations_inds) print(x_train_task_transformed.shape) # train model batch_size = 128 mdl.fit(x=x_train_task_transformed, y=to_categorical(transformations_inds), batch_size=batch_size, epochs=int(np.ceil(200 / transformer.n_transforms))) scores = np.zeros((len(x_test), )) matrix_evals = np.zeros( (len(x_test), transformer.n_transforms, transformer.n_transforms)) observed_data = x_train_task for t_ind in range(transformer.n_transforms): observed_dirichlet = mdl.predict(transformer.transform_batch( observed_data, [t_ind] * len(observed_data)), batch_size=1024) log_p_hat_train = np.log(observed_dirichlet).mean(axis=0) alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet) alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train) x_test_p = mdl.predict(transformer.transform_batch( x_test, [t_ind] * len(x_test)), batch_size=1024) matrix_evals[:, :, t_ind] += x_test_p scores += dirichlet_normality_score(mle_alpha_t, x_test_p) scores /= transformer.n_transforms matrix_evals /= transformer.n_transforms scores_simple = np.trace(matrix_evals, axis1=1, axis2=2) scores_entropy = -1 * get_entropy(matrix_evals) scores_xH = -1 * get_xH(transformer, matrix_evals) labels = y_test.flatten() == single_class_ind save_results_file(results_dir, dataset_name, single_class_ind, scores=scores, labels=labels, experiment_name='transformations') save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_simple, labels=labels, experiment_name='transformations-simple') save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_entropy, labels=labels, experiment_name='transformations-entropy') save_results_file(results_dir, dataset_name, single_class_ind, scores=scores_xH, labels=labels, experiment_name='transformations-xH') mdl_weights_name = '{}_tf1_original_{}_weights.h5'.format( dataset_name, get_class_name_from_index(single_class_ind, dataset_name)) mdl_weights_path = os.path.join(results_dir, dataset_name, mdl_weights_name) mdl.save_weights(mdl_weights_path) """ Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c') 00:06:58.37 (0.9917134999999999, 0.9350055, 0.9872614999999999, 0.94142025) (0.9938067500000001, 0.9923547500000001, 0.9931685, 0.992637375) (0.9912172499999999, 0.9883357499999998, 0.9909070000000001, 0.9886706249999999) #train only Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c', tf_version='tf1') 00:03:48.29 """ return get_roc_auc(scores, labels), get_roc_auc(scores_simple, labels), \ get_roc_auc(scores_entropy, labels), get_roc_auc(scores_xH, labels)
def _kernal_plus_transformations_experiment(dataset_load_fn, dataset_name, single_class_ind, gpu_q): # gpu_to_use = gpu_q.get() # os.environ["CUDA_VISIBLE_DEVICES"] = gpu_to_use (x_train, y_train), (x_test, y_test) = dataset_load_fn() if dataset_name in ['cats-vs-dogs']: transformer = None else: transformer = PlusKernelTransformer(translation_x=8, translation_y=8, rotations=1, flips=1, gauss=1, log=1) n, k = (10, 4) mdl = create_wide_residual_network(x_train.shape[1:], transformer.n_transforms, n, k) mdl.compile('adam', 'categorical_crossentropy', ['acc']) # get inliers of specific class x_train_task = x_train[y_train.flatten() == single_class_ind] # [0_i, ..., (N_transforms-1)_i, ..., ..., 0_N_samples, ..., # (N_transforms-1)_N_samples] shape: (N_transforms*N_samples,) transformations_inds = np.tile(np.arange(transformer.n_transforms), len(x_train_task)) x_train_task_transformed = transformer.transform_batch( np.repeat(x_train_task, transformer.n_transforms, axis=0), transformations_inds) batch_size = 128 mdl.fit( x=x_train_task_transformed, y=to_categorical(transformations_inds), batch_size=batch_size, epochs=2 #int(np.ceil(200/transformer.n_transforms)) ) scores = np.zeros((len(x_test), )) matrix_evals = np.zeros( (len(x_test), transformer.n_transforms, transformer.n_transforms)) observed_data = x_train_task for t_ind in range(transformer.n_transforms): observed_dirichlet = mdl.predict(transformer.transform_batch( observed_data, [t_ind] * len(observed_data)), batch_size=1024) log_p_hat_train = np.log(observed_dirichlet).mean(axis=0) alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet) alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train) x_test_p = mdl.predict(transformer.transform_batch( x_test, [t_ind] * len(x_test)), batch_size=1024) matrix_evals[:, :, t_ind] += x_test_p scores += dirichlet_normality_score(mle_alpha_t, x_test_p) scores /= transformer.n_transforms matrix_evals /= transformer.n_transforms scores_simple = np.trace(matrix_evals, axis1=1, axis2=2) scores_entropy = -1 * get_entropy(matrix_evals) scores_xH = -1 * get_xH(transformer, matrix_evals) labels = y_test.flatten() == single_class_ind save_results_file(dataset_name, single_class_ind, scores=scores, labels=labels, experiment_name='kernel-plus-transformations') save_results_file(dataset_name, single_class_ind, scores=scores_simple, labels=labels, experiment_name='kernel-plus-transformations-simple') save_results_file(dataset_name, single_class_ind, scores=scores_entropy, labels=labels, experiment_name='kernel-plus-transformations-entropy') save_results_file(dataset_name, single_class_ind, scores=scores_xH, labels=labels, experiment_name='kernel-plus-transformations-xH') mdl_weights_name = '{}_kernel-plus-transformations_{}_{}_weights.h5'.format( dataset_name, get_class_name_from_index(single_class_ind, dataset_name), datetime.datetime.now().strftime('%Y-%m-%d-%H%M')) mdl_weights_path = os.path.join(RESULTS_DIR, dataset_name, mdl_weights_name) mdl.save_weights(mdl_weights_path)
def build_network(num_outputs, architecture, classification = False, name = None): """ Constructs a CNN. # Arguments: - num_outputs: number of final output units. - architecture: name of the architecture. See ARCHITECTURES for a list of possible values and README.md for descriptions. - classification: If `True`, the final layer will have a softmax activation, otherwise no activation at all. - name: The name of the network. # Returns: keras.models.Model """ if architecture.lower().endswith('-selu'): activation = 'selu' architecture = architecture[:-5] else: activation = 'relu' # CIFAR-100 architectures if architecture == 'resnet-32': return cifar_resnet.SmallResNet(5, filters = [16, 32, 64] if classification else [32, 64, num_outputs], activation = activation, include_top = classification, classes = num_outputs, name = name) elif architecture == 'resnet-110': return cifar_resnet.SmallResNet(18, filters = [16, 32, 64] if classification else [32, 64, num_outputs], activation = activation, include_top = classification, classes = num_outputs, name = name) elif architecture == 'resnet-110-fc': return cifar_resnet.SmallResNet(18, filters = [32, 64, 128], activation = activation, include_top = True, top_activation = 'softmax' if classification else None, classes = num_outputs, name = name) elif architecture == 'wrn-28-10': return wrn.create_wide_residual_network((32, 32, 3), nb_classes = num_outputs, N = 4, k = 10, verbose = 0, final_activation = 'softmax' if classification else None, name = name) elif architecture == 'densenet-100-12': return densenet.DenseNet(growth_rate = 12, depth = 100, bottleneck = False, classes = num_outputs, activation = 'softmax' if classification else None, name = name) elif architecture == 'pyramidnet-272-200': return cifar_pyramidnet.PyramidNet(272, 200, bottleneck = True, activation = activation, classes = num_outputs, top_activation = 'softmax' if classification else None, name = name) elif architecture == 'pyramidnet-110-270': return cifar_pyramidnet.PyramidNet(110, 270, bottleneck = False, activation = activation, classes = num_outputs, top_activation = 'softmax' if classification else None, name = name) elif architecture == 'simple': return plainnet.PlainNet(num_outputs, activation = activation, final_activation = 'softmax' if classification else None, name = name) # ImageNet architectures elif architecture == 'resnet-50': rn50 = keras.applications.ResNet50(include_top=False, weights=None) rn50_out = rn50.layers[-2].output if isinstance(rn50.layers[-1], keras.layers.AveragePooling2D) else rn50.layers[-1].output x = keras.layers.GlobalAvgPool2D(name='avg_pool')(rn50_out) x = keras.layers.Dense(num_outputs, activation = 'softmax' if classification else None, name = 'prob' if classification else 'embedding')(x) return keras.models.Model(rn50.inputs, x, name=name) elif architecture.startswith('rn'): import keras_resnet.models factories = { 'rn18' : keras_resnet.models.ResNet18, 'rn34' : keras_resnet.models.ResNet34, 'rn50' : keras_resnet.models.ResNet50, 'rn101' : keras_resnet.models.ResNet101, 'rn152' : keras_resnet.models.ResNet152, 'rn200' : keras_resnet.models.ResNet200 } input_ = keras.layers.Input((3, None, None)) if K.image_data_format() == 'channels_first' else keras.layers.Input((None, None, 3)) rn = factories[architecture](input_, include_top = classification, classes = num_outputs, freeze_bn = False, name = name) if not classification: x = keras.layers.GlobalAvgPool2D(name = 'avg_pool')(rn.outputs[-1]) x = keras.layers.Dense(num_outputs, name = 'embedding')(x) rn = keras.models.Model(input_, x, name = name) return rn elif architecture == 'nasnet-a': nasnet = keras.applications.NASNetLarge(include_top=False, input_shape=(224,224,3), weights=None, pooling='avg') x = keras.layers.Dense(num_outputs, activation = 'softmax' if classification else None, name = 'prob' if classification else 'embedding')(nasnet.output) return keras.models.Model(nasnet.inputs, x, name=name) else: raise ValueError('Unknown network architecture: {}'.format(architecture))