Ejemplo n.º 1
0
def train_cifar10():
    (x_train, y_train), (x_test, y_test) = load_cifar10()

    idg = ImageDataGenerator(
        horizontal_flip=True,
        height_shift_range=4,
        width_shift_range=4,
        fill_mode='reflect'
    )

    idg.fit(x_train)

    n = 16
    k = 8
    mdl = create_wide_residual_network(x_train.shape[1:], 10, n, k)
    mdl.compile(SGDTorch(lr=.1, momentum=0.9, nesterov=True), 'categorical_crossentropy', ['acc'])

    lr_cb = LearningRateScheduler(lambda e: 0.1 * (0.2 ** (e >= 160 and 3 or e >= 120 and 2 or e >= 60 and 1 or 0)))

    batch_size = 128
    mdl.fit_generator(
        generator=idg.flow(x_train, to_categorical(y_train), batch_size=batch_size),
        epochs=200,
        validation_data=(idg.standardize(x_test), to_categorical(y_test)),
        callbacks=[lr_cb]
    )
    mdl.save_weights('cifar10_WRN_{}-{}.h5'.format(n, k))
Ejemplo n.º 2
0
def test_resnet_transtransformer_1c():
    n_runs = 10
    transformer = transformations.TransTransformer()
    n, k = (10, 4)
    mdl_resnet = create_wide_residual_network([21, 21, 1],
                                              transformer.n_transforms, n, k)
    scores_list = []
    delta_times_list = []
    for i in range(n_runs):
        start_time = time.time()
        scores = test_model_loading(transformer,
                                    mdl_resnet,
                                    load_hits1c,
                                    dataset_name='hits-1-c',
                                    tf_version='tf1',
                                    transformer_name='transtransformed',
                                    model_name='resnet',
                                    epochs=2)
        end_time = time.time()
        delta_times_list.append(end_time - start_time)
        scores_list.append(scores)
    file_path = os.path.join(PROJECT_PATH, 'tests', 'aux_results',
                             'test_models_tf1-tf2.txt')
    print_scores_times_to_file(
        file_path,
        'Data_transformer_tf1_models_tf1_resnet_transtransformer_1c\n NRUNS: %i'
        % n_runs, scores_list, delta_times_list)
    del mdl_resnet
Ejemplo n.º 3
0
def transformation_cifar10_vs_tinyimagenet():
    _, (x_test, y_test) = load_cifar10()
    x_test_out = load_tinyimagenet('/home/izikgo/Imagenet_resize/Imagenet_resize/')

    transformer = Transformer(8, 8)
    n = 16
    k = 8
    base_mdl = create_wide_residual_network(x_test.shape[1:], 10, n, k)

    transformations_cls_out = Activation('softmax')(dense(transformer.n_transforms)(base_mdl.get_layer(index=-3).output))

    mdl = Model(base_mdl.input, [base_mdl.output, transformations_cls_out])
    mdl.load_weights('cifar10_WRN_doublehead-transformations_{}-{}.h5'.format(n, k))

    scores_mdl = Model(mdl.input, mdl.output[1])
    x_test_all = np.concatenate((x_test, x_test_out))
    preds = np.zeros((len(x_test_all), transformer.n_transforms))
    for t in range(transformer.n_transforms):
        preds[:, t] = scores_mdl.predict(transformer.transform_batch(x_test_all, [t] * len(x_test_all)),
                                  batch_size=128)[:, t]

    labels = np.concatenate((np.ones(len(x_test)), np.zeros(len(x_test_out))))
    scores = preds.mean(axis=-1)

    save_roc_pr_curve_data(scores, labels, 'cifar10-vs-tinyimagenet_transformations.npz')
Ejemplo n.º 4
0
def train_cifar10_transformations():
    (x_train, y_train), _ = load_cifar10()

    transformer = Transformer(8, 8)

    def data_gen(x, y, batch_size):
        while True:
            ind_permutation = np.random.permutation(len(x))
            for b_start_ind in range(0, len(x), batch_size):
                batch_inds = ind_permutation[b_start_ind:b_start_ind + batch_size]
                x_batch = x[batch_inds]
                y_batch = y[batch_inds].flatten()

                if K.image_data_format() == 'channels_first':
                    x_batch = np.transpose(x_batch, (0, 2, 3, 1))

                y_t_batch = np.random.randint(0, transformer.n_transforms, size=len(x_batch))

                x_batch = transformer.transform_batch(x_batch, y_t_batch)

                if K.image_data_format() == 'channels_first':
                    x_batch = np.transpose(x_batch, (0, 3, 1, 2))

                yield (x_batch, [to_categorical(y_batch, num_classes=10), to_categorical(y_t_batch, num_classes=transformer.n_transforms)])

    n = 16
    k = 8
    base_mdl = create_wide_residual_network(x_train.shape[1:], 10, n, k)

    transformations_cls_out = Activation('softmax')(dense(transformer.n_transforms)(base_mdl.get_layer(index=-3).output))

    mdl = Model(base_mdl.input, [base_mdl.output, transformations_cls_out])

    mdl.compile(SGDTorch(lr=.1, momentum=0.9, nesterov=True), 'categorical_crossentropy', ['acc'])

    lr_cb = LearningRateScheduler(lambda e: 0.1 * (0.2 ** (e >= 160 and 3 or e >= 120 and 2 or e >= 60 and 1 or 0)))

    batch_size = 128
    mdl.fit_generator(
        generator=data_gen(x_train, y_train, batch_size=batch_size),
        steps_per_epoch=len(x_train) // batch_size,
        epochs=200,
        callbacks=[lr_cb]
    )
    mdl.save_weights('cifar10_WRN_doublehead-transformations_{}-{}.h5'.format(n, k))
def _transformations_experiment(dataset_load_fn, dataset_name, single_class_ind, gpu_q):
    gpu_to_use = gpu_q.get()
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_to_use

    (x_train, y_train), (x_test, y_test) = dataset_load_fn()

    if dataset_name in ['cats-vs-dogs']:
        transformer = Transformer(16, 16)
        n, k = (16, 8)
    else:
        transformer = Transformer(8, 8)
        n, k = (10, 4)
    mdl = create_wide_residual_network(x_train.shape[1:], transformer.n_transforms, n, k)
    mdl.compile('adam',
                'categorical_crossentropy',
                ['acc'])

    x_train_task = x_train[y_train.flatten() == single_class_ind]
    transformations_inds = np.tile(np.arange(transformer.n_transforms), len(x_train_task))
    x_train_task_transformed = transformer.transform_batch(np.repeat(x_train_task, transformer.n_transforms, axis=0),
                                                           transformations_inds)
    batch_size = 128

    mdl.fit(x=x_train_task_transformed, y=to_categorical(transformations_inds),
            batch_size=batch_size, epochs=int(np.ceil(200/transformer.n_transforms))
            )

    #################################################################################################
    # simplified normality score
    #################################################################################################
    # preds = np.zeros((len(x_test), transformer.n_transforms))
    # for t in range(transformer.n_transforms):
    #     preds[:, t] = mdl.predict(transformer.transform_batch(x_test, [t] * len(x_test)),
    #                               batch_size=batch_size)[:, t]
    #
    # labels = y_test.flatten() == single_class_ind
    # scores = preds.mean(axis=-1)
    #################################################################################################

    def calc_approx_alpha_sum(observations):
        N = len(observations)
        f = np.mean(observations, axis=0)

        return (N * (len(f) - 1) * (-psi(1))) / (
                N * np.sum(f * np.log(f)) - np.sum(f * np.sum(np.log(observations), axis=0)))

    def inv_psi(y, iters=5):
        # initial estimate
        cond = y >= -2.22
        x = cond * (np.exp(y) + 0.5) + (1 - cond) * -1 / (y - psi(1))

        for _ in range(iters):
            x = x - (psi(x) - y) / polygamma(1, x)
        return x

    def fixed_point_dirichlet_mle(alpha_init, log_p_hat, max_iter=1000):
        alpha_new = alpha_old = alpha_init
        for _ in range(max_iter):
            alpha_new = inv_psi(psi(np.sum(alpha_old)) + log_p_hat)
            if np.sqrt(np.sum((alpha_old - alpha_new) ** 2)) < 1e-9:
                break
            alpha_old = alpha_new
        return alpha_new

    def dirichlet_normality_score(alpha, p):
        return np.sum((alpha - 1) * np.log(p), axis=-1)

    scores = np.zeros((len(x_test),))
    observed_data = x_train_task
    for t_ind in range(transformer.n_transforms):
        observed_dirichlet = mdl.predict(transformer.transform_batch(observed_data, [t_ind] * len(observed_data)),
                                         batch_size=1024)
        log_p_hat_train = np.log(observed_dirichlet).mean(axis=0)

        alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet)
        alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

        mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)

        x_test_p = mdl.predict(transformer.transform_batch(x_test, [t_ind] * len(x_test)),
                               batch_size=1024)
        scores += dirichlet_normality_score(mle_alpha_t, x_test_p)

    scores /= transformer.n_transforms
    labels = y_test.flatten() == single_class_ind

    res_file_name = '{}_transformations_{}_{}.npz'.format(dataset_name,
                                                 get_class_name_from_index(single_class_ind, dataset_name),
                                                 datetime.now().strftime('%Y-%m-%d-%H%M'))
    res_file_path = os.path.join(RESULTS_DIR, dataset_name, res_file_name)
    save_roc_pr_curve_data(scores, labels, res_file_path)

    mdl_weights_name = '{}_transformations_{}_{}_weights.h5'.format(dataset_name,
                                                           get_class_name_from_index(single_class_ind, dataset_name),
                                                           datetime.now().strftime('%Y-%m-%d-%H%M'))
    mdl_weights_path = os.path.join(RESULTS_DIR, dataset_name, mdl_weights_name)
    mdl.save_weights(mdl_weights_path)

    gpu_q.put(gpu_to_use)
Ejemplo n.º 6
0
def build_network(num_outputs,
                  architecture,
                  classification=False,
                  no_softmax=False,
                  name=None):
    """ Constructs a CNN.
    
    # Arguments:
    
    - num_outputs: number of final output units.
    
    - architecture: name of the architecture. See ARCHITECTURES for a list of possible values and README.md for descriptions.
    
    - classification: If `True`, the final layer will have a softmax activation, otherwise no activation at all.

    - no_softmax: Usually, the last layer will have a softmax activation if `classification` is True. However, if `no_softmax` is set
                  to True as well, the last layer will not have any activation.
    
    - name: The name of the network.
    
    # Returns:
        keras.models.Model
    """

    if architecture.lower().endswith('-selu'):
        activation = 'selu'
        architecture = architecture[:-5]
    else:
        activation = 'relu'

    # CIFAR-100 architectures

    if architecture == 'resnet-32':

        return cifar_resnet.SmallResNet(
            5,
            filters=[16, 32, 64],
            activation=activation,
            include_top=classification,
            top_activation=None if no_softmax else 'softmax',
            classes=num_outputs,
            name=name)

    elif architecture == 'resnet-110':

        return cifar_resnet.SmallResNet(
            18,
            filters=[16, 32, 64],
            activation=activation,
            include_top=classification,
            top_activation=None if no_softmax else 'softmax',
            classes=num_outputs,
            name=name)

    elif architecture == 'resnet-110-fc':

        return cifar_resnet.SmallResNet(
            18,
            filters=[16, 32, 64],
            activation=activation,
            include_top=True,
            top_activation='softmax' if classification and
            (not no_softmax) else None,
            classes=num_outputs,
            name=name)

    elif architecture == 'resnet-110-wfc':

        return cifar_resnet.SmallResNet(
            18,
            filters=[32, 64, 128],
            activation=activation,
            include_top=True,
            top_activation='softmax' if classification and
            (not no_softmax) else None,
            classes=num_outputs,
            name=name)

    elif architecture == 'wrn-28-10':

        return wrn.create_wide_residual_network(
            (32, 32, 3),
            nb_classes=num_outputs,
            N=4,
            k=10,
            verbose=0,
            final_activation='softmax' if classification and
            (not no_softmax) else None,
            name=name)

    elif architecture == 'densenet-100-12':

        return densenet.DenseNet(growth_rate=12,
                                 depth=100,
                                 nb_dense_block=3,
                                 bottleneck=False,
                                 nb_filter=16,
                                 reduction=0.0,
                                 classes=num_outputs,
                                 activation='softmax' if classification and
                                 (not no_softmax) else None,
                                 name=name)

    elif architecture == 'densenet-100-24':

        return densenet.DenseNet(growth_rate=24,
                                 depth=100,
                                 nb_dense_block=3,
                                 bottleneck=False,
                                 nb_filter=16,
                                 reduction=0.0,
                                 classes=num_outputs,
                                 activation='softmax' if classification and
                                 (not no_softmax) else None,
                                 name=name)

    elif architecture == 'densenet-bc-190-40':

        return densenet.DenseNet(growth_rate=40,
                                 depth=190,
                                 nb_dense_block=3,
                                 bottleneck=True,
                                 nb_filter=-1,
                                 reduction=0.5,
                                 classes=num_outputs,
                                 activation='softmax' if classification and
                                 (not no_softmax) else None,
                                 name=name)

    elif architecture == 'pyramidnet-272-200':

        return cifar_pyramidnet.PyramidNet(
            272,
            200,
            bottleneck=True,
            activation=activation,
            classes=num_outputs,
            top_activation='softmax' if classification and
            (not no_softmax) else None,
            name=name)

    elif architecture == 'pyramidnet-110-270':

        return cifar_pyramidnet.PyramidNet(
            110,
            270,
            bottleneck=False,
            activation=activation,
            classes=num_outputs,
            top_activation='softmax' if classification and
            (not no_softmax) else None,
            name=name)

    elif architecture == 'simple':

        return plainnet.PlainNet(
            num_outputs,
            activation=activation,
            final_activation='softmax' if classification and
            (not no_softmax) else None,
            name=name)

    # ImageNet architectures

    elif architecture in ('resnet-50', 'resnet-101', 'resnet-152'):

        if architecture == 'resnet-101':
            factory = keras_applications.resnet.ResNet101
        elif architecture == 'resnet-152':
            factory = keras_applications.resnet.ResNet152
        else:
            # ResNet50 has been available from the beginning, while the other two were added in keras-applications 1.0.7.
            # Thus, we use the initial implementation of ResNet50 for compatibility's sake.
            factory = keras.applications.ResNet50
        rn = factory(include_top=False, weights=None)
        # Depending on the Keras version, the ResNet50 model may or may not contain a final average pooling layer.
        rn_out = rn.layers[-2].output if isinstance(
            rn.layers[-1],
            keras.layers.AveragePooling2D) else rn.layers[-1].output
        x = keras.layers.GlobalAvgPool2D(name='avg_pool')(rn_out)
        x = keras.layers.Dense(
            num_outputs,
            activation='softmax' if classification and
            (not no_softmax) else None,
            name='prob' if classification else 'embedding')(x)
        return keras.models.Model(rn.inputs, x, name=name)

    elif architecture.startswith('rn'):

        import keras_resnet.models
        factories = {
            'rn18': keras_resnet.models.ResNet18,
            'rn34': keras_resnet.models.ResNet34,
            'rn50': keras_resnet.models.ResNet50,
            'rn101': keras_resnet.models.ResNet101,
            'rn152': keras_resnet.models.ResNet152,
            'rn200': keras_resnet.models.ResNet200
        }
        input_ = keras.layers.Input((3, None, None)) if K.image_data_format(
        ) == 'channels_first' else keras.layers.Input((None, None, 3))
        rn = factories[architecture](input_,
                                     include_top=classification
                                     and (not no_softmax),
                                     classes=num_outputs,
                                     freeze_bn=False,
                                     name=name)
        if (not classification) or no_softmax:
            x = keras.layers.GlobalAvgPool2D(name='avg_pool')(rn.outputs[-1])
            x = keras.layers.Dense(
                num_outputs,
                name='prob' if classification else 'embedding',
                activation=None if no_softmax else 'softmax')(x)
            rn = keras.models.Model(input_, x, name=name)
        return rn

    elif architecture == 'nasnet-a':

        nasnet = keras.applications.NASNetLarge(include_top=False,
                                                input_shape=(224, 224, 3),
                                                weights=None,
                                                pooling='avg')
        x = keras.layers.Dense(num_outputs,
                               activation='softmax' if classification and
                               (not no_softmax) else None,
                               name='prob' if classification else 'embedding')(
                                   nasnet.output)
        return keras.models.Model(nasnet.inputs, x, name=name)

    else:

        raise ValueError(
            'Unknown network architecture: {}'.format(architecture))
Ejemplo n.º 7
0
                      num_classes=num_classes)
if model_name == 'densenet':
    print(model_name)
    model = densenet.DenseNet(nb_classes=num_classes,
                              img_dim=input_shape,
                              depth=40,
                              nb_dense_block=3,
                              growth_rate=12,
                              nb_filter=16,
                              dropout_rate=0,
                              weight_decay=1e-4)
if model_name == 'vgg':
    model = vgg(input_shape=input_shape, num_classes=num_classes)
if model_name == 'wrn':
    model = create_wide_residual_network(input_dim=input_shape,
                                         nb_classes=num_classes,
                                         N=2,
                                         k=8)

if optimizer == 'Adam':
    opt = Adam(lr=init_lr)
elif optimizer == 'SGD':
    opt = SGD(lr=init_lr, momentum=0.9)
elif optimizer == 'RMSprop':
    opt = RMSprop(lr=init_lr)
elif optimizer == 'Adagrad':
    opt = Adagrad(lr=init_lr)

model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy', 'top_k_categorical_accuracy'])
# model.summary()
    (x_train, y_train), (x_val,
                         y_val), (x_test,
                                  y_test) = load_hits(n_samples_by_class=10000,
                                                      test_size=0.20,
                                                      val_size=0.10,
                                                      return_val=True)
    print(x_train.shape)
    print(x_val.shape)
    print(x_test.shape)

    transformer = TransTransformer(8, 8)
    n, k = (10, 4)

    mdl = create_wide_residual_network(input_shape=x_train.shape[1:],
                                       num_classes=transformer.n_transforms,
                                       depth=n,
                                       widen_factor=k)
    mdl.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['acc'])

    print(mdl.summary())

    # get inliers of specific class
    x_train_task = x_train[y_train.flatten() == single_class_ind]
    print(x_train_task.shape)

    x_val_task = x_val[y_val.flatten() == single_class_ind]
    print(x_val_task.shape)

    transformations_inds_train = np.tile(np.arange(transformer.n_transforms),
Ejemplo n.º 9
0
def test_model_original(transformer,
                        loader,
                        dataset_name='hits-4-c',
                        single_class_ind=1):
    results_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_results')
    save_dir = os.path.join(PROJECT_PATH, 'tests', 'aux_data')
    utils.check_path(results_dir)
    utils.check_path(save_dir)
    utils.check_path(os.path.join(results_dir, dataset_name))

    # load-save data
    (x_train, y_train), (x_val, y_val), (x_test,
                                         y_test) = loader(return_val=True)
    normal_data = (x_train, y_train), (x_val, y_val), (x_test, y_test)
    utils.save_pickle(
        normal_data,
        os.path.join(save_dir,
                     'normal_data_%s_tf1_original.pkl' % dataset_name))
    # create model
    n, k = (10, 4)
    mdl = create_wide_residual_network(x_train.shape[1:],
                                       transformer.n_transforms, n, k)
    mdl.compile('adam', 'categorical_crossentropy', ['acc'])
    # get inliers of specific class
    # get inliers
    x_train_task = x_train[y_train.flatten() == single_class_ind]
    print(x_train_task.shape)
    # transform inliers
    transformations_inds = np.tile(np.arange(transformer.n_transforms),
                                   len(x_train_task))
    x_train_task_transformed = transformer.transform_batch(
        np.repeat(x_train_task, transformer.n_transforms, axis=0),
        transformations_inds)
    print(x_train_task_transformed.shape)
    # train model
    batch_size = 128
    mdl.fit(x=x_train_task_transformed,
            y=to_categorical(transformations_inds),
            batch_size=batch_size,
            epochs=int(np.ceil(200 / transformer.n_transforms)))
    scores = np.zeros((len(x_test), ))
    matrix_evals = np.zeros(
        (len(x_test), transformer.n_transforms, transformer.n_transforms))
    observed_data = x_train_task
    for t_ind in range(transformer.n_transforms):
        observed_dirichlet = mdl.predict(transformer.transform_batch(
            observed_data, [t_ind] * len(observed_data)),
                                         batch_size=1024)
        log_p_hat_train = np.log(observed_dirichlet).mean(axis=0)

        alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet)
        alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

        mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)

        x_test_p = mdl.predict(transformer.transform_batch(
            x_test, [t_ind] * len(x_test)),
                               batch_size=1024)
        matrix_evals[:, :, t_ind] += x_test_p
        scores += dirichlet_normality_score(mle_alpha_t, x_test_p)

    scores /= transformer.n_transforms
    matrix_evals /= transformer.n_transforms
    scores_simple = np.trace(matrix_evals, axis1=1, axis2=2)
    scores_entropy = -1 * get_entropy(matrix_evals)
    scores_xH = -1 * get_xH(transformer, matrix_evals)
    labels = y_test.flatten() == single_class_ind

    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores,
                      labels=labels,
                      experiment_name='transformations')
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_simple,
                      labels=labels,
                      experiment_name='transformations-simple')
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_entropy,
                      labels=labels,
                      experiment_name='transformations-entropy')
    save_results_file(results_dir,
                      dataset_name,
                      single_class_ind,
                      scores=scores_xH,
                      labels=labels,
                      experiment_name='transformations-xH')
    mdl_weights_name = '{}_tf1_original_{}_weights.h5'.format(
        dataset_name, get_class_name_from_index(single_class_ind,
                                                dataset_name))
    mdl_weights_path = os.path.join(results_dir, dataset_name,
                                    mdl_weights_name)
    mdl.save_weights(mdl_weights_path)
    """
  Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c') 00:06:58.37
  (0.9917134999999999, 0.9350055, 0.9872614999999999, 0.94142025)
  (0.9938067500000001, 0.9923547500000001, 0.9931685, 0.992637375)
  (0.9912172499999999, 0.9883357499999998, 0.9909070000000001, 0.9886706249999999)
  #train only Time test_model_original(transformer, load_hits4c, dataset_name='hits-4-c', tf_version='tf1') 00:03:48.29
  """
    return get_roc_auc(scores, labels), get_roc_auc(scores_simple, labels), \
           get_roc_auc(scores_entropy, labels), get_roc_auc(scores_xH, labels)
Ejemplo n.º 10
0
def _kernal_plus_transformations_experiment(dataset_load_fn, dataset_name,
                                            single_class_ind, gpu_q):
    # gpu_to_use = gpu_q.get()
    # os.environ["CUDA_VISIBLE_DEVICES"] = gpu_to_use

    (x_train, y_train), (x_test, y_test) = dataset_load_fn()

    if dataset_name in ['cats-vs-dogs']:
        transformer = None
    else:
        transformer = PlusKernelTransformer(translation_x=8,
                                            translation_y=8,
                                            rotations=1,
                                            flips=1,
                                            gauss=1,
                                            log=1)
        n, k = (10, 4)
    mdl = create_wide_residual_network(x_train.shape[1:],
                                       transformer.n_transforms, n, k)
    mdl.compile('adam', 'categorical_crossentropy', ['acc'])

    # get inliers of specific class
    x_train_task = x_train[y_train.flatten() == single_class_ind]
    # [0_i, ..., (N_transforms-1)_i, ..., ..., 0_N_samples, ...,
    # (N_transforms-1)_N_samples] shape: (N_transforms*N_samples,)
    transformations_inds = np.tile(np.arange(transformer.n_transforms),
                                   len(x_train_task))
    x_train_task_transformed = transformer.transform_batch(
        np.repeat(x_train_task, transformer.n_transforms, axis=0),
        transformations_inds)
    batch_size = 128

    mdl.fit(
        x=x_train_task_transformed,
        y=to_categorical(transformations_inds),
        batch_size=batch_size,
        epochs=2  #int(np.ceil(200/transformer.n_transforms))
    )

    scores = np.zeros((len(x_test), ))
    matrix_evals = np.zeros(
        (len(x_test), transformer.n_transforms, transformer.n_transforms))
    observed_data = x_train_task
    for t_ind in range(transformer.n_transforms):
        observed_dirichlet = mdl.predict(transformer.transform_batch(
            observed_data, [t_ind] * len(observed_data)),
                                         batch_size=1024)
        log_p_hat_train = np.log(observed_dirichlet).mean(axis=0)

        alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet)
        alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx

        mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train)

        x_test_p = mdl.predict(transformer.transform_batch(
            x_test, [t_ind] * len(x_test)),
                               batch_size=1024)
        matrix_evals[:, :, t_ind] += x_test_p
        scores += dirichlet_normality_score(mle_alpha_t, x_test_p)

    scores /= transformer.n_transforms
    matrix_evals /= transformer.n_transforms
    scores_simple = np.trace(matrix_evals, axis1=1, axis2=2)
    scores_entropy = -1 * get_entropy(matrix_evals)
    scores_xH = -1 * get_xH(transformer, matrix_evals)
    labels = y_test.flatten() == single_class_ind

    save_results_file(dataset_name,
                      single_class_ind,
                      scores=scores,
                      labels=labels,
                      experiment_name='kernel-plus-transformations')
    save_results_file(dataset_name,
                      single_class_ind,
                      scores=scores_simple,
                      labels=labels,
                      experiment_name='kernel-plus-transformations-simple')
    save_results_file(dataset_name,
                      single_class_ind,
                      scores=scores_entropy,
                      labels=labels,
                      experiment_name='kernel-plus-transformations-entropy')
    save_results_file(dataset_name,
                      single_class_ind,
                      scores=scores_xH,
                      labels=labels,
                      experiment_name='kernel-plus-transformations-xH')

    mdl_weights_name = '{}_kernel-plus-transformations_{}_{}_weights.h5'.format(
        dataset_name, get_class_name_from_index(single_class_ind,
                                                dataset_name),
        datetime.datetime.now().strftime('%Y-%m-%d-%H%M'))
    mdl_weights_path = os.path.join(RESULTS_DIR, dataset_name,
                                    mdl_weights_name)
    mdl.save_weights(mdl_weights_path)
Ejemplo n.º 11
0
def build_network(num_outputs, architecture, classification = False, name = None):
    """ Constructs a CNN.
    
    # Arguments:
    
    - num_outputs: number of final output units.
    
    - architecture: name of the architecture. See ARCHITECTURES for a list of possible values and README.md for descriptions.
    
    - classification: If `True`, the final layer will have a softmax activation, otherwise no activation at all.
    
    - name: The name of the network.
    
    # Returns:
        keras.models.Model
    """
    
    if architecture.lower().endswith('-selu'):
        activation = 'selu'
        architecture = architecture[:-5]
    else:
        activation = 'relu'
    
    # CIFAR-100 architectures
    
    if architecture == 'resnet-32':
        
        return cifar_resnet.SmallResNet(5, filters = [16, 32, 64] if classification else [32, 64, num_outputs], activation = activation,
                                        include_top = classification, classes = num_outputs, name = name)
        
    elif architecture == 'resnet-110':
        
        return cifar_resnet.SmallResNet(18, filters = [16, 32, 64] if classification else [32, 64, num_outputs], activation = activation,
                                        include_top = classification, classes = num_outputs, name = name)
    
    elif architecture == 'resnet-110-fc':
        
        return cifar_resnet.SmallResNet(18, filters = [32, 64, 128], activation = activation,
                                        include_top = True, top_activation = 'softmax' if classification else None,
                                        classes = num_outputs, name = name)
    
    elif architecture == 'wrn-28-10':
        
        return wrn.create_wide_residual_network((32, 32, 3), nb_classes = num_outputs, N = 4, k = 10, verbose = 0,
                                                final_activation = 'softmax' if classification else None, name = name)
        
    elif architecture == 'densenet-100-12':
        
        return densenet.DenseNet(growth_rate = 12, depth = 100, bottleneck = False,
                                 classes = num_outputs, activation = 'softmax' if classification else None, name = name)
    
    elif architecture == 'pyramidnet-272-200':
        
        return cifar_pyramidnet.PyramidNet(272, 200, bottleneck = True, activation = activation,
                                           classes = num_outputs, top_activation = 'softmax' if classification else None, name = name)
    
    elif architecture == 'pyramidnet-110-270':
        
        return cifar_pyramidnet.PyramidNet(110, 270, bottleneck = False, activation = activation,
                                           classes = num_outputs, top_activation = 'softmax' if classification else None, name = name)
        
    elif architecture == 'simple':
        
        return plainnet.PlainNet(num_outputs,
                                 activation = activation,
                                 final_activation = 'softmax' if classification else None,
                                 name = name)
    
    # ImageNet architectures
    
    elif architecture == 'resnet-50':
        
        rn50 = keras.applications.ResNet50(include_top=False, weights=None)
        rn50_out = rn50.layers[-2].output if isinstance(rn50.layers[-1], keras.layers.AveragePooling2D) else rn50.layers[-1].output
        x = keras.layers.GlobalAvgPool2D(name='avg_pool')(rn50_out)
        x = keras.layers.Dense(num_outputs, activation = 'softmax' if classification else None, name = 'prob' if classification else 'embedding')(x)
        return keras.models.Model(rn50.inputs, x, name=name)
    
    elif architecture.startswith('rn'):

        import keras_resnet.models
        factories = {
            'rn18'  : keras_resnet.models.ResNet18,
            'rn34'  : keras_resnet.models.ResNet34,
            'rn50'  : keras_resnet.models.ResNet50,
            'rn101' : keras_resnet.models.ResNet101,
            'rn152' : keras_resnet.models.ResNet152,
            'rn200' : keras_resnet.models.ResNet200
        }
        input_ = keras.layers.Input((3, None, None)) if K.image_data_format() == 'channels_first' else keras.layers.Input((None, None, 3))
        rn = factories[architecture](input_, include_top = classification, classes = num_outputs, freeze_bn = False, name = name)
        if not classification:
            x = keras.layers.GlobalAvgPool2D(name = 'avg_pool')(rn.outputs[-1])
            x = keras.layers.Dense(num_outputs, name = 'embedding')(x)
            rn = keras.models.Model(input_, x, name = name)
        return rn
    
    elif architecture == 'nasnet-a':
        
        nasnet = keras.applications.NASNetLarge(include_top=False, input_shape=(224,224,3), weights=None, pooling='avg')
        x = keras.layers.Dense(num_outputs, activation = 'softmax' if classification else None, name = 'prob' if classification else 'embedding')(nasnet.output)
        return keras.models.Model(nasnet.inputs, x, name=name)
    
    else:
        
        raise ValueError('Unknown network architecture: {}'.format(architecture))