コード例 #1
0
def main(_):

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)

    X = np.load(imdb_data_folder + "X.npy")
    X_test = np.load(imdb_data_folder + "X_test_50.npy")
    y = np.load(imdb_data_folder + "y.npy")
    y_test = np.load(imdb_data_folder + "y_test_50.npy")

    K = pickle.load(open(imdb_data_folder + "K_LSTM_full.p", "rb"))

    save_kernel(K, FLAGS)
    save_data(X, y, X_test, y_test, FLAGS)
コード例 #2
0
def main(_):

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)

    print(partial_kernel_n_proc)

    number_completed = 0

    files = find_partial_kernel_filenames(FLAGS)
    for f in files:
        cnt = int(f.split("_")[-2])
        if cnt > partial_kernel_n_proc:
            number_completed = cnt - partial_kernel_n_proc
        if f == kernel_filename(FLAGS):
            number_completed = partial_kernel_n_proc
            #break
    while number_completed < partial_kernel_n_proc:
        files = find_partial_kernel_filenames(FLAGS)
        print(files)
        if len(files) > 1:
            for i, f in enumerate(files):
                print(f)
                if i == 0:
                    cov = load_kernel_by_filename(f)
                    if number_completed == 0:
                        number_completed += 1
                else:
                    cov += load_kernel_by_filename(f)
                    number_completed += 1
            if number_completed >= partial_kernel_n_proc:
                save_kernel(cov / number_completed, FLAGS)
            else:
                save_kernel_partial(cov, FLAGS,
                                    partial_kernel_n_proc + number_completed)
            for f in files:
                os.remove(f)
        sleep(100)
コード例 #3
0
def main(_):

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)

    if init_dist != "gaussian":
        raise NotImplementedError("Initialization distributions other than Gaussian are not implemented for computing pac bayes bounds!")

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print(rank)

    if n_gpus>0:
        os.environ["CUDA_VISIBLE_DEVICES"]=str((rank)%n_gpus)
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True

    #tf.enable_eager_execution(config=config)
    set_session = tf.compat.v1.keras.backend.set_session
    config.log_device_placement = False  # to log device placement (on which device the operation ran)
    sess = tf.compat.v1.Session(config=config)
    set_session(sess)  # set this TensorFlow session as the default session for Keras

    '''GET DATA'''
    from utils import load_data,load_model,load_kernel
    train_images,flat_train_images,ys,test_images,test_ys = load_data(FLAGS)
    print("max val", train_images.max())
    #print("ys", ys)
    #process data to be on the right format for GP
    #test on a smaller sample on test set because otherwise GP would run out of memory
    test_images = test_images[:test_function_size]
    test_ys = test_ys[:test_function_size]
    X = flat_train_images
    data = test_images
    tp_order = np.concatenate([[0,len(data.shape)-1], np.arange(1, len(data.shape)-1)])
    print(data.shape,tp_order)
    flat_data = np.transpose(data, tp_order)  # NHWC -> NCHW # this is because the cnn GP kernels assume this
    flat_test_images = np.array([test_image.flatten() for test_image in flat_data])
    Xtrain = flat_train_images
    Xtest = flat_test_images
    Xfull =  np.concatenate([flat_train_images,flat_test_images])
    ys2 = [[y] for y in ys]
    if test_fun_override is not None:
        ys2test = [[float(x)] for x in test_fun_override]
    else:
        ys2test = [[y] for y in test_ys]
    ysfull = ys2 + ys2test
    Yfull = np.array(ysfull)
    Ytrain = np.array(ys2)
    Ytest = np.array(ys2test)
    image_size = train_images.shape[1]
    number_channels = train_images.shape[-1]
    input_dim = flat_train_images.shape[1]

    print("compute probability and bound", network, dataset)

    # if loss is not "mse":
    #     raise NotImplementedError("Haven't implemented logQ estimate for CE loss yet")

    from utils import load_posterior_params
    mean,cov = load_posterior_params(FLAGS)

    #finding log marginal likelihood of data
    if using_EP:
        from GP_prob.nngp_mse_heaviside_posterior import nngp_mse_heaviside_posteror_logp
        logQ = nngp_mse_heaviside_posteror_logp(Xtest,Ytest,mean,cov)
    else:
        raise NotImplementedError("Only EP estimation of logQ is implemented")

    if rank == 0:
        print(logQ)
        useful_flags = ["dataset","boolfun_comp","boolfun","test_fun_override", "test_function_size", "network", "m","label_corruption","confusion", "number_layers", "sigmaw", "sigmab", "binarized", "pooling", "intermediate_pooling", "whitening", "training", "n_gpus", "kernel_mult", "normalize_kernel", "logPGPEP", "errors"]
        with open(results_folder+prefix+"logQs.txt","a") as file:
            file.write("#")
            for key in useful_flags:
                file.write("{}\t".format(key))
            file.write("logQ")
            file.write("\n")
            for key in useful_flags:
                file.write("{}\t".format(FLAGS[key]))
            file.write("{}".format(logQ))
            file.write("\n")
コード例 #4
0
FLAGS['number_layers'] =  1
FLAGS['pooling'] =  "none"
FLAGS['intermediate_pooling'] =  "0000"
FLAGS['sigmaw'] =  10.0
FLAGS['sigmab'] =  10.0
FLAGS['network'] =  "fc"
FLAGS['prefix'] =  "test"
FLAGS['whitening'] =  False
FLAGS['centering'] =  False
FLAGS['centering'] =  False
FLAGS['random_labels'] =  True
FLAGS['training'] =  True
FLAGS['no_training'] =  False

from utils import preprocess_flags
FLAGS = preprocess_flags(FLAGS)
globals().update(FLAGS)

from utils import load_data,load_model,load_kernel
train_images,flat_train_images,ys,test_images,test_ys = load_data(FLAGS)
input_dim = train_images.shape[1]
num_channels = train_images.shape[-1]
# tp_order = np.concatenate([[0,len(train_images.shape)-1], np.arange(1, len(train_images.shape)-1)])
# train_images = tf.constant(train_images)

test_images = test_images[:500]
test_ys = test_ys[:500]
#%%

train_images,flat_data,ys,test_images,test_ys = load_data_by_filename(filename)
コード例 #5
0
def main(_):

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)
    print("poolin", pooling)

    print("Generating architecture", network, number_layers)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    import os
    if n_gpus > 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str((rank) % n_gpus)

    from tensorflow import keras
    #import keras
    import keras_applications
    keras_applications._KERAS_BACKEND = keras.backend
    keras_applications._KERAS_LAYERS = keras.layers
    keras_applications._KERAS_MODELS = keras.models
    keras_applications._KERAS_UTILS = keras.utils
    import warnings

    num_classes = 2
    #TODO: make code compatible with non-binary

    # %%

    if dataset == "cifar":
        image_size = 32
        number_channels = 3
    elif dataset == "imagenet":
        image_size = 256
        number_channels = 3
    elif dataset == "mnist":
        image_size = 28
        number_channels = 1
    elif dataset == "mnist-fashion":
        image_size = 28
        number_channels = 1
    elif dataset == "KMNIST":
        image_size = 28
        number_channels = 1
    elif dataset == "EMNIST":
        image_size = 28
        number_channels = 1
    elif dataset == "boolean":
        if boolean_input_dim is not None:
            input_dim = boolean_input_dim
        else:
            input_dim = 7
    elif dataset == "calabiyau":
        input_dim = 180
    elif dataset == "ion":
        input_dim = 34
    else:
        raise NotImplementedError

    if not (dataset == "boolean" or dataset == "calabiyau"
            or dataset == "ion"):
        image_height = image_size
        image_width = image_size
        input_dim = image_height * image_width * number_channels
    set_session = tf.compat.v1.keras.backend.set_session

    from utils import cauchy_init_wrapper, shifted_init_wrapper

    if init_dist == "gaussian":
        bias_initializer = keras.initializers.RandomNormal(stddev=sigmab)
        # weight_initializer = keras.initializers.RandomNormal(stddev=sigmaw/np.sqrt(input_dim))
        weight_initializer = keras.initializers.VarianceScaling(
            scale=sigmaw**2, mode='fan_in', distribution='normal', seed=None)
        if use_shifted_init:
            bias_initializer_last_layer = shifted_init_wrapper(
                sigmab, shifted_init_shift)
        else:
            bias_initializer_last_layer = bias_initializer
    elif init_dist == "cauchy":
        bias_initializer = cauchy_init_wrapper(sigmab)
        weight_initializer = _wrapper(sigmaw)
        bias_initializer_last_layer = bias_initializer
    elif init_dist == "uniform":
        bias_initializer = keras.initializers.RandomUniform(
            minval=-np.sqrt(3 * sigmab), maxval=np.sqrt(3 * sigmab), seed=None)
        weight_initializer = keras.initializers.VarianceScaling(
            scale=sigmaw, mode='fan_in', distribution='uniform', seed=None)
        bias_initializer_last_layer = bias_initializer
    else:
        raise NotImplementedError
    # bias_initializer = keras.initializers.Zeros()
    # weight_initializer = keras.initializers.glorot_uniform()

    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
    config.log_device_placement = False  # to log device placement (on which device the operation ran)
    # config.gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
    # (nothing gets printed in Jupyter, only if you run it standalone)
    sess = tf.compat.v1.Session(config=config)
    set_session(
        sess)  # set this TensorFlow session as the default session for Keras

    activations_dict = {"relu": tf.nn.relu, "tanh": tf.nn.tanh}

    if network == "cnn":
        if intermediate_pooling_type == "avg":
            intermediate_pooling_layer = [
                keras.layers.AvgPool2D(pool_size=2, padding='same')
            ]
        elif intermediate_pooling_type == "max":
            intermediate_pooling_layer = [
                keras.layers.MaxPool2D(pool_size=2, padding='same')
            ]
        else:
            intermediate_pooling_layer = []

        if pooling == "avg":
            pooling_layer = [keras.layers.GlobalAveragePooling2D()]
        elif pooling == "max":
            pooling_layer = [keras.layers.GlobalMaxPooling2D()]
        else:
            pooling_layer = []
        model = keras.Sequential(
            sum([
                [keras.layers.Conv2D(input_shape=(image_height,image_width,number_channels) if index==0 else (None,), \
                    filters=num_filters, \
                    kernel_size=filter_size, \
                    padding=padding, \
                    strides=strides, \
                    activation=activations_dict[activation],
                data_format='channels_last',
                kernel_initializer=weight_initializer,
                bias_initializer=bias_initializer,)] +
                 (intermediate_pooling_layer if have_pooling else [])
                for index,(filter_size,padding,strides,have_pooling,activation) in enumerate(zip(filter_sizes,padding,strides,pooling_in_layer,activations))
            ],[])
            + pooling_layer
            + [ keras.layers.Flatten() ]
            + [
                # keras.layers.Dense(1,activation=tf.nn.sigmoid,)
                keras.layers.Dense(1,#activation=tf.nn.sigmoid,)
                kernel_initializer=weight_initializer,
                bias_initializer=bias_initializer_last_layer,)
                ])
        # ] + [keras.layers.Lambda(lambda x:x+shifted_init_shift)])

    elif network == "fc":
        model = keras.Sequential(
            ([
                keras.layers.Dense(
                    layer_width,
                    activation=activation,
                    input_shape=(input_dim, ) if index == 0 else (None, ),  #)
                    kernel_initializer=weight_initializer,
                    bias_initializer=bias_initializer)
                for index, (layer_width, activation) in enumerate(
                    zip(layer_widths, activations))  #range(number_layers)
            ] if number_layers > 0 else [])
            # + [keras.layers.Lambda(lambda x: x-1/np.sqrt(2*np.pi))]
            + [
                keras.layers.Dense(
                    1,
                    input_shape=(input_dim, ) if number_layers == 0 else
                    (None, ),  #activation=tf.nn.sigmoid,
                    kernel_initializer=weight_initializer,
                    bias_initializer=bias_initializer_last_layer,
                )
            ])
        # ])+[keras.layers.Lambda(lambda x:x+shifted_init_shift)])

    elif network == "resnet":
        #from keras_contrib.applications.resnet import ResNet
        #import sys
        #sys.path += keras_contrib.__path__ + [keras_contrib.__path__[0]+"/applications/"]
        from .resnet import ResNet
        import keras

        n_blocks = 3
        DEPTH = number_layers
        if DEPTH % 6 != 2:
            raise ValueError('DEPTH must be 6n + 2:', DEPTH)
        # block_depth = (DEPTH - 2) // 6
        # resnet_n_plain = resnet_n % 100
        block_depth = (DEPTH - 2) // (n_blocks * 2)
        model = ResNet(
            input_shape=(image_height, image_width, number_channels),
            classes=1,
            block='basic',
            repetitions=[block_depth] * n_blocks,
            transition_strides=[(1, 1), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2),
                                (2, 2)][:n_blocks],
            initial_filters=64,
            initial_strides=(1, 1),
            initial_kernel_size=(3, 3),
            initial_pooling=None,
            #initial_pooling='max',
            #final_pooling=None,
            final_pooling=pooling if pooling is not None else "none",
            activation=None)
        # activation='sigmoid')
    else:
        model = keras.models.Sequential()
        if network == "vgg19":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras.applications.vgg19.VGG19(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "vgg16":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras.applications.vgg16.VGG16(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "resnet50":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            # model1 = keras.applications.resnet.ResNet50(include_top=False, weights=None, input_tensor=None, input_shape=(image_height,image_width,number_channels), pooling=pooling, classes=num_classes)
            model1 = keras_applications.resnet.ResNet50(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "resnet101":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras_applications.resnet.ResNet101(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "resnet152":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras_applications.resnet.ResNet152(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "resnetv2_50":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras_applications.resnet_v2.ResNet50V2(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "resnetv2_101":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras_applications.resnet_v2.ResNet101V2(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "resnetv2_152":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras_applications.resnet_v2.ResNet152V2(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "inception_resnet_v2":
            image_height, image_width, number_channels = max(
                image_height, 75), max(image_width,
                                       75), max(number_channels, 3)
            model1 = keras_applications.inception_resnet_v2.InceptionResNetV2(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "inception_v3":
            image_height, image_width, number_channels = max(
                image_height, 75), max(image_width,
                                       75), max(number_channels, 3)
            model1 = keras.applications.inception_v3.InceptionV3(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "resnext50":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras_applications.resnext.ResNeXt50(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "resnext101":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras_applications.resnext.ResNeXt101(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "densenet121":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras_applications.densenet.DenseNet121(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "densenet169":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras_applications.densenet.DenseNet169(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "densenet201":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras_applications.densenet.DenseNet201(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        elif network == "mobilenetv2":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras.applications.mobilenet_v2.MobileNetV2(
                input_shape=(image_height, image_width, number_channels),
                alpha=1.0,
                include_top=False,
                weights=None,
                input_tensor=None,
                pooling=pooling,
                classes=num_classes)

        elif network == "nasnet":
            image_height, image_width, number_channels = max(
                image_height, 32), max(image_width,
                                       32), max(number_channels, 3)
            model1 = keras.applications.nasnet.NASNetLarge(
                input_shape=(image_height, image_width, number_channels),
                include_top=False,
                weights=None,
                input_tensor=None,
                pooling=pooling,
                classes=num_classes)

        elif network == "xception":
            image_height, image_width, number_channels = max(
                image_height, 71), max(image_width,
                                       71), max(number_channels, 3)
            model1 = keras.applications.xception.Xception(
                include_top=False,
                weights=None,
                input_tensor=None,
                input_shape=(image_height, image_width, number_channels),
                pooling=pooling,
                classes=num_classes)

        model.add(model1)
        print(model1.output_shape)
        model.add(keras.layers.Flatten())
        model.add(
            keras.layers.Dense(
                1,
                kernel_initializer=weight_initializer,
                bias_initializer=bias_initializer_last_layer,
            ))

    print("Number of parameters: ", model.count_params())
    print(model.summary())

    json_string = model.to_json()
    '''SAVE ARCHITECTURE'''
    save_arch(json_string, FLAGS)
コード例 #6
0
def main(_):

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)

    if init_dist != "gaussian":
        raise NotImplementedError(
            "Initialization distributions other than Gaussian are not implemented for computing pac bayes bounds!"
        )

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print(rank)

    if n_gpus > 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str((rank) % n_gpus)
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True

    #tf.enable_eager_execution(config=config)
    set_session = tf.compat.v1.keras.backend.set_session
    config.log_device_placement = False  # to log device placement (on which device the operation ran)
    sess = tf.compat.v1.Session(config=config)
    set_session(
        sess)  # set this TensorFlow session as the default session for Keras
    '''GET DATA'''
    from utils import load_data, load_model, load_kernel
    train_images, flat_train_images, ys, _, _ = load_data(FLAGS)
    X = flat_train_images
    ys2 = [[y] for y in ys]
    Y = np.array(ys2)
    image_size = train_images.shape[1]
    number_channels = train_images.shape[-1]
    input_dim = flat_train_images.shape[1]

    print("compute probability and bound", network, dataset)

    if using_NTK:
        FLAGS["use_empirical_NTK"] = True
        theta = load_kernel(FLAGS)
        print(theta)
        #if using NTK, the above gets the NTK kernel, but we also need the non-NTK one to compute the bound!
        FLAGS["use_empirical_NTK"] = False
        K_pre = load_kernel(FLAGS)
        print(K_pre)
        if normalize_kernel:
            K_pre = K_pre / K_pre.max()
        K = kernel_mult * K_pre
        if theta.shape[0] >= m:  #must have compute kernel for GP_train
            theta = theta[:m, :m]
        if K.shape[0] >= m:  #must have compute kernel for GP_train
            K = K[:m, :m]
    else:
        K_pre = load_kernel(FLAGS)
        print(K_pre)
        if normalize_kernel:
            K_pre = K_pre / K_pre.max()
        K = kernel_mult * K_pre
        if K.shape[0] >= m:  #must have compute kernel for GP_train
            K = K[:m, :m]

    #finding log marginal likelihood of data
    if using_EP:
        from GP_prob.GP_prob_gpy2 import GP_prob
        logPU = GP_prob(K, X, Y, method="EP", using_exactPB=using_exactPB)
    elif using_Laplace:
        from GP_prob.GP_prob_gpy2 import GP_prob
        # from GP_prob.GP_prob_numpy import GP_prob
        logPU = GP_prob(K, X, Y, method="Laplace", using_exactPB=using_exactPB)
        # logPU = GP_prob(K,np.squeeze(Y))
    elif using_Laplace2:
        # from GP_prob.GP_prob_gpy import GP_prob
        from GP_prob.GP_prob_numpy import GP_prob  #this gives different results because it uses a worse implementation of Laplace, by using a more Naive Newton method to find the maximum of the posterior
        # logPU = GP_prob(K,X,Y,method="Laplace")
        logPU = GP_prob(K, np.squeeze(Y))
    elif using_MC:
        from GP_prob.GP_prob_MC import GP_prob
        logPU = GP_prob(K, X, Y, FLAGS)
    elif using_regression:
        from GP_prob.GP_prob_regression import GP_prob
        # logPU = GP_prob(K,X,Y,sigma_noise=np.sqrt(total_samples/2))
        logPU = GP_prob(K, X, Y, sigma_noise=1.0)
    elif using_NTK:
        # from GP_prob.GP_prob_regression import GP_prob
        # logPU = GP_prob(K,X,Y,sigma_noise=np.sqrt(total_samples/2))
        # logPU = GP_prob(K,X,Y,sigma_noise=1.0, posterior="ntk")
        from GP_prob.GP_prob_ntk import GP_prob
        logPU = GP_prob(K, theta, X, Y, t=1e2)

    if rank == 0:
        print(logPU)
        #compute PAC-Bayes bound
        delta = 2**-10
        bound = (-logPU + 2 * np.log(total_samples) + 1 -
                 np.log(delta)) / total_samples
        bound = 1 - np.exp(-bound)
        print("pre-confusion-correction bound: ", bound)
        rho = confusion / (1.0 + confusion)
        bound = (bound - 0.5 * rho) / (
            1 - rho
        )  #to correct for the confusion changing the training data distribution (in training set, but not in test set)!
        print("Bound: ", bound)
        print("Accuracy bound: ", 1 - bound)
        useful_flags = [
            "dataset", "boolfun_comp", "boolfun", "network", "m",
            "label_corruption", "confusion", "number_layers", "sigmaw",
            "sigmab", "binarized", "pooling", "intermediate_pooling",
            "whitening", "training", "n_gpus", "kernel_mult",
            "normalize_kernel"
        ]
        with open(results_folder + prefix + "bounds.txt", "a") as file:
            file.write("#")
            for key in useful_flags:
                file.write("{}\t".format(key))
            file.write("bound")
            file.write("\t")
            file.write("logP")
            file.write("\n")
            for key in useful_flags:
                file.write("{}\t".format(FLAGS[key]))
            file.write("{}".format(bound))
            file.write("\t")
            file.write("{}".format(logPU))
            file.write("\n")
コード例 #7
0
def main(_):

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)

    if init_dist != "gaussian":
        raise NotImplementedError(
            "Initialization distributions other than Gaussian are not implemented for computing pac bayes bounds!"
        )

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print(rank)

    if n_gpus > 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str((rank) % n_gpus)
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True

    #tf.enable_eager_execution(config=config)
    set_session = tf.compat.v1.keras.backend.set_session
    config.log_device_placement = False  # to log device placement (on which device the operation ran)
    sess = tf.compat.v1.Session(config=config)
    set_session(
        sess)  # set this TensorFlow session as the default session for Keras
    '''GET DATA'''
    from utils import load_data, load_model, load_kernel
    train_images, flat_train_images, ys, test_images, test_ys = load_data(
        FLAGS)
    print("max val", train_images.max())
    #print("ys", ys)
    #process data to be on the right format for GP
    #test on a smaller sample on test set because otherwise GP would run out of memory
    test_images = test_images[:test_function_size]
    test_ys = test_ys[:test_function_size]
    X = flat_train_images
    data = test_images
    tp_order = np.concatenate([[0, len(data.shape) - 1],
                               np.arange(1,
                                         len(data.shape) - 1)])
    print(data.shape, tp_order)
    flat_data = np.transpose(
        data, tp_order
    )  # NHWC -> NCHW # this is because the cnn GP kernels assume this
    flat_test_images = np.array(
        [test_image.flatten() for test_image in flat_data])
    Xtrain = flat_train_images
    Xtest = flat_test_images
    Xfull = np.concatenate([flat_train_images, flat_test_images])
    ys2 = [[y] for y in ys]
    # if test_fun_override is not None:
    #     ys2test = [[float(x)] for x in test_fun_override]
    # else:
    ys2test = [[y] for y in test_ys]
    ysfull = ys2 + ys2test
    Yfull = np.array(ysfull)
    Ytrain = np.array(ys2)
    Ytest = np.array(ys2test)
    image_size = train_images.shape[1]
    number_channels = train_images.shape[-1]
    input_dim = flat_train_images.shape[1]

    print("compute probability and bound", network, dataset)

    # if loss is not "mse":
    #     raise NotImplementedError("Haven't implemented logQ estimate for CE loss yet")

    if using_NTK:
        raise NotImplementedError(
            "Haven't implemented logQ estimate for NTK yet")
        # FLAGS["use_empirical_NTK"] = True
        # theta = load_kernel(FLAGS)
        # print(theta)
        # #if using NTK, the above gets the NTK kernel, but we also need the non-NTK one to compute the bound!
        # FLAGS["use_empirical_NTK"] = False
        # K_pre = load_kernel(FLAGS)
        # print(K_pre)
        # if normalize_kernel:
        #     K_pre = K_pre/K_pre.max()
        # K = kernel_mult*K_pre
        # if theta.shape[0] >= m: #must have compute kernel for GP_train
        #     theta = theta[:m,:m]
        # if K.shape[0] >= m: #must have compute kernel for GP_train
        #     K = K[:m,:m]
    else:
        K_pre = load_kernel(FLAGS)
        print(K_pre)
        if normalize_kernel:
            K_pre = K_pre / K_pre.max()
        Kfull = kernel_mult * K_pre

    #finding log marginal likelihood of data
    if loss == "mse":
        from GP_prob.nngp_mse_heaviside_posterior import nngp_mse_heaviside_posteror_params
        mean, cov = nngp_mse_heaviside_posteror_params(Xtrain, Ytrain, Xtest,
                                                       Kfull)
    else:
        raise NotImplementedError("Only mse loss implemented")

    if rank == 0:
        from utils import save_posterior_params
        save_posterior_params(mean, cov, FLAGS)
コード例 #8
0
def main(_):

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    from utils import preprocess_flags
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print(rank)
    num_tasks = number_samples

    num_gpus = n_gpus
    print("num_gpus", num_gpus)

    num_tasks_per_job = num_tasks // size
    tasks = list(
        range(rank * num_tasks_per_job, (rank + 1) * num_tasks_per_job))

    if rank < num_tasks % size:
        tasks.append(size * num_tasks_per_job + rank)

    if num_gpus > 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(rank % num_gpus)

    config = tf.compat.v1.ConfigProto()
    if num_gpus > 0:
        config.gpu_options.allow_growth = True

    tf.compat.v1.enable_eager_execution(config=config)
    ##the code below is necessary for keras not to use all memory
    set_session = tf.compat.v1.keras.backend.set_session

    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
    config.log_device_placement = False  # to log device placement (on which device the operation ran)
    sess = tf.compat.v1.Session(config=config)
    set_session(
        sess)  # set this TensorFlow session as the default session for Keras
    '''LOAD DATA & ARCHITECTURE'''

    from utils import load_data, load_model, load_kernel, entropy
    data, flat_data, _, _, _ = load_data(FLAGS)
    data = tf.constant(data)
    input_dim = data.shape[1]
    num_channels = data.shape[-1]

    model = load_model(FLAGS)

    #K = load_kernel(FLAGS)
    #from GP_prob.GP_prob_gpy import GP_prob
    #def calculate_logPU(preds):
    #    logPU = GP_prob(K,flat_data,preds )
    #    return logPU

    print("Doing task %d of %d" % (rank, size))
    import time
    start_time = time.time()

    index_fun_probs = []
    fun_probs = {}

    if FLAGS["pooling"] is None:
        pooling_flag = "none"
    else:
        pooling_flag = FLAGS["pooling"]
    outfilename = results_folder + "index_funs_probs_" + str(
        rank) + "_" + FLAGS["prefix"] + "_" + str(
            shifted_init_shift
        ) + "_" + FLAGS["dataset"] + "_" + FLAGS["network"] + "_" + str(
            FLAGS["number_layers"]
        ) + "_" + pooling_flag + "_" + FLAGS["intermediate_pooling"] + ".txt"

    if network not in ["cnn", "fc"]:
        layers = get_all_layers(model)
        are_norm = [
            is_normalization_layer(l) for l in layers for w in l.get_weights()
        ]
        initial_weights = model.get_weights()

    local_index = 0
    '''SAMPLING LOOP'''
    for index in tasks:
        outfile = open(outfilename, "a")
        print(index)
        if local_index > 0:
            if network in ["cnn", "fc"]:
                simple_reset_weights(model, sigmaw, sigmab)
            else:
                reset_weights(model, initial_weights, are_norm, sigmaw, sigmab,
                              truncated_init_dist)
        #model = load_model(FLAGS) # this resets the weights (makes sense as the json string only has architecture)

        #save weights?
        #model.save_weights("sampled_nets/"+str(index)+"_"+json_string_filename+".h5")

        #predictions = tf.keras.backend.eval(model(data)) > 0
        predictions = model.predict(data) > 0
        fstring = "".join([str(int(x[0])) for x in predictions])
        n1s = len([x for x in fstring if x == "1"])
        ent = entropy(fstring)

        #if fstring not in fun_probs:
        #    fun_probs[fstring] = calculate_logPU(predictions)
        #index_fun_probs.append((index,ent,fstring,fun_probs[fstring]))
        #index_fun_probs.append((index,ent,fstring))
        outfile.write(
            str(index) + "\t" + fstring + "\t" + str(ent) + "\t" + str(n1s) +
            "\n")
        outfile.close()
        #keras.backend.clear_session()
        local_index += 1

    print("--- %s seconds ---" % (time.time() - start_time))

    index_fun_probs = comm.gather(index_fun_probs, root=0)
コード例 #9
0
def main(_):
    MAX_TRAIN_EPOCHS=5000

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    from utils import preprocess_flags
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)
    if doing_regression:
        assert loss == "mse"
    global threshold

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    num_tasks_per_job = number_inits//size
    tasks = list(range(int(rank*num_tasks_per_job),int((rank+1)*num_tasks_per_job)))

    if rank < number_inits%size:
        tasks.append(size*num_tasks_per_job+rank)

    import os
    if n_gpus>0:
        os.environ["CUDA_VISIBLE_DEVICES"]=str(rank%n_gpus)

    from tensorflow import keras

    '''LOAD DATA & ARCHITECTURE'''

    from utils import load_data,load_model,load_kernel
    train_images,flat_train_images,ys,test_images,test_ys = load_data(FLAGS)
    print("max val", train_images.max())
    #print("ys", ys)
    #process data to be on the right format for GP
    #test on a smaller sample on test set because otherwise GP would run out of memory
    test_images = test_images[:1000]
    test_ys = test_ys[:1000]
    X = flat_train_images
    data = test_images
    tp_order = np.concatenate([[0,len(data.shape)-1], np.arange(1, len(data.shape)-1)])
    print(data.shape,tp_order)
    flat_data = np.transpose(data, tp_order)  # NHWC -> NCHW # this is because the cnn GP kernels assume this
    flat_test_images = np.array([test_image.flatten() for test_image in flat_data])
    Xfull =  np.concatenate([flat_train_images,flat_test_images])
    ys2 = [[y] for y in ys]
    ysfull = ys2 + [[y] for y in test_ys]
    Yfull = np.array(ysfull)
    Y = np.array(ys2)


    FLAGS["use_empirical_NTK"] = True
    theta_full = load_kernel(FLAGS)
    #print(theta_full)
    FLAGS["use_empirical_NTK"] = False
    K_pre = load_kernel(FLAGS)
    print(K_pre)
    if normalize_kernel:
        K_pre = K_pre/K_pre.max()
    Kfull = kernel_mult*K_pre

    input_dim = train_images.shape[1]
    num_channels = train_images.shape[-1]
    print(train_images.shape, ys.shape)

    n=X.shape[0]
    K_train = Kfull[:n,:n]
    K_test = Kfull[n:,n:]
    K_train_test = Kfull[:n,n:]
    theta_train = theta_full[:n,:n]
    theta_test = theta_full[n:,n:]
    theta_train_test = theta_full[:n,n:]
    mu,Sigma = NTK_posterior(K_train,K_test,K_train_test,theta_train,theta_test,theta_train_test,X,Y,t=training_time)

    sample_weights = None
    if gamma != 1.0:
        sample_weights = np.ones(len(ys))
        if not oversampling2:
            sample_weights[m:] = gamma
        else:
            raise NotImplementedError("Gamma not equal to 1.0 with oversampling2 not implemented")

    model = load_model(FLAGS)

    set_session = tf.compat.v1.keras.backend.set_session

    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
    config.log_device_placement = False  # to log device placement (on which device the operation ran)
    sess = tf.compat.v1.Session(config=config)
    set_session(sess)  # set this TensorFlow session as the default session for Keras

    '''TRAINING LOOP'''
    #things to keep track off
    #functions = []
    test_accs = 0
    test_accs_squared = 0
    test_sensitivities = 0
    test_specificities = 0
    train_accs = 0
    train_accs_squared = 0
    funs_filename = results_folder+prefix+"_"+str(rank)+"_nn_train_functions.txt"

    if loss=="mse":
        likelihood = "gaussian"
    elif loss=="ce":
        likelihood = "bernoulli"
    print("Training GP with "+likelihood+" likelihood")

    model.compile("sgd", loss="mse")

    from initialization import get_all_layers, is_normalization_layer, reset_weights, simple_reset_weights
    if network not in ["cnn", "fc"]:
        layers = get_all_layers(model)
        are_norm = [is_normalization_layer(l) for l in layers for w in l.get_weights()]
        initial_weights = model.get_weights()

    K_train_train = Kfull[:len(X),:len(X)]
    X_train_test = Kfull[:len(X),len(X):len(Xfull)-len(X)]
    # predictor = nt.predict.gradient_descent_mse(g_dd, y_train, g_td)

    '''MAIN LOOP'''
    local_index = 0

    from math import ceil
    samples_per_chunk_base=min(len(tasks),10000)
    num_chunks = len(tasks)//samples_per_chunk_base
    remainder = len(tasks)%samples_per_chunk_base
    if remainder > 0:
        num_chunks += 1
    for chunki in range(num_chunks):
        print(chunki)
        if chunki == num_chunks-1 and remainder>0:
            samples_per_chunk = remainder
        else:
            samples_per_chunk = samples_per_chunk_base
        funs_file = open(funs_filename,"a")
        #
        ##if the labels are to be generated by a neural network in parallel
        if nn_random_labels or nn_random_regression_outputs:
            if network in ["cnn", "fc"]:
                simple_reset_weights(model, sigmaw, sigmab)
            else:
                reset_weights(model, initial_weights, are_norm, sigmaw, sigmab, truncated_init_dist)
            if nn_random_labels:
                ys = model.predict(train_images)[:,0]>0
                if training:
                    test_ys = model.predict(test_images)[:,0]>0
            else:
                ys = model.predict(train_images)[:,0]
                if training:
                    test_ys = model.predict(test_images)[:,0]
        ##

        local_index+=1

        #preds = model.predict(flat_test_images)[0]
        #dimensions of output of posterior_samples is (number of input points)x(dimension of output Y)x(number of samples)
        #preds = model.posterior_samples(flat_test_images,size=samples_per_chunk)[:,0,:].T
        print(mu.shape)
        preds = np.random.multivariate_normal(mu,Sigma,size=samples_per_chunk)
        print(preds.shape)
        #preds = np.array([pred[0] for pred in preds])
        if not doing_regression:
            th = 0.5
            train_loss, train_acc = 0, 1.0*samples_per_chunk
            test_loss, test_acc = np.sum(cross_entropy_loss(test_ys,preds))/len(test_ys), np.sum((preds>th)==test_ys)/len(test_ys)
        else:
            train_acc = train_loss = 0
            test_acc = test_loss = np.sum(cross_entropy_loss(test_ys,preds))/len(test_ys)

        #for th in np.linspace(0,1,1000):
        if loss=="mse":
            #NOTE: sensitivity and specificity are not implemented for MSE loss
            test_sensitivity = -1
            test_specificity = -1
        else:
            print("threshold", threshold)
            #TODO: this is ugly, I should just add a flag that allows to say whether we are doing threshold selection or not!!
            if threshold != -1:
                for th in np.linspace(0,1,1000):
                    test_specificity = np.sum(((sigmoid(preds)>th)==test_ys[:100])*(test_ys[:100]==0))/np.sum(test_ys[:100]==0)
                    if test_specificity>0.99:
                        num_0s = np.sum(test_ys==0)
                        if num_0s > 0:
                            test_specificity = np.sum(((sigmoid(preds)>th)==test_ys)*(test_ys==0))/(num_0s)
                        else:
                            test_specificity = -1
                        if test_specificity>0.99:
                            num_1s = np.sum(test_ys==1)
                            if num_1s > 0:
                                test_sensitivity = np.sum(((sigmoid(preds)>th)==test_ys)*(test_ys==1))/(num_1s)
                            else:
                                test_sensitivity = -1
                            break
            else:
                # for th in np.linspace(0,1,5): # low number of thresholds as I'm not exploring unbalanced datasets right now
                #     test_specificity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==0])/(len([x for x in test_ys if x==0]))
                #     if test_specificity>0.99:
                #         test_sensitivity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==1])/(len([x for x in test_ys if x==1]))
                #         break
                test_specificity = -1
                test_sensitivity = -1

        print("Training accuracy", train_acc/samples_per_chunk)
        print('Test accuracy:', test_acc/samples_per_chunk)
        if threshold != -1:
            print('Test sensitivity:', test_sensitivity/samples_per_chunk)
            print('Test specificity:', test_specificity/samples_per_chunk)
        if not ignore_non_fit or train_acc == 1.0:
            print("printing function to file", funs_filename)
            functions = preds[:,:test_function_size]>0.5
            functions=functions.astype(int)
            print(functions.shape)
            functions = [''.join([str(int(x)) for x in function])+"\r\n" for function in functions]
            funs_file.writelines(functions)
            funs_file.close()
            #functions.append(function)
            test_accs += test_acc
            test_accs_squared += test_acc**2
            test_sensitivities += test_sensitivity
            test_specificities += test_specificity
            train_accs += train_acc
            train_accs_squared += train_acc**2

    test_accs_recv = comm.reduce(test_accs, root=0)
    test_accs_squared_recv = comm.reduce(test_accs_squared, root=0)
    test_sensitivities_recv = comm.reduce(test_sensitivities, root=0)
    test_specificities_recv = comm.reduce(test_specificities, root=0)
    train_accs_recv = comm.reduce(train_accs, root=0)
    train_accs_squared_recv = comm.reduce(train_accs_squared, root=0)

    '''PROCESS COLLECTIVE DATA'''
    if rank == 0:
        test_acc = test_accs_recv/number_inits
        test_sensitivity = test_sensitivities_recv/number_inits
        test_specificity = test_specificities_recv/number_inits
        train_acc = train_accs_recv/number_inits
        print('Mean train accuracy:', train_acc)
        print('Mean test accuracy:', test_acc)
        if threshold != -1:
            print('Mean test sensitivity:', test_sensitivity)
            print('Mean test specificity:', test_specificity)
        test_acc = test_accs_recv/number_inits
        train_acc = train_accs_recv/number_inits
        train_acc_std = train_accs_squared_recv/number_inits - train_acc**2
        test_acc_std = test_accs_squared_recv/number_inits - test_acc**2

        useful_train_flags = ["dataset", "m", "network", "pooling", "ignore_non_fit", "test_function_size", "number_layers", "sigmaw", "sigmab", "init_dist","use_shifted_init","shifted_init_shift","whitening", "centering", "oversampling", "oversampling2", "channel_normalization", "training", "binarized", "confusion","filter_sizes", "gamma", "intermediate_pooling", "label_corruption", "threshold", "n_gpus", "n_samples_repeats", "layer_widths", "number_inits", "padding"]
        with open(results_folder+prefix+"nn_training_results.txt","a") as file:
            file.write("#")
            for key in sorted(useful_train_flags):
                file.write("{}\t".format(key))
            file.write("\t".join(["train_acc", "test_error", "test_acc","test_sensitivity","test_specificity","train_acc_std","test_acc_std"]))
            file.write("\n")
            for key in sorted(useful_train_flags):
                file.write("{}\t".format(FLAGS[key]))
            file.write("{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\n".format(train_acc, 1-test_acc,test_acc,\
                test_sensitivity,test_specificity,\
                train_acc_std,test_acc_std))
コード例 #10
0
def main(_):

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)

    if init_dist != "gaussian":
        raise NotImplementedError(
            "Initialization distributions other than Gaussian are not implemented for computing kernels!"
        )

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print(rank)

    if n_gpus > 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str((rank) % n_gpus)
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True

    set_session = keras.backend.set_session
    config.log_device_placement = False  # to log device placement (on which device the operation ran)
    config.allow_soft_placement = True  # so that it uses any other existing and supported devices, if the requested GPU:0 isn't found
    sess = tf.compat.v1.Session(config=config)
    set_session(
        sess)  # set this TensorFlow session as the default session for Keras

    train_images, flat_train_images, _, test_images, _ = load_data(FLAGS)
    image_size = train_images.shape[1]
    number_channels = train_images.shape[-1]
    #print("image_size", image_size)
    X = train_images
    flat_X = flat_train_images
    if compute_for_GP_train:
        test_images = test_images[:1000]
        data = test_images
        tp_order = np.concatenate([[0, len(data.shape) - 1],
                                   np.arange(1,
                                             len(data.shape) - 1)])
        print(data.shape, tp_order)
        flat_data = np.transpose(
            data, tp_order
        )  # NHWC -> NCHW # this is because the cnn GP kernels assume this
        flat_test_images = np.array(
            [test_image.flatten() for test_image in flat_data])
        Xfull = np.concatenate([flat_train_images, flat_test_images])
        flat_X = Xfull
        X = np.concatenate([train_images, test_images])

    print("compute kernel", network, dataset)

    # COMPUTE KERNEL
    if use_empirical_NTK:
        from nngp_kernel.empirical_ntk import empirical_NTK
        print(ceil(int(X.shape[0]) * n_samples_repeats))
        from tensorflow.keras.models import model_from_json
        model = load_model(FLAGS)
        K = empirical_NTK(model, X)  #,sess=sess)
    elif use_empirical_K:
        from nngp_kernel.empirical_kernel import empirical_K
        print("n_samples_repeats", n_samples_repeats)
        print(ceil(int(X.shape[0]) * n_samples_repeats))
        arch_json_string = load_model_json(FLAGS)
        K = empirical_K(
            arch_json_string,
            X,
            ceil(int(X.shape[0]) * n_samples_repeats),
            sigmaw=sigmaw,
            sigmab=sigmab,
            n_gpus=n_gpus,
            empirical_kernel_batch_size=empirical_kernel_batch_size,
            sess=sess,
            truncated_init_dist=truncated_init_dist,
            data_parallelism=False,
            store_partial_kernel=store_partial_kernel,
            partial_kernel_n_proc=partial_kernel_n_proc,
            partial_kernel_index=partial_kernel_index)
    if rank == 0:
        if not (use_empirical_K or use_empirical_NTK):
            if network == "cnn":
                from nngp_kernel.cnn_kernel import kernel_matrix
                K = kernel_matrix(flat_X,
                                  image_size=image_size,
                                  number_channels=number_channels,
                                  filter_sizes=filter_sizes,
                                  padding=padding,
                                  strides=strides,
                                  sigmaw=sigmaw,
                                  sigmab=sigmab,
                                  n_gpus=n_gpus)

            elif network == "resnet":
                from nngp_kernel.resnet_kernel import kernel_matrix
                K = kernel_matrix(flat_X,
                                  depth=number_layers,
                                  image_size=image_size,
                                  number_channels=number_channels,
                                  n_blocks=3,
                                  sigmaw=sigmaw,
                                  sigmab=sigmab,
                                  n_gpus=n_gpus)

            elif network == "fc":
                from nngp_kernel.fc_kernel import kernel_matrix
                K = kernel_matrix(flat_X,
                                  number_layers=number_layers,
                                  sigmaw=sigmaw,
                                  sigmab=sigmab,
                                  n_gpus=n_gpus)

        print(K)
        '''SAVE KERNEL'''
        if store_partial_kernel:
            save_kernel_partial(K, FLAGS, partial_kernel_index)
        else:
            save_kernel(K, FLAGS)
コード例 #11
0
ファイル: compute_CSR.py プロジェクト: guillefix/nn-pacbayes
def main(_):

    FLAGS = tf.app.flags.FLAGS.flag_values_dict()
    from utils import preprocess_flags
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)

    # total_samples = m

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print(rank)
    # num_inits_per_task = 1
    #num_tasks = int(sys.argv[1])
    num_tasks = number_samples

    #from tensorflow.python.client import device_lib
    #
    #def get_available_gpus():
    #    local_device_protos = device_lib.list_local_devices()
    #    return [x.name for x in local_device_protos if x.device_type == 'GPU']
    #
    #num_gpus = len(get_available_gpus())
    num_gpus = n_gpus

    num_tasks_per_job = num_tasks // size
    tasks = list(
        range(rank * num_tasks_per_job, (rank + 1) * num_tasks_per_job))

    if rank < num_tasks % size:
        tasks.append(size * num_tasks_per_job + rank)

    #config = tf.ConfigProto(device_count={'GPU': rank%num_gpus})
    config = tf.ConfigProto()
    os.environ["CUDA_VISIBLE_DEVICES"] = str(rank % num_gpus)
    config.gpu_options.allow_growth = True
    tf.enable_eager_execution(config=config)

    from utils import load_data, load_model, load_kernel
    data, flat_data, _, _, _ = load_data(FLAGS)
    data = tf.constant(data)
    model = load_model(FLAGS)
    K = load_kernel(FLAGS)

    def lass(model, x, r=0.01):
        pred = tf.sign(model(x))
        alpha = 0.5
        #alpha=0.25
        #beta=0.2
        deltax = tf.zeros(x.shape)
        xtilde = x + deltax
        max_iters = 20
        iterr = 0
        while iterr < max_iters:
            with tf.GradientTape() as g:
                g.watch(xtilde)
                y = model(xtilde)
            grads = g.gradient(y, xtilde)
            delta = alpha * tf.sign(
                -pred * grads)  #+ beta*tf.random.normal(x.shape)
            deltax += delta
            deltax = tf.clip_by_value(deltax, -r, r)
            # deltax -= tf.to_float(tf.math.abs(deltax) >= r) * tf.clip_by_value(deltax,-r,r)
            xtilde = x + deltax
            # print(grads)

            if tf.sign(model(xtilde)).numpy()[0] != pred.numpy()[0]:
                return True
            iterr += 1
        return False

    def crit_sample_ratio(
        model,
        xs,
        r=0.01
    ):  # is 0.3 fine for a 0-1 scaling, when they say 0-255 what do they mean? Hmm
        crit_samples = 0
        for i in range(int(xs.shape[0])):
            #print(i)
            # print(xs[i:i+1,:,:,:])
            if lass(model, xs[i:i + 1, :, :, :], r):
                crit_samples += 1
        return 1.0 * crit_samples / int(xs.shape[0])

    #%%

    print("Beginning job %d of %d" % (rank, size))
    import time
    start_time = time.time()
    crit_sample_ratios = []
    #probs = []
    for index in tasks:
        print(index)
        model.load_weights("./sampled_nets/" + str(index) + "_" +
                           json_string_filename + ".h5")
        csr = crit_sample_ratio(model, data, r=0.03)
        crit_sample_ratios.append((index, csr))
        with open(
                results_folder + "CSRs_" + FLAGS["prefix"] + "_" +
                FLAGS["dataset"] + "_" + FLAGS["network"] + "_" +
                str(FLAGS["number_layers"]) + "_" + FLAGS["pooling"] + "_" +
                FLAGS["intermediate_pooling"] + ".txt", "a") as f:
            f.write(str(index) + "\t" + str(csr) + "\n")
        #print(csr)
    print("--- %s seconds ---" % (time.time() - start_time))
    print("Finishing job %d of %d" % (rank, size))

    csr_data = comm.gather(crit_sample_ratios, root=0)

    #tf.keras.initializers.glorot_uniform

    if rank == 0:
        csr_data = sum(csr_data, [])
        pickle.dump(
            csr_data,
            open(
                results_folder + "CSRs_" + FLAGS["prefix"] + "_" +
                FLAGS["dataset"] + "_" + FLAGS["network"] + "_" +
                str(FLAGS["number_layers"]) + "_" + FLAGS["pooling"] + "_" +
                FLAGS["intermediate_pooling"] + ".p", "wb"))
コード例 #12
0
ファイル: NN_train.py プロジェクト: guillefix/nn-pacbayes
def main(_):
    MAX_TRAIN_EPOCHS=20000

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    from utils import preprocess_flags
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)
    if doing_regression:
        assert loss == "mse"
    global threshold

    if using_mpi:
        from mpi4py import MPI
        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        size = comm.Get_size()
    else:
        rank=0
        size=1
    num_tasks_per_job = number_inits//size
    tasks = list(range(int(rank*num_tasks_per_job),int((rank+1)*num_tasks_per_job)))

    if rank < number_inits%size:
        tasks.append(size*num_tasks_per_job+rank)

    import os
    print(rank)
    if n_gpus>0:
        os.environ["CUDA_VISIBLE_DEVICES"]=str(rank%n_gpus)

    from tensorflow import keras

    def binary_accuracy_for_mse(y_true,y_pred):
        if zero_one:
            return keras.backend.mean(tf.cast(tf.equal(tf.cast(y_pred>0.5,tf.float32),y_true), tf.float32))
        else:
            return keras.backend.mean(tf.cast(tf.equal(tf.math.sign(y_pred),y_true), tf.float32))

    print(tf.__version__)
    if loss=="mse":
        callbacks = [EarlyStoppingByAccuracy(monitor='val_binary_accuracy_for_mse', value=acc_threshold, verbose=0, wait_epochs=epochs_after_fit)]
        if doing_regression:
            callbacks = [EarlyStoppingByLoss(monitor='val_loss', value=1e-2, verbose=0, wait_epochs=epochs_after_fit)]
    else:
        #if tf.__version__[:3] == "2.1":
        if tf.__version__[0] == "2":
            print("hi im tf 2")
            callbacks = [EarlyStoppingByAccuracy(monitor='val_accuracy', value=acc_threshold, verbose=0, wait_epochs=epochs_after_fit)]
        else:
            callbacks = [EarlyStoppingByAccuracy(monitor='val_acc', value=acc_threshold, verbose=0, wait_epochs=epochs_after_fit)]

    # callbacks += [EarlyStopping(monitor='val_loss', patience=2, verbose=0),
    #               ModelCheckpoint(kfold_weights_path, monitor='val_loss', save_best_only=True, verbose=0),
    #              ]

    '''LOAD DATA & ARCHITECTURE'''

    from utils import load_data,load_model,load_kernel
    train_images,_,ys,test_images,test_ys = load_data(FLAGS)
    print("max val", train_images.max())
    print("ys", ys)
    input_dim = train_images.shape[1]
    num_channels = train_images.shape[-1]
    print(train_images.shape, ys.shape)

    sample_weights = None
    if gamma != 1.0:
        sample_weights = np.ones(len(ys))
        if not oversampling2:
            sample_weights[m:] = gamma
        else:
            raise NotImplementedError("Gamma not equal to 1.0 with oversampling2 not implemented")

    model = load_model(FLAGS)

    set_session = tf.compat.v1.keras.backend.set_session

    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
    config.log_device_placement = False  # to log device placement (on which device the operation ran)
    sess = tf.compat.v1.Session(config=config)
    set_session(sess)  # set this TensorFlow session as the default session for Keras

    '''TRAINING LOOP'''
    #things to keep track off
    #functions = []
    test_accs = 0
    test_accs_squared = 0
    test_sensitivities = 0
    test_specificities = 0
    train_accs = 0
    train_accs_squared = 0
    weightss = None
    biasess = None
    weightss_squared = None
    biasess_squared = None
    weights_norms = 0
    biases_norms = 0
    weights_norms_squared = 0
    biases_norms_squared = 0
    iterss = 0
    funs_filename = results_folder+prefix+"_"+str(rank)+"_nn_train_functions.txt"

    print("Training NN with",loss,"and optimizer",optimizer)
    if optimizer == "langevin":
        optim = tfp.optimizer.StochasticGradientLangevinDynamics(learning_rate=0.01)
    elif optimizer == "sgd":
        #optim = keras.optimizers.SGD(lr=learning_rate)
        optim = keras.optimizers.SGD(lr=0.001,momentum=0.9,decay=1e-6)
    elif optimizer == "adam":
        optim = keras.optimizers.Adam(lr=learning_rate)
    else:
        optim = optimizer

    def get_metrics():
        if doing_regression:
            #return [keras.losses.mean_squared_error]
            return []
        elif loss=="mse":
            return [binary_accuracy_for_mse]
        else:
            return ['accuracy']

    print(loss)
    model.compile(optim,
                  loss=binary_crossentropy_from_logits if loss=="ce" else loss,
                  metrics=get_metrics())
                  #metrics=['accuracy',sensitivity])
                  #metrics=['accuracy',tf.keras.metrics.SensitivityAtSpecificity(0.99),\
                            #tf.keras.metrics.FalsePositives()])

    from initialization import get_all_layers, is_normalization_layer, reset_weights, simple_reset_weights
    if network not in ["cnn", "fc"]:
        layers = get_all_layers(model)
        are_norm = [is_normalization_layer(l) for l in layers for w in l.get_weights()]
        initial_weights = model.get_weights()

    local_index = 0
    for init in tasks:
        funs_file = open(funs_filename,"a")
        #print(init)
        #
        #TODO: move to a different file, as this is repeated in GP_train..
        ##if the labels are to be generated by a neural network in parallel
        if nn_random_labels or nn_random_regression_outputs:
            if local_index>0:
                if network in ["cnn", "fc"]:
                    simple_reset_weights(model, sigmaw, sigmab)
                else:
                    reset_weights(model, initial_weights, are_norm, sigmaw, sigmab, truncated_init_dist)
            if nn_random_labels:
                ys = model.predict(train_images)[:,0]>0
                if training:
                    test_ys = model.predict(test_images)[:,0]>0
            else:
                ys = model.predict(train_images)[:,0]
                if training:
                    test_ys = model.predict(test_images)[:,0]
        ##
        if local_index>0 or nn_random_labels or nn_random_regression_outputs:
            if network in ["cnn", "fc"]:
                simple_reset_weights(model, sigmaw, sigmab)
            else:
                reset_weights(model, initial_weights, are_norm, sigmaw, sigmab)

        local_index+=1

        ##this reinitalizes the net
        #model = load_model(FLAGS)
        #model.compile(optim,
        #              loss=binary_crossentropy_from_logits if loss=="ce" else loss,
        #              metrics=get_metrics())

        weights, biases = get_weights(model), get_biases(model)
        weights_norm, biases_norm = measure_sigmas(model)
        #print(weights_norm,biases_norm)

        #batch_size = min(batch_size, m)
        if train_one_epoch:
            model.fit(train_images.astype(np.float32), ys.astype(np.float32), verbose=1,\
                sample_weight=sample_weights, validation_data=(train_images.astype(np.float32), ys.astype(np.float32)), epochs=1, batch_size=min(m,batch_size))
            sys.stdout.flush()
        else:
            model.fit(train_images.astype(np.float32), ys.astype(np.float32), verbose=1,\
                sample_weight=sample_weights, validation_data=(train_images.astype(np.float32), ys.astype(np.float32)), epochs=MAX_TRAIN_EPOCHS,callbacks=callbacks, batch_size=min(m,batch_size))
            sys.stdout.flush()

        '''GET DATA: weights, and errors'''
        weights, biases = get_rescaled_weights(model)
        weights_norm, biases_norm = measure_sigmas(model) #TODO: make sure it works with archs with norm layers etc
        #print(weights_norm,biases_norm)

        if not doing_regression: # classification
            train_loss, train_acc = model.evaluate(train_images.astype(np.float32), ys.astype(np.float32), verbose=0)
            test_loss, test_acc = model.evaluate(test_images.astype(np.float32), test_ys.astype(np.float32), verbose=0)
        else:
            train_acc = train_loss = model.evaluate(train_images.astype(np.float32), ys, verbose=0)
            test_acc = test_loss = model.evaluate(test_images.astype(np.float32), test_ys, verbose=0)
        preds = model.predict(test_images)[:,0]
        # print(preds)
        # print(preds.shape)
        # test_false_positive_rate = test_fps/(len([x for x in test_ys if x==1]))
        def sigmoid(x):
            return np.exp(x)/(1+np.exp(x))

        #for th in np.linspace(0,1,1000):
        if loss=="mse":
            #NOTE: sensitivity and specificity are not implemented for MSE loss
            test_sensitivity = -1
            test_specificity = -1
        else:
            #print("threshold", threshold)
            #TODO: this is ugly, I should just add a flag that allows to say whether we are doing threshold selection or not!!
            if threshold != -1:
                for th in np.linspace(0,1,1000):
                    test_specificity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys[:100]) if x==0])/(len([x for x in test_ys[:100] if x==0]))
                    if test_specificity>0.99:
                        num_0s = len([x for x in test_ys if x==0])
                        if num_0s > 0:
                            test_specificity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==0])/(num_0s)
                        else:
                            test_specificity = -1
                        if test_specificity>0.99:
                            num_1s = len([x for x in test_ys if x==1])
                            if num_1s > 0:
                                test_sensitivity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==1])/(num_1s)
                            else:
                                test_sensitivity = -1
                            break
            else:
                # for th in np.linspace(0,1,5): # low number of thresholds as I'm not exploring unbalanced datasets right now
                #     test_specificity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==0])/(len([x for x in test_ys if x==0]))
                #     if test_specificity>0.99:
                #         test_sensitivity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==1])/(len([x for x in test_ys if x==1]))
                #         break
                test_specificity = -1
                test_sensitivity = -1
        #print("Training accuracy", train_acc)
        #print('Test accuracy:', test_acc)
        #print('Test sensitivity:', test_sensitivity)
        #print('Test specificity:', test_specificity)

        if not ignore_non_fit or train_acc >= acc_threshold:
            #print("printing function to file", funs_filename)
            function = (model.predict(test_images[:test_function_size].astype(np.float32), verbose=0))[:,0]
            if loss=="mse" and zero_one:
                function = function>0.5
            else:
                function = function>0
            function=function.astype(int)
            function = ''.join([str(int(i)) for i in function])
            funs_file.write(function+"\r\n")
            funs_file.close()
            #functions.append(function)
            test_accs += test_acc
            test_accs_squared += test_acc**2
            test_sensitivities += test_sensitivity
            test_specificities += test_specificity
            train_accs += train_acc
            train_accs_squared += train_acc**2
            if weightss is None:
                weightss = weights
                biasess = biases
                weightss_squared = weights**2
                biasess_squared = biases**2
            else:
                weightss += weights
                biasess += biases
                weightss_squared += weights**2
                biasess_squared += biases**2
            weights_norms += weights_norm
            weights_norms_squared += weights_norm**2
            biases_norms += biases_norm
            biases_norms_squared += biases_norm**2
            iterss += model.history.epoch[-1]
        #keras.backend.clear_session()
        gc.collect()

    #print("Print functions to file")
    #with open(,"a") as file:
    #    file.write("\r\n".join(functions))
    #    file.write("\r\n")

    # functions = comm.gather(functions, root=0)
    if rank == 0:
        #test_accs_recv = np.empty([size,1],dtype=np.float32)
        #test_accs_squared_recv = np.empty([size,1],dtype=np.float32)
        #test_sensitivities_recv = np.empty([size,1],dtype=np.float32)
        #test_specificities_recv = np.empty([size,1],dtype=np.float32)
        #train_accs_recv = np.empty([size,1],dtype=np.float32)
        #train_accs_squared_recv = np.empty([size,1],dtype=np.float32)

        weights_shape = weightss.flatten().shape[0]
        biases_shape = biasess.flatten().shape[0]
        weightss_recv = np.zeros(weights_shape, dtype=np.float32)
        biasess_recv = np.zeros(biases_shape, dtype=np.float32)
        weightss_squared_recv = np.zeros(weights_shape, dtype=np.float32)
        biasess_squared_recv = np.zeros(biases_shape, dtype=np.float32)
        #weights_norms_recv = np.empty([size,1],dtype=np.float32)
        #weights_norms_squared_recv = np.empty([size,1],dtype=np.float32)
        #biases_norms_recv = np.empty([size,1],dtype=np.float32)
        #biases_norms_squared_recv = np.empty([size,1],dtype=np.float32)
        #iterss_recv = np.empty([size,1],dtype='i')

    else:
        #test_accs_recv = None
        #test_accs_squared_recv = None
        #test_sensitivities_recv = None
        #test_specificities_recv = None
        #train_accs_recv = None
        #train_accs_squared_recv = None

        weightss_recv = None
        weightss_squared_recv = None
        biasess_recv = None
        biasess_squared_recv = None
        #weights_norms_recv = None
        #weights_norms_squared_recv = None
        #biases_norms_recv = None
        #biases_norms_squared_recv = None
        #iterss_recv = None

    if using_mpi:
        test_accs_recv = comm.reduce(test_accs, root=0)
        test_accs_squared_recv = comm.reduce(test_accs_squared, root=0)
        test_sensitivities_recv = comm.reduce(test_sensitivities, root=0)
        test_specificities_recv = comm.reduce(test_specificities, root=0)
        train_accs_recv = comm.reduce(train_accs, root=0)
        train_accs_squared_recv = comm.reduce(train_accs_squared, root=0)

        comm.Reduce(weightss.flatten(), weightss_recv, root=0)
        comm.Reduce(biasess.flatten(), biasess_recv, root=0)
        comm.Reduce(weightss_squared.flatten(), weightss_squared_recv, root=0)
        comm.Reduce(biasess_squared.flatten(), biasess_squared_recv, root=0)
        weights_norms_recv = comm.reduce(weights_norms, root=0)
        weights_norms_squared_recv = comm.reduce(weights_norms_squared, root=0)
        biases_norms_recv = comm.reduce(biases_norms, root=0)
        biases_norms_squared_recv = comm.reduce(biases_norms_squared, root=0)
        iterss_recv = comm.reduce(iterss, root=0)
    else:
        test_accs_recv = test_accs
        test_accs_squared_recv = test_accs_squared
        test_sensitivities_recv = test_sensitivities
        test_specificities_recv = test_specificities
        train_accs_recv = train_accs
        train_accs_squared_recv = train_accs_squared

        weightss_recv=weightss.flatten()
        biasess_recv=biasess.flatten()
        weightss_squared_recv=weightss_squared.flatten()
        biasess_squared_recv=biasess_squared.flatten()
        weights_norms_recv = weights_norms
        weights_norms_squared_recv = weights_norms_squared
        biases_norms_recv = biases_norms
        biases_norms_squared_recv = biases_norms_squared
        iterss_recv = iterss

    '''PROCESS COLLECTIVE DATA'''
    if rank == 0:
        #weightss = np.stack(sum(weightss,[]))
        #weights_norms = sum(weights_norms,[])
        #biasess = np.stack(sum(biasess,[]))
        weights_mean = np.mean(weightss_recv)/number_inits #average over dimension indexing which weight it is (we've already reduced over the number_inits dimension)
        biases_mean = np.mean(biasess_recv)/number_inits
        weights_std = np.mean(weightss_squared_recv)/number_inits - weights_mean**2
        biases_std = np.mean(biasess_squared_recv)/number_inits - biases_mean**2
        weights_norm_mean = weights_norms_recv/number_inits
        weights_norm_std = weights_norms_squared_recv/number_inits - weights_norm_mean**2
        biases_norm_mean = biases_norms_recv/number_inits
        biases_norm_std = biases_norms_squared_recv/number_inits - biases_norm_mean**2

        # functions = sum(functions,[])
        test_acc = test_accs_recv/number_inits
        test_sensitivity = test_sensitivities_recv/number_inits
        test_specificity = test_specificities_recv/number_inits
        train_acc = train_accs_recv/number_inits
        print('Mean test accuracy:', test_acc)
        print('Mean test sensitivity:', test_sensitivity)
        print('Mean test specificity:', test_specificity)
        print('Mean train accuracy:', train_acc)
        test_acc = test_accs_recv/number_inits
        train_acc = train_accs_recv/number_inits
        train_acc_std = train_accs_squared_recv/number_inits - train_acc**2
        test_acc_std = test_accs_squared_recv/number_inits - test_acc**2
        mean_iters = 1.0*iterss_recv/number_inits

        useful_train_flags = ["dataset", "m", "network", "loss", "optimizer", "pooling", "epochs_after_fit", "ignore_non_fit", "test_function_size", "batch_size", "number_layers", "sigmaw", "sigmab", "init_dist","use_shifted_init","shifted_init_shift","whitening", "centering", "oversampling", "oversampling2", "channel_normalization", "training", "binarized", "confusion","filter_sizes", "gamma", "intermediate_pooling", "label_corruption", "threshold", "n_gpus", "n_samples_repeats", "layer_widths", "number_inits", "padding"]
        with open(results_folder+prefix+"nn_training_results.txt","a") as file:
            file.write("#")
            for key in sorted(useful_train_flags):
                file.write("{}\t".format(key))
            file.write("\t".join(["train_acc", "test_error", "test_acc","test_sensitivity","test_specificity","weights_std","biases_std","weights_mean", "biases_mean", "weights_norm_mean","weights_norm_std","biases_norm_mean","biases_norm_std","mean_iters","train_acc_std","test_acc_std"]))
            file.write("\n")
            for key in sorted(useful_train_flags):
                file.write("{}\t".format(FLAGS[key]))
            file.write("{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:d}\t{:.4f}\t{:.4f}\n".format(train_acc, 1-test_acc,test_acc,\
                test_sensitivity,test_specificity,weights_std,biases_std,\
                weights_mean,biases_mean,weights_norm_mean,weights_norm_std,biases_norm_mean,biases_norm_std,int(mean_iters),train_acc_std,test_acc_std)) #normalized to sqrt(input_dim)
コード例 #13
0
ファイル: sample_GP.py プロジェクト: guillefix/nn-pacbayes
def main(_):

    FLAGS = tf.app.flags.FLAGS.flag_values_dict()
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print(rank)

    os.environ["CUDA_VISIBLE_DEVICES"] = str((rank + 1) % n_gpus)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    #tf.enable_eager_execution(config=config)
    set_session = keras.backend.set_session
    config.log_device_placement = False  # to log device placement (on which device the operation ran)
    sess = tf.Session(config=config)
    set_session(
        sess)  # set this TensorFlow session as the default session for Keras

    from utils import load_data, load_model, load_kernel
    train_images, flat_train_images, ys, _, _ = load_data(FLAGS)
    X = flat_train_images
    ys2 = [[y] for y in ys]
    Y = np.array(ys2)
    image_size = train_images.shape[1]
    number_channels = train_images.shape[-1]
    input_dim = flat_train_images.shape[1]

    num_tasks = 100
    cupy_samples = 1e5

    num_tasks_per_job = num_tasks // size
    tasks = list(
        range(int(rank * num_tasks_per_job), int(
            (rank + 1) * num_tasks_per_job)))

    if rank < num_tasks % size:
        tasks.append(size * num_tasks_per_job + rank)

    print("compute probability and bound", network, dataset)

    K = load_kernel(FLAGS)
    import cupy as cp
    # import numpy as cp

    Y = cp.array(Y)

    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    freq = 0
    for i in tasks:
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()
        exact_samples = cp.random.multivariate_normal(
            cp.zeros(m), K, int(cupy_samples), dtype=np.float32) > 0

        fits_data = cp.prod(~(exact_samples[:, :m] ^ (Y.T == 1)), 1)

        indices = cp.where(fits_data)[0]
        freq += len(indices)

    freqs = comm.gather(freqs, root=0)

    if rank == 0:
        freqs = sum(freqs, [])
        prob = freqs / (num_tasks * cupy_samples)
        logPU = np.log(prob)
        log10PU = np.log10(prob)
        print(log10PU)
        #compute PAC-Bayes bound
        delta = 2**-10
        bound = (-logPU + 2 * np.log(total_samples) + 1 -
                 np.log(delta)) / total_samples
        bound = 1 - np.exp(-bound)
        print("pre-confusion-correction bound: ", bound)
        rho = confusion / (1.0 + confusion)
        bound = (bound - 0.5 * rho) / (
            1 - rho
        )  #to correct for the confusion changing the training data distribution (in training set, but not in test set)!
        print("Bound: ", bound)
        print("Accuracy bound: ", 1 - bound)
        useful_flags = [
            "dataset", "network", "m", "label_corruption", "confusion",
            "number_layers", "sigmaw", "sigmab", "binarized", "pooling",
            "intermediate_pooling", "whitening", "centering",
            "channel_normalization", "training", "n_gpus"
        ]
        with open(results_folder + prefix + "bounds.txt", "a") as file:
            file.write("#")
            for key in useful_flags:
                file.write("{}\t".format(key))
            file.write("bound")
            file.write("\t")
            file.write("log10PU")
            file.write("\n")
            for key in useful_flags:
                file.write("{}\t".format(FLAGS[key]))
            file.write("{}".format(bound))
            file.write("\t")
            file.write("{}".format(log10PU))
            file.write("\n")
コード例 #14
0
def main(_):

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    FLAGS = preprocess_flags(FLAGS)
    print(FLAGS)
    globals().update(FLAGS)
    global m, total_samples, num_classes

    print("Generating input samples", dataset, m)

    from math import ceil

    if dataset == "cifar":
        image_size = 32
        number_channels = 3
    elif dataset == "imagenet":
        image_size = 256
        number_channels = 3
    elif dataset == "mnist":
        image_size = 28
        number_channels = 1
    elif dataset == "mnist-fashion":
        image_size = 28
        number_channels = 1
    elif dataset == "KMNIST":
        image_size = 28
        number_channels = 1
    elif dataset == "EMNIST":
        image_size = 28
        number_channels = 1
    elif dataset == "boolean":
        if boolean_input_dim is not None:
            input_dim = boolean_input_dim
        else:
            input_dim = 7
        image_size = None
    elif dataset == "ion":
        input_dim = 34
        image_size = None
    elif dataset == "calabiyau":
        input_dim = 180
        image_size = None
    else:
        raise NotImplementedError

    if network in [
            "cnn", "fc", "inception_resnet_v2", "inception_v3", "xception"
    ]:
        if network not in ["cnn", "fc"]:
            if network == "xception":
                image_size = max(image_size, 71)
            else:
                image_size = max(image_size, 75)
    else:
        image_size = max(image_size, 32)

    if dataset is not "boolean" or dataset is not "calabiyau" or dataset is not "ion":
        image_width = image_height = image_size

    #image datasets
    aliases = {
        "cifar": "CIFAR10",
        "mnist": "MNIST",
        "mnist-fashion": "FashionMNIST",
        "imagenet": "ImageNet"
    }
    if dataset in [
            "cifar", "mnist", "mnist-fashion", "KMNIST", "EMNIST", "imagenet"
    ]:
        if dataset in aliases:
            dataset_attr = aliases[dataset]
        else:
            dataset_attr = dataset
        dataset_constructor = getattr(torchvision.datasets, dataset_attr)
        transformation = transforms.Compose([transforms.ToPILImage()] +
                                            ([transforms.Resize(image_size)] if
                                             image_size is not None else []) +
                                            [transforms.ToTensor()])
        extra_kwargs = {}
        if dataset == "EMNIST":
            extra_kwargs = {"split": "byclass"}
        d1 = dataset_constructor("./datasets",
                                 download=True,
                                 transform=transformation,
                                 train=True,
                                 **extra_kwargs)
        d2 = dataset_constructor("./datasets",
                                 download=True,
                                 transform=transformation,
                                 train=False,
                                 **extra_kwargs)
        num_classes = len(d1.classes)
        #mm = int(ceil(d.data.shape[0]*5/6))
        full_data = np.concatenate([d1.data, d2.data])
        full_targets = np.concatenate([d1.targets, d2.targets])
        if out_of_sample_test_error:
            #if extended_test_set:
            #    (train_images,train_labels),(test_images,test_labels) = (data[:mm], targets[:mm]),(data[mm:],targets[mm:])
            #else:
            (train_images,
             train_labels), (test_images,
                             test_labels) = (d1.data, d1.targets), (d2.data,
                                                                    d2.targets)
        else:
            (train_images,
             train_labels), (test_images,
                             test_labels) = (d1.data,
                                             d1.targets), (full_data,
                                                           full_targets)
        if dataset == "cifar":
            train_images = torch.Tensor(train_images)
            test_images = torch.Tensor(test_images)
        print(train_images.min(), train_images.max())

    #TODO: add custom datasets

    #non-image-like datasets:
    else:
        if dataset == "boolean":
            assert network == "fc"
            num_classes = 2
            if centering:
                inputs = np.array(
                    [[float(l) * 2.0 - 1 for l in "{0:07b}".format(i)]
                     for i in range(0, 2**input_dim)])
            else:
                #we ignore the 0 input, because it casues problems when computing the kernel matrix :P when sigmab==0 though
                #if sigmab==0:
                #    inputs = np.array([[float(l) for l in "{0:07b}".format(i)] for i in range(1,2**7)])
                #else:
                inputs = np.array([[float(l) for l in "{0:07b}".format(i)]
                                   for i in range(0, 2**input_dim)])

            if boolfun is not "none":
                fun = boolfun
            else:
                if boolean_input_dim is not None:
                    raise NotImplementedError(
                        "It is not supported to use boolean_input_dim and not specify a explicit boolfun (which should have the same size)"
                    )
                if boolfun_comp is not "none":
                    # open("boolfun_comps.txt","w").write("\n".join(list(funs.keys())))
                    funs = pickle.load(open("funs_per_complexity.p", "rb"))
                    fun = np.random.choice(funs[boolfun_comp])
                    print("complexity", boolfun_comp)
                else:
                    funs = pickle.load(open("funs_per_complexity.p", "rb"))
                    comp = np.random.choice(list(funs.keys()))
                    print("complexity", comp)
                    fun = np.random.choice(funs[comp])
                    # funs = {}
                    # with open("LZ_freq_1e6_7_40_40_1_relu.txt","r") as f:
                    #     for line in f.readlines():
                    #         fun,comp,freq = line.strip().split("\t")
                    #         if comp not in funs:
                    #             funs[comp] = [fun]
                    #         else:
                    #             funs[comp].append(fun)
                    # pickle.dump(funs,open("funs_per_complexity.p","wb"))

            print("fun", fun)

            #if sigmab==0 and not centering:
            #    #labels=np.array([[int(xx)*2.0-1] for xx in list(fun)[1:]]) #start from 1 because we ignored the 0th input
            #    labels=np.array([[int(xx)] for xx in list(fun)[1:]]) #start from 1 because we ignored the 0th input
            #else:
            #labels=np.array([[int(xx)*2.0-1] for xx in list(fun)[0:]])
            labels = np.array([[int(xx)] for xx in list(fun)[0:]])
        elif dataset == "calabiyau":
            assert network == "fc"
            num_classes = 2
            #we ignore the 0 input, because it casues problems when computing the kernel matrix :P
            data = np.load("datasets/calabiyau.npz")
            inputs, labels = data["inputs"], data["targets"]
            if whitening:
                inputs = inputs - inputs.mean(0)
        elif dataset == "ion":
            assert network == "fc"
            num_classes = 2
            #we ignore the 0 input, because it casues problems when computing the kernel matrix :P
            #data = np.load("datasets/calabiyau.npz")
            #inputs, labels = data["inputs"], data["targets"]
            #inputs = inputs - inputs.mean(0)
            data = pd.read_csv('datasets/ionosphere.csv')
        else:
            raise NotImplementedError

    global threshold
    if threshold == -1:
        threshold = ceil(num_classes / 2)

    # print(train_images.shape)
    ##get random training sample##
    # and perform some more processing

    # np.random.seed(42069)
    '''GET TRAINING SAMPLE INDICES'''
    '''AND DO PRE-PROCESSING if it's an image dataset'''
    #for datasets that are not images, like the boolean one
    if dataset == "boolean" or dataset == "calabiyau":
        if not random_training_set:
            raise NotImplementedError

        if booltrain_set is not None:
            indices = [i for i, x in enumerate(booltrain_set) if x == "1"]
            assert len(indices) == m
        elif oversampling:
            probs = list(
                map(
                    lambda x: threshold / (num_classes * len(inputs))
                    if x >= threshold else (num_classes - threshold) /
                    (num_classes * len(inputs)), inputs))
            probs = np.array(probs)
            probs /= np.sum(probs)
            indices = np.random.choice(range(len(inputs)),
                                       size=int(total_samples),
                                       replace=False,
                                       p=probs)
        elif oversampling2:
            indices = np.random.choice(range(len(inputs)),
                                       size=int(total_samples),
                                       replace=False)
            indices = sum([[i]*(num_classes-threshold) for i in indices if train_labels[i]<threshold] \
                + [[i]*threshold for i in indices if train_labels[i]>=threshold],[])
            #print("Indices: ", indices)

            m *= int(
                (2 * (num_classes - threshold) * threshold / (num_classes)))
        else:
            indices = np.random.choice(range(int(len(inputs))),
                                       size=int(total_samples),
                                       replace=False)
        # print(indices)
        print(
            "train_set", "".join([("1" if i in indices else "0")
                                  for i in range(int(len(inputs)))]))
        if out_of_sample_test_error:
            test_indices = np.array(
                [i for i in range(len(inputs)) if i not in indices])
        else:
            test_indices = np.array(range(len(inputs)))
        train_inputs = inputs[indices, :].astype(np.float32)
        train_labels = labels[indices]
        if training:
            test_inputs = inputs[test_indices, :]
            flat_test_images = test_inputs
            test_labels = labels[test_indices]

        flat_train_images = train_inputs
    elif dataset == "ion":
        np.random.seed(seed=708)
        data = data.reindex(np.random.permutation(data.index))
        data = data.reset_index(drop=True)
        data = data.to_numpy()
        number_of_test_examples = 351 - m
        X_train_full = data[:-number_of_test_examples, :-1].astype(float)
        X_test_full = data[-number_of_test_examples:, :-1].astype(float)
        y_train_full = data[:-number_of_test_examples, -1].astype(float)
        y_test_full = data[-number_of_test_examples:, -1].astype(float)
        np.random.seed()
        n = data.shape[0] - number_of_test_examples
        train_inputs = X_train_full.reshape(n, 34).astype('float32')
        flat_train_images = train_inputs
        train_labels = y_train_full[:n].reshape(n, 1)
        n = number_of_test_examples
        test_inputs = X_test_full.reshape(n, 34).astype('float32')
        flat_test_images = test_inputs
        test_labels = y_test_full.reshape(n, 1)

    #for image datasets
    else:
        #data processing functions assume the images have values in range [0,255]
        #global train_images_obs
        #train_images_obs=train_images
        max1 = torch.max(train_images).item()
        max2 = torch.max(test_images).item()
        print("maxs", max1, max2)
        max_val = max(max1, max2)
        #train_images  = train_images.numpy().astype(np.float32)*255.0/max_val
        #test_images = test_images.numpy().astype(np.float32)*255.0/max_val
        train_images = train_images.numpy().astype(np.uint8)
        test_images = test_images.numpy().astype(np.uint8)

        #GET TRAINIG SAMPLE INDICES
        if random_training_set:
            if oversampling:
                probs = list(
                    map(
                        lambda x: threshold / (num_classes)
                        if x >= threshold else (num_classes - threshold) /
                        (num_classes), train_labels))
                probs = np.array(probs)
                probs /= np.sum(probs)
                indices = np.random.choice(range(len(train_images)),
                                           size=int(total_samples),
                                           replace=False,
                                           p=probs)
            elif oversampling2:
                indices = np.random.choice(range(len(train_images)),
                                           size=int(total_samples),
                                           replace=False)
                indices = sum([[i]*(num_classes-threshold) for i in indices if train_labels[i]<threshold] \
                    + [[i]*threshold for i in indices if train_labels[i]>=threshold],[])

                m *= int((2 * (num_classes - threshold) * threshold /
                          (num_classes)))
            else:
                indices = np.random.choice(range(len(train_images)),
                                           size=int(total_samples),
                                           replace=False)
        else:
            indices = np.arange(int(total_samples))
        # print(indices)

        #if network == "nasnet":
        #    train_images = (train_images[indices,:,:,:]).astype(np.float32) #NHWC
        #    train_images = keras_applications.nasnet.preprocess_input(train_images, backend=tf.keras.backend)
        #    if training:
        #        test_images = keras_applications.nasnet.preprocess_input(test_images, backend=tf.keras.backend)
        #        train_labels = np.take(train_labels,indices)
        #        print(len([x for x in train_labels if x<threshold])/len(train_images))

        #elif network == "vgg19":
        #    train_images = (train_images[indices,:,:,:]).astype(np.float32) #NHWC
        #    train_images = keras_applications.vgg19.preprocess_input(train_images, backend=tf.keras.backend)
        #    if training:
        #        test_images = keras_applications.vgg19.preprocess_input(test_images, backend=tf.keras.backend)
        #        train_labels = np.take(train_labels,indices)
        #        print(len([x for x in train_labels if x<threshold])/len(train_images))

        #elif network == "vgg16":
        #    train_images = (train_images[indices,:,:,:]).astype(np.float32) #NHWC
        #    train_images = keras_applications.vgg16.preprocess_input(train_images, backend=tf.keras.backend)
        #    if training:
        #        test_images = keras_applications.vgg16.preprocess_input(test_images, backend=tf.keras.backend)
        #        train_labels = np.take(train_labels,indices)
        #        print(len([x for x in train_labels if x<threshold])/len(train_images))

        #elif network == "resnet50" or network == "resnet101" or network == "renset152":
        ## elif network == "resnet101" or network == "renset152":
        #    train_images = (train_images[indices,:,:,:]).astype(np.float32) #NHWC
        #    train_images = keras_applications.resnet.preprocess_input(train_images, backend=tf.keras.backend)
        #    # train_images = train_images/255.0
        #    # import matplotlib.pyplot as plt
        #    # # print(train_images)
        #    # plt.imshow(train_images[0])
        #    if training:
        #        test_images = keras_applications.resnet.preprocess_input(test_images, backend=tf.keras.backend)
        #        train_labels = np.take(train_labels,indices)
        #        # test_images = test_images/255.0
        #        print(len([x for x in train_labels if x<threshold])/len(train_images))

        #elif network in ["resnet_v2_50","resnetv2_101", "resnetv2_152"]:
        #    train_images = (train_images[indices,:,:,:]).astype(np.float32) #NHWC
        #    train_images = keras_applications.resnet_v2.preprocess_input(train_images, backend=tf.keras.backend)
        #    if training:
        #        test_images = keras_applications.resnet_v2.preprocess_input(test_images, backend=tf.keras.backend)
        #        train_labels = np.take(train_labels,indices)
        #        print(len([x for x in train_labels if x<threshold])/len(train_images))

        #else:
        if True:
            train_images = train_images[indices]
            if training:
                test_images = test_images
                train_labels = np.take(train_labels, indices)
                print(
                    len([x for x in train_labels if x < threshold]) /
                    len(train_images))

            ##adding channel dimenions for image datasets without them
            if dataset in ["mnist", "mnist-fashion", "KMNIST", "EMNIST"]:
                train_images = np.expand_dims(train_images,
                                              -1).astype(np.uint8)
                test_images = np.expand_dims(test_images, -1).astype(np.uint8)
                ## for non-flexible architectures, transform the data
                if network not in ["cnn", "fc"]:
                    train_images = np.tile(train_images, (1, 1, 1, 3))
                    test_images = np.tile(test_images, (1, 1, 1, 3))
                    #print(train_images.dtype)
                    # plt.imshow(train_images[0])
                    # plt.show()
                    #print(train_images.shape)
            if network in ["cnn", "fc"]:
                #normalize the images pixels to be in [0,1]
                train_images = train_images.astype(np.float32) / 255.0
                if training:
                    test_images = test_images.astype(np.float32) / 255.0
            else:
                #note that the transformation to PIL and back to Tensor normalizes the image pixels to be in [0,1]
                assert train_images.dtype == "uint8"  #otherwise ToPILImage wants the input to be NCHW. wtff
                train_images = np.stack(
                    [d1.transform(image) for image in train_images])
                train_images = np.transpose(
                    train_images, (0, 2, 3, 1)
                )  # this is because the pytorch transform changes it to NCHW for some reason :P
                if unnormalized_images:
                    train_images = train_images * 255.0
                if training:
                    test_images = np.stack(
                        [d1.transform(image) for image in test_images])
                    test_images = np.transpose(test_images, (0, 2, 3, 1))
                    if unnormalized_images:
                        test_images = test_images * 255.0
                print(train_images.shape)
                print("max after transforming", train_images.max())

            #check correct dimensions
            if network != "fc":
                image_size = train_images.shape[1]
                assert train_images.shape[1] == train_images.shape[2]
                number_channels = train_images.shape[-1]

            flat_train_images = np.array(
                [train_image.flatten() for train_image in train_images])
            if training:
                flat_test_images = np.array(
                    [test_image.flatten() for test_image in test_images])

            if channel_normalization:
                #flatten to compute SVD matrix
                print("channel normalizing")
                x = train_images
                flat_x = flat_train_images

                #normalize each channel (3 colors for e.g.)
                x_mean = np.mean(x, axis=(0, 1, 2))
                x_std = np.std(x, axis=(0, 1, 2))
                x = (x - x_mean) / x_std
                train_images = x

                #test images
                if training:
                    test_images = (test_images - x_mean) / x_std

                #flatten again after normalizing
                flat_train_images = np.array([
                    train_image.flatten() for train_image in flat_train_images
                ])
                if training:
                    flat_test_images = np.array([
                        test_image.flatten() for test_image in flat_test_images
                    ])

            if centering:
                #flatten to compute SVD matrix
                print("centering")
                x = train_images
                flat_x = flat_train_images
                flat_x -= flat_x.mean(axis=0)

                x = flat_x.reshape(
                    (x.shape[0], x.shape[1], x.shape[2], x.shape[3]))

                #test images
                if training:
                    flat_test_images -= flat_test_images.mean(axis=0)
                    test_images = flat_test_images.reshape(
                        (test_images.shape[0], test_images.shape[1],
                         test_images.shape[2], test_images.shape[3]))

            #WHITENING using training_images
            if whitening:
                #flatten to compute SVD matrix
                print("ZCA whitening")
                x = train_images
                flat_x = flat_train_images
                flat_x -= flat_x.mean(axis=0)
                sigma = np.matmul(flat_x.T, flat_x) / flat_x.shape[0]
                u, s, _ = np.linalg.svd(sigma)
                zca_epsilon = 1e-10  # avoid division by 0
                d = np.diag(1. / np.sqrt(s + zca_epsilon))
                Q = np.matmul(np.matmul(u, d), u.T)
                flat_x = np.matmul(flat_x, Q.T)
                flat_train_images = flat_x

                #normalize each channel (3 colors for e.g.)
                #to do this we reshape the tensor to NHWC form
                #train_images = flat_x.reshape((x.shape[0], x.shape[3], x.shape[1],x.shape[2]))
                train_images = flat_x.reshape(
                    (x.shape[0], x.shape[1], x.shape[2], x.shape[3]))

                #test images
                if training:
                    flat_test_images -= flat_test_images.mean(axis=0)
                    flat_test_images = np.matmul(flat_test_images, Q.T)
                    test_images = flat_test_images.reshape(
                        (test_images.shape[0], test_images.shape[1],
                         test_images.shape[2], test_images.shape[3]))
                    #test_images = flat_test_images.reshape((test_images.shape[0], test_images.shape[3], test_images.shape[1],test_images.shape[2]))
                    #test_images =  np.transpose(test_images, tp_order)

        #flattened images, as the kernel function takes flattened vectors (row major for NCHW images)
        tp_order = np.concatenate([[0, len(train_images.shape) - 1],
                                   np.arange(1,
                                             len(train_images.shape) - 1)])
        if n_gpus > 0:
            flat_train_images = np.transpose(train_images,
                                             tp_order)  # NHWC -> NCHW
            flat_train_images = np.array(
                [train_image.flatten() for train_image in flat_train_images])
            if training:
                flat_test_images = np.transpose(test_images,
                                                tp_order)  # NHWC -> NCHW
                flat_test_images = np.array(
                    [test_image.flatten() for test_image in flat_test_images])

    if network == "fc":
        train_images = flat_train_images
        if training:
            test_images = flat_test_images

    #corrupting images, and adding confusion data
    def binarize(label, threshold, method="threshold"):
        if method == "threshold":
            return label >= threshold
        elif method == "oddeven":
            return (label + 1) % 2

    # %%
    def process_labels(label,
                       label_corruption,
                       threshold,
                       zero_one=False,
                       binarized=True,
                       binarization_method="threshold"):
        if binarized:
            if zero_one:
                if np.random.rand() < label_corruption:
                    return np.random.choice([0, 1])
                else:
                    return float(
                        binarize(label, threshold, binarization_method))
            else:
                if np.random.rand() < label_corruption:
                    return np.random.choice([-1.0, 1.0])
                else:
                    return float(
                        binarize(label, threshold,
                                 binarization_method)) * 2.0 - 1
        else:
            if np.random.rand() < label_corruption:
                return np.random.choice(range(num_classes))
            else:
                return float(label)

    #if the labels are to be generated by a neural network:
    if doing_regression:
        from utils import load_model
        model = load_model(FLAGS)
        ys = model.predict(train_images)[:, 0]
        if training:
            test_ys = model.predict(test_images)[:, 0]
    else:
        if nn_random_labels:
            from utils import load_model
            model = load_model(FLAGS)
            # data = tf.constant(train_images)
            train_labels = model.predict(
                train_images)[:,
                              0] > 0  #, batch_size=data.shape[0], steps=1) > 0
            # print("generated function", "".join([str(int(y)) for y in train_labels]))
            if training:
                # data = tf.constant(test_images)
                test_labels = model.predict(
                    test_images
                )[:, 0] > 0  #, batch_size=data.shape[0], steps=1) > 0
            if binarized:
                threshold = 1
                num_classes = 2

        if random_labels:
            print("zero_one", zero_one)
            ys = [
                process_labels(label,
                               label_corruption,
                               threshold,
                               zero_one=True,
                               binarized=binarized,
                               binarization_method=binarization_method)
                for label in train_labels[:m]
            ] + [
                process_labels(label,
                               1.0,
                               threshold,
                               zero_one=True,
                               binarized=binarized,
                               binarization_method=binarization_method)
                for label in train_labels[m:]
            ]
        else:  #confusion/attack labels
            ys = [
                process_labels(label,
                               label_corruption,
                               threshold,
                               zero_one=True,
                               binarized=binarized,
                               binarization_method=binarization_method)
                for label in train_labels[:m]
            ] + [
                float(not binarize(
                    label, threshold, binarization_method=binarization_method))
                for label in train_labels[m:]
            ]

        if training:
            test_ys = np.array([
                process_labels(label,
                               label_corruption,
                               threshold,
                               zero_one=True,
                               binarized=binarized,
                               binarization_method=binarization_method)
                for label in test_labels
            ])
    '''SAVING DATA SAMPLES'''
    if training:
        save_data(train_images, ys, test_images, test_ys, FLAGS)
    else:
        test_images = test_ys = []
        save_data(train_images, ys, test_images, test_ys, FLAGS)