def main(_): FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) X = np.load(imdb_data_folder + "X.npy") X_test = np.load(imdb_data_folder + "X_test_50.npy") y = np.load(imdb_data_folder + "y.npy") y_test = np.load(imdb_data_folder + "y_test_50.npy") K = pickle.load(open(imdb_data_folder + "K_LSTM_full.p", "rb")) save_kernel(K, FLAGS) save_data(X, y, X_test, y_test, FLAGS)
def main(_): FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) print(partial_kernel_n_proc) number_completed = 0 files = find_partial_kernel_filenames(FLAGS) for f in files: cnt = int(f.split("_")[-2]) if cnt > partial_kernel_n_proc: number_completed = cnt - partial_kernel_n_proc if f == kernel_filename(FLAGS): number_completed = partial_kernel_n_proc #break while number_completed < partial_kernel_n_proc: files = find_partial_kernel_filenames(FLAGS) print(files) if len(files) > 1: for i, f in enumerate(files): print(f) if i == 0: cov = load_kernel_by_filename(f) if number_completed == 0: number_completed += 1 else: cov += load_kernel_by_filename(f) number_completed += 1 if number_completed >= partial_kernel_n_proc: save_kernel(cov / number_completed, FLAGS) else: save_kernel_partial(cov, FLAGS, partial_kernel_n_proc + number_completed) for f in files: os.remove(f) sleep(100)
def main(_): FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) if init_dist != "gaussian": raise NotImplementedError("Initialization distributions other than Gaussian are not implemented for computing pac bayes bounds!") from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() print(rank) if n_gpus>0: os.environ["CUDA_VISIBLE_DEVICES"]=str((rank)%n_gpus) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True #tf.enable_eager_execution(config=config) set_session = tf.compat.v1.keras.backend.set_session config.log_device_placement = False # to log device placement (on which device the operation ran) sess = tf.compat.v1.Session(config=config) set_session(sess) # set this TensorFlow session as the default session for Keras '''GET DATA''' from utils import load_data,load_model,load_kernel train_images,flat_train_images,ys,test_images,test_ys = load_data(FLAGS) print("max val", train_images.max()) #print("ys", ys) #process data to be on the right format for GP #test on a smaller sample on test set because otherwise GP would run out of memory test_images = test_images[:test_function_size] test_ys = test_ys[:test_function_size] X = flat_train_images data = test_images tp_order = np.concatenate([[0,len(data.shape)-1], np.arange(1, len(data.shape)-1)]) print(data.shape,tp_order) flat_data = np.transpose(data, tp_order) # NHWC -> NCHW # this is because the cnn GP kernels assume this flat_test_images = np.array([test_image.flatten() for test_image in flat_data]) Xtrain = flat_train_images Xtest = flat_test_images Xfull = np.concatenate([flat_train_images,flat_test_images]) ys2 = [[y] for y in ys] if test_fun_override is not None: ys2test = [[float(x)] for x in test_fun_override] else: ys2test = [[y] for y in test_ys] ysfull = ys2 + ys2test Yfull = np.array(ysfull) Ytrain = np.array(ys2) Ytest = np.array(ys2test) image_size = train_images.shape[1] number_channels = train_images.shape[-1] input_dim = flat_train_images.shape[1] print("compute probability and bound", network, dataset) # if loss is not "mse": # raise NotImplementedError("Haven't implemented logQ estimate for CE loss yet") from utils import load_posterior_params mean,cov = load_posterior_params(FLAGS) #finding log marginal likelihood of data if using_EP: from GP_prob.nngp_mse_heaviside_posterior import nngp_mse_heaviside_posteror_logp logQ = nngp_mse_heaviside_posteror_logp(Xtest,Ytest,mean,cov) else: raise NotImplementedError("Only EP estimation of logQ is implemented") if rank == 0: print(logQ) useful_flags = ["dataset","boolfun_comp","boolfun","test_fun_override", "test_function_size", "network", "m","label_corruption","confusion", "number_layers", "sigmaw", "sigmab", "binarized", "pooling", "intermediate_pooling", "whitening", "training", "n_gpus", "kernel_mult", "normalize_kernel", "logPGPEP", "errors"] with open(results_folder+prefix+"logQs.txt","a") as file: file.write("#") for key in useful_flags: file.write("{}\t".format(key)) file.write("logQ") file.write("\n") for key in useful_flags: file.write("{}\t".format(FLAGS[key])) file.write("{}".format(logQ)) file.write("\n")
FLAGS['number_layers'] = 1 FLAGS['pooling'] = "none" FLAGS['intermediate_pooling'] = "0000" FLAGS['sigmaw'] = 10.0 FLAGS['sigmab'] = 10.0 FLAGS['network'] = "fc" FLAGS['prefix'] = "test" FLAGS['whitening'] = False FLAGS['centering'] = False FLAGS['centering'] = False FLAGS['random_labels'] = True FLAGS['training'] = True FLAGS['no_training'] = False from utils import preprocess_flags FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) from utils import load_data,load_model,load_kernel train_images,flat_train_images,ys,test_images,test_ys = load_data(FLAGS) input_dim = train_images.shape[1] num_channels = train_images.shape[-1] # tp_order = np.concatenate([[0,len(train_images.shape)-1], np.arange(1, len(train_images.shape)-1)]) # train_images = tf.constant(train_images) test_images = test_images[:500] test_ys = test_ys[:500] #%% train_images,flat_data,ys,test_images,test_ys = load_data_by_filename(filename)
def main(_): FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) print("poolin", pooling) print("Generating architecture", network, number_layers) from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() import os if n_gpus > 0: os.environ["CUDA_VISIBLE_DEVICES"] = str((rank) % n_gpus) from tensorflow import keras #import keras import keras_applications keras_applications._KERAS_BACKEND = keras.backend keras_applications._KERAS_LAYERS = keras.layers keras_applications._KERAS_MODELS = keras.models keras_applications._KERAS_UTILS = keras.utils import warnings num_classes = 2 #TODO: make code compatible with non-binary # %% if dataset == "cifar": image_size = 32 number_channels = 3 elif dataset == "imagenet": image_size = 256 number_channels = 3 elif dataset == "mnist": image_size = 28 number_channels = 1 elif dataset == "mnist-fashion": image_size = 28 number_channels = 1 elif dataset == "KMNIST": image_size = 28 number_channels = 1 elif dataset == "EMNIST": image_size = 28 number_channels = 1 elif dataset == "boolean": if boolean_input_dim is not None: input_dim = boolean_input_dim else: input_dim = 7 elif dataset == "calabiyau": input_dim = 180 elif dataset == "ion": input_dim = 34 else: raise NotImplementedError if not (dataset == "boolean" or dataset == "calabiyau" or dataset == "ion"): image_height = image_size image_width = image_size input_dim = image_height * image_width * number_channels set_session = tf.compat.v1.keras.backend.set_session from utils import cauchy_init_wrapper, shifted_init_wrapper if init_dist == "gaussian": bias_initializer = keras.initializers.RandomNormal(stddev=sigmab) # weight_initializer = keras.initializers.RandomNormal(stddev=sigmaw/np.sqrt(input_dim)) weight_initializer = keras.initializers.VarianceScaling( scale=sigmaw**2, mode='fan_in', distribution='normal', seed=None) if use_shifted_init: bias_initializer_last_layer = shifted_init_wrapper( sigmab, shifted_init_shift) else: bias_initializer_last_layer = bias_initializer elif init_dist == "cauchy": bias_initializer = cauchy_init_wrapper(sigmab) weight_initializer = _wrapper(sigmaw) bias_initializer_last_layer = bias_initializer elif init_dist == "uniform": bias_initializer = keras.initializers.RandomUniform( minval=-np.sqrt(3 * sigmab), maxval=np.sqrt(3 * sigmab), seed=None) weight_initializer = keras.initializers.VarianceScaling( scale=sigmaw, mode='fan_in', distribution='uniform', seed=None) bias_initializer_last_layer = bias_initializer else: raise NotImplementedError # bias_initializer = keras.initializers.Zeros() # weight_initializer = keras.initializers.glorot_uniform() config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU config.log_device_placement = False # to log device placement (on which device the operation ran) # config.gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) # (nothing gets printed in Jupyter, only if you run it standalone) sess = tf.compat.v1.Session(config=config) set_session( sess) # set this TensorFlow session as the default session for Keras activations_dict = {"relu": tf.nn.relu, "tanh": tf.nn.tanh} if network == "cnn": if intermediate_pooling_type == "avg": intermediate_pooling_layer = [ keras.layers.AvgPool2D(pool_size=2, padding='same') ] elif intermediate_pooling_type == "max": intermediate_pooling_layer = [ keras.layers.MaxPool2D(pool_size=2, padding='same') ] else: intermediate_pooling_layer = [] if pooling == "avg": pooling_layer = [keras.layers.GlobalAveragePooling2D()] elif pooling == "max": pooling_layer = [keras.layers.GlobalMaxPooling2D()] else: pooling_layer = [] model = keras.Sequential( sum([ [keras.layers.Conv2D(input_shape=(image_height,image_width,number_channels) if index==0 else (None,), \ filters=num_filters, \ kernel_size=filter_size, \ padding=padding, \ strides=strides, \ activation=activations_dict[activation], data_format='channels_last', kernel_initializer=weight_initializer, bias_initializer=bias_initializer,)] + (intermediate_pooling_layer if have_pooling else []) for index,(filter_size,padding,strides,have_pooling,activation) in enumerate(zip(filter_sizes,padding,strides,pooling_in_layer,activations)) ],[]) + pooling_layer + [ keras.layers.Flatten() ] + [ # keras.layers.Dense(1,activation=tf.nn.sigmoid,) keras.layers.Dense(1,#activation=tf.nn.sigmoid,) kernel_initializer=weight_initializer, bias_initializer=bias_initializer_last_layer,) ]) # ] + [keras.layers.Lambda(lambda x:x+shifted_init_shift)]) elif network == "fc": model = keras.Sequential( ([ keras.layers.Dense( layer_width, activation=activation, input_shape=(input_dim, ) if index == 0 else (None, ), #) kernel_initializer=weight_initializer, bias_initializer=bias_initializer) for index, (layer_width, activation) in enumerate( zip(layer_widths, activations)) #range(number_layers) ] if number_layers > 0 else []) # + [keras.layers.Lambda(lambda x: x-1/np.sqrt(2*np.pi))] + [ keras.layers.Dense( 1, input_shape=(input_dim, ) if number_layers == 0 else (None, ), #activation=tf.nn.sigmoid, kernel_initializer=weight_initializer, bias_initializer=bias_initializer_last_layer, ) ]) # ])+[keras.layers.Lambda(lambda x:x+shifted_init_shift)]) elif network == "resnet": #from keras_contrib.applications.resnet import ResNet #import sys #sys.path += keras_contrib.__path__ + [keras_contrib.__path__[0]+"/applications/"] from .resnet import ResNet import keras n_blocks = 3 DEPTH = number_layers if DEPTH % 6 != 2: raise ValueError('DEPTH must be 6n + 2:', DEPTH) # block_depth = (DEPTH - 2) // 6 # resnet_n_plain = resnet_n % 100 block_depth = (DEPTH - 2) // (n_blocks * 2) model = ResNet( input_shape=(image_height, image_width, number_channels), classes=1, block='basic', repetitions=[block_depth] * n_blocks, transition_strides=[(1, 1), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2)][:n_blocks], initial_filters=64, initial_strides=(1, 1), initial_kernel_size=(3, 3), initial_pooling=None, #initial_pooling='max', #final_pooling=None, final_pooling=pooling if pooling is not None else "none", activation=None) # activation='sigmoid') else: model = keras.models.Sequential() if network == "vgg19": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras.applications.vgg19.VGG19( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "vgg16": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras.applications.vgg16.VGG16( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "resnet50": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) # model1 = keras.applications.resnet.ResNet50(include_top=False, weights=None, input_tensor=None, input_shape=(image_height,image_width,number_channels), pooling=pooling, classes=num_classes) model1 = keras_applications.resnet.ResNet50( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "resnet101": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras_applications.resnet.ResNet101( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "resnet152": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras_applications.resnet.ResNet152( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "resnetv2_50": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras_applications.resnet_v2.ResNet50V2( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "resnetv2_101": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras_applications.resnet_v2.ResNet101V2( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "resnetv2_152": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras_applications.resnet_v2.ResNet152V2( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "inception_resnet_v2": image_height, image_width, number_channels = max( image_height, 75), max(image_width, 75), max(number_channels, 3) model1 = keras_applications.inception_resnet_v2.InceptionResNetV2( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "inception_v3": image_height, image_width, number_channels = max( image_height, 75), max(image_width, 75), max(number_channels, 3) model1 = keras.applications.inception_v3.InceptionV3( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "resnext50": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras_applications.resnext.ResNeXt50( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "resnext101": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras_applications.resnext.ResNeXt101( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "densenet121": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras_applications.densenet.DenseNet121( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "densenet169": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras_applications.densenet.DenseNet169( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "densenet201": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras_applications.densenet.DenseNet201( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) elif network == "mobilenetv2": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras.applications.mobilenet_v2.MobileNetV2( input_shape=(image_height, image_width, number_channels), alpha=1.0, include_top=False, weights=None, input_tensor=None, pooling=pooling, classes=num_classes) elif network == "nasnet": image_height, image_width, number_channels = max( image_height, 32), max(image_width, 32), max(number_channels, 3) model1 = keras.applications.nasnet.NASNetLarge( input_shape=(image_height, image_width, number_channels), include_top=False, weights=None, input_tensor=None, pooling=pooling, classes=num_classes) elif network == "xception": image_height, image_width, number_channels = max( image_height, 71), max(image_width, 71), max(number_channels, 3) model1 = keras.applications.xception.Xception( include_top=False, weights=None, input_tensor=None, input_shape=(image_height, image_width, number_channels), pooling=pooling, classes=num_classes) model.add(model1) print(model1.output_shape) model.add(keras.layers.Flatten()) model.add( keras.layers.Dense( 1, kernel_initializer=weight_initializer, bias_initializer=bias_initializer_last_layer, )) print("Number of parameters: ", model.count_params()) print(model.summary()) json_string = model.to_json() '''SAVE ARCHITECTURE''' save_arch(json_string, FLAGS)
def main(_): FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) if init_dist != "gaussian": raise NotImplementedError( "Initialization distributions other than Gaussian are not implemented for computing pac bayes bounds!" ) from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() print(rank) if n_gpus > 0: os.environ["CUDA_VISIBLE_DEVICES"] = str((rank) % n_gpus) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True #tf.enable_eager_execution(config=config) set_session = tf.compat.v1.keras.backend.set_session config.log_device_placement = False # to log device placement (on which device the operation ran) sess = tf.compat.v1.Session(config=config) set_session( sess) # set this TensorFlow session as the default session for Keras '''GET DATA''' from utils import load_data, load_model, load_kernel train_images, flat_train_images, ys, _, _ = load_data(FLAGS) X = flat_train_images ys2 = [[y] for y in ys] Y = np.array(ys2) image_size = train_images.shape[1] number_channels = train_images.shape[-1] input_dim = flat_train_images.shape[1] print("compute probability and bound", network, dataset) if using_NTK: FLAGS["use_empirical_NTK"] = True theta = load_kernel(FLAGS) print(theta) #if using NTK, the above gets the NTK kernel, but we also need the non-NTK one to compute the bound! FLAGS["use_empirical_NTK"] = False K_pre = load_kernel(FLAGS) print(K_pre) if normalize_kernel: K_pre = K_pre / K_pre.max() K = kernel_mult * K_pre if theta.shape[0] >= m: #must have compute kernel for GP_train theta = theta[:m, :m] if K.shape[0] >= m: #must have compute kernel for GP_train K = K[:m, :m] else: K_pre = load_kernel(FLAGS) print(K_pre) if normalize_kernel: K_pre = K_pre / K_pre.max() K = kernel_mult * K_pre if K.shape[0] >= m: #must have compute kernel for GP_train K = K[:m, :m] #finding log marginal likelihood of data if using_EP: from GP_prob.GP_prob_gpy2 import GP_prob logPU = GP_prob(K, X, Y, method="EP", using_exactPB=using_exactPB) elif using_Laplace: from GP_prob.GP_prob_gpy2 import GP_prob # from GP_prob.GP_prob_numpy import GP_prob logPU = GP_prob(K, X, Y, method="Laplace", using_exactPB=using_exactPB) # logPU = GP_prob(K,np.squeeze(Y)) elif using_Laplace2: # from GP_prob.GP_prob_gpy import GP_prob from GP_prob.GP_prob_numpy import GP_prob #this gives different results because it uses a worse implementation of Laplace, by using a more Naive Newton method to find the maximum of the posterior # logPU = GP_prob(K,X,Y,method="Laplace") logPU = GP_prob(K, np.squeeze(Y)) elif using_MC: from GP_prob.GP_prob_MC import GP_prob logPU = GP_prob(K, X, Y, FLAGS) elif using_regression: from GP_prob.GP_prob_regression import GP_prob # logPU = GP_prob(K,X,Y,sigma_noise=np.sqrt(total_samples/2)) logPU = GP_prob(K, X, Y, sigma_noise=1.0) elif using_NTK: # from GP_prob.GP_prob_regression import GP_prob # logPU = GP_prob(K,X,Y,sigma_noise=np.sqrt(total_samples/2)) # logPU = GP_prob(K,X,Y,sigma_noise=1.0, posterior="ntk") from GP_prob.GP_prob_ntk import GP_prob logPU = GP_prob(K, theta, X, Y, t=1e2) if rank == 0: print(logPU) #compute PAC-Bayes bound delta = 2**-10 bound = (-logPU + 2 * np.log(total_samples) + 1 - np.log(delta)) / total_samples bound = 1 - np.exp(-bound) print("pre-confusion-correction bound: ", bound) rho = confusion / (1.0 + confusion) bound = (bound - 0.5 * rho) / ( 1 - rho ) #to correct for the confusion changing the training data distribution (in training set, but not in test set)! print("Bound: ", bound) print("Accuracy bound: ", 1 - bound) useful_flags = [ "dataset", "boolfun_comp", "boolfun", "network", "m", "label_corruption", "confusion", "number_layers", "sigmaw", "sigmab", "binarized", "pooling", "intermediate_pooling", "whitening", "training", "n_gpus", "kernel_mult", "normalize_kernel" ] with open(results_folder + prefix + "bounds.txt", "a") as file: file.write("#") for key in useful_flags: file.write("{}\t".format(key)) file.write("bound") file.write("\t") file.write("logP") file.write("\n") for key in useful_flags: file.write("{}\t".format(FLAGS[key])) file.write("{}".format(bound)) file.write("\t") file.write("{}".format(logPU)) file.write("\n")
def main(_): FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) if init_dist != "gaussian": raise NotImplementedError( "Initialization distributions other than Gaussian are not implemented for computing pac bayes bounds!" ) from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() print(rank) if n_gpus > 0: os.environ["CUDA_VISIBLE_DEVICES"] = str((rank) % n_gpus) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True #tf.enable_eager_execution(config=config) set_session = tf.compat.v1.keras.backend.set_session config.log_device_placement = False # to log device placement (on which device the operation ran) sess = tf.compat.v1.Session(config=config) set_session( sess) # set this TensorFlow session as the default session for Keras '''GET DATA''' from utils import load_data, load_model, load_kernel train_images, flat_train_images, ys, test_images, test_ys = load_data( FLAGS) print("max val", train_images.max()) #print("ys", ys) #process data to be on the right format for GP #test on a smaller sample on test set because otherwise GP would run out of memory test_images = test_images[:test_function_size] test_ys = test_ys[:test_function_size] X = flat_train_images data = test_images tp_order = np.concatenate([[0, len(data.shape) - 1], np.arange(1, len(data.shape) - 1)]) print(data.shape, tp_order) flat_data = np.transpose( data, tp_order ) # NHWC -> NCHW # this is because the cnn GP kernels assume this flat_test_images = np.array( [test_image.flatten() for test_image in flat_data]) Xtrain = flat_train_images Xtest = flat_test_images Xfull = np.concatenate([flat_train_images, flat_test_images]) ys2 = [[y] for y in ys] # if test_fun_override is not None: # ys2test = [[float(x)] for x in test_fun_override] # else: ys2test = [[y] for y in test_ys] ysfull = ys2 + ys2test Yfull = np.array(ysfull) Ytrain = np.array(ys2) Ytest = np.array(ys2test) image_size = train_images.shape[1] number_channels = train_images.shape[-1] input_dim = flat_train_images.shape[1] print("compute probability and bound", network, dataset) # if loss is not "mse": # raise NotImplementedError("Haven't implemented logQ estimate for CE loss yet") if using_NTK: raise NotImplementedError( "Haven't implemented logQ estimate for NTK yet") # FLAGS["use_empirical_NTK"] = True # theta = load_kernel(FLAGS) # print(theta) # #if using NTK, the above gets the NTK kernel, but we also need the non-NTK one to compute the bound! # FLAGS["use_empirical_NTK"] = False # K_pre = load_kernel(FLAGS) # print(K_pre) # if normalize_kernel: # K_pre = K_pre/K_pre.max() # K = kernel_mult*K_pre # if theta.shape[0] >= m: #must have compute kernel for GP_train # theta = theta[:m,:m] # if K.shape[0] >= m: #must have compute kernel for GP_train # K = K[:m,:m] else: K_pre = load_kernel(FLAGS) print(K_pre) if normalize_kernel: K_pre = K_pre / K_pre.max() Kfull = kernel_mult * K_pre #finding log marginal likelihood of data if loss == "mse": from GP_prob.nngp_mse_heaviside_posterior import nngp_mse_heaviside_posteror_params mean, cov = nngp_mse_heaviside_posteror_params(Xtrain, Ytrain, Xtest, Kfull) else: raise NotImplementedError("Only mse loss implemented") if rank == 0: from utils import save_posterior_params save_posterior_params(mean, cov, FLAGS)
def main(_): FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() from utils import preprocess_flags FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() print(rank) num_tasks = number_samples num_gpus = n_gpus print("num_gpus", num_gpus) num_tasks_per_job = num_tasks // size tasks = list( range(rank * num_tasks_per_job, (rank + 1) * num_tasks_per_job)) if rank < num_tasks % size: tasks.append(size * num_tasks_per_job + rank) if num_gpus > 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(rank % num_gpus) config = tf.compat.v1.ConfigProto() if num_gpus > 0: config.gpu_options.allow_growth = True tf.compat.v1.enable_eager_execution(config=config) ##the code below is necessary for keras not to use all memory set_session = tf.compat.v1.keras.backend.set_session config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU config.log_device_placement = False # to log device placement (on which device the operation ran) sess = tf.compat.v1.Session(config=config) set_session( sess) # set this TensorFlow session as the default session for Keras '''LOAD DATA & ARCHITECTURE''' from utils import load_data, load_model, load_kernel, entropy data, flat_data, _, _, _ = load_data(FLAGS) data = tf.constant(data) input_dim = data.shape[1] num_channels = data.shape[-1] model = load_model(FLAGS) #K = load_kernel(FLAGS) #from GP_prob.GP_prob_gpy import GP_prob #def calculate_logPU(preds): # logPU = GP_prob(K,flat_data,preds ) # return logPU print("Doing task %d of %d" % (rank, size)) import time start_time = time.time() index_fun_probs = [] fun_probs = {} if FLAGS["pooling"] is None: pooling_flag = "none" else: pooling_flag = FLAGS["pooling"] outfilename = results_folder + "index_funs_probs_" + str( rank) + "_" + FLAGS["prefix"] + "_" + str( shifted_init_shift ) + "_" + FLAGS["dataset"] + "_" + FLAGS["network"] + "_" + str( FLAGS["number_layers"] ) + "_" + pooling_flag + "_" + FLAGS["intermediate_pooling"] + ".txt" if network not in ["cnn", "fc"]: layers = get_all_layers(model) are_norm = [ is_normalization_layer(l) for l in layers for w in l.get_weights() ] initial_weights = model.get_weights() local_index = 0 '''SAMPLING LOOP''' for index in tasks: outfile = open(outfilename, "a") print(index) if local_index > 0: if network in ["cnn", "fc"]: simple_reset_weights(model, sigmaw, sigmab) else: reset_weights(model, initial_weights, are_norm, sigmaw, sigmab, truncated_init_dist) #model = load_model(FLAGS) # this resets the weights (makes sense as the json string only has architecture) #save weights? #model.save_weights("sampled_nets/"+str(index)+"_"+json_string_filename+".h5") #predictions = tf.keras.backend.eval(model(data)) > 0 predictions = model.predict(data) > 0 fstring = "".join([str(int(x[0])) for x in predictions]) n1s = len([x for x in fstring if x == "1"]) ent = entropy(fstring) #if fstring not in fun_probs: # fun_probs[fstring] = calculate_logPU(predictions) #index_fun_probs.append((index,ent,fstring,fun_probs[fstring])) #index_fun_probs.append((index,ent,fstring)) outfile.write( str(index) + "\t" + fstring + "\t" + str(ent) + "\t" + str(n1s) + "\n") outfile.close() #keras.backend.clear_session() local_index += 1 print("--- %s seconds ---" % (time.time() - start_time)) index_fun_probs = comm.gather(index_fun_probs, root=0)
def main(_): MAX_TRAIN_EPOCHS=5000 FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() from utils import preprocess_flags FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) if doing_regression: assert loss == "mse" global threshold from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() num_tasks_per_job = number_inits//size tasks = list(range(int(rank*num_tasks_per_job),int((rank+1)*num_tasks_per_job))) if rank < number_inits%size: tasks.append(size*num_tasks_per_job+rank) import os if n_gpus>0: os.environ["CUDA_VISIBLE_DEVICES"]=str(rank%n_gpus) from tensorflow import keras '''LOAD DATA & ARCHITECTURE''' from utils import load_data,load_model,load_kernel train_images,flat_train_images,ys,test_images,test_ys = load_data(FLAGS) print("max val", train_images.max()) #print("ys", ys) #process data to be on the right format for GP #test on a smaller sample on test set because otherwise GP would run out of memory test_images = test_images[:1000] test_ys = test_ys[:1000] X = flat_train_images data = test_images tp_order = np.concatenate([[0,len(data.shape)-1], np.arange(1, len(data.shape)-1)]) print(data.shape,tp_order) flat_data = np.transpose(data, tp_order) # NHWC -> NCHW # this is because the cnn GP kernels assume this flat_test_images = np.array([test_image.flatten() for test_image in flat_data]) Xfull = np.concatenate([flat_train_images,flat_test_images]) ys2 = [[y] for y in ys] ysfull = ys2 + [[y] for y in test_ys] Yfull = np.array(ysfull) Y = np.array(ys2) FLAGS["use_empirical_NTK"] = True theta_full = load_kernel(FLAGS) #print(theta_full) FLAGS["use_empirical_NTK"] = False K_pre = load_kernel(FLAGS) print(K_pre) if normalize_kernel: K_pre = K_pre/K_pre.max() Kfull = kernel_mult*K_pre input_dim = train_images.shape[1] num_channels = train_images.shape[-1] print(train_images.shape, ys.shape) n=X.shape[0] K_train = Kfull[:n,:n] K_test = Kfull[n:,n:] K_train_test = Kfull[:n,n:] theta_train = theta_full[:n,:n] theta_test = theta_full[n:,n:] theta_train_test = theta_full[:n,n:] mu,Sigma = NTK_posterior(K_train,K_test,K_train_test,theta_train,theta_test,theta_train_test,X,Y,t=training_time) sample_weights = None if gamma != 1.0: sample_weights = np.ones(len(ys)) if not oversampling2: sample_weights[m:] = gamma else: raise NotImplementedError("Gamma not equal to 1.0 with oversampling2 not implemented") model = load_model(FLAGS) set_session = tf.compat.v1.keras.backend.set_session config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU config.log_device_placement = False # to log device placement (on which device the operation ran) sess = tf.compat.v1.Session(config=config) set_session(sess) # set this TensorFlow session as the default session for Keras '''TRAINING LOOP''' #things to keep track off #functions = [] test_accs = 0 test_accs_squared = 0 test_sensitivities = 0 test_specificities = 0 train_accs = 0 train_accs_squared = 0 funs_filename = results_folder+prefix+"_"+str(rank)+"_nn_train_functions.txt" if loss=="mse": likelihood = "gaussian" elif loss=="ce": likelihood = "bernoulli" print("Training GP with "+likelihood+" likelihood") model.compile("sgd", loss="mse") from initialization import get_all_layers, is_normalization_layer, reset_weights, simple_reset_weights if network not in ["cnn", "fc"]: layers = get_all_layers(model) are_norm = [is_normalization_layer(l) for l in layers for w in l.get_weights()] initial_weights = model.get_weights() K_train_train = Kfull[:len(X),:len(X)] X_train_test = Kfull[:len(X),len(X):len(Xfull)-len(X)] # predictor = nt.predict.gradient_descent_mse(g_dd, y_train, g_td) '''MAIN LOOP''' local_index = 0 from math import ceil samples_per_chunk_base=min(len(tasks),10000) num_chunks = len(tasks)//samples_per_chunk_base remainder = len(tasks)%samples_per_chunk_base if remainder > 0: num_chunks += 1 for chunki in range(num_chunks): print(chunki) if chunki == num_chunks-1 and remainder>0: samples_per_chunk = remainder else: samples_per_chunk = samples_per_chunk_base funs_file = open(funs_filename,"a") # ##if the labels are to be generated by a neural network in parallel if nn_random_labels or nn_random_regression_outputs: if network in ["cnn", "fc"]: simple_reset_weights(model, sigmaw, sigmab) else: reset_weights(model, initial_weights, are_norm, sigmaw, sigmab, truncated_init_dist) if nn_random_labels: ys = model.predict(train_images)[:,0]>0 if training: test_ys = model.predict(test_images)[:,0]>0 else: ys = model.predict(train_images)[:,0] if training: test_ys = model.predict(test_images)[:,0] ## local_index+=1 #preds = model.predict(flat_test_images)[0] #dimensions of output of posterior_samples is (number of input points)x(dimension of output Y)x(number of samples) #preds = model.posterior_samples(flat_test_images,size=samples_per_chunk)[:,0,:].T print(mu.shape) preds = np.random.multivariate_normal(mu,Sigma,size=samples_per_chunk) print(preds.shape) #preds = np.array([pred[0] for pred in preds]) if not doing_regression: th = 0.5 train_loss, train_acc = 0, 1.0*samples_per_chunk test_loss, test_acc = np.sum(cross_entropy_loss(test_ys,preds))/len(test_ys), np.sum((preds>th)==test_ys)/len(test_ys) else: train_acc = train_loss = 0 test_acc = test_loss = np.sum(cross_entropy_loss(test_ys,preds))/len(test_ys) #for th in np.linspace(0,1,1000): if loss=="mse": #NOTE: sensitivity and specificity are not implemented for MSE loss test_sensitivity = -1 test_specificity = -1 else: print("threshold", threshold) #TODO: this is ugly, I should just add a flag that allows to say whether we are doing threshold selection or not!! if threshold != -1: for th in np.linspace(0,1,1000): test_specificity = np.sum(((sigmoid(preds)>th)==test_ys[:100])*(test_ys[:100]==0))/np.sum(test_ys[:100]==0) if test_specificity>0.99: num_0s = np.sum(test_ys==0) if num_0s > 0: test_specificity = np.sum(((sigmoid(preds)>th)==test_ys)*(test_ys==0))/(num_0s) else: test_specificity = -1 if test_specificity>0.99: num_1s = np.sum(test_ys==1) if num_1s > 0: test_sensitivity = np.sum(((sigmoid(preds)>th)==test_ys)*(test_ys==1))/(num_1s) else: test_sensitivity = -1 break else: # for th in np.linspace(0,1,5): # low number of thresholds as I'm not exploring unbalanced datasets right now # test_specificity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==0])/(len([x for x in test_ys if x==0])) # if test_specificity>0.99: # test_sensitivity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==1])/(len([x for x in test_ys if x==1])) # break test_specificity = -1 test_sensitivity = -1 print("Training accuracy", train_acc/samples_per_chunk) print('Test accuracy:', test_acc/samples_per_chunk) if threshold != -1: print('Test sensitivity:', test_sensitivity/samples_per_chunk) print('Test specificity:', test_specificity/samples_per_chunk) if not ignore_non_fit or train_acc == 1.0: print("printing function to file", funs_filename) functions = preds[:,:test_function_size]>0.5 functions=functions.astype(int) print(functions.shape) functions = [''.join([str(int(x)) for x in function])+"\r\n" for function in functions] funs_file.writelines(functions) funs_file.close() #functions.append(function) test_accs += test_acc test_accs_squared += test_acc**2 test_sensitivities += test_sensitivity test_specificities += test_specificity train_accs += train_acc train_accs_squared += train_acc**2 test_accs_recv = comm.reduce(test_accs, root=0) test_accs_squared_recv = comm.reduce(test_accs_squared, root=0) test_sensitivities_recv = comm.reduce(test_sensitivities, root=0) test_specificities_recv = comm.reduce(test_specificities, root=0) train_accs_recv = comm.reduce(train_accs, root=0) train_accs_squared_recv = comm.reduce(train_accs_squared, root=0) '''PROCESS COLLECTIVE DATA''' if rank == 0: test_acc = test_accs_recv/number_inits test_sensitivity = test_sensitivities_recv/number_inits test_specificity = test_specificities_recv/number_inits train_acc = train_accs_recv/number_inits print('Mean train accuracy:', train_acc) print('Mean test accuracy:', test_acc) if threshold != -1: print('Mean test sensitivity:', test_sensitivity) print('Mean test specificity:', test_specificity) test_acc = test_accs_recv/number_inits train_acc = train_accs_recv/number_inits train_acc_std = train_accs_squared_recv/number_inits - train_acc**2 test_acc_std = test_accs_squared_recv/number_inits - test_acc**2 useful_train_flags = ["dataset", "m", "network", "pooling", "ignore_non_fit", "test_function_size", "number_layers", "sigmaw", "sigmab", "init_dist","use_shifted_init","shifted_init_shift","whitening", "centering", "oversampling", "oversampling2", "channel_normalization", "training", "binarized", "confusion","filter_sizes", "gamma", "intermediate_pooling", "label_corruption", "threshold", "n_gpus", "n_samples_repeats", "layer_widths", "number_inits", "padding"] with open(results_folder+prefix+"nn_training_results.txt","a") as file: file.write("#") for key in sorted(useful_train_flags): file.write("{}\t".format(key)) file.write("\t".join(["train_acc", "test_error", "test_acc","test_sensitivity","test_specificity","train_acc_std","test_acc_std"])) file.write("\n") for key in sorted(useful_train_flags): file.write("{}\t".format(FLAGS[key])) file.write("{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\n".format(train_acc, 1-test_acc,test_acc,\ test_sensitivity,test_specificity,\ train_acc_std,test_acc_std))
def main(_): FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) if init_dist != "gaussian": raise NotImplementedError( "Initialization distributions other than Gaussian are not implemented for computing kernels!" ) from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() print(rank) if n_gpus > 0: os.environ["CUDA_VISIBLE_DEVICES"] = str((rank) % n_gpus) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True set_session = keras.backend.set_session config.log_device_placement = False # to log device placement (on which device the operation ran) config.allow_soft_placement = True # so that it uses any other existing and supported devices, if the requested GPU:0 isn't found sess = tf.compat.v1.Session(config=config) set_session( sess) # set this TensorFlow session as the default session for Keras train_images, flat_train_images, _, test_images, _ = load_data(FLAGS) image_size = train_images.shape[1] number_channels = train_images.shape[-1] #print("image_size", image_size) X = train_images flat_X = flat_train_images if compute_for_GP_train: test_images = test_images[:1000] data = test_images tp_order = np.concatenate([[0, len(data.shape) - 1], np.arange(1, len(data.shape) - 1)]) print(data.shape, tp_order) flat_data = np.transpose( data, tp_order ) # NHWC -> NCHW # this is because the cnn GP kernels assume this flat_test_images = np.array( [test_image.flatten() for test_image in flat_data]) Xfull = np.concatenate([flat_train_images, flat_test_images]) flat_X = Xfull X = np.concatenate([train_images, test_images]) print("compute kernel", network, dataset) # COMPUTE KERNEL if use_empirical_NTK: from nngp_kernel.empirical_ntk import empirical_NTK print(ceil(int(X.shape[0]) * n_samples_repeats)) from tensorflow.keras.models import model_from_json model = load_model(FLAGS) K = empirical_NTK(model, X) #,sess=sess) elif use_empirical_K: from nngp_kernel.empirical_kernel import empirical_K print("n_samples_repeats", n_samples_repeats) print(ceil(int(X.shape[0]) * n_samples_repeats)) arch_json_string = load_model_json(FLAGS) K = empirical_K( arch_json_string, X, ceil(int(X.shape[0]) * n_samples_repeats), sigmaw=sigmaw, sigmab=sigmab, n_gpus=n_gpus, empirical_kernel_batch_size=empirical_kernel_batch_size, sess=sess, truncated_init_dist=truncated_init_dist, data_parallelism=False, store_partial_kernel=store_partial_kernel, partial_kernel_n_proc=partial_kernel_n_proc, partial_kernel_index=partial_kernel_index) if rank == 0: if not (use_empirical_K or use_empirical_NTK): if network == "cnn": from nngp_kernel.cnn_kernel import kernel_matrix K = kernel_matrix(flat_X, image_size=image_size, number_channels=number_channels, filter_sizes=filter_sizes, padding=padding, strides=strides, sigmaw=sigmaw, sigmab=sigmab, n_gpus=n_gpus) elif network == "resnet": from nngp_kernel.resnet_kernel import kernel_matrix K = kernel_matrix(flat_X, depth=number_layers, image_size=image_size, number_channels=number_channels, n_blocks=3, sigmaw=sigmaw, sigmab=sigmab, n_gpus=n_gpus) elif network == "fc": from nngp_kernel.fc_kernel import kernel_matrix K = kernel_matrix(flat_X, number_layers=number_layers, sigmaw=sigmaw, sigmab=sigmab, n_gpus=n_gpus) print(K) '''SAVE KERNEL''' if store_partial_kernel: save_kernel_partial(K, FLAGS, partial_kernel_index) else: save_kernel(K, FLAGS)
def main(_): FLAGS = tf.app.flags.FLAGS.flag_values_dict() from utils import preprocess_flags FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) # total_samples = m from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() print(rank) # num_inits_per_task = 1 #num_tasks = int(sys.argv[1]) num_tasks = number_samples #from tensorflow.python.client import device_lib # #def get_available_gpus(): # local_device_protos = device_lib.list_local_devices() # return [x.name for x in local_device_protos if x.device_type == 'GPU'] # #num_gpus = len(get_available_gpus()) num_gpus = n_gpus num_tasks_per_job = num_tasks // size tasks = list( range(rank * num_tasks_per_job, (rank + 1) * num_tasks_per_job)) if rank < num_tasks % size: tasks.append(size * num_tasks_per_job + rank) #config = tf.ConfigProto(device_count={'GPU': rank%num_gpus}) config = tf.ConfigProto() os.environ["CUDA_VISIBLE_DEVICES"] = str(rank % num_gpus) config.gpu_options.allow_growth = True tf.enable_eager_execution(config=config) from utils import load_data, load_model, load_kernel data, flat_data, _, _, _ = load_data(FLAGS) data = tf.constant(data) model = load_model(FLAGS) K = load_kernel(FLAGS) def lass(model, x, r=0.01): pred = tf.sign(model(x)) alpha = 0.5 #alpha=0.25 #beta=0.2 deltax = tf.zeros(x.shape) xtilde = x + deltax max_iters = 20 iterr = 0 while iterr < max_iters: with tf.GradientTape() as g: g.watch(xtilde) y = model(xtilde) grads = g.gradient(y, xtilde) delta = alpha * tf.sign( -pred * grads) #+ beta*tf.random.normal(x.shape) deltax += delta deltax = tf.clip_by_value(deltax, -r, r) # deltax -= tf.to_float(tf.math.abs(deltax) >= r) * tf.clip_by_value(deltax,-r,r) xtilde = x + deltax # print(grads) if tf.sign(model(xtilde)).numpy()[0] != pred.numpy()[0]: return True iterr += 1 return False def crit_sample_ratio( model, xs, r=0.01 ): # is 0.3 fine for a 0-1 scaling, when they say 0-255 what do they mean? Hmm crit_samples = 0 for i in range(int(xs.shape[0])): #print(i) # print(xs[i:i+1,:,:,:]) if lass(model, xs[i:i + 1, :, :, :], r): crit_samples += 1 return 1.0 * crit_samples / int(xs.shape[0]) #%% print("Beginning job %d of %d" % (rank, size)) import time start_time = time.time() crit_sample_ratios = [] #probs = [] for index in tasks: print(index) model.load_weights("./sampled_nets/" + str(index) + "_" + json_string_filename + ".h5") csr = crit_sample_ratio(model, data, r=0.03) crit_sample_ratios.append((index, csr)) with open( results_folder + "CSRs_" + FLAGS["prefix"] + "_" + FLAGS["dataset"] + "_" + FLAGS["network"] + "_" + str(FLAGS["number_layers"]) + "_" + FLAGS["pooling"] + "_" + FLAGS["intermediate_pooling"] + ".txt", "a") as f: f.write(str(index) + "\t" + str(csr) + "\n") #print(csr) print("--- %s seconds ---" % (time.time() - start_time)) print("Finishing job %d of %d" % (rank, size)) csr_data = comm.gather(crit_sample_ratios, root=0) #tf.keras.initializers.glorot_uniform if rank == 0: csr_data = sum(csr_data, []) pickle.dump( csr_data, open( results_folder + "CSRs_" + FLAGS["prefix"] + "_" + FLAGS["dataset"] + "_" + FLAGS["network"] + "_" + str(FLAGS["number_layers"]) + "_" + FLAGS["pooling"] + "_" + FLAGS["intermediate_pooling"] + ".p", "wb"))
def main(_): MAX_TRAIN_EPOCHS=20000 FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() from utils import preprocess_flags FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) if doing_regression: assert loss == "mse" global threshold if using_mpi: from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() else: rank=0 size=1 num_tasks_per_job = number_inits//size tasks = list(range(int(rank*num_tasks_per_job),int((rank+1)*num_tasks_per_job))) if rank < number_inits%size: tasks.append(size*num_tasks_per_job+rank) import os print(rank) if n_gpus>0: os.environ["CUDA_VISIBLE_DEVICES"]=str(rank%n_gpus) from tensorflow import keras def binary_accuracy_for_mse(y_true,y_pred): if zero_one: return keras.backend.mean(tf.cast(tf.equal(tf.cast(y_pred>0.5,tf.float32),y_true), tf.float32)) else: return keras.backend.mean(tf.cast(tf.equal(tf.math.sign(y_pred),y_true), tf.float32)) print(tf.__version__) if loss=="mse": callbacks = [EarlyStoppingByAccuracy(monitor='val_binary_accuracy_for_mse', value=acc_threshold, verbose=0, wait_epochs=epochs_after_fit)] if doing_regression: callbacks = [EarlyStoppingByLoss(monitor='val_loss', value=1e-2, verbose=0, wait_epochs=epochs_after_fit)] else: #if tf.__version__[:3] == "2.1": if tf.__version__[0] == "2": print("hi im tf 2") callbacks = [EarlyStoppingByAccuracy(monitor='val_accuracy', value=acc_threshold, verbose=0, wait_epochs=epochs_after_fit)] else: callbacks = [EarlyStoppingByAccuracy(monitor='val_acc', value=acc_threshold, verbose=0, wait_epochs=epochs_after_fit)] # callbacks += [EarlyStopping(monitor='val_loss', patience=2, verbose=0), # ModelCheckpoint(kfold_weights_path, monitor='val_loss', save_best_only=True, verbose=0), # ] '''LOAD DATA & ARCHITECTURE''' from utils import load_data,load_model,load_kernel train_images,_,ys,test_images,test_ys = load_data(FLAGS) print("max val", train_images.max()) print("ys", ys) input_dim = train_images.shape[1] num_channels = train_images.shape[-1] print(train_images.shape, ys.shape) sample_weights = None if gamma != 1.0: sample_weights = np.ones(len(ys)) if not oversampling2: sample_weights[m:] = gamma else: raise NotImplementedError("Gamma not equal to 1.0 with oversampling2 not implemented") model = load_model(FLAGS) set_session = tf.compat.v1.keras.backend.set_session config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU config.log_device_placement = False # to log device placement (on which device the operation ran) sess = tf.compat.v1.Session(config=config) set_session(sess) # set this TensorFlow session as the default session for Keras '''TRAINING LOOP''' #things to keep track off #functions = [] test_accs = 0 test_accs_squared = 0 test_sensitivities = 0 test_specificities = 0 train_accs = 0 train_accs_squared = 0 weightss = None biasess = None weightss_squared = None biasess_squared = None weights_norms = 0 biases_norms = 0 weights_norms_squared = 0 biases_norms_squared = 0 iterss = 0 funs_filename = results_folder+prefix+"_"+str(rank)+"_nn_train_functions.txt" print("Training NN with",loss,"and optimizer",optimizer) if optimizer == "langevin": optim = tfp.optimizer.StochasticGradientLangevinDynamics(learning_rate=0.01) elif optimizer == "sgd": #optim = keras.optimizers.SGD(lr=learning_rate) optim = keras.optimizers.SGD(lr=0.001,momentum=0.9,decay=1e-6) elif optimizer == "adam": optim = keras.optimizers.Adam(lr=learning_rate) else: optim = optimizer def get_metrics(): if doing_regression: #return [keras.losses.mean_squared_error] return [] elif loss=="mse": return [binary_accuracy_for_mse] else: return ['accuracy'] print(loss) model.compile(optim, loss=binary_crossentropy_from_logits if loss=="ce" else loss, metrics=get_metrics()) #metrics=['accuracy',sensitivity]) #metrics=['accuracy',tf.keras.metrics.SensitivityAtSpecificity(0.99),\ #tf.keras.metrics.FalsePositives()]) from initialization import get_all_layers, is_normalization_layer, reset_weights, simple_reset_weights if network not in ["cnn", "fc"]: layers = get_all_layers(model) are_norm = [is_normalization_layer(l) for l in layers for w in l.get_weights()] initial_weights = model.get_weights() local_index = 0 for init in tasks: funs_file = open(funs_filename,"a") #print(init) # #TODO: move to a different file, as this is repeated in GP_train.. ##if the labels are to be generated by a neural network in parallel if nn_random_labels or nn_random_regression_outputs: if local_index>0: if network in ["cnn", "fc"]: simple_reset_weights(model, sigmaw, sigmab) else: reset_weights(model, initial_weights, are_norm, sigmaw, sigmab, truncated_init_dist) if nn_random_labels: ys = model.predict(train_images)[:,0]>0 if training: test_ys = model.predict(test_images)[:,0]>0 else: ys = model.predict(train_images)[:,0] if training: test_ys = model.predict(test_images)[:,0] ## if local_index>0 or nn_random_labels or nn_random_regression_outputs: if network in ["cnn", "fc"]: simple_reset_weights(model, sigmaw, sigmab) else: reset_weights(model, initial_weights, are_norm, sigmaw, sigmab) local_index+=1 ##this reinitalizes the net #model = load_model(FLAGS) #model.compile(optim, # loss=binary_crossentropy_from_logits if loss=="ce" else loss, # metrics=get_metrics()) weights, biases = get_weights(model), get_biases(model) weights_norm, biases_norm = measure_sigmas(model) #print(weights_norm,biases_norm) #batch_size = min(batch_size, m) if train_one_epoch: model.fit(train_images.astype(np.float32), ys.astype(np.float32), verbose=1,\ sample_weight=sample_weights, validation_data=(train_images.astype(np.float32), ys.astype(np.float32)), epochs=1, batch_size=min(m,batch_size)) sys.stdout.flush() else: model.fit(train_images.astype(np.float32), ys.astype(np.float32), verbose=1,\ sample_weight=sample_weights, validation_data=(train_images.astype(np.float32), ys.astype(np.float32)), epochs=MAX_TRAIN_EPOCHS,callbacks=callbacks, batch_size=min(m,batch_size)) sys.stdout.flush() '''GET DATA: weights, and errors''' weights, biases = get_rescaled_weights(model) weights_norm, biases_norm = measure_sigmas(model) #TODO: make sure it works with archs with norm layers etc #print(weights_norm,biases_norm) if not doing_regression: # classification train_loss, train_acc = model.evaluate(train_images.astype(np.float32), ys.astype(np.float32), verbose=0) test_loss, test_acc = model.evaluate(test_images.astype(np.float32), test_ys.astype(np.float32), verbose=0) else: train_acc = train_loss = model.evaluate(train_images.astype(np.float32), ys, verbose=0) test_acc = test_loss = model.evaluate(test_images.astype(np.float32), test_ys, verbose=0) preds = model.predict(test_images)[:,0] # print(preds) # print(preds.shape) # test_false_positive_rate = test_fps/(len([x for x in test_ys if x==1])) def sigmoid(x): return np.exp(x)/(1+np.exp(x)) #for th in np.linspace(0,1,1000): if loss=="mse": #NOTE: sensitivity and specificity are not implemented for MSE loss test_sensitivity = -1 test_specificity = -1 else: #print("threshold", threshold) #TODO: this is ugly, I should just add a flag that allows to say whether we are doing threshold selection or not!! if threshold != -1: for th in np.linspace(0,1,1000): test_specificity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys[:100]) if x==0])/(len([x for x in test_ys[:100] if x==0])) if test_specificity>0.99: num_0s = len([x for x in test_ys if x==0]) if num_0s > 0: test_specificity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==0])/(num_0s) else: test_specificity = -1 if test_specificity>0.99: num_1s = len([x for x in test_ys if x==1]) if num_1s > 0: test_sensitivity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==1])/(num_1s) else: test_sensitivity = -1 break else: # for th in np.linspace(0,1,5): # low number of thresholds as I'm not exploring unbalanced datasets right now # test_specificity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==0])/(len([x for x in test_ys if x==0])) # if test_specificity>0.99: # test_sensitivity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==1])/(len([x for x in test_ys if x==1])) # break test_specificity = -1 test_sensitivity = -1 #print("Training accuracy", train_acc) #print('Test accuracy:', test_acc) #print('Test sensitivity:', test_sensitivity) #print('Test specificity:', test_specificity) if not ignore_non_fit or train_acc >= acc_threshold: #print("printing function to file", funs_filename) function = (model.predict(test_images[:test_function_size].astype(np.float32), verbose=0))[:,0] if loss=="mse" and zero_one: function = function>0.5 else: function = function>0 function=function.astype(int) function = ''.join([str(int(i)) for i in function]) funs_file.write(function+"\r\n") funs_file.close() #functions.append(function) test_accs += test_acc test_accs_squared += test_acc**2 test_sensitivities += test_sensitivity test_specificities += test_specificity train_accs += train_acc train_accs_squared += train_acc**2 if weightss is None: weightss = weights biasess = biases weightss_squared = weights**2 biasess_squared = biases**2 else: weightss += weights biasess += biases weightss_squared += weights**2 biasess_squared += biases**2 weights_norms += weights_norm weights_norms_squared += weights_norm**2 biases_norms += biases_norm biases_norms_squared += biases_norm**2 iterss += model.history.epoch[-1] #keras.backend.clear_session() gc.collect() #print("Print functions to file") #with open(,"a") as file: # file.write("\r\n".join(functions)) # file.write("\r\n") # functions = comm.gather(functions, root=0) if rank == 0: #test_accs_recv = np.empty([size,1],dtype=np.float32) #test_accs_squared_recv = np.empty([size,1],dtype=np.float32) #test_sensitivities_recv = np.empty([size,1],dtype=np.float32) #test_specificities_recv = np.empty([size,1],dtype=np.float32) #train_accs_recv = np.empty([size,1],dtype=np.float32) #train_accs_squared_recv = np.empty([size,1],dtype=np.float32) weights_shape = weightss.flatten().shape[0] biases_shape = biasess.flatten().shape[0] weightss_recv = np.zeros(weights_shape, dtype=np.float32) biasess_recv = np.zeros(biases_shape, dtype=np.float32) weightss_squared_recv = np.zeros(weights_shape, dtype=np.float32) biasess_squared_recv = np.zeros(biases_shape, dtype=np.float32) #weights_norms_recv = np.empty([size,1],dtype=np.float32) #weights_norms_squared_recv = np.empty([size,1],dtype=np.float32) #biases_norms_recv = np.empty([size,1],dtype=np.float32) #biases_norms_squared_recv = np.empty([size,1],dtype=np.float32) #iterss_recv = np.empty([size,1],dtype='i') else: #test_accs_recv = None #test_accs_squared_recv = None #test_sensitivities_recv = None #test_specificities_recv = None #train_accs_recv = None #train_accs_squared_recv = None weightss_recv = None weightss_squared_recv = None biasess_recv = None biasess_squared_recv = None #weights_norms_recv = None #weights_norms_squared_recv = None #biases_norms_recv = None #biases_norms_squared_recv = None #iterss_recv = None if using_mpi: test_accs_recv = comm.reduce(test_accs, root=0) test_accs_squared_recv = comm.reduce(test_accs_squared, root=0) test_sensitivities_recv = comm.reduce(test_sensitivities, root=0) test_specificities_recv = comm.reduce(test_specificities, root=0) train_accs_recv = comm.reduce(train_accs, root=0) train_accs_squared_recv = comm.reduce(train_accs_squared, root=0) comm.Reduce(weightss.flatten(), weightss_recv, root=0) comm.Reduce(biasess.flatten(), biasess_recv, root=0) comm.Reduce(weightss_squared.flatten(), weightss_squared_recv, root=0) comm.Reduce(biasess_squared.flatten(), biasess_squared_recv, root=0) weights_norms_recv = comm.reduce(weights_norms, root=0) weights_norms_squared_recv = comm.reduce(weights_norms_squared, root=0) biases_norms_recv = comm.reduce(biases_norms, root=0) biases_norms_squared_recv = comm.reduce(biases_norms_squared, root=0) iterss_recv = comm.reduce(iterss, root=0) else: test_accs_recv = test_accs test_accs_squared_recv = test_accs_squared test_sensitivities_recv = test_sensitivities test_specificities_recv = test_specificities train_accs_recv = train_accs train_accs_squared_recv = train_accs_squared weightss_recv=weightss.flatten() biasess_recv=biasess.flatten() weightss_squared_recv=weightss_squared.flatten() biasess_squared_recv=biasess_squared.flatten() weights_norms_recv = weights_norms weights_norms_squared_recv = weights_norms_squared biases_norms_recv = biases_norms biases_norms_squared_recv = biases_norms_squared iterss_recv = iterss '''PROCESS COLLECTIVE DATA''' if rank == 0: #weightss = np.stack(sum(weightss,[])) #weights_norms = sum(weights_norms,[]) #biasess = np.stack(sum(biasess,[])) weights_mean = np.mean(weightss_recv)/number_inits #average over dimension indexing which weight it is (we've already reduced over the number_inits dimension) biases_mean = np.mean(biasess_recv)/number_inits weights_std = np.mean(weightss_squared_recv)/number_inits - weights_mean**2 biases_std = np.mean(biasess_squared_recv)/number_inits - biases_mean**2 weights_norm_mean = weights_norms_recv/number_inits weights_norm_std = weights_norms_squared_recv/number_inits - weights_norm_mean**2 biases_norm_mean = biases_norms_recv/number_inits biases_norm_std = biases_norms_squared_recv/number_inits - biases_norm_mean**2 # functions = sum(functions,[]) test_acc = test_accs_recv/number_inits test_sensitivity = test_sensitivities_recv/number_inits test_specificity = test_specificities_recv/number_inits train_acc = train_accs_recv/number_inits print('Mean test accuracy:', test_acc) print('Mean test sensitivity:', test_sensitivity) print('Mean test specificity:', test_specificity) print('Mean train accuracy:', train_acc) test_acc = test_accs_recv/number_inits train_acc = train_accs_recv/number_inits train_acc_std = train_accs_squared_recv/number_inits - train_acc**2 test_acc_std = test_accs_squared_recv/number_inits - test_acc**2 mean_iters = 1.0*iterss_recv/number_inits useful_train_flags = ["dataset", "m", "network", "loss", "optimizer", "pooling", "epochs_after_fit", "ignore_non_fit", "test_function_size", "batch_size", "number_layers", "sigmaw", "sigmab", "init_dist","use_shifted_init","shifted_init_shift","whitening", "centering", "oversampling", "oversampling2", "channel_normalization", "training", "binarized", "confusion","filter_sizes", "gamma", "intermediate_pooling", "label_corruption", "threshold", "n_gpus", "n_samples_repeats", "layer_widths", "number_inits", "padding"] with open(results_folder+prefix+"nn_training_results.txt","a") as file: file.write("#") for key in sorted(useful_train_flags): file.write("{}\t".format(key)) file.write("\t".join(["train_acc", "test_error", "test_acc","test_sensitivity","test_specificity","weights_std","biases_std","weights_mean", "biases_mean", "weights_norm_mean","weights_norm_std","biases_norm_mean","biases_norm_std","mean_iters","train_acc_std","test_acc_std"])) file.write("\n") for key in sorted(useful_train_flags): file.write("{}\t".format(FLAGS[key])) file.write("{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:d}\t{:.4f}\t{:.4f}\n".format(train_acc, 1-test_acc,test_acc,\ test_sensitivity,test_specificity,weights_std,biases_std,\ weights_mean,biases_mean,weights_norm_mean,weights_norm_std,biases_norm_mean,biases_norm_std,int(mean_iters),train_acc_std,test_acc_std)) #normalized to sqrt(input_dim)
def main(_): FLAGS = tf.app.flags.FLAGS.flag_values_dict() FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() print(rank) os.environ["CUDA_VISIBLE_DEVICES"] = str((rank + 1) % n_gpus) config = tf.ConfigProto() config.gpu_options.allow_growth = True #tf.enable_eager_execution(config=config) set_session = keras.backend.set_session config.log_device_placement = False # to log device placement (on which device the operation ran) sess = tf.Session(config=config) set_session( sess) # set this TensorFlow session as the default session for Keras from utils import load_data, load_model, load_kernel train_images, flat_train_images, ys, _, _ = load_data(FLAGS) X = flat_train_images ys2 = [[y] for y in ys] Y = np.array(ys2) image_size = train_images.shape[1] number_channels = train_images.shape[-1] input_dim = flat_train_images.shape[1] num_tasks = 100 cupy_samples = 1e5 num_tasks_per_job = num_tasks // size tasks = list( range(int(rank * num_tasks_per_job), int( (rank + 1) * num_tasks_per_job))) if rank < num_tasks % size: tasks.append(size * num_tasks_per_job + rank) print("compute probability and bound", network, dataset) K = load_kernel(FLAGS) import cupy as cp # import numpy as cp Y = cp.array(Y) mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() freq = 0 for i in tasks: mempool.free_all_blocks() pinned_mempool.free_all_blocks() exact_samples = cp.random.multivariate_normal( cp.zeros(m), K, int(cupy_samples), dtype=np.float32) > 0 fits_data = cp.prod(~(exact_samples[:, :m] ^ (Y.T == 1)), 1) indices = cp.where(fits_data)[0] freq += len(indices) freqs = comm.gather(freqs, root=0) if rank == 0: freqs = sum(freqs, []) prob = freqs / (num_tasks * cupy_samples) logPU = np.log(prob) log10PU = np.log10(prob) print(log10PU) #compute PAC-Bayes bound delta = 2**-10 bound = (-logPU + 2 * np.log(total_samples) + 1 - np.log(delta)) / total_samples bound = 1 - np.exp(-bound) print("pre-confusion-correction bound: ", bound) rho = confusion / (1.0 + confusion) bound = (bound - 0.5 * rho) / ( 1 - rho ) #to correct for the confusion changing the training data distribution (in training set, but not in test set)! print("Bound: ", bound) print("Accuracy bound: ", 1 - bound) useful_flags = [ "dataset", "network", "m", "label_corruption", "confusion", "number_layers", "sigmaw", "sigmab", "binarized", "pooling", "intermediate_pooling", "whitening", "centering", "channel_normalization", "training", "n_gpus" ] with open(results_folder + prefix + "bounds.txt", "a") as file: file.write("#") for key in useful_flags: file.write("{}\t".format(key)) file.write("bound") file.write("\t") file.write("log10PU") file.write("\n") for key in useful_flags: file.write("{}\t".format(FLAGS[key])) file.write("{}".format(bound)) file.write("\t") file.write("{}".format(log10PU)) file.write("\n")
def main(_): FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() FLAGS = preprocess_flags(FLAGS) print(FLAGS) globals().update(FLAGS) global m, total_samples, num_classes print("Generating input samples", dataset, m) from math import ceil if dataset == "cifar": image_size = 32 number_channels = 3 elif dataset == "imagenet": image_size = 256 number_channels = 3 elif dataset == "mnist": image_size = 28 number_channels = 1 elif dataset == "mnist-fashion": image_size = 28 number_channels = 1 elif dataset == "KMNIST": image_size = 28 number_channels = 1 elif dataset == "EMNIST": image_size = 28 number_channels = 1 elif dataset == "boolean": if boolean_input_dim is not None: input_dim = boolean_input_dim else: input_dim = 7 image_size = None elif dataset == "ion": input_dim = 34 image_size = None elif dataset == "calabiyau": input_dim = 180 image_size = None else: raise NotImplementedError if network in [ "cnn", "fc", "inception_resnet_v2", "inception_v3", "xception" ]: if network not in ["cnn", "fc"]: if network == "xception": image_size = max(image_size, 71) else: image_size = max(image_size, 75) else: image_size = max(image_size, 32) if dataset is not "boolean" or dataset is not "calabiyau" or dataset is not "ion": image_width = image_height = image_size #image datasets aliases = { "cifar": "CIFAR10", "mnist": "MNIST", "mnist-fashion": "FashionMNIST", "imagenet": "ImageNet" } if dataset in [ "cifar", "mnist", "mnist-fashion", "KMNIST", "EMNIST", "imagenet" ]: if dataset in aliases: dataset_attr = aliases[dataset] else: dataset_attr = dataset dataset_constructor = getattr(torchvision.datasets, dataset_attr) transformation = transforms.Compose([transforms.ToPILImage()] + ([transforms.Resize(image_size)] if image_size is not None else []) + [transforms.ToTensor()]) extra_kwargs = {} if dataset == "EMNIST": extra_kwargs = {"split": "byclass"} d1 = dataset_constructor("./datasets", download=True, transform=transformation, train=True, **extra_kwargs) d2 = dataset_constructor("./datasets", download=True, transform=transformation, train=False, **extra_kwargs) num_classes = len(d1.classes) #mm = int(ceil(d.data.shape[0]*5/6)) full_data = np.concatenate([d1.data, d2.data]) full_targets = np.concatenate([d1.targets, d2.targets]) if out_of_sample_test_error: #if extended_test_set: # (train_images,train_labels),(test_images,test_labels) = (data[:mm], targets[:mm]),(data[mm:],targets[mm:]) #else: (train_images, train_labels), (test_images, test_labels) = (d1.data, d1.targets), (d2.data, d2.targets) else: (train_images, train_labels), (test_images, test_labels) = (d1.data, d1.targets), (full_data, full_targets) if dataset == "cifar": train_images = torch.Tensor(train_images) test_images = torch.Tensor(test_images) print(train_images.min(), train_images.max()) #TODO: add custom datasets #non-image-like datasets: else: if dataset == "boolean": assert network == "fc" num_classes = 2 if centering: inputs = np.array( [[float(l) * 2.0 - 1 for l in "{0:07b}".format(i)] for i in range(0, 2**input_dim)]) else: #we ignore the 0 input, because it casues problems when computing the kernel matrix :P when sigmab==0 though #if sigmab==0: # inputs = np.array([[float(l) for l in "{0:07b}".format(i)] for i in range(1,2**7)]) #else: inputs = np.array([[float(l) for l in "{0:07b}".format(i)] for i in range(0, 2**input_dim)]) if boolfun is not "none": fun = boolfun else: if boolean_input_dim is not None: raise NotImplementedError( "It is not supported to use boolean_input_dim and not specify a explicit boolfun (which should have the same size)" ) if boolfun_comp is not "none": # open("boolfun_comps.txt","w").write("\n".join(list(funs.keys()))) funs = pickle.load(open("funs_per_complexity.p", "rb")) fun = np.random.choice(funs[boolfun_comp]) print("complexity", boolfun_comp) else: funs = pickle.load(open("funs_per_complexity.p", "rb")) comp = np.random.choice(list(funs.keys())) print("complexity", comp) fun = np.random.choice(funs[comp]) # funs = {} # with open("LZ_freq_1e6_7_40_40_1_relu.txt","r") as f: # for line in f.readlines(): # fun,comp,freq = line.strip().split("\t") # if comp not in funs: # funs[comp] = [fun] # else: # funs[comp].append(fun) # pickle.dump(funs,open("funs_per_complexity.p","wb")) print("fun", fun) #if sigmab==0 and not centering: # #labels=np.array([[int(xx)*2.0-1] for xx in list(fun)[1:]]) #start from 1 because we ignored the 0th input # labels=np.array([[int(xx)] for xx in list(fun)[1:]]) #start from 1 because we ignored the 0th input #else: #labels=np.array([[int(xx)*2.0-1] for xx in list(fun)[0:]]) labels = np.array([[int(xx)] for xx in list(fun)[0:]]) elif dataset == "calabiyau": assert network == "fc" num_classes = 2 #we ignore the 0 input, because it casues problems when computing the kernel matrix :P data = np.load("datasets/calabiyau.npz") inputs, labels = data["inputs"], data["targets"] if whitening: inputs = inputs - inputs.mean(0) elif dataset == "ion": assert network == "fc" num_classes = 2 #we ignore the 0 input, because it casues problems when computing the kernel matrix :P #data = np.load("datasets/calabiyau.npz") #inputs, labels = data["inputs"], data["targets"] #inputs = inputs - inputs.mean(0) data = pd.read_csv('datasets/ionosphere.csv') else: raise NotImplementedError global threshold if threshold == -1: threshold = ceil(num_classes / 2) # print(train_images.shape) ##get random training sample## # and perform some more processing # np.random.seed(42069) '''GET TRAINING SAMPLE INDICES''' '''AND DO PRE-PROCESSING if it's an image dataset''' #for datasets that are not images, like the boolean one if dataset == "boolean" or dataset == "calabiyau": if not random_training_set: raise NotImplementedError if booltrain_set is not None: indices = [i for i, x in enumerate(booltrain_set) if x == "1"] assert len(indices) == m elif oversampling: probs = list( map( lambda x: threshold / (num_classes * len(inputs)) if x >= threshold else (num_classes - threshold) / (num_classes * len(inputs)), inputs)) probs = np.array(probs) probs /= np.sum(probs) indices = np.random.choice(range(len(inputs)), size=int(total_samples), replace=False, p=probs) elif oversampling2: indices = np.random.choice(range(len(inputs)), size=int(total_samples), replace=False) indices = sum([[i]*(num_classes-threshold) for i in indices if train_labels[i]<threshold] \ + [[i]*threshold for i in indices if train_labels[i]>=threshold],[]) #print("Indices: ", indices) m *= int( (2 * (num_classes - threshold) * threshold / (num_classes))) else: indices = np.random.choice(range(int(len(inputs))), size=int(total_samples), replace=False) # print(indices) print( "train_set", "".join([("1" if i in indices else "0") for i in range(int(len(inputs)))])) if out_of_sample_test_error: test_indices = np.array( [i for i in range(len(inputs)) if i not in indices]) else: test_indices = np.array(range(len(inputs))) train_inputs = inputs[indices, :].astype(np.float32) train_labels = labels[indices] if training: test_inputs = inputs[test_indices, :] flat_test_images = test_inputs test_labels = labels[test_indices] flat_train_images = train_inputs elif dataset == "ion": np.random.seed(seed=708) data = data.reindex(np.random.permutation(data.index)) data = data.reset_index(drop=True) data = data.to_numpy() number_of_test_examples = 351 - m X_train_full = data[:-number_of_test_examples, :-1].astype(float) X_test_full = data[-number_of_test_examples:, :-1].astype(float) y_train_full = data[:-number_of_test_examples, -1].astype(float) y_test_full = data[-number_of_test_examples:, -1].astype(float) np.random.seed() n = data.shape[0] - number_of_test_examples train_inputs = X_train_full.reshape(n, 34).astype('float32') flat_train_images = train_inputs train_labels = y_train_full[:n].reshape(n, 1) n = number_of_test_examples test_inputs = X_test_full.reshape(n, 34).astype('float32') flat_test_images = test_inputs test_labels = y_test_full.reshape(n, 1) #for image datasets else: #data processing functions assume the images have values in range [0,255] #global train_images_obs #train_images_obs=train_images max1 = torch.max(train_images).item() max2 = torch.max(test_images).item() print("maxs", max1, max2) max_val = max(max1, max2) #train_images = train_images.numpy().astype(np.float32)*255.0/max_val #test_images = test_images.numpy().astype(np.float32)*255.0/max_val train_images = train_images.numpy().astype(np.uint8) test_images = test_images.numpy().astype(np.uint8) #GET TRAINIG SAMPLE INDICES if random_training_set: if oversampling: probs = list( map( lambda x: threshold / (num_classes) if x >= threshold else (num_classes - threshold) / (num_classes), train_labels)) probs = np.array(probs) probs /= np.sum(probs) indices = np.random.choice(range(len(train_images)), size=int(total_samples), replace=False, p=probs) elif oversampling2: indices = np.random.choice(range(len(train_images)), size=int(total_samples), replace=False) indices = sum([[i]*(num_classes-threshold) for i in indices if train_labels[i]<threshold] \ + [[i]*threshold for i in indices if train_labels[i]>=threshold],[]) m *= int((2 * (num_classes - threshold) * threshold / (num_classes))) else: indices = np.random.choice(range(len(train_images)), size=int(total_samples), replace=False) else: indices = np.arange(int(total_samples)) # print(indices) #if network == "nasnet": # train_images = (train_images[indices,:,:,:]).astype(np.float32) #NHWC # train_images = keras_applications.nasnet.preprocess_input(train_images, backend=tf.keras.backend) # if training: # test_images = keras_applications.nasnet.preprocess_input(test_images, backend=tf.keras.backend) # train_labels = np.take(train_labels,indices) # print(len([x for x in train_labels if x<threshold])/len(train_images)) #elif network == "vgg19": # train_images = (train_images[indices,:,:,:]).astype(np.float32) #NHWC # train_images = keras_applications.vgg19.preprocess_input(train_images, backend=tf.keras.backend) # if training: # test_images = keras_applications.vgg19.preprocess_input(test_images, backend=tf.keras.backend) # train_labels = np.take(train_labels,indices) # print(len([x for x in train_labels if x<threshold])/len(train_images)) #elif network == "vgg16": # train_images = (train_images[indices,:,:,:]).astype(np.float32) #NHWC # train_images = keras_applications.vgg16.preprocess_input(train_images, backend=tf.keras.backend) # if training: # test_images = keras_applications.vgg16.preprocess_input(test_images, backend=tf.keras.backend) # train_labels = np.take(train_labels,indices) # print(len([x for x in train_labels if x<threshold])/len(train_images)) #elif network == "resnet50" or network == "resnet101" or network == "renset152": ## elif network == "resnet101" or network == "renset152": # train_images = (train_images[indices,:,:,:]).astype(np.float32) #NHWC # train_images = keras_applications.resnet.preprocess_input(train_images, backend=tf.keras.backend) # # train_images = train_images/255.0 # # import matplotlib.pyplot as plt # # # print(train_images) # # plt.imshow(train_images[0]) # if training: # test_images = keras_applications.resnet.preprocess_input(test_images, backend=tf.keras.backend) # train_labels = np.take(train_labels,indices) # # test_images = test_images/255.0 # print(len([x for x in train_labels if x<threshold])/len(train_images)) #elif network in ["resnet_v2_50","resnetv2_101", "resnetv2_152"]: # train_images = (train_images[indices,:,:,:]).astype(np.float32) #NHWC # train_images = keras_applications.resnet_v2.preprocess_input(train_images, backend=tf.keras.backend) # if training: # test_images = keras_applications.resnet_v2.preprocess_input(test_images, backend=tf.keras.backend) # train_labels = np.take(train_labels,indices) # print(len([x for x in train_labels if x<threshold])/len(train_images)) #else: if True: train_images = train_images[indices] if training: test_images = test_images train_labels = np.take(train_labels, indices) print( len([x for x in train_labels if x < threshold]) / len(train_images)) ##adding channel dimenions for image datasets without them if dataset in ["mnist", "mnist-fashion", "KMNIST", "EMNIST"]: train_images = np.expand_dims(train_images, -1).astype(np.uint8) test_images = np.expand_dims(test_images, -1).astype(np.uint8) ## for non-flexible architectures, transform the data if network not in ["cnn", "fc"]: train_images = np.tile(train_images, (1, 1, 1, 3)) test_images = np.tile(test_images, (1, 1, 1, 3)) #print(train_images.dtype) # plt.imshow(train_images[0]) # plt.show() #print(train_images.shape) if network in ["cnn", "fc"]: #normalize the images pixels to be in [0,1] train_images = train_images.astype(np.float32) / 255.0 if training: test_images = test_images.astype(np.float32) / 255.0 else: #note that the transformation to PIL and back to Tensor normalizes the image pixels to be in [0,1] assert train_images.dtype == "uint8" #otherwise ToPILImage wants the input to be NCHW. wtff train_images = np.stack( [d1.transform(image) for image in train_images]) train_images = np.transpose( train_images, (0, 2, 3, 1) ) # this is because the pytorch transform changes it to NCHW for some reason :P if unnormalized_images: train_images = train_images * 255.0 if training: test_images = np.stack( [d1.transform(image) for image in test_images]) test_images = np.transpose(test_images, (0, 2, 3, 1)) if unnormalized_images: test_images = test_images * 255.0 print(train_images.shape) print("max after transforming", train_images.max()) #check correct dimensions if network != "fc": image_size = train_images.shape[1] assert train_images.shape[1] == train_images.shape[2] number_channels = train_images.shape[-1] flat_train_images = np.array( [train_image.flatten() for train_image in train_images]) if training: flat_test_images = np.array( [test_image.flatten() for test_image in test_images]) if channel_normalization: #flatten to compute SVD matrix print("channel normalizing") x = train_images flat_x = flat_train_images #normalize each channel (3 colors for e.g.) x_mean = np.mean(x, axis=(0, 1, 2)) x_std = np.std(x, axis=(0, 1, 2)) x = (x - x_mean) / x_std train_images = x #test images if training: test_images = (test_images - x_mean) / x_std #flatten again after normalizing flat_train_images = np.array([ train_image.flatten() for train_image in flat_train_images ]) if training: flat_test_images = np.array([ test_image.flatten() for test_image in flat_test_images ]) if centering: #flatten to compute SVD matrix print("centering") x = train_images flat_x = flat_train_images flat_x -= flat_x.mean(axis=0) x = flat_x.reshape( (x.shape[0], x.shape[1], x.shape[2], x.shape[3])) #test images if training: flat_test_images -= flat_test_images.mean(axis=0) test_images = flat_test_images.reshape( (test_images.shape[0], test_images.shape[1], test_images.shape[2], test_images.shape[3])) #WHITENING using training_images if whitening: #flatten to compute SVD matrix print("ZCA whitening") x = train_images flat_x = flat_train_images flat_x -= flat_x.mean(axis=0) sigma = np.matmul(flat_x.T, flat_x) / flat_x.shape[0] u, s, _ = np.linalg.svd(sigma) zca_epsilon = 1e-10 # avoid division by 0 d = np.diag(1. / np.sqrt(s + zca_epsilon)) Q = np.matmul(np.matmul(u, d), u.T) flat_x = np.matmul(flat_x, Q.T) flat_train_images = flat_x #normalize each channel (3 colors for e.g.) #to do this we reshape the tensor to NHWC form #train_images = flat_x.reshape((x.shape[0], x.shape[3], x.shape[1],x.shape[2])) train_images = flat_x.reshape( (x.shape[0], x.shape[1], x.shape[2], x.shape[3])) #test images if training: flat_test_images -= flat_test_images.mean(axis=0) flat_test_images = np.matmul(flat_test_images, Q.T) test_images = flat_test_images.reshape( (test_images.shape[0], test_images.shape[1], test_images.shape[2], test_images.shape[3])) #test_images = flat_test_images.reshape((test_images.shape[0], test_images.shape[3], test_images.shape[1],test_images.shape[2])) #test_images = np.transpose(test_images, tp_order) #flattened images, as the kernel function takes flattened vectors (row major for NCHW images) tp_order = np.concatenate([[0, len(train_images.shape) - 1], np.arange(1, len(train_images.shape) - 1)]) if n_gpus > 0: flat_train_images = np.transpose(train_images, tp_order) # NHWC -> NCHW flat_train_images = np.array( [train_image.flatten() for train_image in flat_train_images]) if training: flat_test_images = np.transpose(test_images, tp_order) # NHWC -> NCHW flat_test_images = np.array( [test_image.flatten() for test_image in flat_test_images]) if network == "fc": train_images = flat_train_images if training: test_images = flat_test_images #corrupting images, and adding confusion data def binarize(label, threshold, method="threshold"): if method == "threshold": return label >= threshold elif method == "oddeven": return (label + 1) % 2 # %% def process_labels(label, label_corruption, threshold, zero_one=False, binarized=True, binarization_method="threshold"): if binarized: if zero_one: if np.random.rand() < label_corruption: return np.random.choice([0, 1]) else: return float( binarize(label, threshold, binarization_method)) else: if np.random.rand() < label_corruption: return np.random.choice([-1.0, 1.0]) else: return float( binarize(label, threshold, binarization_method)) * 2.0 - 1 else: if np.random.rand() < label_corruption: return np.random.choice(range(num_classes)) else: return float(label) #if the labels are to be generated by a neural network: if doing_regression: from utils import load_model model = load_model(FLAGS) ys = model.predict(train_images)[:, 0] if training: test_ys = model.predict(test_images)[:, 0] else: if nn_random_labels: from utils import load_model model = load_model(FLAGS) # data = tf.constant(train_images) train_labels = model.predict( train_images)[:, 0] > 0 #, batch_size=data.shape[0], steps=1) > 0 # print("generated function", "".join([str(int(y)) for y in train_labels])) if training: # data = tf.constant(test_images) test_labels = model.predict( test_images )[:, 0] > 0 #, batch_size=data.shape[0], steps=1) > 0 if binarized: threshold = 1 num_classes = 2 if random_labels: print("zero_one", zero_one) ys = [ process_labels(label, label_corruption, threshold, zero_one=True, binarized=binarized, binarization_method=binarization_method) for label in train_labels[:m] ] + [ process_labels(label, 1.0, threshold, zero_one=True, binarized=binarized, binarization_method=binarization_method) for label in train_labels[m:] ] else: #confusion/attack labels ys = [ process_labels(label, label_corruption, threshold, zero_one=True, binarized=binarized, binarization_method=binarization_method) for label in train_labels[:m] ] + [ float(not binarize( label, threshold, binarization_method=binarization_method)) for label in train_labels[m:] ] if training: test_ys = np.array([ process_labels(label, label_corruption, threshold, zero_one=True, binarized=binarized, binarization_method=binarization_method) for label in test_labels ]) '''SAVING DATA SAMPLES''' if training: save_data(train_images, ys, test_images, test_ys, FLAGS) else: test_images = test_ys = [] save_data(train_images, ys, test_images, test_ys, FLAGS)