def test_bernoulli_equiv_var_exps(): sess = gpflow.get_default_session() F, Y, feed = _prepare(dimF=2, dimY=1) Fvar = tf.exp( tf.stack([F[:, 1], -10.0 + tf.zeros(tf.shape(F)[0], dtype=F.dtype)], axis=1)) F = tf.stack([F[:, 0], tf.zeros(tf.shape(F)[0], dtype=F.dtype)], axis=1) Ylabel = 1 - Y # We need the 1 - Y, as we need to pass the *label* to SoftMax def logistic_link(x): return 1.0 / (1.0 + tf.exp(-x)) ls = gpflow.likelihoods.SoftMax(2) ls.num_monte_carlo_points = int(1e7) lb = gpflow.likelihoods.Bernoulli(invlink=logistic_link) lb.num_gauss_hermite_points = 50 var_exps = [ ls.variational_expectations(F, Fvar, Ylabel), lb.variational_expectations(F[:, :1], Fvar[:, :1], Y) ] ls_ve, lb_ve = sess.run(var_exps, feed_dict=feed) assert_allclose(ls_ve[:, 0, None], lb_ve, rtol=1e-3)
def __init__(self, X, Y, num_induced_points=None, controller=None, reward=None, m_init=None, S_init=None, name=None, debug=False): # super(PILCO, self).__init__(name) if not num_induced_points: # num_induced_points ? self.mgpr = MGPR(X, Y) else: self.mgpr = SMGPR(X, Y, num_induced_points) self.state_dim = Y.shape[1] self.control_dim = X.shape[1] - Y.shape[1] self.sess = gpflow.get_default_session() if debug: self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess) # self.sess.run(tf.global_variables_initializer()) if controller is None: # the policy - to change print("controller cannot be None") else: self.controller = controller if reward is None: # reward function self.reward = Reward() else: self.reward = reward if m_init is None or S_init is None: # If the user has not provided an initial state for the rollouts, # then define it as the first state in the dataset. self.m_init = X[0:1, 0:self.state_dim] self.S_init = np.diag(np.ones(self.state_dim) * 0.1) # variance else: self.m_init = m_init self.S_init = S_init
def test_bernoulli_equiv_pred_mean_var(): sess = gpflow.get_default_session() F, Y, feed = _prepare(dimF=2, dimY=1) Fvar = tf.exp( tf.stack([F[:, 1], -10.0 + tf.zeros(tf.shape(F)[0], dtype=F.dtype)], axis=1)) F = tf.stack([F[:, 0], tf.zeros(tf.shape(F)[0], dtype=F.dtype)], axis=1) Ylabel = 1 - Y # We need the 1 - Y, as we need to pass the *label* to SoftMax def logistic_link(x): return 1.0 / (1.0 + tf.exp(-x)) ls = gpflow.likelihoods.SoftMax(2) ls.num_monte_carlo_points = int(1e7) lb = gpflow.likelihoods.Bernoulli(invlink=logistic_link) lb.num_gauss_hermite_points = 50 preds = [ ls.predict_mean_and_var(F, Fvar), lb.predict_mean_and_var(F[:, :1], Fvar[:, :1]) ] (ls_pm, ls_pv), (lb_pm, lb_pv) = sess.run(preds, feed_dict=feed) assert_allclose(ls_pm[:, 0, None], lb_pm, rtol=1e-3) assert_allclose(ls_pv[:, 0, None], lb_pv, rtol=1e-3)
def test_bernoulli_equiv_cond_mean_var(): sess = gpflow.get_default_session() F, Y, feed = _prepare(dimF=2, dimY=1) Fvar = tf.exp( tf.stack([F[:, 1], -10.0 + tf.zeros(tf.shape(F)[0], dtype=F.dtype)], axis=1)) F = tf.stack([F[:, 0], tf.zeros(tf.shape(F)[0], dtype=F.dtype)], axis=1) Ylabel = 1 - Y # We need the 1 - Y, as we need to pass the *label* to SoftMax def logistic_link(x): return 1.0 / (1.0 + tf.exp(-x)) ls = gpflow.likelihoods.SoftMax(2) ls.num_monte_carlo_points = int(1e7) lb = gpflow.likelihoods.Bernoulli(invlink=logistic_link) lb.num_gauss_hermite_points = 50 runs = [ ls.conditional_mean(F), lb.conditional_mean(F[:, :1]), ls.conditional_variance(F), lb.conditional_variance(F[:, :1]), ls.logp(F, Ylabel), lb.logp(F[:, :1], Y) ] ls_cm, lb_cm, ls_cv, lb_cv, ls_lp, lb_lp = sess.run(runs, feed_dict=feed) ls_cm, ls_cv = ls_cm[:, :1], ls_cv[:, :1] assert_allclose(ls_cm, lb_cm) assert_allclose(ls_cv, lb_cv) assert_allclose(ls_lp, lb_lp)
def myKL2(self): X = np.array([[1., 2., 3.], [1., 2.1, 3.], [1.1, 2., 3.], [1., 2., 3.1]]) Y = np.array([[1.], [2.], [.2], [3.]]) Z = np.array([[1., 2., 3.], [1.3, 2.2, 3.1]]) A = np.tril(np.random.rand(6, 6)) #"cholesky" of S_M B = np.random.rand(6, 1) #mu_M all_kernels = [ kernels.RBF(3), kernels.RBF(2, lengthscales=3., variance=2.) ] all_Zs, all_mfs = init_linear(X, Z, all_kernels) mylayers = Fully_Coupled_Layers(X, Y, Z, all_kernels, all_mfs, all_Zs, mu_M=B, S_M=A) kl = mylayers.KL() session = get_default_session() kl = session.run(kl) Kmm1 = all_kernels[0].compute_K_symm( all_Zs[0]) + np.eye(Z.shape[0]) * settings.jitter Kmm2 = all_kernels[1].compute_K_symm( all_Zs[1]) + np.eye(all_Zs[1].shape[0]) * settings.jitter K_big = scipy.linalg.block_diag(Kmm1, Kmm1, Kmm2) tfKL = gauss_kl(tf.constant(B), tf.constant(A[np.newaxis]), K=tf.constant(K_big)) sess = tf.Session() return kl, sess.run(tfKL)
def optimize(self, num_iter=2000, num_restarts=30, opt_messages=False, print_result=False): # TODO multiple restarts from the samples of the prior distribution # https://github.com/GPflow/GPflow/issues/797 if self.optimizer_tensor is None: self.optimizer_tensor = self.opt.make_optimize_tensor(self.model) session = gpflow.get_default_session() for i in range(num_iter): session.run(self.optimizer_tensor) self.model.anchor(session) print(self.model)
def kernel_matrix(X, depth=5, image_size=28, number_channels=1, n_blocks=3, sigmaw=1.0, sigmab=1.0, n_gpus=1): # resnet_n=5 block_depth = (depth - 2) // (n_blocks * 2) # resnet_n_plain = resnet_n % 100 with tf.device("cpu:0"): kern = dkern.resnet.ResnetKernel( input_shape=[number_channels, image_size, image_size], # block_sizes=[resnet_n_plain]*depth, block_sizes=[block_depth] * n_blocks, block_strides=[1, 2, 2, 2, 2, 2, 2][:n_blocks], var_weight=sigmaw** 2, # scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)**2, var_bias=sigmab**2, kernel_size=3, conv_stride=1, recurse_kern=(dkern.ExReLU() if depth < 100 else dkern.ExErf()), data_format='NCHW', ) # kern #N_train=100; N_vali=1000 #X, Y, Xv, _, Xt, _ = mnist_1hot_all() ## Xv = np.concatenate([X[N_train:, :], Xv], axis=0)[:N_vali, :] #X = X[:N_train] #Y = Y[:N_train] # # Y.shape # # ys = [int((np.argmax(labels)>5))*2.0-1 for labels in Y] # sess.close() #sess = gpflow.get_default_session() #N = X.shape[0] #out = create_array_dataset(False, N,N) #K=compute_big_K(out,sess,kern,100,X,n_gpus=n_gpus) sess = gpflow.get_default_session() K = compute_big_K(sess, kern, 400, X, n_gpus=n_gpus) #K += 1e-6 * np.eye(len(X)) # print(K) return K
def fgsm_cleverhans(K_inv_Y, kernel, X, Xt, Yt, epsilon=0.3, norm_type=np.Inf, output_images=True, max_output=128, output_path='/scratch/etv21/conv_gp_data/MNIST_data/cleverhans_fgsm/', adv_file_output='cleverhans_fgsm_eps={}_norm_{}'): batch_size = 1 #Create output directory if it doesn't exist if not os.path.exists(output_path): os.mkdir(output_path) output_images_dir = os.path.join(output_path, '{}_images'.format(adv_file_output)) if output_images and not os.path.exists(output_images_dir): os.mkdir(output_images_dir) sess = gpflow.get_default_session() fgsm_params = {'eps': epsilon,'ord': norm_type, 'clip_min': np.float64(0.0), 'clip_max': np.float64(1.0)} #Placeholders K_inv_Y_ph = tf.placeholder(settings.float_type, K_inv_Y.shape, 'K_inv_Y') X2_ph = tf.placeholder(settings.float_type, X.shape, 'X_train') #Callable that returns logits def predict_callable(xt): Kxtx_op = kernel.K(xt, X2_ph) predict_op = tf.matmul(Kxtx_op, K_inv_Y_ph) return predict_op #Convert callable to model model = cleverhans.model.CallableModelWrapper(predict_callable, 'logits') #Define attack part of graph fgsm = FastGradientMethod(model, sess=sess, dtypestr='float64') x = tf.placeholder(settings.float_type, shape=(None, Xt.shape[1])) adv_x_op = fgsm.generate(x, **fgsm_params) preds_adv_op = model.get_logits(adv_x_op) adv_examples = None batch_num = 0 for k in tqdm.trange(0, Yt.shape[0], batch_size): end = min(k + batch_size , Yt.shape[0]) #feed_dict = {K_inv_Y_ph: K_inv_Y, X_ph: Xt[k:end, :], X2_ph: X} feed_dict = {K_inv_Y_ph: K_inv_Y, x: Xt[k:end, :], X2_ph: X} yt = Yt[k:end, :] adv_x, preds_adv = sess.run((adv_x_op, preds_adv_op), feed_dict=feed_dict) if adv_examples is None: adv_examples = np.array(adv_x.reshape(batch_size, 28*28)) else: adv_examples = np.append(adv_examples, adv_x.reshape(batch_size, 28*28), 0) if output_images and (max_output == None or max_output > batch_num * batch_size or (batch_num*batch_size >= 1280 and batch_num*batch_size < 1280 + max_output)): for c in range(0, batch_size): adv_img = adv_x[c]*255 adv_img = (adv_img.astype(int)).reshape(28,28) scipy.misc.toimage(adv_img, cmin=0, cmax=255).save(path.join(output_images_dir, 'gp_attack_{}_noisy.png'.format(batch_num*batch_size + c))) batch_num += 1 np.save(os.path.join(output_path,(adv_file_output + '.npy').format(epsilon, norm_type)), adv_examples, allow_pickle=False) return adv_examples
def get_Full_and_MF_ELBOs(self, white): X = np.array([[1., 2., 3.], [1., 2.1, 3.], [1.1, 2., 3.], [1., 2., 3.1]]) Y = np.array([[1.], [2.], [1.2], [3.]]) Z = np.array([[1., 2., 3.], [1.3, 2.2, 3.1]]) sm_sqrt2 = np.array([[1., 0., 0., 0., 0., 0., 0., 0.], [0.5, 1., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0., 0.], [0., 0., 0.95, 1., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.1, 1., 0., 0.], [0., 0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 0.25, 1.]]) mu_M = np.array([[1., 2., 3., 4., 5., 6., 3., 3.]]).T kernels = [ gpflow.kernels.RBF(3), gpflow.kernels.RBF(2, lengthscales=4.0), gpflow.kernels.RBF(1, lengthscales=2.0) ] #all_Zs, all_mean_funcs = init_linear(X,Z,kernels) #mylayers = Fully_Coupled_Layers(X,Y,Z,kernels,all_mean_funcs,all_Zs,S_M = sm_sqrt2, mu_M = mu_M) mydgp = my_FullDGP(X, Y, Z, kernels, Gaussian(), mu_M=mu_M, S_M=sm_sqrt2, whitened_prior=white) #,mylayers) zs = [[[0.1, 0.5], [-0.3, 0.2], [1., -1.3], [2., 0.]], [[.1], [.2], [.2], [0.1]], [[1.], [.5], [.2], [0.5]]] #f, muNtilde, SNtilde, K1NM, mean, var = mydgp.propagate(X,zs=zs) ELBO_diag = mydgp._build_likelihood(zs=zs) session = gpflow.get_default_session() ELBO_diag = session.run(ELBO_diag) z1 = [[0.1, 0.5], [-0.3, 0.2], [1., -1.3], [2., 0.]] z2 = [[.1], [.2], [.2], [0.1]] z3 = [[1.], [.5], [.2], [0.5]] Saldgp = sal0dgp(X, Y, Z, kernels, Gaussian(), white=white) myqsqrt = np.array([[[[1., 0.], [0.5, 1.]], [[1., 0.], [0.95, 1.]]], [[[1., 0.], [0.1, 1.]]], [[[1., 0.], [0.25, 1.]]]]) myqmu = [[[1., 3.], [2., 4.]], [[5.], [6.]], [[3.], [3.]]] Saldgp.set_qsqrt(myqsqrt, myqmu) ELBO_sal = Saldgp.my_ELBO(z1, z2, z3) return ELBO_diag, ELBO_sal, mydgp.get_KL(), Saldgp.get_KL()
def test_convolutional_patch_features(): """ Predictive variance of convolutional kernel must be unchanged when using inducing points, and inducing patches where all patches of the inducing points are used. :return: """ settings = gpflow.settings.get_settings() settings.numerics.jitter_level = 1e-14 with gpflow.settings.temp_settings(settings): M = 10 image_size = [4, 4] patch_size = [2, 2] kern = gpflow.kernels.Convolutional( gpflow.kernels.SquaredExponential(4), image_size, patch_size) # Evaluate with inducing points Zpoints = np.random.randn(M, np.prod(image_size)) points = gpflow.features.InducingPoints(Zpoints) points_var = gpflow.conditionals.conditional(tf.identity(Zpoints), points, kern, np.zeros((M, 1)), full_output_cov=True, q_sqrt=None, white=False)[1] # Evaluate with inducing patches Zpatches = kern.compute_patches(Zpoints).reshape( M * kern.num_patches, np.prod(patch_size)) patches = gpflow.features.InducingPatch(Zpatches) patches_var = gpflow.conditionals.conditional(tf.identity(Zpoints), patches, kern, np.zeros( (len(patches), 1)), full_output_cov=True, q_sqrt=None, white=False)[1] sess = gpflow.get_default_session() points_var_eval = sess.run(points_var) patches_var_eval = sess.run(patches_var) assert np.all(points_var_eval > 0.0) assert np.all(points_var_eval < 1e-13) assert np.all(patches_var_eval > 0.0) assert np.all(patches_var_eval < 1e-13)
def kernel_matrix(X, X2=None, image_size=28, number_channels=1, filter_sizes=[[5, 5], [2, 2], [5, 5], [2, 2]], padding=["VALID", "SAME", "VALID", "SAME"], strides=[[1, 1]] * 4, sigmaw=1.0, sigmab=1.0, n_gpus=1): with tf.device("cpu:0"): kern = dkern.DeepKernel( #[number_channels, image_size, image_size], ([number_channels, image_size, image_size] if n_gpus > 0 else [image_size, image_size, number_channels]), filter_sizes=filter_sizes, recurse_kern=dkern.ExReLU(multiply_by_sqrt2=False), var_weight=sigmaw**2, var_bias=sigmab**2, padding=padding, strides=strides, #data_format="NCHW", data_format=( "NCHW" if n_gpus > 0 else "NHWC" ), #but don't need to change inputs dkern transposes the inputs itself apparently :P skip_freq=-1, # no residual connections ) # kern # N_train=20000; N_vali=1000 # X, Y, Xv, _, Xt, _ = mnist_1hot_all() # # Xv = np.concatenate([X[N_train:, :], Xv], axis=0)[:N_vali, :] # X = X[:N_train] # Y = Y[:N_train] # # Y.shape # # ys = [int((np.argmax(labels)>5))*2.0-1 for labels in Y] # sess.close() sess = gpflow.get_default_session() K = compute_big_K(sess, kern, 400, X, X2, n_gpus=n_gpus) sess.close() return K
def full_SM(self): X = np.array([[1., 2.], [1., 2.1], [1.3, 2.], [1.2, 2.4]]) Y = X.copy() Z = X.copy() kernels = [ gpflow.kernels.RBF(2), gpflow.kernels.RBF(2, lengthscales=3., variance=2.) ] np.random.seed(2) a = np.random.rand(8, 8) A = np.tril(a) out = np.zeros((4, 2, 2)) for k in range(4): KMn = np.kron( np.eye(2), np.matmul(np.linalg.inv(kernels[0].compute_K_symm(X)), kernels[0].compute_K(X, [X[k]]))) K2MM = np.kron(np.eye(2), kernels[1].compute_K_symm(X)) B = np.linalg.cholesky(K2MM) S11 = np.matmul(A, A.T) S12 = np.matmul(A, B.T) temp1 = np.matmul(S12.T, KMn) temp2 = np.matmul(np.matmul(KMn.T, S11), KMn) S22 = np.matmul(np.matmul(temp1, np.linalg.inv(temp2)), temp1.T) + K2MM + 0.0000001 * np.eye(K2MM.shape[0]) S_M = np.block([[S11, S12], [S12.T, S22]]) S_M_sqrt = np.linalg.cholesky(S_M) mu_M = np.array([[ 1., 2., 3., 4., 5., 6., 3., 3., 1., 2., 3., 4., 5., 6., 3., 3. ]]).T mydgp = Full_DGP(X, Y, Z, kernels, Gaussian(), S_M=S_M_sqrt, mu_M=mu_M) _, _, _, _, _, var = mydgp.propagate(X) session = gpflow.get_default_session() var = session.run(var) var = np.array(var) out[k] = var[1, k] return out
def save_kernels(kern, N_train, N_vali, n_gpus, gram_file, Kxvx_file, Kxtx_file, n_max=400, Kv_diag_file=None, Kt_diag_file=None): X, _, Xv, _, Xt, _ = mnist_1hot_all() Xv = np.concatenate([X[N_train:, :], Xv], axis=0)[:N_vali, :] X = X[:N_train] sess = gpflow.get_default_session() if os.path.isfile(gram_file): print("Skipping Kxx") else: print("Computing Kxx") Kxx = compute_big_K(sess, kern, n_max=n_max, X=X, n_gpus=n_gpus) np.save(gram_file, Kxx, allow_pickle=False) if os.path.isfile(Kxvx_file): print("Skipping Kxvx") else: print("Computing Kxvx") Kxvx = compute_big_K(sess, kern, n_max=n_max, X=Xv, X2=X, n_gpus=n_gpus) np.save(Kxvx_file, Kxvx, allow_pickle=False) if os.path.isfile(Kxtx_file): print("Skipping Kxtx") else: print("Computing Kxtx") Kxtx = compute_big_K(sess, kern, n_max=n_max, X=Xt, X2=X, n_gpus=n_gpus) np.save(Kxtx_file, Kxtx, allow_pickle=False) if Kv_diag_file is not None: if os.path.isfile(Kv_diag_file): print("Skipping Kv_diag") else: print("Computing Kv_diag") Kv_diag = compute_big_Kdiag(sess, kern, n_max=n_max*n_max, X=Xv, n_gpus=n_gpus) np.save(Kv_diag_file, Kv_diag, allow_pickle=False) if Kt_diag_file is not None: if os.path.isfile(Kt_diag_file): print("Skipping Kt_diag") else: print("Computing Kt_diag") Kt_diag = compute_big_Kdiag(sess, kern, n_max=n_max*n_max, X=Xt, n_gpus=n_gpus) np.save(Kt_diag_file, Kt_diag, allow_pickle=False)
def diag_SM(self): X = np.array([[1., 2., 3.], [1., 2.1, 3.], [1.1, 2., 3.], [1., 2., 3.1]]) Y = np.array([[1.], [2.], [.2], [3.]]) Z = np.array([[1., 2., 3.], [1.3, 2.2, 3.1]]) kernels = [ gpflow.kernels.RBF(3), gpflow.kernels.RBF(2, lengthscales=4.0), gpflow.kernels.RBF(1, lengthscales=2.0) ] sm_sqrt2 = np.array([[1., 0., 0., 0., 0., 0., 0., 0.], [0.5, 1., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0., 0.], [0., 0., 0.95, 1., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.1, 1., 0., 0.], [0., 0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 0.25, 1.]]) mu_M = np.array([[1., 2., 3., 4., 5., 6., 3., 3.]]).T mydgp = Full_DGP(X, Y, Z, kernels, Gaussian(), S_M=sm_sqrt2, mu_M=mu_M) zs = [[[0.1, 0.5], [-0.3, 0.2], [1., -1.3], [2., 0.]], [[.1], [.2], [.2], [0.1]], [[1.], [.5], [.2], [0.5]]] diag_f, _, _, _, _, _ = mydgp.propagate(X, zs=zs) session = gpflow.get_default_session() diag_f = session.run(diag_f) z1 = [[0.1, 0.5], [-0.3, 0.2], [1., -1.3], [2., 0.]] z2 = [[.1], [.2], [.2], [0.1]] z3 = [[1.], [.5], [.2], [0.5]] Saldgp = sal0dgp(X, Y, Z, kernels, Gaussian()) myqsqrt = np.array([[[[1., 0.], [0.5, 1.]], [[1., 0.], [0.95, 1.]]], [[[1., 0.], [0.1, 1.]]], [[[1., 0.], [0.25, 1.]]]]) myqmu = [[[1., 3.], [2., 4.]], [[5.], [6.]], [[3.], [3.]]] Saldgp.set_qsqrt(myqsqrt, myqmu) temp, _, _ = Saldgp.my_propagate(X, z1, z2, z3) sal_f = temp[0][0] sal_f = np.append(sal_f, temp[1][0], axis=1) sal_f = np.append(sal_f, temp[2][0], axis=1) return sal_f, diag_f
def fgsm(K_inv_Y, kernel, X, Xt, Yt, seed=20, epsilon=0.3, clip_min=0.0, clip_max=1.0, norm_type='inf', batch_size=1, output_images=False, max_output=None, output_path='/scratch/etv21/conv_gp_data/MNIST_data/reverse/', adv_file_output='gp_adversarial_examples_eps={}_norm_{}'): #Create output directory if it doesn't exist if not os.path.exists(output_path): os.mkdir(output_path) output_images_dir = os.path.join(output_path, '{}_images'.format(adv_file_output)) if output_images and not os.path.exists(output_images_dir): os.mkdir(output_images_dir) adv_examples = None sess = gpflow.get_default_session() with tf.device("gpu:0"): print("Xt's shape: {} X's shape: {}".format(Xt.shape, X.shape)) print("K_inv_Y's shape: {}".format(K_inv_Y.shape)) print("Yt's shape: {}".format(Yt.shape)) K_inv_Y_ph = tf.placeholder(settings.float_type, K_inv_Y.shape, 'K_inv_Y') Yt_ph = tf.placeholder(settings.float_type, [None, Yt.shape[1]], 'Y_test') X_ph = tf.placeholder(settings.float_type, [batch_size, Xt.shape[1]], 'X_test') X2_ph = tf.placeholder(settings.float_type, X.shape, 'X_train') Kxtx_op = kernel.K(X_ph, X2_ph) predict_op = tf.matmul(Kxtx_op, K_inv_Y_ph) #For plain MSE loss #-tf.losses.mean_squared_error(predict_op, -Yt_ph) #For negative log likelihood: #We can't use the tf negative log likelihood function because it requires doubles, which our graphics card doesn't support. #So the function we're trying to implement is: # - (1/n) sum( log(y_i) ) #Where y_i is the is the probability of pattern i being assigned its true class. By maximing this, we are by the virtues of probability, # also minimizing the probability of y_i being assigned to the wrong class. #Multiply predictions by one-hot labels to only get predictions of the true class. Then take the log softmax and sum over the batch. #Do we need to divide by the batch_size? It's a constant, so it shouldn't matter. Ytarget = 1 - Yt_ph tmp_softmax = tf.nn.softmax(predict_op) loss = tf.reduce_sum(tf.math.multiply(tf.math.log(tmp_softmax), Ytarget)) #For MSE of softmax loss #loss = tf.losses.mean_squared_error(tf.nn.softmax(predict_op),Yt_ph) grad = tf.gradients(loss, X_ph, stop_gradients=[K_inv_Y_ph, Yt_ph, X2_ph])[0] #Check for Nan/Infinite gradients with tf.control_dependencies([tf.debugging.assert_all_finite(grad,'grad is not well-defined')]): grad = tf.identity(grad) eps = tf.constant(epsilon, dtype=settings.float_type) if norm_type == np.inf: #L-inf norm for epsilon: abs_grad = tf.stop_gradient(tf.math.abs(grad)) #Find elements of the gradient with abs value larger than the scale of the noise we add const_bound = tf.constant(0.0001, dtype=tf.float64) mask = tf.math.greater(abs_grad,const_bound) #Only perturb gradient elements that are large enough large_enough_grad = tf.multiply(grad, tf.cast(mask, tf.float64)) optimal_perturbation = tf.sign(large_enough_grad) elif norm_type == 1: abs_grad = tf.stop_gradient(tf.math.abs(grad)) max_abs_grad = tf.reduce_max(abs_grad, axis=1, keepdims=True) tied_for_max = tf.to_float(tf.equal(abs_grad, max_abs_grad)) num_ties = tf.reduce_sum(tied_for_max, axis=1, keepdims=True) sign_grad = tf.math.sign(grad) optimal_perturbation = sign_grad * tied_for_max / num_ties elif norm_type == 2: const_not_zero = tf.constant(1e-12, dtype=tf.float64) square = tf.maximum(const_not_zero, tf.reduce_sum(tf.square(grad),axis=1,keepdims=True)) optimal_perturbation = grad / tf.sqrt(square) scaled_perturbation = tf.math.scalar_mul(eps, optimal_perturbation) X_adv_op = X_ph + scaled_perturbation X_adv_op = tf.clip_by_value(X_adv_op, clip_min, clip_max) #sess.graph.finalize() #writer = tf.summary.FileWriter(kernel_path + '/tboard', sess.graph) batch_num = 0 for k in tqdm.trange(0, Yt.shape[0], batch_size): end = min(k + batch_size , Yt.shape[0]) feed_dict = {K_inv_Y_ph: K_inv_Y, Yt_ph: Yt[k:end, :], X_ph: Xt[k:end, :], X2_ph: X} #adv_example_batch, grad_res = sess.run((Kxtx_op,grad), feed_dict=feed_dict) #adv_example_batch, batch_predictions, batch_loss, batch_grad, batch_grad_sign, batch_opt_pert = sess.run((X_adv_op, predict_op, loss, grad, grad_sign, optimal_perturbation), feed_dict=feed_dict, options=config_pb2.RunOptions(report_tensor_allocations_upon_oom=True)) adv_example_batch = sess.run((X_adv_op), feed_dict=feed_dict) if adv_examples is None: adv_examples = np.array(adv_example_batch.reshape(batch_size, 28*28)) else: adv_examples = np.append(adv_examples, adv_example_batch.reshape(batch_size, 28*28), 0) if output_images and (max_output == None or max_output > batch_num * batch_size or (batch_num*batch_size >= 1280 and batch_num*batch_size < 1280 + max_output)): for c in range(0, batch_size): adv_img = adv_example_batch[c]*255 adv_img = (adv_img.astype(int)).reshape(28,28) scipy.misc.toimage(adv_img, cmin=0, cmax=255).save(path.join(output_images_dir, 'gp_attack_{}_noisy.png'.format(batch_num*batch_size + c))) batch_num += 1 np.save(os.path.join(output_path,(adv_file_output + '.npy').format(epsilon, norm_type)), adv_examples, allow_pickle=False) return adv_examples
def predict_on_noisy_inputs(self, m, s): # iK, beta = self.calculate_factorizations() # return self.predict_given_factorizations(m, s, iK, beta) sess = gpflow.get_default_session() M, S, V = sess.run([self.M, self.S, self.V], {self.m: m, self.s: s}) return M, S, V
# kern N_train=10000; N_vali=1000 X, Y, Xv, _, Xt, _ = mnist_1hot_all() # Xv = np.concatenate([X[N_train:, :], Xv], axis=0)[:N_vali, :] X = X[:N_train] Y = Y[:N_train] # # Y.shape # # ys = [int((np.argmax(labels)>5))*2.0-1 for labels in Y] # sess.close() sess = gpflow.get_default_session() K=compute_big_K(sess,kern,20,X) # print(K) # return K # np.sum(np.isnan(K)) from GP_prob import GP_prob import pickle # pickle.dump(K,open("resnet_K_1000.p","wb")) K = pickle.load(open("resnet_K_1000.p","rb")) ys2=[int(np.argmax(y)>=5)*2.0-1 for y in Y] ys=np.array([[int(np.argmax(y)>=5)] for y in Y]) logPU = GP_prob(K,ys2,1e-9,cpu=False)
def execute_tensor(tensor): sess = gpflow.get_default_session() return sess.run(tensor)