def dec(x, start_res, end_res, scope='Decoder'): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): x = ops.dense('fc1', x, fn(4) * 4 * 4, 'NHWC') x = tf.reshape(x, [-1, 4, 4, fn(4)]) res = 8 prev_x = None while res <= end_res: prev_x = x x = block_up(x, fn(res), 3, rname(res)) res *= 2 res = res // 2 if res > start_res: t = tf.get_variable( rname(res) + '_t', shape=[], collections=[tf.GraphKeys.GLOBAL_VARIABLES, "lerp"], dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False) x1 = ops.to_rgb('rgb_' + rname(res // 2), prev_x, 'NHWC') x1 = ops.upscale2d(x1, 'NHWC') x2 = ops.to_rgb('rgb_' + rname(res), x, 'NHWC') x = ops.lerp_clip(x1, x2, t) else: x = ops.to_rgb('rgb_' + rname(res), x, "NHWC") x_shape = utils.int_shape(x) assert (end_res == x_shape[1]) assert (end_res == x_shape[2]) return x
def pixel_norm(x, data_format, epsilon=1e-8): with tf.variable_scope('PixelNorm'): shape = utils.int_shape(x) if len(shape) == 2: axis = 1 else: axis = 3 if data_format == 'NHWC' else 1 return x * tf.rsqrt( tf.reduce_mean(tf.square(x), axis=axis, keepdims=True) + epsilon)
def squeeze(self, x): # x.shape = [N, d, n, 2] N, d, n, _ = int_shape(x) return tf.reshape(x, [ N, d // self.f1, n // self.f2, 2 * self.f1 * self.f2, ])
def squash(x, data_format, epsilon=1e-8): with tf.variable_scope('Squash'): shape = utils.int_shape(x) if len(shape) == 2: axis = 1 else: axis = 3 if data_format == 'NHWC' else 1 squared_norm = tf.reduce_sum(tf.square(x), axis=axis, keepdims=True) scalar_factor = squared_norm / ( 1 + squared_norm) * tf.rsqrt(squared_norm + epsilon) return x * scalar_factor
def generator(x, last_layer_resolution, cfg, is_training=True, scope='Generator'): def rname(resolution): return str(resolution) + 'x' + str(resolution) with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): with tf.variable_scope("4x4"): fn4 = cfg.resolution_to_filt_num[4] x = ops.pixel_norm(x, cfg.data_format) x = ops.dense('dense', x, 4 * 4 * fn4, cfg.data_format) if cfg.data_format == 'NHWC': x = tf.reshape(x, [-1, 4, 4, fn4]) else: x = tf.reshape(x, [-1, fn4, 4, 4]) x = ops.leaky_relu(x) x = ops.pixel_norm(x, cfg.data_format) x = ops.conv2d('conv', x, fn4, 3, cfg.data_format) x = ops.leaky_relu(x) x = ops.pixel_norm(x, cfg.data_format) resolution = 8 prev_x = None while resolution <= last_layer_resolution: filt_num = cfg.resolution_to_filt_num[resolution] prev_x = x x = gblock(rname(resolution), x, filt_num, cfg.data_format) resolution *= 2 resolution = resolution // 2 if resolution > cfg.starting_resolution: t = tf.get_variable( rname(resolution) + '_t', shape=[], collections=[tf.GraphKeys.GLOBAL_VARIABLES, "lerp"], dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False) x1 = ops.to_rgb('to_rgb_' + rname(resolution // 2), prev_x, cfg.data_format) x1 = ops.upscale2d(x1, cfg.data_format) x2 = ops.to_rgb('to_rgb_' + rname(resolution), x, cfg.data_format) x = ops.lerp_clip(x1, x2, t) else: x = ops.to_rgb('to_rgb_' + rname(resolution), x, cfg.data_format) x_shape = utils.int_shape(x) assert (resolution == x_shape[1 if cfg.data_format == 'NHWC' else 3]) assert (resolution == x_shape[2]) return x
def discriminator(x, resolution, cfg, is_training=True, scope='Discriminator'): assert (cfg.data_format == 'NCHW' or cfg.data_format == 'NHWC') def rname(resolution): return str(resolution) + 'x' + str(resolution) def fmap(resolution): return cfg.resolution_to_filt_num[resolution] x_shape = utils.int_shape(x) assert (resolution == x_shape[1 if cfg.data_format == 'NHWC' else 3]) assert (resolution == x_shape[2]) with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): if resolution > cfg.starting_resolution: x1 = ops.downscale2d(x, cfg.data_format) x1 = ops.from_rgb('from_rgb_' + rname(resolution // 2), x1, fmap(resolution // 2), cfg.data_format) x2 = ops.from_rgb('from_rgb_' + rname(resolution), x, fmap(resolution // 2), cfg.data_format) t = tf.get_variable( rname(resolution) + '_t', shape=[], dtype=tf.float32, collections=[tf.GraphKeys.GLOBAL_VARIABLES, "lerp"], initializer=tf.zeros_initializer(), trainable=False) num_filters = [fmap(resolution), fmap(resolution // 2)] x2 = dblock(rname(resolution), x2, num_filters, cfg.data_format) x = ops.lerp_clip(x1, x2, t) resolution = resolution // 2 else: x = ops.from_rgb('from_rgb_' + rname(resolution), x, fmap(resolution), cfg.data_format) while resolution >= 4: if resolution == 4: x = ops.minibatch_stddev_layer(x, cfg.data_format) num_filters = [fmap(resolution), fmap(resolution // 2)] x = dblock(rname(resolution), x, num_filters, cfg.data_format) resolution = resolution // 2 x = ops.dense('2x2', x, fmap(resolution), cfg.data_format) x = ops.leaky_relu(x) x = ops.dense('output', x, 1, cfg.data_format) return x
def apply_bias(x, data_format): shape = utils.int_shape(x) assert (len(shape) == 2 or len(shape) == 4) if len(shape) == 2: channels = shape[1] else: channels = shape[3] if data_format == 'NHWC' else shape[1] b = tf.get_variable('bias', shape=[channels], initializer=tf.initializers.zeros()) b = tf.cast(b, x.dtype) if len(x.shape) == 2: return x + b else: if data_format == 'NHWC': return x + tf.reshape(b, [1, 1, 1, -1]) else: return x + tf.reshape(b, [1, -1, 1, 1])
def make_pixeldefend(sess, x, pixelcnn_out): l = pixelcnn_out xs = int_shape( x) # true image (i.e. labels) to regress to, e.g. (B,32,32,3) ls = int_shape(l) # predicted distribution, e.g. (B,32,32,100) nr_mix = int( ls[-1] / 10) # here and below: unpacking the params of the mixture of logistics logit_probs = l[:, :, :, :nr_mix] l = tf.reshape(l[:, :, :, nr_mix:], xs + [nr_mix * 3]) means = l[:, :, :, :, :nr_mix] log_scales = tf.maximum(l[:, :, :, :, nr_mix:2 * nr_mix], -7.) coeffs = tf.nn.tanh(l[:, :, :, :, 2 * nr_mix:3 * nr_mix]) x_ = tf.reshape(x, xs + [1]) + tf.zeros( xs + [nr_mix] ) # here and below: getting the means and adjusting them based on preceding sub-pixels m2 = tf.reshape( means[:, :, :, 1, :] + coeffs[:, :, :, 0, :] * x_[:, :, :, 0, :], [xs[0], xs[1], xs[2], 1, nr_mix]) m3 = tf.reshape( means[:, :, :, 2, :] + coeffs[:, :, :, 1, :] * x_[:, :, :, 0, :] + coeffs[:, :, :, 2, :] * x_[:, :, :, 1, :], [xs[0], xs[1], xs[2], 1, nr_mix]) means = tf.concat([ tf.reshape(means[:, :, :, 0, :], [xs[0], xs[1], xs[2], 1, nr_mix]), m2, m3 ], 3) # B is batch size (1) # H and W are height and width (32) # C is channels (3) # M is number of mixtures (10) # shapes are (B, H, W, C, M) eval_pts = tf.constant( np.linspace(-1 + 1. / 256, 1 - 1. / 256, 255, dtype=np.float32)) eval_pts = tf.reshape(eval_pts, (1, 1, 1, 1, 1, -1)) eval_pts = tf.tile(eval_pts, (xs[0], xs[1], xs[2], 3, nr_mix, 1)) log_scales = tf.reshape(log_scales, (xs[0], xs[1], xs[2], 3, nr_mix, 1)) scales = tf.exp(log_scales) scales = tf.tile(scales, (1, 1, 1, 1, 1, 255)) means = tf.reshape(means, (xs[0], xs[1], xs[2], 3, nr_mix, 1)) means = tf.tile(means, (1, 1, 1, 1, 1, 255)) evals = tf.sigmoid((eval_pts - means) / scales) eval_upper = tf.concat( [evals, tf.ones((xs[0], xs[1], xs[2], 3, nr_mix, 1))], axis=5) eval_lower = tf.concat( [tf.zeros((xs[0], xs[1], xs[2], 3, nr_mix, 1)), evals], axis=5) eval_diffs = eval_upper - eval_lower probs_tiled = tf.nn.softmax(tf.tile( tf.reshape(logit_probs, (xs[0], xs[1], xs[2], 1, nr_mix, 1)), (1, 1, 1, 3, 1, 256)), axis=4) probs = tf.reduce_sum(eval_diffs * probs_tiled, axis=4) # input image has elements in [0, 255] # epsilon is 0-255 def pixeldefend(input_image, eps=16): purified = 2.0 * np.copy( input_image) / 255.0 - 1.0 # rescale to [-1, 1] for yi in range(32): for xi in range(32): # we have to do this one channel at a time, due to channel-wise dependencies for ki in range(3): p = sess.run(probs, {x: [purified]}) sub = p[0, yi, xi, ki] curr_val = np.floor(255.0 * (purified[yi, xi, ki] + 1) / 2.0) feasible = range(int(max(curr_val - eps, 0)), int(min(curr_val + eps, 255) + 1)) best_p = -1 best_idx = None for i in feasible: if sub[i] > best_p: best_p = sub[i] best_idx = i purified[yi, xi, ki] = 2.0 * best_idx / 255.0 - 1.0 return 255.0 * ((purified + 1.0) / 2.0) return pixeldefend
def make_pixeldefend(sess, x, pixelcnn_out): l = pixelcnn_out xs = int_shape(x) # true image (i.e. labels) to regress to, e.g. (B,32,32,3) ls = int_shape(l) # predicted distribution, e.g. (B,32,32,100) nr_mix = int(ls[-1] / 10) # here and below: unpacking the params of the mixture of logistics logit_probs = l[:,:,:,:nr_mix] l = tf.reshape(l[:,:,:,nr_mix:], xs + [nr_mix*3]) means = l[:,:,:,:,:nr_mix] log_scales = tf.maximum(l[:,:,:,:,nr_mix:2*nr_mix], -7.) coeffs = tf.nn.tanh(l[:,:,:,:,2*nr_mix:3*nr_mix]) x_ = tf.reshape(x, xs + [1]) + tf.zeros(xs + [nr_mix]) # here and below: getting the means and adjusting them based on preceding sub-pixels m2 = tf.reshape(means[:,:,:,1,:] + coeffs[:, :, :, 0, :] * x_[:, :, :, 0, :], [xs[0],xs[1],xs[2],1,nr_mix]) m3 = tf.reshape(means[:, :, :, 2, :] + coeffs[:, :, :, 1, :] * x_[:, :, :, 0, :] + coeffs[:, :, :, 2, :] * x_[:, :, :, 1, :], [xs[0],xs[1],xs[2],1,nr_mix]) means = tf.concat([tf.reshape(means[:,:,:,0,:], [xs[0],xs[1],xs[2],1,nr_mix]), m2, m3],3) # B is batch size (1) # H and W are height and width (32) # C is channels (3) # M is number of mixtures (10) # shapes are (B, H, W, C, M) eval_pts = tf.constant(np.linspace(-1+1./256, 1-1./256, 255, dtype=np.float32)) eval_pts = tf.reshape(eval_pts, (1, 1, 1, 1, 1, -1)) eval_pts = tf.tile(eval_pts, (xs[0],xs[1],xs[2],3,nr_mix,1)) log_scales = tf.reshape(log_scales, (xs[0],xs[1],xs[2],3,nr_mix,1)) scales = tf.exp(log_scales) scales = tf.tile(scales, (1, 1, 1, 1, 1, 255)) means = tf.reshape(means, (xs[0],xs[1],xs[2],3,nr_mix,1)) means = tf.tile(means, (1, 1, 1, 1, 1, 255)) evals = tf.sigmoid((eval_pts - means) / scales) eval_upper = tf.concat([evals, tf.ones((xs[0],xs[1],xs[2],3,nr_mix,1))], axis=5) eval_lower = tf.concat([tf.zeros((xs[0],xs[1],xs[2],3,nr_mix,1)), evals], axis=5) eval_diffs = eval_upper - eval_lower probs_tiled = tf.nn.softmax( tf.tile(tf.reshape(logit_probs, (xs[0],xs[1],xs[2],1,nr_mix,1)), (1,1,1,3,1,256)), axis=4 ) probs = tf.reduce_sum(eval_diffs * probs_tiled, axis=4) # input image has elements in [0, 255] # epsilon is 0-255 def pixeldefend(input_image, eps=16): purified = 2.0*np.copy(input_image)/255.0 - 1.0 # rescale to [-1, 1] for yi in range(32): for xi in range(32): # we have to do this one channel at a time, due to channel-wise dependencies for ki in range(3): p = sess.run(probs, {x: [purified]}) sub = p[0,yi,xi,ki] curr_val = np.floor(255.0*(purified[yi,xi,ki]+1)/2.0) feasible = range(int(max(curr_val-eps, 0)), int(min(curr_val+eps, 255)+1)) best_p = -1 best_idx = None for i in feasible: if sub[i] > best_p: best_p = sub[i] best_idx = i purified[yi,xi,ki] = 2.0*best_idx/255.0 - 1.0 return 255.0*((purified+1.0)/2.0) return pixeldefend
def sample(mean, std): shape = utils.int_shape(mean) with tf.variable_scope('Sample'): n = tf.random_normal([shape[0], shape[1]]) return mean + tf.multiply(n, std)
def unsqueeze(self, x): # x.shape = [N, d/f1, n/f2, 2*f1*f2] N, d_over_f1, n_over_f2, _ = int_shape(x) return tf.reshape(x, [N, d_over_f1 * self.f1, n_over_f2 * self.f2, 2])