def generator(batch_size, latent_size, args, reuse=False): """Adds generator nodes to the graph. From noise, applies deconv2d until image is scaled up to match the dataset. """ # final_activation = tf.tanh if args.model in ['wgan', 'iwgan'] else tf.nn.sigmoid output_dim = 64 * 64 * 3 with arg_scope([hem.dense, hem.deconv2d], reuse=reuse, use_batch_norm=True, activation=tf.nn.relu): z = tf.random_normal([batch_size, latent_size]) y = hem.dense(z, latent_size, 4 * 4 * 4 * latent_size, name='fc1') y = tf.reshape(y, [-1, 4 * latent_size, 4, 4]) y = hem.deconv2d(y, 4 * latent_size, 2 * latent_size, 5, 2, name='dc1') y = hem.deconv2d(y, 2 * latent_size, latent_size, 5, 2, name='dc2') y = hem.deconv2d(y, latent_size, int(latent_size / 2), 5, 2, name='dc3') y = hem.deconv2d(y, int(latent_size / 2), 3, 5, 2, name='dc4', activation=tf.tanh, use_batch_norm=False) y = tf.reshape(y, [-1, output_dim]) return y
def g_mean_provided2(self, x, y_bar, args, reuse=False): with tf.variable_scope('encoder', reuse=reuse), \ arg_scope([hem.conv2d], reuse=reuse, filter_size=5, stride=2, padding='VALID', init=tf.contrib.layers.xavier_initializer, activation=tf.nn.relu): # 65x65x3 x = tf.concat([x, tf.ones((args.batch_size, 1, 65, 65))], axis=1) e1 = hem.conv2d(x, 4, 64, name='e1') # 31x31x64 # e1 = tf.concat([e1, tf.ones((args.batch_size, 1, 31, 31)) * y_bar], axis=1) e2 = hem.conv2d(e1, 64, 128, name='e2') # 14x14x128 e3 = hem.conv2d(e2, 128, 256, name='e3') # 5x5x256 e4 = hem.conv2d(e3, 256, 512, name='e4') # 1x1x512 with tf.variable_scope('decoder', reuse=reuse), \ arg_scope([hem.deconv2d, hem.conv2d], reuse=reuse, filter_size=5, stride=2, init=tf.contrib.layers.xavier_initializer, padding='VALID', activation=lambda x: hem.lrelu(x, leak=0.2)): # 1x1x512 y_hat = hem.deconv2d(e4, 512, 256, output_shape=(args.batch_size, 256, 5, 5), name='d1') # 5x5x256 y_hat = tf.concat([y_hat, e3], axis=1) # 5x5x512 y_hat = hem.deconv2d(y_hat, 512, 128, output_shape=(args.batch_size, 128, 14, 14), name='d2') # 14x14x128 y_hat = tf.concat([y_hat, e2], axis=1) # 14x14x256 y_hat = hem.deconv2d(y_hat, 256, 64, output_shape=(args.batch_size, 64, 31, 31), name='d3') # 31x31x64 y_hat = tf.concat([y_hat, e1], axis=1) # 31x31x128 y_hat = hem.conv2d(y_hat, 128, 1, stride=1, filter_size=1, padding='SAME', activation=None, name='d4') # 31x31x1 y_hat = hem.crop_to_bounding_box(y_hat, 0, 0, 29, 29) # 29x29x1 # y_hat = tf.maximum(y_hat, tf.zeros_like(y_hat)) return y_hat
def _decoder(x, args, reuse=False): """ Adds decoder nodes to the graph. Args: x: Tensor, the latent variable tensor from the encoder. args: Argparse structure. reuse: Boolean, whether to reuse pinned variables (in multi-GPU environment). Returns: Tensor, a single-channel image representing the predicted depth map. """ e_layers = tf.get_collection('conv_layers') with arg_scope([hem.dense, hem.conv2d, hem.deconv2d], reuse=reuse, use_batch_norm=args.batch_norm, dropout=args.dropout, activation=tf.nn.relu): skip_axis = 1 skip_m = 2 if args.skip_layers else 1 # TODO add better support for skip layers in layer ops # input = 4x4x32 x = hem.conv2d(x, 32, 96, 1, name='d_c1') # output = 4x4x96 + e_c5 x = tf.concat( (x, e_layers[4]), axis=skip_axis) if args.skip_layers else x x = hem.conv2d(x, 96 * skip_m, 256, 1, name='d_c2') # output = 4x4x256 + e_c4 x = tf.concat( (x, e_layers[3]), axis=skip_axis) if args.skip_layers else x x = hem.deconv2d(x, 256 * skip_m, 256, 5, 2, name='d_dc1') # output = 8x8x256 + e_c3 x = tf.concat( (x, e_layers[2]), axis=skip_axis) if args.skip_layers else x x = hem.deconv2d(x, 256 * skip_m, 128, 5, 2, name='d_dc2') # output = 16x16x128 + e_c2 x = tf.concat( (x, e_layers[1]), axis=skip_axis) if args.skip_layers else x x = hem.deconv2d(x, 128 * skip_m, 64, 5, 2, name='d_dc3') # output = 32x32x64 + e_c1 x = tf.concat( (x, e_layers[0]), axis=skip_axis) if args.skip_layers else x x = hem.deconv2d(x, 64 * skip_m, 1, 5, 2, name='d_dc4', activation=tf.nn.tanh, dropout=0, use_batch_norm=False) # output = 64x64x1 return x
def decoder(x, args, reuse=False): """Adds decoder nodes to the graph. Args: x: Tensor, encoded image representation. args: Argparse struct reuse: Boolean, whether to reuse variables. """ with arg_scope([hem.dense, hem.conv2d, hem.deconv2d], reuse=reuse, activation=tf.nn.relu): x = hem.dense(x, args.latent_size, 32 * 4 * 4, name='d1') x = tf.reshape(x, [-1, 32, 4, 4]) # un-flatten x = hem.conv2d(x, 32, 96, 1, name='c1') x = hem.conv2d(x, 96, 256, 1, name='c2') x = hem.deconv2d(x, 256, 256, 5, 2, name='dc1') x = hem.deconv2d(x, 256, 128, 5, 2, name='dc2') x = hem.deconv2d(x, 128, 64, 5, 2, name='dc3') x = hem.deconv2d(x, 64, 3, 5, 2, name='dc4', activation=tf.nn.tanh) return x
def generator(z, x, reuse=False): # given the noise z (256x256x1) and rgb map x (256x256x3), generate a depth map y (256x256x1) with tf.variable_scope('generator', reuse=reuse),\ arg_scope([hem.conv2d, hem.deconv2d], reuse=reuse, filter_size=5, stride=2, padding='VALID', init=lambda: tf.random_normal_initializer(mean=0, stddev=0.02), activation=lambda x: hem.lrelu(x, leak=0.2)): y = tf.concat([x, z], axis=1) y = hem.conv2d(y, 4, 64, name='g1') y = hem.conv2d(y, 64, 128, name='g2') y = hem.conv2d(y, 128, 256, name='g3') y = hem.conv2d(y, 256, 512, name='g4') y = hem.deconv2d(y, 512, 256, name='g5') y = hem.deconv2d(y, 256, 128, name='g6') y = hem.deconv2d(y, 128, 64, name='g7') y = hem.deconv2d(y, 64, 1, name='g8', activation=tf.tanh) return y
def generatorE2(x, args, reuse=False): with tf.variable_scope('encoder', reuse=reuse), \ arg_scope([hem.conv2d], reuse=reuse, stride=2, padding='SAME', filter_size=5, init=tf.contrib.layers.xavier_initializer, activation=tf.nn.relu): # 64x64x3 noise = tf.random_uniform([args.batch_size, 1, 64, 64], minval=-1.0, maxval=1.0) x = tf.concat([x, noise], axis=1) # 64x64x4 e1 = hem.conv2d(x, 7, 64, name='e1') # 32x32x64 e2 = hem.conv2d(e1, 64, 128, name='e2') # 16x16x128 e3 = hem.conv2d(e2, 128, 256, name='e3') # 8x8x256 e4 = hem.conv2d(e3, 256, 512, name='e4') # 4x4x512 e5 = hem.conv2d(e4, 512, 1024, filter_size=4, padding='VALID', name='e5') # 1x1x1024 with tf.variable_scope('decoder', reuse=reuse), \ arg_scope([hem.deconv2d, hem.conv2d], reuse=reuse, stride=2, init=tf.contrib.layers.xavier_initializer, padding='SAME', filter_size=5, activation=lambda x: hem.lrelu(x, leak=0.2)): # 1x1x1024 y = hem.deconv2d(e5, 1024, 512, output_shape=(args.batch_size, 512, 4, 4), filter_size=4, padding='VALID', name='d1') # 4x4x512 y = tf.concat([y, e4], axis=1) # 4x4x1024 y = hem.deconv2d(y, 1024, 256, output_shape=(args.batch_size, 256, 8, 8), name='d2') # 8x8x256 y = tf.concat([y, e3], axis=1) # 8x8x512 y = hem.deconv2d(y, 512, 128, output_shape=(args.batch_size, 128, 16, 16), name='d3') # 16x16x128 y = tf.concat([y, e2], axis=1) # 16x16x256 y = hem.deconv2d(y, 256, 64, output_shape=(args.batch_size, 64, 32, 32), name='d4') # 32x32x64 y = tf.concat([y, e1], axis=1) # 32x32x128 y = hem.conv2d(y, 128, 1, stride=1, filter_size=1, activation=tf.nn.tanh, name='d5') # 31x31x1 return y
def decoder(x_rep, args, channel_output=3, reuse=False): with arg_scope([hem.conv2d, hem.deconv2d], reuse=reuse, filter_size=5, stride=2, padding='VALID', init=hem.xavier_initializer, use_batch_norm=True, activation=lambda x: hem.lrelu(x, leak=0.2)): x_hat = hem.deconv2d(x_rep, 384, 192, output_shape=(args.batch_size, 192, 5, 5), name='d1') x_hat = hem.deconv2d(x_hat, 192, 48, output_shape=(args.batch_size, 48, 13, 13), name='d2') x_hat = hem.deconv2d(x_hat, 48, 24, output_shape=(args.batch_size, 24, 29, 29), name='d3') x_hat = hem.deconv2d(x_hat, 24, 12, output_shape=(args.batch_size, 12, 61, 61), name='d4') x_hat = hem.deconv2d(x_hat, 12, 6, output_shape=(args.batch_size, 6, 126, 126), name='d5') x_hat = hem.deconv2d(x_hat, 6, channel_output, activation=tf.tanh, output_shape=(args.batch_size, channel_output, 256, 256), name='d6') return x_hat
def g_baseline(self, x, args, reuse=False): with tf.variable_scope('encoder', reuse=reuse), \ arg_scope([hem.conv2d], reuse=reuse, filter_size=5, stride=2, padding='VALID', use_batch_norm=args.e_bn, init=tf.contrib.layers.xavier_initializer, activation=tf.nn.relu): # 65x65x3 if args.noise_layer == 'x': noise = tf.random_uniform([args.batch_size, 1, 65, 65], minval=0, maxval=1) e1 = hem.conv2d(tf.concat([x, noise], axis=1), 4, 64, name='e1') # 31x31x64 else: e1 = hem.conv2d(x, 3, 64, name='e1') if args.noise_layer == 'e1': noise = tf.random_uniform([args.batch_size, 1, 31, 31], minval=0, maxval=1) e2 = hem.conv2d(tf.concat([e1, noise], axis=1), 65, 128, name='e2') # 14x14x128 else: e2 = hem.conv2d(e1, 64, 128, name='e2') # 14x14x128 if args.noise_layer == 'e2': noise = tf.random_uniform([args.batch_size, 1, 14, 14], minval=0, maxval=1) e3 = hem.conv2d(tf.concat([e2, noise], axis=1), 129, 256, name='e3') # 5x5x256 else: e3 = hem.conv2d(e2, 128, 256, name='e3') # 5x5x256 if args.noise_layer == 'e3': noise = tf.random_uniform([args.batch_size, 1, 5, 5], minval=0, maxval=1) e4 = hem.conv2d(tf.concat([e3, noise], axis=1), 257, 512, name='e4') # 1x1x512 else: e4 = hem.conv2d(e3, 256, 512, name='e4') # 1x1x512 with tf.variable_scope('decoder', reuse=reuse), \ arg_scope([hem.deconv2d, hem.conv2d], reuse=reuse, filter_size=5, stride=2, init=tf.contrib.layers.xavier_initializer, padding='VALID', activation=lambda x: hem.lrelu(x, leak=0.2)): # 1x1x512 # TODO: noise could be of size 512, instead of 1 if args.noise_layer == 'e4': noise = tf.random_uniform([args.batch_size, 1, 1, 1], minval=0, maxval=1) y_hat = hem.deconv2d(tf.concat([e4, noise], axis=1), 513, 256, output_shape=(args.batch_size, 256, 5, 5), name='d1') # 5x5x256 elif args.noise_layer == 'e4-512': noise = tf.random_uniform([args.batch_size, 512, 1, 1], minval=0, maxval=1) y_hat = hem.deconv2d(tf.concat([e4, noise], axis=1), 1024, 256, output_shape=(args.batch_size, 256, 5, 5), name='d1') # 5x5x256 else: y_hat = hem.deconv2d(e4, 512, 256, output_shape=(args.batch_size, 256, 5, 5), name='d1') # 5x5x256 y_hat = tf.concat([y_hat, e3], axis=1) # 5x5x512 if args.noise_layer == 'd2': noise = tf.random_uniform([args.batch_size, 1, 5, 5], minval=0, maxval=1) y_hat = hem.deconv2d(tf.concat([y_hat, noise], axis=1), 513, 128, output_shape=(args.batch_size, 128, 14, 14), name='d2') # 14x14x128z else: y_hat = hem.deconv2d(y_hat, 512, 128, output_shape=(args.batch_size, 128, 14, 14), name='d2') # 14x14x128z y_hat = tf.concat([y_hat, e2], axis=1) # 14x14x256 if args.noise_layer == 'd3': noise = tf.random_uniform([args.batch_size, 1, 14, 14], minval=0, maxval=1) y_hat = hem.deconv2d(tf.concat([y_hat, noise], axis=1), 257, 64, output_shape=(args.batch_size, 64, 31, 31), name='d3') # 31x31x64 else: y_hat = hem.deconv2d(y_hat, 256, 64, output_shape=(args.batch_size, 64, 31, 31), name='d3') # 31x31x64 y_hat = tf.concat([y_hat, e1], axis=1) # 31x31x128 if args.noise_layer == 'd4': noise = tf.random_uniform([args.batch_size, 1, 31, 31], minval=0, maxval=1) y_hat = hem.conv2d(tf.concat([y_hat, noise], axis=1), 129, 1, stride=1, filter_size=1, padding='SAME', activation=None, name='d4') # 31x31x1 else: y_hat = hem.conv2d(y_hat, 128, 1, stride=1, filter_size=1, padding='SAME', activation=None, name='d4') # 31x31x1 y_hat = hem.crop_to_bounding_box(y_hat, 0, 0, 29, 29) # 29x29x1 #y_hat = tf.maximum(y_hat, tf.zeros_like(y_hat)) return y_hat
def generator(x, args, reuse=False): """Adds generator nodes to the graph using input `x`. This is an encoder-decoder architecture. Args: x: Tensor, the input image batch (256x256 RGB images) args: Argparse struct. reuse: Bool, whether to reuse variables. Returns: Tensor, the generator's output, the estimated depth map (256x256 grayscale). """ # encoder with arg_scope([hem.conv2d], reuse=reuse, use_batch_norm=args.batch_norm_gen, filter_size=4, stride=2, init=lambda: tf.random_normal_initializer(mean=0, stddev=0.02), activation=lambda x: hem.lrelu(x, leak=0.2)): with tf.variable_scope('enocder', reuse=reuse): if 'input' in args.noise: noise = tf.random_uniform([args.batch_size, 1, 256, 256], minval=-1.0, maxval=1.0) e1 = hem.conv2d(tf.concat([x, noise], axis=1), 4, 64, name='1', use_batch_norm=False) # 128x128x64 else: e1 = hem.conv2d(x, 3, 64, name='1', use_batch_norm=False) e2 = hem.conv2d(e1, 64, 128, name='2') # 64x64x128 e3 = hem.conv2d(e2, 128, 256, name='3') # 32x32x256 e4 = hem.conv2d(e3, 256, 512, name='4') # 16x16x512 e5 = hem.conv2d(e4, 512, 512, name='5') # 8x8x512 e6 = hem.conv2d(e5, 512, 512, name='6') # 4x4x512 e7 = hem.conv2d(e6, 512, 512, name='7') # 2x2x512 e8 = hem.conv2d(e7, 512, 512, name='8') # 1x1x512 # decoder with arg_scope([hem.deconv2d, hem.conv2d], reuse=reuse, use_batch_norm=True, filter_size=4, stride=2, init=lambda: tf.random_normal_initializer(mean=0, stddev=0.02), activation=lambda x: hem.lrelu(x, leak=0)): # TODO figure out cleaner way to add skip layers with tf.variable_scope('decoder', reuse=reuse): if 'latent' in args.noise: noise = tf.random_uniform([args.batch_size, 512, 1, 1], minval=-1.0, maxval=1.0) y = hem.deconv2d(tf.concat([e8, noise], axis=1), 1024, 512, name='1', dropout=args.dropout) # 2x2x512*2 else: y = hem.deconv2d(e8, 512, 512, name='1', dropout=args.dropout) # 2x2x512*2 y = tf.concat([y, e7], axis=1) y = hem.deconv2d(y, 1024, 512, name='2', dropout=args.dropout) # 4x4x512*2 y = tf.concat([y, e6], axis=1) y = hem.deconv2d(y, 1024, 512, name='3', dropout=args.dropout) # 8x8x512*2 y = tf.concat([y, e5], axis=1) y = hem.deconv2d(y, 1024, 512, name='4') # 16x16x512*2 y = tf.concat([y, e4], axis=1) y = hem.deconv2d(y, 1024, 256, name='5') # 32x32x256*2 y = tf.concat([y, e3], axis=1) y = hem.deconv2d(y, 512, 128, name='6') # 64x64x128*2 y = tf.concat([y, e2], axis=1) y = hem.deconv2d(y, 256, 64, name='7') # 128x128x64*2 y = tf.concat([y, e1], axis=1) if 'end' in args.noise: noise = tf.random_uniform([args.batch_size, 1, 128, 128], minval=-1.0, maxval=1.0) y = hem.deconv2d(tf.concat([y, noise], axis=1), 129, 1, name='8', activation=tf.nn.tanh) # 256x256x1 else: y = hem.deconv2d(y, 128, 1, name='8', activation=tf.nn.tanh) # 256x256x1 return y
def generator(x, args, reuse=False): """Adds generator nodes to the graph using input `x`. This is an encoder-decoder architecture. Args: x: Tensor, the input image batch (65x65 RGB images) args: Argparse struct. reuse: Bool, whether to reuse variables. Returns: Tensor, the generator's output, the estimated depth map (33x33 estimated depth map) """ # encoder with tf.variable_scope('encoder', reuse=reuse),\ arg_scope([hem.conv2d], reuse=reuse, use_batch_norm=args.batch_norm_gen, filter_size=5, stride=2, padding='VALID', init=tf.contrib.layers.xavier_initializer, # init=lambda: tf.random_normal_initializer(mean=0, stddev=0.02), activation=tf.nn.relu): # activation=lambda x: hem.lrelu(x, leak=0.2)): # 65x65x3 noise = tf.random_uniform([args.batch_size, 1, 65, 65], minval=-1.0, maxval=1.0) x = tf.concat([x, noise], axis=1) # 65x65x4 e1 = hem.conv2d(x, 4, 64, name='1', use_batch_norm=False) # 31x31x64 if args.garch == 'large': e1 = hem.conv2d(e1, 64, 64, stride=1, padding='SAME', name='1b') # 31x31x64 e1 = hem.conv2d(e1, 64, 64, stride=1, padding='SAME', name='1c') # 31x31x64 e2 = hem.conv2d(e1, 64, 128, name='2') # 14x14x128 if args.garch == 'large': e2 = hem.conv2d(e2, 128, 128, stride=1, padding='SAME', name='2b') # 14x14x128 e2 = hem.conv2d(e2, 128, 128, stride=1, padding='SAME', name='2c') # 14x14x128 e3 = hem.conv2d(e2, 128, 256, name='3') # 5x5x256 if args.garch == 'large': e3 = hem.conv2d(e3, 256, 256, stride=1, padding='SAME', name='3b') # 5x5x256 e3 = hem.conv2d(e3, 256, 256, stride=1, padding='SAME', name='3c') # 5x5x256 e4 = hem.conv2d(e3, 256, 512, name='4') # 1x1x512 # decoder with tf.variable_scope('decoder', reuse=reuse),\ arg_scope([hem.deconv2d, hem.conv2d], reuse=reuse, use_batch_norm=args.batch_norm_gen, filter_size=5, stride=2, init=tf.contrib.layers.xavier_initializer, # init=lambda: tf.random_normal_initializer(mean=0, stddev=0.02), padding='VALID', activation=lambda x: hem.lrelu(x, leak=0.2)): # 1x1x512 # TODO figure out cleaner way to add skip layers y = hem.deconv2d(e4, 512, 256, output_shape=(args.batch_size, 256, 5, 5), name='1') # 5x5x256 y = tf.concat([y, e3], axis=1) # 5x5x512 if args.garch == 'large': y = hem.deconv2d(y, 512, 512, output_shape=(args.batch_size, 512, 5, 5), stride=1, padding='SAME', name='1b') y = hem.deconv2d(y, 512, 128, output_shape=(args.batch_size, 128, 14, 14), name='2') # 14x14x128 y = tf.concat([y, e2], axis=1) # 14x14x256 if args.garch == 'large': y = hem.deconv2d(y, 256, 256, output_shape=(args.batch_size, 256, 14, 14), stride=1, padding='SAME', name='2b') y = hem.deconv2d(y, 256, 64, output_shape=(args.batch_size, 64, 31, 31), name='3') # 31x31x64 y = tf.concat([y, e1], axis=1) # 31x31x128 if args.garch == 'large': y = hem.deconv2d(y, 128, 128, output_shape=(args.batch_size, 128, 31, 31), stride=1, padding='SAME', name='3b') # 31x31x64 y = hem.conv2d(y, 128, 1, stride=1, padding='SAME', activation=tf.nn.tanh, name='7') # 31x31x1 return y