def _residual_block(input_net, num_outputs, kernel_size, stride=1, padding_size=0, activation_fn=tf.nn.relu, normalizer_fn=None, name='residual_block'): """Residual Block. Input Tensor X - > Conv1 -> IN -> ReLU -> Conv2 -> IN + X PyTorch Version: https://github.com/yunjey/StarGAN/blob/fbdb6a6ce2a4a92e1dc034faec765e0dbe4b8164/model.py#L7 Args: input_net: Tensor as input. num_outputs: (int) number of output channels for Convolution. kernel_size: (int) size of the square kernel for Convolution. stride: (int) stride for Convolution. Default to 1. padding_size: (int) padding size for Convolution. Default to 0. activation_fn: Activation function. normalizer_fn: Normalization function. name: Name scope Returns: Residual Tensor with the same shape as the input tensor. """ with tf.variable_scope(name): with tf.contrib.framework.arg_scope([tf.contrib.layers.conv2d], num_outputs=num_outputs, kernel_size=kernel_size, stride=stride, padding='VALID', normalizer_fn=normalizer_fn, activation_fn=None): res_block = ops.pad(input_net, padding_size) res_block = tf.contrib.layers.conv2d(inputs=res_block, scope='conv_0') res_block = activation_fn(res_block, name='activation_0') res_block = ops.pad(res_block, padding_size) res_block = tf.contrib.layers.conv2d(inputs=res_block, scope='conv_1') output_net = res_block + input_net return output_net
def test_padding_with_tensor_of_invalid_shape(self): n = 2 invalid_rank = 1 h = 128 w = 64 c = 3 pad = 3 test_input_tensor = tf.random_uniform((n, invalid_rank, h, w, c)) with self.assertRaises(ValueError): ops.pad(test_input_tensor, padding_size=pad)
def _residual_block(input_net, num_outputs, kernel_size, stride=1, padding_size=0, activation_fn=tf.nn.relu, normalizer_fn=None, name='residual_block'): """Residual Block. Input Tensor X - > Conv1 -> IN -> ReLU -> Conv2 -> IN + X PyTorch Version: https://github.com/yunjey/StarGAN/blob/fbdb6a6ce2a4a92e1dc034faec765e0dbe4b8164/model.py#L7 Args: input_net: Tensor as input. num_outputs: (int) number of output channels for Convolution. kernel_size: (int) size of the square kernel for Convolution. stride: (int) stride for Convolution. Default to 1. padding_size: (int) padding size for Convolution. Default to 0. activation_fn: Activation function. normalizer_fn: Normalization function. name: Name scope Returns: Residual Tensor with the same shape as the input tensor. """ with tf.variable_scope(name): with tf.contrib.framework.arg_scope( [tf.contrib.layers.conv2d], num_outputs=num_outputs, kernel_size=kernel_size, stride=stride, padding='VALID', normalizer_fn=normalizer_fn, activation_fn=None): res_block = ops.pad(input_net, padding_size) res_block = tf.contrib.layers.conv2d(inputs=res_block, scope='conv_0') res_block = activation_fn(res_block, name='activation_0') res_block = ops.pad(res_block, padding_size) res_block = tf.contrib.layers.conv2d(inputs=res_block, scope='conv_1') output_net = res_block + input_net return output_net
def discriminator_output_source(input_net): """Output Layer for Source in the Discriminator. Determine if the image is real/fake based on the feature extracted. We follow the original paper design where the output is not a simple (batch_size) shape Tensor but rather a (batch_size, 2, 2, 2048) shape Tensor. We will get the correct shape later when we piece things together. PyTorch Version: https://github.com/yunjey/StarGAN/blob/fbdb6a6ce2a4a92e1dc034faec765e0dbe4b8164/model.py#L79 Args: input_net: Tensor of shape (batch_size, h / 64, w / 64, 2048) as features. Returns: Tensor of shape (batch_size, h / 64, w / 64, 1) as the score. """ with tf.variable_scope('discriminator_output_source'): output_src = ops.pad(input_net, 1) output_src = tf.contrib.layers.conv2d( inputs=output_src, num_outputs=1, kernel_size=3, stride=1, padding='VALID', activation_fn=None, normalizer_fn=None, biases_initializer=None, scope='conv') return output_src
def discriminator_output_source(input_net): """Output Layer for Source in the Discriminator. Determine if the image is real/fake based on the feature extracted. We follow the original paper design where the output is not a simple (batch_size) shape Tensor but rather a (batch_size, 2, 2, 2048) shape Tensor. We will get the correct shape later when we piece things together. PyTorch Version: https://github.com/yunjey/StarGAN/blob/fbdb6a6ce2a4a92e1dc034faec765e0dbe4b8164/model.py#L79 Args: input_net: Tensor of shape (batch_size, h / 64, w / 64, 2048) as features. Returns: Tensor of shape (batch_size, h / 64, w / 64, 1) as the score. """ with tf.variable_scope('discriminator_output_source'): output_src = ops.pad(input_net, 1) output_src = tf.contrib.layers.conv2d(inputs=output_src, num_outputs=1, kernel_size=3, stride=1, padding='VALID', activation_fn=None, normalizer_fn=None, biases_initializer=None, scope='conv') return output_src
def test_padding_with_3D_tensor(self): h = 128 w = 64 c = 3 pad = 3 test_input_tensor = tf.random_uniform((h, w, c)) test_output_tensor = ops.pad(test_input_tensor, padding_size=pad) with self.test_session() as sess: output = sess.run(test_output_tensor) self.assertTupleEqual((h + pad * 2, w + pad * 2, c), output.shape)
def generator_up_sample(input_net, num_outputs): """Up-sampling module in Generator. Up sampling path for image generation in the Generator. PyTorch Version: https://github.com/yunjey/StarGAN/blob/fbdb6a6ce2a4a92e1dc034faec765e0dbe4b8164/model.py#L44 Args: input_net: Tensor of shape (batch_size, h / 4, w / 4, 256). num_outputs: (int) Number of channel for the output tensor. Returns: Tensor of shape (batch_size, h, w, num_outputs). """ with tf.variable_scope('generator_up_sample'): with tf.contrib.framework.arg_scope( [tf.contrib.layers.conv2d_transpose], kernel_size=4, stride=2, padding='VALID', normalizer_fn=tf.contrib.layers.instance_norm, activation_fn=tf.nn.relu): up_sample = tf.contrib.layers.conv2d_transpose(inputs=input_net, num_outputs=128, scope='deconv_0') up_sample = up_sample[:, 1:-1, 1:-1, :] up_sample = tf.contrib.layers.conv2d_transpose(inputs=up_sample, num_outputs=64, scope='deconv_1') up_sample = up_sample[:, 1:-1, 1:-1, :] output_net = ops.pad(up_sample, 3) output_net = tf.contrib.layers.conv2d(inputs=output_net, num_outputs=num_outputs, kernel_size=7, stride=1, padding='VALID', activation_fn=tf.nn.tanh, normalizer_fn=None, biases_initializer=None, scope='conv_0') return output_net
def generator_up_sample(input_net, num_outputs): """Up-sampling module in Generator. Up sampling path for image generation in the Generator. PyTorch Version: https://github.com/yunjey/StarGAN/blob/fbdb6a6ce2a4a92e1dc034faec765e0dbe4b8164/model.py#L44 Args: input_net: Tensor of shape (batch_size, h / 4, w / 4, 256). num_outputs: (int) Number of channel for the output tensor. Returns: Tensor of shape (batch_size, h, w, num_outputs). """ with tf.variable_scope('generator_up_sample'): with tf.contrib.framework.arg_scope( [tf.contrib.layers.conv2d_transpose], kernel_size=4, stride=2, padding='VALID', normalizer_fn=tf.contrib.layers.instance_norm, activation_fn=tf.nn.relu): up_sample = tf.contrib.layers.conv2d_transpose( inputs=input_net, num_outputs=128, scope='deconv_0') up_sample = up_sample[:, 1:-1, 1:-1, :] up_sample = tf.contrib.layers.conv2d_transpose( inputs=up_sample, num_outputs=64, scope='deconv_1') up_sample = up_sample[:, 1:-1, 1:-1, :] output_net = ops.pad(up_sample, 3) output_net = tf.contrib.layers.conv2d( inputs=output_net, num_outputs=num_outputs, kernel_size=7, stride=1, padding='VALID', activation_fn=tf.nn.tanh, normalizer_fn=None, biases_initializer=None, scope='conv_0') return output_net
def discriminator_input_hidden(input_net, hidden_layer=6, init_num_outputs=64): """Input Layer + Hidden Layer in the Discriminator. Feature extraction pathway in the Discriminator. PyTorch Version: https://github.com/yunjey/StarGAN/blob/fbdb6a6ce2a4a92e1dc034faec765e0dbe4b8164/model.py#L68 Args: input_net: Tensor of shape (batch_size, h, w, 3) as batch of images. hidden_layer: (int) Number of hidden layers. Default to 6 per the original implementation. init_num_outputs: (int) Number of hidden unit in the first hidden layer. The number of hidden unit double after each layer. Default to 64 per the original implementation. Returns: Tensor of shape (batch_size, h / 64, w / 64, 2048) as features. """ num_outputs = init_num_outputs with tf.variable_scope('discriminator_input_hidden'): hidden = input_net for i in range(hidden_layer): hidden = ops.pad(hidden, 1) hidden = tf.contrib.layers.conv2d( inputs=hidden, num_outputs=num_outputs, kernel_size=4, stride=2, padding='VALID', activation_fn=None, normalizer_fn=None, scope='conv_{}'.format(i)) hidden = tf.nn.leaky_relu(hidden, alpha=0.01) num_outputs = 2 * num_outputs return hidden
def discriminator_input_hidden(input_net, hidden_layer=6, init_num_outputs=64): """Input Layer + Hidden Layer in the Discriminator. Feature extraction pathway in the Discriminator. PyTorch Version: https://github.com/yunjey/StarGAN/blob/fbdb6a6ce2a4a92e1dc034faec765e0dbe4b8164/model.py#L68 Args: input_net: Tensor of shape (batch_size, h, w, 3) as batch of images. hidden_layer: (int) Number of hidden layers. Default to 6 per the original implementation. init_num_outputs: (int) Number of hidden unit in the first hidden layer. The number of hidden unit double after each layer. Default to 64 per the original implementation. Returns: Tensor of shape (batch_size, h / 64, w / 64, 2048) as features. """ num_outputs = init_num_outputs with tf.variable_scope('discriminator_input_hidden'): hidden = input_net for i in range(hidden_layer): hidden = ops.pad(hidden, 1) hidden = tf.contrib.layers.conv2d(inputs=hidden, num_outputs=num_outputs, kernel_size=4, stride=2, padding='VALID', activation_fn=None, normalizer_fn=None, scope='conv_{}'.format(i)) hidden = tf.nn.leaky_relu(hidden, alpha=0.01) num_outputs = 2 * num_outputs return hidden
def generator_down_sample(input_net, final_num_outputs=256): """Down-sampling module in Generator. Down sampling pathway of the Generator Architecture: PyTorch Version: https://github.com/yunjey/StarGAN/blob/fbdb6a6ce2a4a92e1dc034faec765e0dbe4b8164/model.py#L32 Notes: We require dimension 1 and dimension 2 of the input_net to be fully defined for the correct down sampling. Args: input_net: Tensor of shape (batch_size, h, w, c + num_class). final_num_outputs: (int) Number of hidden unit for the final layer. Returns: Tensor of shape (batch_size, h / 4, w / 4, 256). Raises: ValueError: If final_num_outputs are not divisible by 4, or input_net does not have a rank of 4, or dimension 1 and dimension 2 of input_net are not defined at graph construction time, or dimension 1 and dimension 2 of input_net are not divisible by 4. """ if final_num_outputs % 4 != 0: raise ValueError('Final number outputs need to be divisible by 4.') # Check the rank of input_net. input_net.shape.assert_has_rank(4) # Check dimension 1 and dimension 2 are defined and divisible by 4. if input_net.shape[1]: if input_net.shape[1] % 4 != 0: raise ValueError( 'Dimension 1 of the input should be divisible by 4, but is {} ' 'instead.'. format(input_net.shape[1])) else: raise ValueError('Dimension 1 of the input should be explicitly defined.') # Check dimension 1 and dimension 2 are defined and divisible by 4. if input_net.shape[2]: if input_net.shape[2] % 4 != 0: raise ValueError( 'Dimension 2 of the input should be divisible by 4, but is {} ' 'instead.'. format(input_net.shape[2])) else: raise ValueError('Dimension 2 of the input should be explicitly defined.') with tf.variable_scope('generator_down_sample'): with tf.contrib.framework.arg_scope( [tf.contrib.layers.conv2d], padding='VALID', biases_initializer=None, normalizer_fn=tf.contrib.layers.instance_norm, activation_fn=tf.nn.relu): down_sample = ops.pad(input_net, 3) down_sample = tf.contrib.layers.conv2d( inputs=down_sample, num_outputs=final_num_outputs / 4, kernel_size=7, stride=1, scope='conv_0') down_sample = ops.pad(down_sample, 1) down_sample = tf.contrib.layers.conv2d( inputs=down_sample, num_outputs=final_num_outputs / 2, kernel_size=4, stride=2, scope='conv_1') down_sample = ops.pad(down_sample, 1) output_net = tf.contrib.layers.conv2d( inputs=down_sample, num_outputs=final_num_outputs, kernel_size=4, stride=2, scope='conv_2') return output_net
def generator_down_sample(input_net, final_num_outputs=256): """Down-sampling module in Generator. Down sampling pathway of the Generator Architecture: PyTorch Version: https://github.com/yunjey/StarGAN/blob/fbdb6a6ce2a4a92e1dc034faec765e0dbe4b8164/model.py#L32 Notes: We require dimension 1 and dimension 2 of the input_net to be fully defined for the correct down sampling. Args: input_net: Tensor of shape (batch_size, h, w, c + num_class). final_num_outputs: (int) Number of hidden unit for the final layer. Returns: Tensor of shape (batch_size, h / 4, w / 4, 256). Raises: ValueError: If final_num_outputs are not divisible by 4, or input_net does not have a rank of 4, or dimension 1 and dimension 2 of input_net are not defined at graph construction time, or dimension 1 and dimension 2 of input_net are not divisible by 4. """ if final_num_outputs % 4 != 0: raise ValueError('Final number outputs need to be divisible by 4.') # Check the rank of input_net. input_net.shape.assert_has_rank(4) # Check dimension 1 and dimension 2 are defined and divisible by 4. if input_net.shape[1]: if input_net.shape[1] % 4 != 0: raise ValueError( 'Dimension 1 of the input should be divisible by 4, but is {} ' 'instead.'.format(input_net.shape[1])) else: raise ValueError( 'Dimension 1 of the input should be explicitly defined.') # Check dimension 1 and dimension 2 are defined and divisible by 4. if input_net.shape[2]: if input_net.shape[2] % 4 != 0: raise ValueError( 'Dimension 2 of the input should be divisible by 4, but is {} ' 'instead.'.format(input_net.shape[2])) else: raise ValueError( 'Dimension 2 of the input should be explicitly defined.') with tf.variable_scope('generator_down_sample'): with tf.contrib.framework.arg_scope( [tf.contrib.layers.conv2d], padding='VALID', biases_initializer=None, normalizer_fn=tf.contrib.layers.instance_norm, activation_fn=tf.nn.relu): down_sample = ops.pad(input_net, 3) down_sample = tf.contrib.layers.conv2d( inputs=down_sample, num_outputs=final_num_outputs / 4, kernel_size=7, stride=1, scope='conv_0') down_sample = ops.pad(down_sample, 1) down_sample = tf.contrib.layers.conv2d( inputs=down_sample, num_outputs=final_num_outputs / 2, kernel_size=4, stride=2, scope='conv_1') down_sample = ops.pad(down_sample, 1) output_net = tf.contrib.layers.conv2d( inputs=down_sample, num_outputs=final_num_outputs, kernel_size=4, stride=2, scope='conv_2') return output_net
def run(self, args): print(" [*] Running....") if not os.path.exists(args.output_path): os.makedirs(args.output_path) self.sess.run(tf.global_variables_initializer()) self.sess.run(tf.local_variables_initializer()) disp_batch = self.dataloader.disp left_batch = self.dataloader.left right_batch = self.dataloader.right image_name = self.dataloader.filename num_samples = count_text_lines(self.dataset) if self.mode == 'reprojection': print(' [*] Generating Reprojection error') warped = ops.generate_image_left(self.placeholders['right'], self.placeholders['disp']) net_output = -(tf.reduce_sum( 0.85 * ops.SSIM(warped, self.placeholders['left']) + 0.15 * tf.abs(warped - self.placeholders['left']), -1, keepdims=True)) elif self.mode == 'agreement': print(' [*] Generating Disparity Agreement') net_output = tf.py_func(ops.agreement, [self.placeholders['disp'], 2], tf.float32) elif self.mode == 'uniqueness': print(' [*] Generating Uniqueness Constraint') net_output = tf.py_func(ops.uniqueness, [self.placeholders['disp']], tf.float32) elif 'otb' in self.mode: print(' [*] OTB inference') self.saver = tf.train.Saver() if args.checkpoint_path: self.saver.restore(self.sess, args.checkpoint_path) print(" [*] Load model: SUCCESS") else: print(" [*] Load failed...neglected") print(" [*] End Testing...") raise ValueError('args.checkpoint_path is None') net_output = tf.nn.sigmoid(self.prediction) if self.mode == 'otb-online': print(' [*] Online Adaptation') self.optimizer = tf.train.GradientDescentOptimizer( self.learning_rate).minimize( self.loss, var_list=tf.global_variables()) else: print(" [*] Unsupported testing mode!") raise ValueError('args.mode is not supported') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) print(" [*] Start Testing...") bar = progressbar.ProgressBar(max_value=num_samples) for step in range(num_samples): batch_left, batch_right, batch_disp, filename = self.sess.run( [left_batch, right_batch, disp_batch, image_name]) val_disp, hpad, wpad = ops.pad(batch_disp, self.image_height, self.image_width) val_left, _, _ = ops.pad(batch_left, self.image_height, self.image_width) val_right, _, _ = ops.pad(batch_right, self.image_height, self.image_width) if self.mode == 'otb-online': _, confidence = self.sess.run( [self.optimizer, net_output], feed_dict={ self.placeholders['disp']: val_disp, self.placeholders['left']: val_left, self.placeholders['right']: val_right, self.learning_rate: self.initial_learning_rate }) else: confidence = self.sess.run(net_output, feed_dict={ self.placeholders['disp']: val_disp, self.placeholders['left']: val_left, self.placeholders['right']: val_right }) confidence = ops.depad(confidence, hpad, wpad) outdir = args.output_path + '/' + filename if not os.path.exists(outdir): os.makedirs(outdir) confidence_file = outdir + '/' + self.mode + '.png' c = confidence[0] c = (c - np.min(c)) / (np.max(c) - np.min(c)) cv2.imwrite(confidence_file, (c * (2**16 - 1)).astype('uint16')) if self.colors: color_file = outdir + '/' + self.mode + '-color.png' cv2.imwrite( color_file, cv2.applyColorMap(((1 - c) * (2**8 - 1)).astype('uint8'), cv2.COLORMAP_WINTER)) bar.update(step + 1) coord.request_stop() coord.join(threads)
def test(self, args): print("[*] Testing....") if not os.path.exists(args.output_path): os.makedirs(args.output_path) self.sess.run(tf.global_variables_initializer()) self.sess.run(tf.local_variables_initializer()) if self.model == 'LGC': self.vars = tf.all_variables() self.vars_global = [k for k in self.vars if k.name.startswith('ConfNet')] self.vars_local = [k for k in self.vars if (k.name.startswith('CCNN') or k.name.startswith('LFN')) ] self.vars_lgc = [k for k in self.vars if k.name.startswith('LGC')] self.saver_global = tf.train.Saver(self.vars_global) self.saver_local = tf.train.Saver(self.vars_local) self.saver_LGC = tf.train.Saver(self.vars_lgc) if args.checkpoint_path[0] and args.checkpoint_path[1] and args.checkpoint_path[2]: self.saver_global.restore(self.sess, args.checkpoint_path[0]) self.saver_local.restore(self.sess, args.checkpoint_path[1]) self.saver_LGC.restore(self.sess, args.checkpoint_path[2]) print(" [*] Load model: SUCCESS") else: print(" [*] Load failed...neglected") print(" [*] End Testing...") raise ValueError('args.checkpoint_path[0] or args.checkpoint_path[1] or args.checkpoint_path[2] is None') else: self.saver = tf.train.Saver() if args.checkpoint_path: self.saver.restore(self.sess, args.checkpoint_path[0]) print(" [*] Load model: SUCCESS") else: print(" [*] Load failed...neglected") print(" [*] End Testing...") raise ValueError('args.checkpoint_path is None') disp_batch = self.dataloader.disp left_batch = self.dataloader.left line = self.dataloader.disp_filename num_samples = count_text_lines(self.dataset) if self.model == 'ConfNet': prediction = tf.nn.sigmoid(self.prediction) else: prediction = tf.pad(tf.nn.sigmoid(self.prediction), tf.constant([[0, 0], [self.radius, self.radius], [self.radius, self.radius], [0, 0]]), "CONSTANT") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) print(" [*] Start Testing...") for step in range(num_samples): batch_left, batch_disp, filename = self.sess.run([left_batch, disp_batch, line]) if self.model == 'ConfNet' or self.model == 'LGC': val_disp, hpad, wpad = ops.pad(batch_disp) val_left, _, _ = ops.pad(batch_left) print(" [*] Test image:" + filename) start = time.time() if self.model == 'ConfNet' or self.model == 'LGC': confidence = self.sess.run(prediction, feed_dict={self.left: val_left, self.disp: val_disp}) confidence = ops.depad(confidence, hpad, wpad) else: confidence = self.sess.run(prediction, feed_dict={self.left: batch_left, self.disp: batch_disp}) current = time.time() output_file = args.output_path + filename.strip().split('/')[-1] cv2.imwrite(output_file, (confidence[0] * 65535.0).astype('uint16')) print(" [*] Confidence prediction saved in:" + output_file) print(" [*] Running time:" + str(current - start) + "s") coord.request_stop() coord.join(threads)