def began_encoder(num_units, num_layers, output_dim, inputs, opts, is_training=False, reuse=False): layer_x = inputs layer_x = ops.conv2d(opts, layer_x, num_units, scope='hfirst_conv') for i in range(num_layers): if i % 3 < 2: if i != num_layers - 2: ii = i - int(i / 3) scale = (ii + 1 - int(ii / 2)) else: ii = i - int(i / 3) scale = (ii - int((ii - 1) / 2)) layer_x = ops.conv2d(opts, layer_x, num_units * scale, d_h=1, d_w=1, scope='_h{}_conv'.format(i)) layer_x = tf.nn.relu(layer_x) else: if i != num_layers - 1: layer_x = ops.downsample(layer_x, scope='h{}_maxpool'.format(i), reuse=reuse) # Tensor should be [N, 8, 8, filters] at this point layer_x = ops.linear(opts, layer_x, output_dim, scope='out_lin') return layer_x
def began_encoder(opts, inputs, is_training=False, reuse=False): num_units = opts['e_num_filters'] assert num_units == opts['g_num_filters'], \ 'BEGAN requires same number of filters in encoder and decoder' num_layers = opts['e_num_layers'] layer_x = ops.conv2d(opts, inputs, num_units, scope='hfirst_conv') for i in range(num_layers): if i % 3 < 2: if i != num_layers - 2: ii = i - (i / 3) scale = (ii + 1 - ii / 2) else: ii = i - (i / 3) scale = (ii - (ii - 1) / 2) layer_x = ops.conv2d(opts, layer_x, num_units * scale, d_h=1, d_w=1, scope='h%d_conv' % i) layer_x = tf.nn.elu(layer_x) else: if i != num_layers - 1: layer_x = ops.downsample(layer_x, scope='h%d_maxpool' % i, reuse=reuse) # Tensor should be [N, 8, 8, filters] at this point if opts['e_noise'] != 'gaussian': res = ops.linear(opts, layer_x, opts['zdim'], scope='hfinal_lin') return res else: mean = ops.linear(opts, layer_x, opts['zdim'], scope='mean_lin') log_sigmas = ops.linear(opts, layer_x, opts['zdim'], scope='log_sigmas_lin') return mean, log_sigmas
def test_upsample_bilinear_inverted_by_bilinear(self): test_input = tf.reshape( tf.constant(np.arange(0, 2 * 8 * 8 * 3) / (2 * 8 * 8 * 3), dtype=tf.float32), [2, 8, 8, 3]) up_x = upsample(test_input, "bilinear") down_x = downsample(up_x, "bilinear") np.set_printoptions(threshold=np.nan, suppress=True) self.assertAllClose(down_x, test_input, atol=.02)
def call(self, x, alpha, y=None): """ :param x: image to analyze :param alpha: how much weight to give to the current resolution's output vs previous resolution :return: classification logit (low number for fake, high for real) """ width = x.get_shape()[2] if width != self.res: x = downsample(x, 'nearest_neighbor', factor=width // self.res) input_lowres = downsample(x, method=self.resize_method) x = tf.nn.leaky_relu(self.fromRGB(x), alpha=.2) current_res = self.res for conv1, conv2 in self.conv_layers: if current_res == self.res // 2: x_lower = tf.nn.leaky_relu(self.fromRGB_lower(input_lowres), alpha=.2) x = x_lower + alpha * (x - x_lower) if current_res == self.end_shape[1] and self.do_minibatch_stddev: x = minibatch_stddev(x) x = tf.nn.leaky_relu(conv1(x), alpha=.2) x = tf.nn.leaky_relu(conv2(x), alpha=.2) if current_res != self.end_shape[1]: x = downsample(x, method=self.resize_method) current_res = current_res // 2 x = tf.reshape(x, [-1, 512]) logit = self.fc_layer(x) if y is not None and self.label_list is None: # proj discrim if self.embedding is None: raise ValueError("need y value when using cgan") conditional_dotprod = tf.reduce_sum(tf.multiply( y, self.embedding(x)), axis=1, keep_dims=True) tf.summary.scalar("conditional_dotprod", tf.reduce_mean(conditional_dotprod)) logit += conditional_dotprod return logit, None elif self.label_list is not None: # acgan class_logits = {} for label in self.label_list: class_logits[label.name] = self.class_dense_map[label.name](x) return logit, class_logits return logit, None # no conditional
def test_downsample_avg(self): test_input_spatial = [[0., 0., 1., 1.], [0., 0., 1., 1.], [2., 2., 3., 3.], [2., 2., 3., 3.]] test_input = tf.transpose(tf.constant([[test_input_spatial] * 3] * 2), (0, 2, 3, 1)) # b, h, w, c x = downsample(test_input, method='nearest_neighbor') spatial_target = [[0., 1.], [2., 3.]] target_array = tf.constant([[spatial_target] * 3] * 2) # b, c, h, w #x = tf.transpose(x, [0, 3, 1, 2]) # b, c, h, w target_array = tf.transpose(target_array, [0, 2, 3, 1]) self.assertAllEqual(x, target_array)
def test_downsample_bilinear(self): test_input_spatial = np.resize(np.arange(0, 16 * 16) / 256., [16, 16]).tolist() test_input = tf.transpose(tf.constant([[test_input_spatial] * 3] * 2), (0, 2, 3, 1)) # b, h, w, c x = downsample(test_input, "bilinear") # skimage.transform.resize result (a bit different than tf.image.resize_bilinear) spatial_target = [[ 0.03320313, 0.04101563, 0.04882813, 0.05664063, 0.06445313, 0.07226563, 0.08007813, 0.08789063 ], [ 0.15820313, 0.16601563, 0.17382813, 0.18164063, 0.18945313, 0.19726563, 0.20507813, 0.21289063 ], [ 0.28320313, 0.29101563, 0.29882813, 0.30664063, 0.31445313, 0.32226563, 0.33007813, 0.33789063 ], [ 0.40820312, 0.41601563, 0.42382813, 0.43164063, 0.43945313, 0.44726563, 0.45507813, 0.46289063 ], [ 0.53320312, 0.54101562, 0.54882812, 0.55664063, 0.56445313, 0.57226563, 0.58007813, 0.58789063 ], [ 0.65820312, 0.66601562, 0.67382813, 0.68164063, 0.68945313, 0.69726563, 0.70507813, 0.71289063 ], [ 0.78320312, 0.79101562, 0.79882812, 0.80664062, 0.81445312, 0.82226563, 0.83007813, 0.83789063 ], [ 0.90820312, 0.91601562, 0.92382812, 0.93164062, 0.93945312, 0.94726563, 0.95507813, 0.96289063 ]] target_array = tf.constant([[spatial_target] * 3] * 2) # b, c, h, w target_array = tf.transpose(target_array, [0, 2, 3, 1]) self.assertAllClose(x, target_array, atol=.02)
def unsupervised_train(batch): normalization = [[104.920005, 110.1753, 114.785955]] channel_mean = tf.constant(normalization[0]) / 255.0 im1, im2 = batch im1 = im1 / 255.0 im2 = im2 / 255.0 im_shape = tf.shape(im1)[1:3] im1_geo, im2_geo = im1, im2 im1_photo, im2_photo = im1, im2 loss_weights = {'ternary_weight' : 1.0, 'smooth_2nd_weight' : 3.0, 'fb_weight' : 0.2, 'occ_weight' : 12.4, 'photo_weight' : 1.0} border_mask = create_border_mask(im1, 0.1) # Images for loss comparisons with values in [0, 1] (scale to original using * 255) im1_norm = im1_geo im2_norm = im2_geo # Images for neural network input with mean-zero values in [-1, 1] im1_photo = im1_photo - channel_mean im2_photo = im2_photo - channel_mean #build flows_fw, flows_bw = flownet(im1_photo, im2_photo, backward_flow=True,) flows_fw = flows_fw[-1] flows_bw = flows_bw[-1] layer_weights = [12.7, 4.35, 3.9, 3.4, 1.1] layer_patch_distances = [3, 2, 2, 1, 1] im1_s = downsample(im1_norm, 4) im2_s = downsample(im2_norm, 4) mask_s = downsample(border_mask, 4) final_flow_scale = FLOW_SCALE final_flow_fw = tf.image.resize_bilinear(flows_fw[0], im_shape) * final_flow_scale * 4 final_flow_bw = tf.image.resize_bilinear(flows_bw[0], im_shape) * final_flow_scale * 4 combined_losses = dict() combined_loss = 0.0 for loss in LOSSES: combined_losses[loss] = 0.0 flow_enum = enumerate(zip(flows_fw, flows_bw)) for i, flow_pair in flow_enum: layer_name = "loss" + str(i + 2) flow_scale = final_flow_scale / (2 ** i) with tf.variable_scope(layer_name): layer_weight = layer_weights[i] flow_fw_s, flow_bw_s = flow_pair mask_occlusion = 'fb' assert mask_occlusion in ['fb', 'disocc', ''] losses = compute_losses(im1_s, im2_s, flow_fw_s * flow_scale, flow_bw_s * flow_scale, border_mask=mask_s, mask_occlusion=mask_occlusion, data_max_distance=layer_patch_distances[i]) layer_loss = 0.0 for loss in LOSSES: weight_name = loss + '_weight' layer_loss += loss_weights[weight_name] * losses[loss] combined_losses[loss] += layer_weight * losses[loss] combined_loss += layer_weight * layer_loss im1_s = downsample(im1_s, 2) im2_s = downsample(im2_s, 2) mask_s = downsample(mask_s, 2) regularization_loss = tf.losses.get_regularization_loss() final_loss = combined_loss + regularization_loss """ warp_1 = image_warp(im1_photo, final_flow_bw) warp_1 = warp_1 + channel_mean warp_2 = image_warp(im2_photo, final_flow_fw) warp_2 = warp_2 + channel_mean dis_1, dis_2 = disbatch dis_1_warp = image_warp(dis_1, final_flow_bw) dis_2_warp = image_warp(dis_2, final_flow_fw) dis_diff_1 = dis_1_warp-dis_2 dis_diff_2 = dis_2_warp - dis_1 """ return final_loss, final_flow_fw, final_flow_bw