Esempio n. 1
0
def began_encoder(num_units,
                  num_layers,
                  output_dim,
                  inputs,
                  opts,
                  is_training=False,
                  reuse=False):
    layer_x = inputs
    layer_x = ops.conv2d(opts, layer_x, num_units, scope='hfirst_conv')
    for i in range(num_layers):
        if i % 3 < 2:
            if i != num_layers - 2:
                ii = i - int(i / 3)
                scale = (ii + 1 - int(ii / 2))
            else:
                ii = i - int(i / 3)
                scale = (ii - int((ii - 1) / 2))
            layer_x = ops.conv2d(opts,
                                 layer_x,
                                 num_units * scale,
                                 d_h=1,
                                 d_w=1,
                                 scope='_h{}_conv'.format(i))
            layer_x = tf.nn.relu(layer_x)
        else:
            if i != num_layers - 1:
                layer_x = ops.downsample(layer_x,
                                         scope='h{}_maxpool'.format(i),
                                         reuse=reuse)
    # Tensor should be [N, 8, 8, filters] at this point
    layer_x = ops.linear(opts, layer_x, output_dim, scope='out_lin')

    return layer_x
Esempio n. 2
0
def began_encoder(opts, inputs, is_training=False, reuse=False):
    num_units = opts['e_num_filters']
    assert num_units == opts['g_num_filters'], \
        'BEGAN requires same number of filters in encoder and decoder'
    num_layers = opts['e_num_layers']
    layer_x = ops.conv2d(opts, inputs, num_units, scope='hfirst_conv')
    for i in range(num_layers):
        if i % 3 < 2:
            if i != num_layers - 2:
                ii = i - (i / 3)
                scale = (ii + 1 - ii / 2)
            else:
                ii = i - (i / 3)
                scale = (ii - (ii - 1) / 2)
            layer_x = ops.conv2d(opts, layer_x, num_units * scale, d_h=1, d_w=1,
                                 scope='h%d_conv' % i)
            layer_x = tf.nn.elu(layer_x)
        else:
            if i != num_layers - 1:
                layer_x = ops.downsample(layer_x, scope='h%d_maxpool' % i,
                                         reuse=reuse)
    # Tensor should be [N, 8, 8, filters] at this point
    if opts['e_noise'] != 'gaussian':
        res = ops.linear(opts, layer_x, opts['zdim'], scope='hfinal_lin')
        return res
    else:
        mean = ops.linear(opts, layer_x, opts['zdim'], scope='mean_lin')
        log_sigmas = ops.linear(opts, layer_x,
                                opts['zdim'], scope='log_sigmas_lin')
        return mean, log_sigmas
Esempio n. 3
0
 def test_upsample_bilinear_inverted_by_bilinear(self):
     test_input = tf.reshape(
         tf.constant(np.arange(0, 2 * 8 * 8 * 3) / (2 * 8 * 8 * 3),
                     dtype=tf.float32), [2, 8, 8, 3])
     up_x = upsample(test_input, "bilinear")
     down_x = downsample(up_x, "bilinear")
     np.set_printoptions(threshold=np.nan, suppress=True)
     self.assertAllClose(down_x, test_input, atol=.02)
Esempio n. 4
0
    def call(self, x, alpha, y=None):
        """
        :param x: image to analyze
        :param alpha: how much weight to give to the current resolution's output vs previous resolution
        :return: classification logit (low number for fake, high for real)
        """
        width = x.get_shape()[2]
        if width != self.res:
            x = downsample(x, 'nearest_neighbor', factor=width // self.res)
        input_lowres = downsample(x, method=self.resize_method)
        x = tf.nn.leaky_relu(self.fromRGB(x), alpha=.2)
        current_res = self.res
        for conv1, conv2 in self.conv_layers:
            if current_res == self.res // 2:
                x_lower = tf.nn.leaky_relu(self.fromRGB_lower(input_lowres),
                                           alpha=.2)
                x = x_lower + alpha * (x - x_lower)
            if current_res == self.end_shape[1] and self.do_minibatch_stddev:
                x = minibatch_stddev(x)
            x = tf.nn.leaky_relu(conv1(x), alpha=.2)
            x = tf.nn.leaky_relu(conv2(x), alpha=.2)
            if current_res != self.end_shape[1]:
                x = downsample(x, method=self.resize_method)
            current_res = current_res // 2
        x = tf.reshape(x, [-1, 512])
        logit = self.fc_layer(x)
        if y is not None and self.label_list is None:  # proj discrim
            if self.embedding is None:
                raise ValueError("need y value when using cgan")
            conditional_dotprod = tf.reduce_sum(tf.multiply(
                y, self.embedding(x)),
                                                axis=1,
                                                keep_dims=True)
            tf.summary.scalar("conditional_dotprod",
                              tf.reduce_mean(conditional_dotprod))
            logit += conditional_dotprod
            return logit, None
        elif self.label_list is not None:  # acgan
            class_logits = {}
            for label in self.label_list:
                class_logits[label.name] = self.class_dense_map[label.name](x)
            return logit, class_logits

        return logit, None  # no conditional
Esempio n. 5
0
 def test_downsample_avg(self):
     test_input_spatial = [[0., 0., 1., 1.], [0., 0., 1., 1.],
                           [2., 2., 3., 3.], [2., 2., 3., 3.]]
     test_input = tf.transpose(tf.constant([[test_input_spatial] * 3] * 2),
                               (0, 2, 3, 1))  # b, h, w, c
     x = downsample(test_input, method='nearest_neighbor')
     spatial_target = [[0., 1.], [2., 3.]]
     target_array = tf.constant([[spatial_target] * 3] * 2)  # b, c, h, w
     #x = tf.transpose(x, [0, 3, 1, 2])  # b, c, h, w
     target_array = tf.transpose(target_array, [0, 2, 3, 1])
     self.assertAllEqual(x, target_array)
Esempio n. 6
0
 def test_downsample_bilinear(self):
     test_input_spatial = np.resize(np.arange(0, 16 * 16) / 256.,
                                    [16, 16]).tolist()
     test_input = tf.transpose(tf.constant([[test_input_spatial] * 3] * 2),
                               (0, 2, 3, 1))  # b, h, w, c
     x = downsample(test_input, "bilinear")
     # skimage.transform.resize result (a bit different than tf.image.resize_bilinear)
     spatial_target = [[
         0.03320313, 0.04101563, 0.04882813, 0.05664063, 0.06445313,
         0.07226563, 0.08007813, 0.08789063
     ],
                       [
                           0.15820313, 0.16601563, 0.17382813, 0.18164063,
                           0.18945313, 0.19726563, 0.20507813, 0.21289063
                       ],
                       [
                           0.28320313, 0.29101563, 0.29882813, 0.30664063,
                           0.31445313, 0.32226563, 0.33007813, 0.33789063
                       ],
                       [
                           0.40820312, 0.41601563, 0.42382813, 0.43164063,
                           0.43945313, 0.44726563, 0.45507813, 0.46289063
                       ],
                       [
                           0.53320312, 0.54101562, 0.54882812, 0.55664063,
                           0.56445313, 0.57226563, 0.58007813, 0.58789063
                       ],
                       [
                           0.65820312, 0.66601562, 0.67382813, 0.68164063,
                           0.68945313, 0.69726563, 0.70507813, 0.71289063
                       ],
                       [
                           0.78320312, 0.79101562, 0.79882812, 0.80664062,
                           0.81445312, 0.82226563, 0.83007813, 0.83789063
                       ],
                       [
                           0.90820312, 0.91601562, 0.92382812, 0.93164062,
                           0.93945312, 0.94726563, 0.95507813, 0.96289063
                       ]]
     target_array = tf.constant([[spatial_target] * 3] * 2)  # b, c, h, w
     target_array = tf.transpose(target_array, [0, 2, 3, 1])
     self.assertAllClose(x, target_array, atol=.02)
Esempio n. 7
0
def unsupervised_train(batch):
    normalization = [[104.920005, 110.1753, 114.785955]]
    channel_mean = tf.constant(normalization[0]) / 255.0
    im1, im2 = batch
    im1 = im1 / 255.0
    im2 = im2 / 255.0
    im_shape = tf.shape(im1)[1:3]
    im1_geo, im2_geo = im1, im2
    im1_photo, im2_photo = im1, im2

    loss_weights = {'ternary_weight' : 1.0, 'smooth_2nd_weight' : 3.0, 'fb_weight' : 0.2,
                    'occ_weight' : 12.4, 'photo_weight' : 1.0}

    border_mask = create_border_mask(im1, 0.1)

    # Images for loss comparisons with values in [0, 1] (scale to original using * 255)
    im1_norm = im1_geo
    im2_norm = im2_geo
    # Images for neural network input with mean-zero values in [-1, 1]
    im1_photo = im1_photo - channel_mean
    im2_photo = im2_photo - channel_mean

    #build
    flows_fw, flows_bw = flownet(im1_photo, im2_photo, backward_flow=True,)


    flows_fw = flows_fw[-1]
    flows_bw = flows_bw[-1]


    layer_weights = [12.7, 4.35, 3.9, 3.4, 1.1]
    layer_patch_distances = [3, 2, 2, 1, 1]
    im1_s = downsample(im1_norm, 4)
    im2_s = downsample(im2_norm, 4)
    mask_s = downsample(border_mask, 4)
    final_flow_scale = FLOW_SCALE
    final_flow_fw = tf.image.resize_bilinear(flows_fw[0], im_shape) * final_flow_scale * 4
    final_flow_bw = tf.image.resize_bilinear(flows_bw[0], im_shape) * final_flow_scale * 4

    combined_losses = dict()
    combined_loss = 0.0
    for loss in LOSSES:
        combined_losses[loss] = 0.0

    flow_enum = enumerate(zip(flows_fw, flows_bw))

    for i, flow_pair in flow_enum:
        layer_name = "loss" + str(i + 2)

        flow_scale = final_flow_scale / (2 ** i)

        with tf.variable_scope(layer_name):
            layer_weight = layer_weights[i]
            flow_fw_s, flow_bw_s = flow_pair

            mask_occlusion = 'fb'
            assert mask_occlusion in ['fb', 'disocc', '']



            losses = compute_losses(im1_s, im2_s,
                                    flow_fw_s * flow_scale, flow_bw_s * flow_scale,
                                    border_mask=mask_s,
                                    mask_occlusion=mask_occlusion,
                                    data_max_distance=layer_patch_distances[i])

            layer_loss = 0.0

            for loss in LOSSES:
                weight_name = loss + '_weight'
                layer_loss += loss_weights[weight_name] * losses[loss]
                combined_losses[loss] += layer_weight * losses[loss]

            combined_loss += layer_weight * layer_loss

            im1_s = downsample(im1_s, 2)
            im2_s = downsample(im2_s, 2)
            mask_s = downsample(mask_s, 2)

    regularization_loss = tf.losses.get_regularization_loss()
    final_loss = combined_loss + regularization_loss

    """
    warp_1 = image_warp(im1_photo, final_flow_bw)
    warp_1 = warp_1 + channel_mean

    warp_2 = image_warp(im2_photo, final_flow_fw)
    warp_2 = warp_2 + channel_mean

    dis_1, dis_2 = disbatch
    dis_1_warp = image_warp(dis_1, final_flow_bw)
    dis_2_warp = image_warp(dis_2, final_flow_fw)
    dis_diff_1 = dis_1_warp-dis_2
    dis_diff_2 = dis_2_warp - dis_1
    """
    return final_loss, final_flow_fw, final_flow_bw