Exemple #1
0
def create_acvideo_discriminator(clips,
                                 actions,
                                 ndf=64,
                                 norm_layer='instance',
                                 use_noise=False,
                                 noise_sigma=None):
    norm_layer = ops.get_norm_layer(norm_layer)
    layers = []
    paddings = [[0, 0], [0, 0], [1, 1], [1, 1], [0, 0]]

    clips = clips * 2 - 1
    clip_pairs = tf.concat([clips[:-1], clips[1:]], axis=-1)
    clip_pairs = tile_concat([clip_pairs, actions[..., None, None, :]],
                             axis=-1)
    clip_pairs = tf_utils.transpose_batch_time(clip_pairs)

    with tf.variable_scope("acvideo_layer_1"):
        h1 = noise(clip_pairs, use_noise, noise_sigma)
        h1 = conv3d(tf.pad(h1, paddings),
                    ndf,
                    kernel_size=(3, 4, 4),
                    strides=(1, 2, 2),
                    padding='VALID',
                    use_bias=False)
        h1 = lrelu(h1, 0.2)
        layers.append(h1)

    with tf.variable_scope("acvideo_layer_2"):
        h2 = noise(h1, use_noise, noise_sigma)
        h2 = conv3d(tf.pad(h2, paddings),
                    ndf * 2,
                    kernel_size=(3, 4, 4),
                    strides=(1, 2, 2),
                    padding='VALID',
                    use_bias=False)
        h2 = norm_layer(h2)
        h2 = lrelu(h2, 0.2)
        layers.append(h2)

    with tf.variable_scope("acvideo_layer_3"):
        h3 = noise(h2, use_noise, noise_sigma)
        h3 = conv3d(tf.pad(h3, paddings),
                    ndf * 4,
                    kernel_size=(3, 4, 4),
                    strides=(1, 2, 2),
                    padding='VALID',
                    use_bias=False)
        h3 = norm_layer(h3)
        h3 = lrelu(h3, 0.2)
        layers.append(h3)

    with tf.variable_scope("acvideo_layer_4"):
        logits = conv3d(tf.pad(h3, paddings),
                        1,
                        kernel_size=(3, 4, 4),
                        strides=(1, 2, 2),
                        padding='VALID',
                        use_bias=False)
        layers.append(logits)
    return nest.map_structure(tf_utils.transpose_batch_time, layers)
Exemple #2
0
def video_sn_discriminator(clips, ndf=64):
    clips = tf_utils.transpose_batch_time(clips)
    batch_size = clips.shape[0].value
    layers = []
    paddings = [[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]

    def conv3d(inputs, *args, **kwargs):
        kwargs.setdefault('padding', 'VALID')
        kwargs.setdefault('use_spectral_norm', True)
        return ops.conv3d(tf.pad(inputs, paddings), *args, **kwargs)

    with tf.variable_scope("sn_conv0_0"):
        layers.append(lrelu(conv3d(clips, ndf, kernel_size=3, strides=1), 0.1))

    with tf.variable_scope("sn_conv0_1"):
        layers.append(
            lrelu(
                conv3d(layers[-1], ndf * 2, kernel_size=4, strides=(1, 2, 2)),
                0.1))

    with tf.variable_scope("sn_conv1_0"):
        layers.append(
            lrelu(conv3d(layers[-1], ndf * 2, kernel_size=3, strides=1), 0.1))

    with tf.variable_scope("sn_conv1_1"):
        layers.append(
            lrelu(
                conv3d(layers[-1], ndf * 4, kernel_size=4, strides=(1, 2, 2)),
                0.1))

    with tf.variable_scope("sn_conv2_0"):
        layers.append(
            lrelu(conv3d(layers[-1], ndf * 4, kernel_size=3, strides=1), 0.1))

    with tf.variable_scope("sn_conv2_1"):
        layers.append(
            lrelu(conv3d(layers[-1], ndf * 8, kernel_size=4, strides=2), 0.1))

    with tf.variable_scope("sn_conv3_0"):
        layers.append(
            lrelu(conv3d(layers[-1], ndf * 8, kernel_size=3, strides=1), 0.1))

    with tf.variable_scope("sn_fc4"):
        logits = dense(tf.reshape(layers[-1], [batch_size, -1]),
                       1,
                       use_spectral_norm=True)
        layers.append(logits)
    layers = nest.map_structure(tf_utils.transpose_batch_time, layers)
    return layers
Exemple #3
0
def create_video_discriminator(clips, ndf=64, norm_layer='instance'):
    norm_layer = ops.get_norm_layer(norm_layer)
    layers = []
    paddings = [[0, 0], [0, 0], [1, 1], [1, 1], [0, 0]]

    clips = tf_utils.transpose_batch_time(clips)

    with tf.variable_scope("video_layer_1"):
        h1 = conv3d(tf.pad(clips, paddings),
                    ndf,
                    kernel_size=4,
                    strides=(1, 2, 2),
                    padding='VALID')
        h1 = lrelu(h1, 0.2)
        layers.append(h1)

    with tf.variable_scope("video_layer_2"):
        h2 = conv3d(tf.pad(h1, paddings),
                    ndf * 2,
                    kernel_size=4,
                    strides=(1, 2, 2),
                    padding='VALID')
        h2 = norm_layer(h2)
        h2 = lrelu(h2, 0.2)
        layers.append(h2)

    with tf.variable_scope("video_layer_3"):
        h3 = conv3d(tf.pad(h2, paddings),
                    ndf * 4,
                    kernel_size=4,
                    strides=(1, 2, 2),
                    padding='VALID')
        h3 = norm_layer(h3)
        h3 = lrelu(h3, 0.2)
        layers.append(h3)

    with tf.variable_scope("video_layer_4"):
        if h3.shape[1].value < 4:
            kernel_size = (h3.shape[1].value, 4, 4)
        else:
            kernel_size = 4
        logits = conv3d(h3,
                        1,
                        kernel_size=kernel_size,
                        strides=1,
                        padding='VALID')
        layers.append(logits)
    return nest.map_structure(tf_utils.transpose_batch_time, layers)
 def where_axis1(cond, x, y):
     return transpose_batch_time(
         tf.where(cond, transpose_batch_time(x),
                  transpose_batch_time(y)))