Exemple #1
0
def model(is_training, reuse, num_classes=5, dropout_keep_prob=0.5):
    common_args = common_layer_args(is_training, reuse)
    conv_args = make_args(batch_norm=True,
                          activation=prelu,
                          w_init=initz.he_normal(scale=1),
                          untie_biases=False,
                          **common_args)
    conv_args_fm = make_args(w_init=initz.he_normal(scale=1),
                             untie_biases=False,
                             **common_args)
    pool_args = make_args(padding='SAME', **common_args)
    inputs = input((None, crop_size[1], crop_size[0], 3), **common_args)
    with tf.variable_scope('squeezenet', values=[inputs]):
        net = conv2d(inputs, 96, stride=(2, 2), name='conv1', **conv_args)
        net = max_pool(net, name='maxpool1', **pool_args)
        net = fire_module(net, 16, 64, name='fire2', **conv_args_fm)
        net = bottleneck_simple(net, 16, 64, name='fire3', **conv_args_fm)
        net = batch_norm(net,
                         activation_fn=tf.nn.relu,
                         name='fire3_bn',
                         is_training=is_training,
                         reuse=reuse)
        net = fire_module(net, 32, 128, name='fire4', **conv_args_fm)
        net = max_pool(net, name='maxpool4', **pool_args)
        net = bottleneck_simple(net, 32, 128, name='fire5', **conv_args_fm)
        net = batch_norm(net,
                         activation_fn=tf.nn.relu,
                         name='fire5_bn',
                         is_training=is_training,
                         reuse=reuse)
        net = fire_module(net, 48, 192, name='fire6', **conv_args_fm)
        net = bottleneck_simple(net, 48, 192, name='fire7', **conv_args_fm)
        net = batch_norm(net,
                         activation_fn=tf.nn.relu,
                         name='fire7_bn',
                         is_training=is_training,
                         reuse=reuse)
        net = fire_module(net, 64, 256, name='fire8', **conv_args_fm)
        net = max_pool(net, name='maxpool8', **pool_args)
        net = bottleneck_simple(net, 64, 256, name='fire9', **conv_args_fm)
        net = batch_norm(net,
                         activation_fn=tf.nn.relu,
                         name='fire9_bn',
                         is_training=is_training,
                         reuse=reuse)
        # Reversed avg and conv layers per 'Network in Network'
        net = dropout(net,
                      drop_p=1 - dropout_keep_prob,
                      name='dropout6',
                      **common_args)
        net = conv2d(net,
                     num_classes,
                     filter_size=(1, 1),
                     name='conv10',
                     **conv_args_fm)
        logits = global_avg_pool(net, name='logits', **pool_args)
        predictions = softmax(logits, name='predictions', **common_args)
        return end_points(is_training)
Exemple #2
0
def generator(z_shape, is_training, reuse, batch_size=32):
    common_args = common_layer_args(is_training, reuse)
    conv_args = make_args(batch_norm=True,
                          activation=lrelu,
                          w_init=initz.he_normal(scale=1),
                          untie_biases=False,
                          **common_args)
    conv_args_1st = make_args(batch_norm=None,
                              activation=lrelu,
                              w_init=initz.he_normal(scale=1),
                              untie_biases=False,
                              **common_args)
    fc_args = make_args(activation=lrelu,
                        w_init=initz.he_normal(scale=1),
                        **common_args)
    pool_args = make_args(padding='SAME', **common_args)
    # project `z` and reshape
    # TODO think about phase again
    end_points = {}
    z = get_z(z_shape, reuse)
    end_points['z'] = z
    z_fc = fully_connected(z, 4 * 4 * 512, name="g_fc", **fc_args)
    end_points['g_fc'] = z_fc
    x = tf.reshape(z_fc, [batch_size, 4, 4, 512])
    end_points['g_reshaped'] = x
    x = upsample2d(x, [batch_size, 8, 8, 256],
                   name="g_deconv2d_1",
                   **conv_args)
    end_points['g_deconv2d_1'] = x
    x = upsample2d(x, [batch_size, 16, 16, 128],
                   name="g_deconv2d_2",
                   **conv_args)
    end_points['g_deconv2d_2'] = x
    # x = upsample2d(x, [batch_size, 32, 32, 16 * 3],
    #               name="g_deconv2d_3", **conv_args)
    # end_points['g_deconv2d_3'] = x
    # for now lets examine cifar
    # x = subpixel2d(x, 4, name='z_subpixel1')
    # x shape[batch_size, 128, 128, 3]
    # end_points['subpixel1'] = x
    x = upsample2d(x, [batch_size, 32, 32, 64],
                   name="g_deconv2d_3",
                   **conv_args)
    end_points['g_deconv2d_3'] = x
    x = upsample2d(x, [batch_size, 64, 64, 32],
                   name="g_deconv2d_4",
                   **conv_args)
    end_points['g_deconv2d_4'] = x
    x = upsample2d(x, [batch_size, 128, 128, 3],
                   name="g_deconv2d_5",
                   **conv_args_1st)
    end_points['g_deconv2d_5'] = x

    end_points['softmax'] = tf.nn.tanh(x)
    return end_points
Exemple #3
0
def encoder(inputs, is_training, reuse, z_dim=512):
    common_args = common_layer_args(is_training, reuse)
    conv_args = make_args(batch_norm=True,
                          activation=lrelu,
                          w_init=initz.he_normal(scale=1),
                          untie_biases=False,
                          **common_args)
    conv_args_1st = make_args(batch_norm=None,
                              activation=lrelu,
                              w_init=initz.he_normal(scale=1),
                              untie_biases=False,
                              **common_args)
    logits_args = make_args(activation=None,
                            w_init=initz.he_normal(scale=1),
                            **common_args)
    pool_args = make_args(padding='SAME', **common_args)
    end_points = {}
    x = inputs
    end_points['inputs'] = x
    x = dropout(x, drop_p=0.2, name="input_dropout1", **common_args)
    x = conv2d(x,
               96,
               filter_size=(5, 5),
               stride=(2, 2),
               name="e_conv1_1",
               **conv_args_1st)
    end_points['e_conv1_1'] = x
    x = conv2d(x, 96, name="e_conv1_2", **conv_args)
    end_points['e_conv1_2'] = x
    x = conv2d(x, 96, stride=(2, 2), name="e_conv1_3", **conv_args)
    end_points['e_conv1_3'] = x
    x = dropout(x, drop_p=0.2, name="dropout1", **common_args)
    x = conv2d(x, 192, name="e_conv2_1", **conv_args)
    end_points['e_conv2_1'] = x
    x = conv2d(x, 192, name="e_conv2_2", **conv_args)
    end_points['e_conv2_2'] = x
    # x = conv2d(x, 192, stride=(2, 2), name="e_conv2_3", **conv_args)
    # end_points['e_conv2_3'] = x
    x = dropout(x, drop_p=0.2, name="dropout2", **common_args)
    # x = conv2d(x, 192, stride=(2, 2), name="e_conv3_1", **conv_args)
    # end_points['e_conv3_1'] = x
    x = conv2d(x, 192, filter_size=(1, 1), name="e_conv4_1", **conv_args)
    end_points['e_conv4_1'] = x
    x = conv2d(x, 192, filter_size=(1, 1), name="e_conv4_2", **conv_args)
    end_points['e_conv4_2'] = x
    x = global_avg_pool(x, name="global_pool")
    end_points['global_pool'] = x
    logits1 = fully_connected(x, z_dim, name="e_logits1", **logits_args)
    logits2 = fully_connected(x, z_dim, name="e_logits2", **logits_args)
    logits2 = tf.tanh(logits2, name='e_logits2_tanh')
    end_points['e_logits1'] = logits1
    end_points['e_logits2'] = logits2
    return end_points
Exemple #4
0
def discriminator(inputs, is_training, reuse, num_classes=1):
    common_args = common_layer_args(is_training, reuse)
    conv_args = make_args(batch_norm=True,
                          activation=lrelu,
                          w_init=initz.he_normal(scale=1),
                          untie_biases=False,
                          **common_args)
    conv_args_1st = make_args(batch_norm=None,
                              activation=lrelu,
                              w_init=initz.he_normal(scale=1),
                              untie_biases=False,
                              **common_args)
    logits_args = make_args(activation=None,
                            w_init=initz.he_normal(scale=1),
                            **common_args)
    pool_args = make_args(padding='SAME', **common_args)
    end_points = {}
    x = inputs
    end_points['inputs'] = x
    x = dropout(x, drop_p=0.2, name="input_dropout1", **common_args)
    x = conv2d(x,
               96,
               filter_size=(5, 5),
               stride=(2, 2),
               name="d_conv1_1",
               **conv_args_1st)
    end_points['d_conv1_1'] = x
    x = conv2d(x, 96, name="d_conv1_2", **conv_args)
    end_points['d_conv1_2'] = x
    x = conv2d(x, 96, stride=(2, 2), name="d_conv1_3", **conv_args)
    end_points['d_conv1_3'] = x
    x = dropout(x, drop_p=0.2, name="dropout1", **common_args)
    x = conv2d(x, 192, name="d_conv2_1", **conv_args)
    end_points['d_conv2_1'] = x
    x = conv2d(x, 192, name="d_conv2_2", **conv_args)
    end_points['d_conv2_2'] = x
    # x = conv2d(x, 192, stride=(2, 2), name="d_conv2_3", **conv_args)
    # end_points['d_conv2_3'] = x
    x = dropout(x, drop_p=0.2, name="dropout2", **common_args)
    # x = conv2d(x, 192, stride=(2, 2), name="d_conv3_1", **conv_args)
    # end_points['d_conv3_1'] = x
    x = conv2d(x, 192, filter_size=(1, 1), name="d_conv4_1", **conv_args)
    end_points['d_conv4_1'] = x
    x = conv2d(x, 192, filter_size=(1, 1), name="d_conv4_2", **conv_args)
    end_points['d_conv4_2'] = x
    x = global_avg_pool(x, name="global_pool")
    end_points['global_pool'] = x
    logits = fully_connected(x, num_classes, name="d_logits", **logits_args)
    end_points['logits'] = logits
    end_points['predictions'] = softmax(logits,
                                        name='predictions',
                                        **common_args)
    return end_points
Exemple #5
0
def model(inputs,
          is_training,
          reuse,
          input_size=image_size[0],
          drop_p_conv=0.0,
          drop_p_trans=0.0,
          n_filters=64,
          n_layers=[1, 2, 2, 3],
          num_classes=5, **kwargs):
  common_args = common_layer_args(is_training, reuse)
  conv_args = make_args(
      batch_norm=True,
      activation=prelu,
      w_init=initz.he_normal(scale=1),
      untie_biases=True,
      **common_args)
  fc_args = make_args(activation=prelu, w_init=initz.he_normal(scale=1), **common_args)
  logit_args = make_args(activation=None, w_init=initz.he_normal(scale=1), **common_args)
  pred_args = make_args(activation=prelu, w_init=initz.he_normal(scale=1), **common_args)
  pool_args = make_args(padding='SAME', filter_size=(2, 2), stride=(2, 2), **common_args)

  x = conv2d(inputs, 48, filter_size=(7, 7), name="conv1", **conv_args)
  x = max_pool(x, name='pool1', **pool_args)
  x = conv2d(x, 64, name="conv2_1", **conv_args)
  x = conv2d(x, 64, name="conv2_2", **conv_args)
  x = max_pool(x, name='pool2', **pool_args)

  # 112
  for block_idx in range(3):
    x, n_filters = dense_block(
        x,
        n_filters,
        num_layers=n_layers[block_idx],
        drop_p=drop_p_conv,
        block_name='dense_' + str(block_idx),
        **conv_args)
    x = trans_block(
        x, n_filters, drop_p=drop_p_trans, block_name='trans_' + str(block_idx), **conv_args)

  x, n_filters = dense_block(
      x, n_filters, num_layers=n_layers[3], drop_p=drop_p_trans, block_name='dense_3', **conv_args)
  # 8
  x = global_avg_pool(x, name='avgpool_1a_8x8')
  logits = fully_connected(x, n_output=num_classes, name="logits", **logit_args)

  predictions = softmax(logits, name='predictions', **common_args)
  return end_points(is_training)
Exemple #6
0
def fully_connected(x,
                    n_output,
                    is_training,
                    reuse,
                    activation=None,
                    batch_norm=None,
                    batch_norm_args=None,
                    w_init=initz.he_normal(),
                    use_bias=True,
                    b_init=0.0,
                    w_regularizer=tf.nn.l2_loss,
                    outputs_collections=None,
                    trainable=True,
                    name='fc'):
    input_shape = helper.get_input_shape(x)
    assert len(input_shape) > 1, "Input Tensor shape must be > 1-D"
    if len(x.get_shape()) != 2:
        x = _flatten(x)

    n_input = helper.get_input_shape(x)[1]

    with tf.variable_scope(name, reuse=reuse) as curr_scope:
        shape = [n_input, n_output] if hasattr(w_init, '__call__') else None
        W = tf.get_variable(name='weights',
                            shape=shape,
                            initializer=w_init,
                            regularizer=w_regularizer,
                            trainable=trainable)
        output = tf.matmul(x, W)

        if use_bias:
            b = tf.get_variable(name='biases',
                                shape=[n_output],
                                initializer=tf.constant_initializer(b_init),
                                trainable=trainable)

            output = tf.nn.bias_add(value=output, bias=b)

        if batch_norm is not None and batch_norm is not False:
            if isinstance(batch_norm, bool):
                batch_norm = batch_norm_tf
            batch_norm_args = batch_norm_args or {}
            output = batch_norm(output,
                                is_training=is_training,
                                reuse=reuse,
                                trainable=trainable,
                                **batch_norm_args)

        if activation:
            output = activation(output,
                                is_training=is_training,
                                reuse=reuse,
                                trainable=trainable)

        return _collect_named_outputs(outputs_collections,
                                      curr_scope.original_name_scope, name,
                                      output)
Exemple #7
0
def conv2d_same(inputs, n_output_channels, is_training, reuse, filter_size=(3, 3), stride=(1, 1), dilation_rate=1,
                activation=None, batch_norm=None, batch_norm_args=None, w_init=initz.he_normal(),
                use_bias=True, untie_biases=False, b_init=0.0, w_regularizer=tf.nn.l2_loss,
                outputs_collections=None, trainable=True, name='conv2d_same'):
    """Strided 2-D convolution with 'SAME' padding.
    When stride > 1, then we do explicit zero-padding, followed by conv2d with
    'VALID' padding.
    Note that
       net = conv2d_same(inputs, num_outputs, 3, stride=stride)
    is equivalent to
       net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME')
       net = subsample(net, factor=stride)
    whereas
       net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME')
    is different when the input's height or width is even, which is why we add the
    current function. For more details, see ResnetUtilsTest.testConv2DSameEven().
    """
    if stride[0] == 1:
        return conv2d(inputs, n_output_channels, is_training, reuse, filter_size=filter_size, stride=stride,
                      dilation_rate=dilation_rate, padding='SAME', activation=activation, batch_norm=batch_norm,
                      batch_norm_args=batch_norm_args, w_init=w_init, use_bias=use_bias, untie_biases=untie_biases,
                      b_init=b_init, w_regularizer=w_regularizer, outputs_collections=outputs_collections,
                      trainable=trainable, name=name)
    else:
        kernel_size = filter_size[0]
        kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation_rate - 1)
        pad_total = kernel_size_effective - 1
        pad_beg = pad_total // 2
        pad_end = pad_total - pad_beg
        inputs = tf.pad(inputs,
                        [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
        return conv2d(inputs, n_output_channels, is_training, reuse, filter_size=filter_size, stride=stride,
                      dilation_rate=dilation_rate, padding='VALID', activation=activation, batch_norm=batch_norm,
                      batch_norm_args=batch_norm_args, w_init=w_init, use_bias=use_bias, untie_biases=untie_biases,
                      b_init=b_init, w_regularizer=w_regularizer, outputs_collections=outputs_collections,
                      trainable=trainable, name=name)
Exemple #8
0
def conv2d(x,
           n_output_channels,
           is_training,
           reuse,
           filter_size=(3, 3),
           stride=(1, 1),
           dilation_rate=1,
           padding='SAME',
           activation=None,
           batch_norm=None,
           batch_norm_args=None,
           w_init=initz.he_normal(),
           use_bias=True,
           untie_biases=False,
           b_init=0.0,
           w_regularizer=tf.nn.l2_loss,
           outputs_collections=None,
           trainable=True,
           name='conv2d'):
    input_shape = helper.get_input_shape(x)
    assert len(input_shape) == 4, "Input Tensor shape must be 4-D"
    with tf.variable_scope(name, reuse=reuse) as curr_scope:
        shape = [
            filter_size[0], filter_size[1],
            x.get_shape()[-1], n_output_channels
        ] if hasattr(w_init, '__call__') else None
        W = tf.get_variable(name='weights',
                            shape=shape,
                            initializer=w_init,
                            regularizer=w_regularizer,
                            trainable=trainable)

        if dilation_rate == 1:
            output = tf.nn.conv2d(input=x,
                                  filter=W,
                                  strides=[1, stride[0], stride[1], 1],
                                  padding=padding)
        else:
            if len([s for s in stride if s > 1]) > 0:
                raise ValueError(
                    "Stride (%s) cannot be more than 1 if rate (%d) is not 1" %
                    (stride, dilation_rate))

            output = tf.nn.atrous_conv2d(value=x,
                                         filters=W,
                                         rate=dilation_rate,
                                         padding=padding)

        if use_bias:
            if untie_biases:
                b = tf.get_variable(
                    name='biases',
                    shape=output.get_shape()[1:],
                    initializer=tf.constant_initializer(b_init),
                    trainable=trainable)
                output = tf.add(output, b)
            else:
                b = tf.get_variable(
                    name='biases',
                    shape=[n_output_channels],
                    initializer=tf.constant_initializer(b_init),
                    trainable=trainable)
                output = tf.nn.bias_add(value=output, bias=b)

        if batch_norm is not None:
            if isinstance(batch_norm, bool):
                batch_norm = batch_norm_tf
            batch_norm_args = batch_norm_args or {}
            output = batch_norm(output,
                                is_training=is_training,
                                reuse=reuse,
                                trainable=trainable,
                                **batch_norm_args)

        if activation:
            output = activation(output,
                                is_training=is_training,
                                reuse=reuse,
                                trainable=trainable)

        return _collect_named_outputs(outputs_collections,
                                      curr_scope.original_name_scope, output)
Exemple #9
0
def model(inputs, is_training, reuse, num_classes=21, batch_size=1):
    common_args = common_layer_args(is_training, reuse)
    conv_args = make_args(batch_norm=True, activation=lrelu, w_init=initz.he_normal(
        scale=1), untie_biases=False, **common_args)
    upsample_args = make_args(
        batch_norm=False, activation=lrelu, use_bias=False, **common_args)
    logits_args = make_args(
        activation=None, **common_args)
    pool_args = make_args(padding='SAME', **common_args)

    conv1_1 = conv2d(inputs, 64, name="vgg_19/conv1/conv1_1", **conv_args)
    conv1_2 = conv2d(conv1_1, 64, name="vgg_19/conv1/conv1_2", **conv_args)
    pool1 = max_pool(conv1_2, stride=2, name='pool1', **pool_args)
    conv2_1 = conv2d(pool1, 128, name="vgg_19/conv2/conv2_1", **conv_args)
    conv2_2 = conv2d(conv2_1, 128, name="vgg_19/conv2/conv2_2", **conv_args)
    pool2 = max_pool(conv2_2, stride=2, name='pool2', **pool_args)
    conv3_1 = conv2d(pool2, 256, name="vgg_19/conv3/conv3_1", **conv_args)
    conv3_2 = conv2d(conv3_1, 256, name="vgg_19/conv3/conv3_2", **conv_args)
    conv3_3 = conv2d(conv3_2, 256, name="vgg_19/conv3/conv3_3", **conv_args)
    conv3_4 = conv2d(conv3_3, 256, name="vgg_19/conv3/conv3_4", **conv_args)
    pool3 = max_pool(conv3_4, stride=2, name='pool3', **pool_args)
    conv4_1 = conv2d(pool3, 512, name="vgg_19/conv4/conv4_1", **conv_args)
    conv4_2 = conv2d(conv4_1, 512, name="vgg_19/conv4/conv4_2", **conv_args)
    conv4_3 = conv2d(conv4_2, 512, name="vgg_19/conv4/conv4_3", **conv_args)
    conv4_4 = conv2d(conv4_3, 512, name="vgg_19/conv4/conv4_4", **conv_args)
    pool4 = max_pool(conv4_4, stride=2, name='pool4', **pool_args)
    conv5_1 = conv2d(pool4, 512, name="vgg_19/conv5/conv5_1", **conv_args)
    conv5_2 = conv2d(conv5_1, 512, name="vgg_19/conv5/conv5_2", **conv_args)
    conv5_3 = conv2d(conv5_2, 512, name="vgg_19/conv5/conv5_3", **conv_args)
    conv5_4 = conv2d(conv5_3, 512, name="vgg_19/conv5/conv5_4", **conv_args)
    pool5 = max_pool(conv5_4, stride=2, name='pool5', **pool_args)

    fc6 = conv2d(pool5, 4096, filter_size=(7, 7),
                 name="vgg_19/fc6", **conv_args)
    fc6 = dropout(fc6, **common_args)
    fc7 = conv2d(fc6, 4096, filter_size=(1, 1), name="vgg_19/fc7", **conv_args)
    fc7 = dropout(fc7, **common_args)
    score_fr = conv2d(fc7, num_classes, filter_size=(1, 1),
                      name="score_fr", **conv_args)

    pred = tf.argmax(score_fr, axis=3)
    pool4_shape = pool4.get_shape().as_list()
    upscore2 = upsample2d(score_fr, [batch_size, pool4_shape[1], pool4_shape[2], num_classes], filter_size=(4, 4), stride=(2, 2),
                          name="deconv2d_1", w_init=initz.bilinear((4, 4, num_classes, num_classes)), **upsample_args)
    score_pool4 = conv2d(pool4, num_classes, filter_size=(1, 1),
                         name="score_pool4", **conv_args)
    fuse_pool4 = tf.add(upscore2, score_pool4)

    pool3_shape = pool3.get_shape().as_list()
    upscore4 = upsample2d(fuse_pool4, [batch_size, pool3_shape[1], pool3_shape[2], num_classes], filter_size=(4, 4), stride=(2, 2),
                          name="deconv2d_2", w_init=initz.bilinear((4, 4, num_classes, num_classes)), **upsample_args)
    score_pool3 = conv2d(pool3, num_classes, filter_size=(1, 1),
                         name="score_pool3", **conv_args)
    fuse_pool3 = tf.add(upscore4, score_pool3)
    input_shape = inputs.get_shape().as_list()
    upscore32 = upsample2d(fuse_pool3, [batch_size, input_shape[1], input_shape[2], num_classes], filter_size=(16, 16), stride=(8, 8),
                           name="deconv2d_3", w_init=initz.bilinear((16, 16, num_classes, num_classes)), **logits_args)
    logits = register_to_collections(tf.reshape(
        upscore32, shape=(-1, num_classes)), name='logits', **common_args)
    pred_up = tf.argmax(upscore32, axis=3)
    pred_up = register_to_collections(
        pred_up, name='final_prediction_map', **common_args)
    predictions = softmax(logits, name='predictions', **common_args)
    return end_points(is_training)
Exemple #10
0
def discriminator(inputs, is_training, reuse, num_classes=11, batch_size=32):
    common_args = common_layer_args(is_training, reuse)
    conv_args = make_args(batch_norm=True,
                          activation=lrelu,
                          w_init=initz.he_normal(scale=1),
                          untie_biases=False,
                          **common_args)
    conv_args_1st = make_args(batch_norm=None,
                              activation=lrelu,
                              w_init=initz.he_normal(scale=1),
                              untie_biases=False,
                              **common_args)
    logits_args = make_args(activation=None,
                            w_init=initz.he_normal(scale=1),
                            **common_args)
    pool_args = make_args(padding='SAME', **common_args)
    end_points = {}
    x = inputs
    end_points['inputs'] = x
    x = dropout(x, drop_p=0.2, name="input_dropout1", **common_args)
    x = conv2d(x,
               96,
               filter_size=(5, 5),
               stride=(2, 2),
               name="d_conv1_1",
               **conv_args_1st)
    end_points['d_conv1_1'] = x
    x = conv2d(x, 96, name="d_conv1_2", **conv_args)
    end_points['d_conv1_2'] = x
    x = conv2d(x, 96, stride=(2, 2), name="d_conv1_3", **conv_args)
    end_points['d_conv1_3'] = x
    x = dropout(x, drop_p=0.2, name="dropout1", **common_args)
    x = conv2d(x, 192, name="d_conv2_1", **conv_args)
    end_points['d_conv2_1'] = x
    x = conv2d(x, 192, name="d_conv2_2", **conv_args)
    end_points['d_conv2_2'] = x
    x = conv2d(x, 192, stride=(2, 2), name="d_conv2_3", **conv_args)
    end_points['d_conv2_3'] = x
    x = dropout(x, drop_p=0.2, name="dropout2", **common_args)
    x = conv2d(x, 192, stride=(2, 2), name="d_conv3_1", **conv_args)
    end_points['d_conv3_1'] = x
    x = conv2d(x, 192, filter_size=(1, 1), name="d_conv4_1", **conv_args)
    end_points['d_conv4_1'] = x
    x = conv2d(x, 192, filter_size=(1, 1), name="d_conv4_2", **conv_args)
    end_points['d_conv4_2'] = x
    x = global_avg_pool(x, name="global_pool")
    end_points['global_pool'] = x
    logits = fully_connected(x, num_classes, name="d_logits", **logits_args)
    end_points['logits'] = logits
    end_points['predictions'] = softmax(logits,
                                        name='predictions',
                                        **common_args)

    if is_training:
        batch_size = 2 * batch_size
        generated_class_logits = tf.squeeze(
            tf.slice(logits, [0, num_classes - 1], [batch_size, 1]))
        end_points['generated_class_logits'] = generated_class_logits
        positive_class_logits = tf.slice(logits, [0, 0],
                                         [batch_size, num_classes - 1])
        end_points['positive_class_logits'] = positive_class_logits

        max_ = tf.reduce_max(positive_class_logits, 1, keep_dims=True)
        safe_pos_class_logits = positive_class_logits - max_
        end_points['safe_pos_class_logits'] = safe_pos_class_logits

        gan_logits = tf.log(
            tf.reduce_sum(tf.exp(safe_pos_class_logits),
                          1)) + tf.squeeze(max_) - generated_class_logits
        end_points['gan_logits'] = gan_logits
        assert len(gan_logits.get_shape()) == 1

        probs = tf.nn.sigmoid(gan_logits)
        end_points['probs'] = probs
        class_logits = tf.slice(logits, [0, 0], [batch_size / 2, num_classes])
        end_points['class_logits'] = class_logits
        D_on_data = tf.slice(probs, [0], [batch_size / 2])
        end_points['D_on_data'] = D_on_data
        D_on_data_logits = tf.slice(gan_logits, [0], [batch_size / 2])
        end_points['D_on_data_logits'] = D_on_data_logits
        D_on_G = tf.slice(probs, [batch_size / 2], [batch_size / 2])
        end_points['D_on_G'] = D_on_G
        D_on_G_logits = tf.slice(gan_logits, [batch_size / 2],
                                 [batch_size / 2])
        end_points['D_on_G_logits'] = D_on_G_logits

        return end_points
    else:
        return end_points
Exemple #11
0
def vgg_16(is_training, reuse,
           num_classes=1000,
           dropout_keep_prob=0.5,
           spatial_squeeze=True,
           name='vgg_16'):
    """Oxford Net VGG 16-Layers version D Example.

    Note: All the fully_connected layers have been transformed to conv2d layers.
          To use in classification mode, resize input to 224x224.

    Args:
      inputs: a tensor of size [batch_size, height, width, channels].
      num_classes: number of predicted classes.
      is_training: whether or not the model is being trained.
      dropout_keep_prob: the probability that activations are kept in the dropout
        layers during training.
      spatial_squeeze: whether or not should squeeze the spatial dimensions of the
        outputs. Useful to remove unnecessary dimensions for classification.
      name: Optional name for the variables.

    Returns:
      the last op containing the log predictions and end_points dict.
    """
    common_args = common_layer_args(is_training, reuse)
    conv_args = make_args(batch_norm=True, activation=prelu, w_init=initz.he_normal(
        scale=1), untie_biases=False, **common_args)
    logit_args = make_args(
        activation=None, w_init=initz.he_normal(scale=1), **common_args)
    pred_args = make_args(
        activation=prelu, w_init=initz.he_normal(scale=1), **common_args)
    pool_args = make_args(padding='SAME', **common_args)
    inputs = input((None, crop_size[1], crop_size[0], 3), **common_args)
    with tf.variable_scope(name, 'vgg_16', [inputs]):
        net = repeat(inputs, 2, conv2d,
                     64, filter_size=(3, 3), name='conv1', **conv_args)
        net = max_pool(net, name='pool1', **pool_args)
        net = repeat(net, 2, conv2d, 128, filter_size=(
            3, 3), name='conv2', **conv_args)
        net = max_pool(net, name='pool2', **pool_args)
        net = repeat(net, 3, conv2d, 256, filter_size=(
            3, 3), name='conv3', **conv_args)
        net = max_pool(net, name='pool3', **pool_args)
        net = repeat(net, 3, conv2d, 512, filter_size=(
            3, 3), name='conv4', **conv_args)
        net = max_pool(net, name='pool4', **pool_args)
        net = repeat(net, 3, conv2d, 512, filter_size=(
            3, 3), name='conv5', **conv_args)
        net = max_pool(net, name='pool5', **pool_args)
        # Use conv2d instead of fully_connected layers.
        net = conv2d(net, 4096, filter_size=(7, 7), name='fc6', **conv_args)
        net = dropout(net, drop_p=1 - dropout_keep_prob, is_training=is_training,
                      name='dropout6', **common_args)
        net = conv2d(net, 4096, filter_size=(1, 1), name='fc7', **conv_args)
        net = dropout(net, drop_p=1 - dropout_keep_prob, is_training=is_training,
                      name='dropout7', **common_args)
        logits = conv2d(net, num_classes, filter_size=(1, 1),
                        activation=None,
                        name='logits', **logit_args)
        # Convert end_points_collection into a end_point dict.
        if spatial_squeeze:
            logits = tf.squeeze(logits, [1, 2], name='logits/squeezed')
        predictions = softmax(logits, name='predictions', **pred_args)
        return end_points(is_training)
Exemple #12
0
def model(inputs,
          is_training,
          reuse,
          num_classes=5,
          dropout_keep_prob=0.5,
          spatial_squeeze=True,
          name='alexnet_v2',
          **kwargs):
    """AlexNet version 2.

  Described in: http://arxiv.org/pdf/1404.5997v2.pdf
  Parameters from:
  github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
  layers-imagenet-1gpu.cfg

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224. To use in fully
        convolutional mode, set spatial_squeeze to false.
        The LRN layers have been removed and change the initializers from
        random_normal_initializer to xavier_initializer.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    name: Optional name for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.
  """
    common_args = common_layer_args(is_training, reuse)
    conv_args = make_args(batch_norm=True,
                          activation=prelu,
                          w_init=initz.he_normal(scale=1),
                          untie_biases=False,
                          **common_args)
    logit_args = make_args(activation=None,
                           w_init=initz.he_normal(scale=1),
                           **common_args)
    pred_args = make_args(activation=prelu,
                          w_init=initz.he_normal(scale=1),
                          **common_args)
    pool_args = make_args(padding='SAME', **common_args)

    # inputs = input((None, crop_size[1], crop_size[0], 3), **common_args)
    with tf.variable_scope(name, 'alexnet_v2', [inputs]):
        net = conv2d(inputs,
                     64,
                     filter_size=(11, 11),
                     stride=(4, 4),
                     name='conv1',
                     **conv_args)
        net = max_pool(net, stride=(2, 2), name='pool1', **pool_args)
        net = conv2d(net, 192, filter_size=(5, 5), name='conv2', **conv_args)
        net = max_pool(net, stride=(2, 2), name='pool2', **pool_args)
        net = conv2d(net, 384, name='conv3', **conv_args)
        net = conv2d(net, 384, name='conv4', **conv_args)
        net = conv2d(net, 256, name='conv5', **conv_args)
        net = max_pool(net, stride=(2, 2), name='pool5', **pool_args)

        # Use conv2d instead of fully_connected layers.
        net = conv2d(net, 4096, filter_size=(5, 5), name='fc6', **conv_args)
        net = dropout(net,
                      drop_p=1 - dropout_keep_prob,
                      name='dropout6',
                      **common_args)
        net = conv2d(net, 4096, filter_size=(1, 1), name='fc7', **conv_args)
        net = dropout(net,
                      drop_p=1 - dropout_keep_prob,
                      name='dropout7',
                      **common_args)
        net = global_avg_pool(net)
        logits = fc(net, num_classes, name='logits', **logit_args)

        predictions = softmax(logits, name='predictions', **common_args)
        return end_points(is_training)
Exemple #13
0
def resnet_v1(inputs,
              is_training,
              reuse,
              blocks,
              num_classes=None,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              name=None):
    """Generator for v2 (preactivation) ResNet models.

    This function generates a family of ResNet v2 models. See the resnet_v2_*()
    methods for specific model instantiations, obtained by selecting different
    block instantiations that produce ResNets of various depths.

    Training for image classification on Imagenet is usually done with [224, 224]
    inputs, resulting in [7, 7] feature maps at the output of the last ResNet
    block for the ResNets defined in [1] that have nominal stride equal to 32.
    However, for dense prediction tasks we advise that one uses inputs with
    spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
    this case the feature maps at the ResNet output will have spatial shape
    [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
    and corners exactly aligned with the input image corners, which greatly
    facilitates alignment of the features to the image. Using as input [225, 225]
    images results in [8, 8] feature maps at the output of the last ResNet block.

    For dense prediction tasks, the ResNet needs to run in fully-convolutional
    (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
    have nominal stride equal to 32 and a good choice in FCN mode is to use
    output_stride=16 in order to increase the density of the computed features at
    small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.

    Args:
      inputs: A tensor of size [batch, height_in, width_in, channels].
      blocks: A list of length equal to the number of ResNet blocks. Each element
        is a resnet_utils.Block object describing the units in the block.
      num_classes: Number of predicted classes for classification tasks. If None
        we return the features before the logit layer.
      is_training: whether is training or not.
      global_pool: If True, we perform global average pooling before computing the
        logits. Set to True for image classification, False for dense prediction.
      output_stride: If None, then the output will be computed at the nominal
        network stride. If output_stride is not None, it specifies the requested
        ratio of input to output spatial resolution.
      include_root_block: If True, include the initial convolution followed by
        max-pooling, if False excludes it. If excluded, `inputs` should be the
        results of an activation-less convolution.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      name: Optional variable_scope.


    Returns:
      net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
        If global_pool is False, then height_out and width_out are reduced by a
        factor of output_stride compared to the respective height_in and width_in,
        else both height_out and width_out equal one. If num_classes is None, then
        net is the output of the last ResNet block, potentially after global
        average pooling. If num_classes is not None, net contains the pre-softmax
        activations.
      end_points: A dictionary from components of the network to the corresponding
        activation.

    Raises:
      ValueError: If the target output_stride is not valid.
    """
    common_args = common_layer_args(is_training, reuse)
    conv_args = make_args(batch_norm=True,
                          activation=prelu,
                          w_init=initz.he_normal(scale=1),
                          untie_biases=False,
                          **common_args)
    logits_args = make_args(activation=None,
                            w_init=initz.he_normal(scale=1),
                            **common_args)
    pred_args = make_args(activation=prelu,
                          w_init=initz.he_normal(scale=1),
                          **common_args)
    pool_args = make_args(padding='SAME', **common_args)

    with tf.variable_scope(name, 'resnet_v2', [inputs], reuse=reuse):
        net = inputs
        if include_root_block:
            if output_stride is not None:
                if output_stride % 4 != 0:
                    raise ValueError(
                        'The output_stride needs to be a multiple of 4.')
                output_stride /= 4
            # We do not include batch normalization or activation functions in
            # conv1 because the first ResNet unit will perform these. Cf.
            # Appendix of [2].
            net = resnet_utils.conv2d_same(net,
                                           64,
                                           7,
                                           stride=2,
                                           name='conv1',
                                           **common_args)
            net = max_pool(net, name='pool1', **pool_args)
        net = resnet_utils.stack_blocks_dense(net, blocks, output_stride,
                                              **conv_args)
        # This is needed because the pre-activation variant does not have batch
        # normalization or activation functions in the residual unit output. See
        # Appendix of [2].
        net = batch_norm(net,
                         activation=tf.nn.relu,
                         name='postnorm',
                         is_training=is_training,
                         reuse=reuse)
        if global_pool:
            # Global average pooling.
            net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
        if num_classes is not None:
            net = conv2d(net,
                         num_classes,
                         filter_size=(1, 1),
                         name='logits',
                         **logits_args)
        if num_classes is not None:
            predictions = softmax(net, name='predictions', **pred_args)

        return end_points(is_training)
Exemple #14
0
def _linear(x,
            n_output,
            reuse,
            trainable=True,
            w_init=initz.he_normal(),
            b_init=0.0,
            w_regularizer=tf.nn.l2_loss,
            name='fc',
            layer_norm=None,
            layer_norm_args=None,
            activation=None,
            outputs_collections=None,
            use_bias=True):
    """Adds a fully connected layer.

        `fully_connected` creates a variable called `weights`, representing a fully
        connected weight matrix, which is multiplied by the `x` to produce a
        `Tensor` of hidden units. If a `layer_norm` is provided (such as
        `layer_norm`), it is then applied. Otherwise, if `layer_norm` is
        None and a `b_init` and `use_bias` is provided then a `biases` variable would be
        created and added the hidden units. Finally, if `activation` is not `None`,
        it is applied to the hidden units as well.
        Note: that if `x` have a rank greater than 2, then `x` is flattened
        prior to the initial matrix multiply by `weights`.

    Args:
        x: A `Tensor` of with at least rank 2 and value for the last dimension,
            i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
        n_output: Integer or long, the number of output units in the layer.
        reuse: whether or not the layer and its variables should be reused. To be
            able to reuse the layer scope must be given.
        activation: activation function, set to None to skip it and maintain
            a linear activation.
        layer_norm: normalization function to use. If
           `batch_norm` is `True` then google original implementation is used and
            if another function is provided then it is applied.
            default set to None for no normalizer function
        layer_norm_args: normalization function parameters.
        w_init: An initializer for the weights.
        w_regularizer: Optional regularizer for the weights.
        b_init: An initializer for the biases. If None skip biases.
        outputs_collections: The collections to which the outputs are added.
        trainable: If `True` also add variables to the graph collection
            `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
        name: Optional name or scope for variable_scope/name_scope.
        use_bias: Whether to add bias or not

    Returns:
        The 2-D `Tensor` variable representing the result of the series of operations.
        e.g: 2-D `Tensor` [batch, n_output].

    Raises:
        ValueError: if `x` has rank less than 2 or if its last dimension is not set.
        ValueError: linear is expecting 2D arguments
        ValueError: linear expects shape[1] to be provided for shape
    """
    if not (isinstance(n_output, six.integer_types)):
        raise ValueError('n_output should be int or long, got %s.', n_output)

    if not helper.is_sequence(x):
        x = [x]

    n_input = 0
    shapes = [_x.get_shape() for _x in x]
    for shape in shapes:
        if shape.ndims != 2:
            raise ValueError("linear is expecting 2D arguments: %s" % shapes)
        if shape[1].value is None:
            raise ValueError(
                "linear expects shape[1] to be provided for shape %s, but saw %s"
                % (shape, shape[1]))
        else:
            n_input += shape[1].value

    with tf.variable_scope(name, reuse=reuse):
        shape = [n_input, n_output] if hasattr(w_init, '__call__') else None
        W = tf.get_variable(name='W',
                            shape=shape,
                            dtype=tf.float32,
                            initializer=w_init,
                            regularizer=w_regularizer,
                            trainable=trainable)
        if len(x) == 1:
            output = tf.matmul(x[0], W)
        else:
            output = tf.matmul(tf.concat_v2(x, 1), W)

        if use_bias:
            b = tf.get_variable(
                name='b',
                shape=[n_output],
                dtype=tf.float32,
                initializer=tf.constant_initializer(b_init),
                trainable=trainable,
            )

            output = tf.nn.bias_add(value=output, bias=b)

        if layer_norm is not None:
            layer_norm_args = layer_norm_args or {}
            output = layer_norm(output,
                                reuse=reuse,
                                trainable=trainable,
                                **layer_norm_args)

        if activation:
            output = activation(output, reuse=reuse, trainable=trainable)

        return _collect_named_outputs(outputs_collections, name, output)
Exemple #15
0
def model(height,
          width,
          num_actions,
          is_training=False,
          reuse=None,
          name=None):
    common_args = common_layer_args(is_training, reuse)
    conv_args = make_args(batch_norm=True,
                          activation=prelu,
                          w_init=initz.he_normal(scale=1),
                          untie_biases=False,
                          **common_args)
    logits_args = make_args(activation=None,
                            w_init=initz.he_normal(scale=1),
                            **common_args)
    fc_args = make_args(activation=prelu,
                        w_init=initz.he_normal(scale=1),
                        **common_args)
    pool_args = make_args(padding='SAME', **common_args)
    with tf.variable_scope(name):
        state = register_to_collections(tf.placeholder(
            shape=[None, 4, height, width], dtype=tf.float32, name='state'),
                                        name='state',
                                        **common_args)
        state_perm = tf.transpose(state, perm=[0, 2, 3, 1])
        summary_ops = [
            tf.summary.image("states",
                             state[:, 0, :, :][..., tf.newaxis],
                             max_outputs=10,
                             collections='train')
        ]
        conv1_0 = conv2d(state_perm,
                         32,
                         filter_size=8,
                         stride=(1, 1),
                         name="conv1_0",
                         **conv_args)
        conv1_1 = conv2d(conv1_0,
                         64,
                         filter_size=8,
                         stride=(2, 2),
                         name="conv1_1",
                         **conv_args)
        pool = max_pool(conv1_1, filter_size=2, name="maxpool", **pool_args)
        conv2_0 = conv2d(pool,
                         128,
                         filter_size=4,
                         stride=2,
                         name="conv2_0",
                         **conv_args)
        conv2_1 = conv2d(conv2_0,
                         256,
                         filter_size=3,
                         stride=(2, 2),
                         name="conv2_1",
                         **conv_args)
        conv3_0 = conv2d(conv2_1,
                         256,
                         filter_size=4,
                         stride=1,
                         name="conv3_0",
                         **conv_args)
        conv3_1 = conv2d(conv3_0,
                         512,
                         filter_size=4,
                         stride=2,
                         name="conv3_1",
                         **conv_args)
        # Dueling
        value_hid = fc(conv3_1, 512, name="value_hid", **fc_args)
        adv_hid = fc(conv3_1, 512, name="adv_hid", **fc_args)

        value = fc(value_hid, 1, name="value", **logits_args)
        advantage = fc(adv_hid, num_actions, name="advantage", **logits_args)

        # Average Dueling
        Qs = value + (advantage -
                      tf.reduce_mean(advantage, axis=1, keep_dims=True))

        # action with highest Q values
        a = register_to_collections(tf.argmax(Qs, 1), name='a', **common_args)
        # Q value belonging to selected action
        Q = register_to_collections(tf.reduce_max(Qs, 1),
                                    name='Q',
                                    **common_args)
        summary_ops.append(tf.summary.histogram("Q", Q, collections='train'))
        # For training
        Q_target = register_to_collections(tf.placeholder(shape=[None],
                                                          dtype=tf.float32),
                                           name='Q_target',
                                           **common_args)
        actions = register_to_collections(tf.placeholder(shape=[None],
                                                         dtype=tf.int32),
                                          name='actions',
                                          **common_args)
        actions_onehot = tf.one_hot(actions,
                                    num_actions,
                                    on_value=1.,
                                    off_value=0.,
                                    axis=1,
                                    dtype=tf.float32)

        Q_tmp = tf.reduce_sum(tf.multiply(Qs, actions_onehot), axis=1)
        loss = register_to_collections(tf.reduce_mean(
            tf.square(Q_target - Q_tmp)),
                                       name='loss',
                                       **common_args)
        summary_ops.append(tf.summary.scalar("loss", loss,
                                             collections='train'))
        register_to_collections(summary_ops, name='summary_ops', **common_args)
        return end_points(is_training)