Example #1
0
def resnet50(x, nums, is_training=True, reuse=False):
    """
    Resnet v2-50
    Assumes input is [batch, height_in, width_in, channels]!!
    Input:
    - x: N x H x W x 3
    - reuse: bool->True if test

    Outputs:
    - cam: N x 3
    - Pose vector: N x 72
    - Shape vector: N x 10
    - variables: tf variables
    """
    from tensorflow.contrib.slim.python.slim.nets import resnet_v2
    with tf.name_scope("Resnet", [x]):
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            net, end_points = resnet_v2.resnet_v2_50(  # shape=(N, 1, 1, 2048)
                x,
                num_classes=None,
                is_training=is_training,
                reuse=reuse,
                scope='resnet_v2_50')
            net = tf.squeeze(net, axis=[1, 2])  # shape=(N, 2048)
            net = slim.fully_connected(net,
                                       num_outputs=nums,
                                       activation_fn=None,
                                       trainable=is_training,
                                       reuse=reuse,
                                       scope='fc')
    variables = tf.contrib.framework.get_variables('resnet_v2_50')
    return net, variables
def Encoder_resnet(x, is_training=True, weight_decay=0.001, reuse=False):
    """
    Resnet v2-50
    Assumes input is [batch, height_in, width_in, channels]!!
    Input:
    - x: N x H x W x 3
    - weight_decay: float
    - reuse: bool->True if test

    Outputs:
    - cam: N x 3
    - Pose vector: N x 72
    - Shape vector: N x 10
    - variables: tf variables
    """
    from tensorflow.contrib.slim.python.slim.nets import resnet_v2
    with tf.name_scope("Encoder_resnet", [x]):
        with slim.arg_scope(
                resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            net, end_points = resnet_v2.resnet_v2_50(
                x,
                num_classes=None,
                is_training=is_training,
                reuse=reuse,
                scope='resnet_v2_50')
            net = tf.squeeze(net, axis=[1, 2])
    variables = tf.contrib.framework.get_variables('resnet_v2_50')
    return net, variables
Example #3
0
def Encoder_resnet(x, is_training=True, weight_decay=0.001, reuse=False):
    """
    Resnet v2-50
    Assumes input is [batch, height_in, width_in, channels]!!
    Input:
    - x: N x H x W x 3
    - weight_decay: float
    - reuse: bool->True if test

    Outputs:
    - cam: N x 3
    - Pose vector: N x 72
    - Shape vector: N x 10
    - variables: tf variables
    """
    from tensorflow.contrib.slim.python.slim.nets import resnet_v2
    with tf.name_scope("Encoder_resnet", [x]):
        with slim.arg_scope(
                resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            net, end_points = resnet_v2.resnet_v2_50(
                x,
                num_classes=None,
                is_training=is_training,
                reuse=reuse,
                scope='resnet_v2_50')
            net = tf.squeeze(net, axis=[1, 2])
    variables = tf.contrib.framework.get_variables('resnet_v2_50')
    return net, variables
Example #4
0
    def build_pretrained_graph(self,
                               images,
                               resnet_layer,
                               checkpoint,
                               is_training,
                               reuse=False):
        """See baseclass."""
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            _, endpoints = resnet_v2.resnet_v2_50(images,
                                                  is_training=is_training,
                                                  reuse=reuse)
            resnet_layer = 'resnet_v2_50/block%d' % resnet_layer
            resnet_output = endpoints[resnet_layer]
            resnet_variables = slim.get_variables_to_restore()
            resnet_variables = [
                i for i in resnet_variables if 'global_step' not in i.name
            ]
            if is_training and not reuse:
                init_saver = tf.train.Saver(resnet_variables)

                def init_fn(scaffold, sess):
                    del scaffold
                    init_saver.restore(sess, checkpoint)
            else:
                init_fn = None

            return resnet_output, resnet_variables, init_fn
Example #5
0
    def __init__(self, x, num_classes=1001, is_training=False):
        """Initializes the tensorflow graph for the ResNet50-v2 model.

        Args:
            x (tf.Variable): The variable in the tensorflow graph
                that feeds into the model nodes.
            num_classes (int):
                Number of predicted classes for classification tasks.
                If 0 or None, the features before the logit layer are returned.
            is_training (bool): Whether batch_norm layers are in training mode.
        """

        super(ResNet50v2, self).__init__()

        self.x = x
        self.num_classes = num_classes

        # populating the tensorflow graph
        with slim.arg_scope(resnet_arg_scope()):
            net, end_points = resnet_v2_50(
                x, num_classes=num_classes,
                is_training=is_training, reuse=None)

        self.end_points = _get_updated_endpoints(end_points)
        self.variables_to_restore = slim.get_variables_to_restore(exclude=[])
Example #6
0
def test_network(img_path):
    x = tf.placeholder("float", shape=[None, 224, 224, 3], name='input')
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        logits, end_points = resnet_v2.resnet_v2_50(x,
                                                    num_classes=2,
                                                    is_training=False)
    predictions = end_points["predictions"]

    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, "train.ckpt")

        w = 224
        h = 224
        c = 3
        imgs = []
        img = io.imread(img_path)
        img = transform.resize(img, (w, h, c))
        imgs.append(img)

        data = np.asarray(imgs, np.float32)

        predictions_val = predictions.eval(feed_dict={x: data})

        print(predictions_val)
Example #7
0
def tower_loss(scope):
    images, labels = read_and_decode()
    if net == 'vgg_16':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_16(images, num_classes=FLAGS.num_classes)
    elif net == 'vgg_19':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_19(images, num_classes=FLAGS.num_classes)
    elif net == 'resnet_v1_101':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_101(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    elif net == 'resnet_v1_50':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_50(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    elif net == 'resnet_v2_50':
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_50(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    else:
        raise Exception('No network matched with net %s.' % net)
    assert logits.shape == (FLAGS.batch_size, FLAGS.num_classes)
    _ = cal_loss(logits, labels)
    losses = tf.get_collection('losses', scope)
    total_loss = tf.add_n(losses, name='total_loss')
    for l in losses + [total_loss]:
        loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
        tf.summary.scalar(loss_name, l)
    return total_loss
Example #8
0
    def fprop(self, x):
        num_original_classes = 1001

        var_to_ckpt_name = lambda v: \
            v.name.replace(self._var_scope+'/', '')\
                  .replace(':0', '')

        with slim.arg_scope(resnet_arg_scope()), \
                tf.variable_scope(self._var_scope):

            x = self._preprocessing_fn(x)

            net, end_points = resnet_v2_50(x,
                                           num_classes=num_original_classes,
                                           is_training=False,
                                           reuse=tf.AUTO_REUSE)
            end_points = self._get_updated_endpoints(end_points)

        # Load weights for a particular scope only once
        if self._var_scope not in self._scopes_loaded:
            variables_to_restore = list(
                filter(lambda v: v.name.split('/')[0] == self._var_scope,
                       slim.get_variables_to_restore(exclude=[])))

            variable_name_map = {
                var_to_ckpt_name(v): v
                for v in variables_to_restore
            }

            saver = tf.train.Saver(var_list=variable_name_map)
            saver.restore(self._sess, self._get_latest_checkpoint_path())

            self._scopes_loaded.add(self._var_scope)

        return end_points
Example #9
0
 def __call__(self, image_batch):
     if self.model == vgg16:
         with slim.arg_scope(vgg.vgg_arg_scope()):
             features, _ = self.model(inputs=image_batch)
     if self.model == resnet101:
         with slim.arg_scope(resnet.resnet_arg_scope()):
             features, _ = self.model(inputs=image_batch, num_classes=None)
     return features
Example #10
0
def resnet_fm(input_ph):
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        net, endpoints = resnet_v2.resnet_v2_50(input_ph,
                                                num_classes=None,
                                                is_training=False,
                                                reuse=tf.AUTO_REUSE)
        feature_map = tf.squeeze(net, axis=[1, 2])
    return feature_map
Example #11
0
def get_class_resnet(inputs, num_classes, is_training=False):
    with tf.variable_scope("classifier",
                           custom_getter=float32_variable_storage_getter):
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_50(
                inputs,
                num_classes,
                reuse=tf.AUTO_REUSE,
                is_training=is_training)
    return logits, end_points
Example #12
0
 def __init__(self, tensor, keep_prob=1.0, num_classes=1001, retrain_layer=[], weights_path='./weights/resnet_v2_101.ckpt'):
     # Call the parent class
     Model.__init__(self, tensor, keep_prob, num_classes, retrain_layer, weights_path)
     
     # Create the Graph
     is_training = True if retrain_layer else False
     with slim.arg_scope(resnet_arg_scope()):
         self.final, self.endpoints = resnet_v2_101(
             self.tensor,
             num_classes=num_classes,
             is_training=is_training,
             global_pool=True # True: both height_out and width_out equal one
         )
Example #13
0
    def get_logits_prob(self, batch_input):
        """
        Prediction from the model on a single batch.
        :param batch_input: the input batch. Must be from size [?, 224, 224, 3]
        :return: the logits and probabilities for the batch
        """

        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_50(batch_input,
                                                        num_classes=1001,
                                                        is_training=False)
            probs = tf.squeeze(end_points["predictions"])
            probs = probs[1:]
        return logits, probs
Example #14
0
def resnet_v2_50(inputs, is_training=True):

    blocks = [
        resnet_v2.resnet_v2_block('block1',
                                  base_depth=64,
                                  num_units=3,
                                  stride=2),
        resnet_v2.resnet_v2_block('block2',
                                  base_depth=128,
                                  num_units=4,
                                  stride=2),
        resnet_v2.resnet_v2_block('block3',
                                  base_depth=256,
                                  num_units=6,
                                  stride=2),
        resnet_v2.resnet_v2_block('block4',
                                  base_depth=512,
                                  num_units=3,
                                  stride=1),
    ]
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        with tf.variable_scope('resnet_v2_50', 'resnet_v2', [inputs]):
            with slim.arg_scope([
                    slim.conv2d, resnet_v2.bottleneck,
                    resnet_utils.stack_blocks_dense
            ]):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=is_training):
                    net = inputs
                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None):
                        net = resnet_utils.conv2d_same(net,
                                                       64,
                                                       7,
                                                       stride=2,
                                                       scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
                    net = resnet_utils.stack_blocks_dense(net, blocks)
                    # This is needed because the pre-activation variant does not have batch
                    # normalization or activation functions in the residual unit output. See
                    # Appendix of [2].
                    net = slim.batch_norm(net,
                                          activation_fn=nn_ops.relu,
                                          scope='postnorm')
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keepdims=True)

    return net
Example #15
0
 def extract_features_resnet50(self, im, scope_name, reuse=False):
     use_global_pool = True
     num_classes = 512
     with tf.name_scope(scope_name):
         with slim.arg_scope(resnet_v2.resnet_arg_scope()):
             out, _ = resnet_v2.resnet_v2_50(inputs=im,
                                             num_classes=num_classes,
                                             global_pool=use_global_pool,
                                             is_training=self.is_training,
                                             scope='resnet_v2_50',
                                             reuse=reuse)
     print('\nShape after Resnet_50\n')
     print(out.get_shape())
     out = layers.flatten(out)
     return out
Example #16
0
    def __init__(self,
                 tensor,
                 keep_prob=1.0,
                 num_classes=1001,
                 retrain_layer=[],
                 weights_path='./weights/resnet_v2_101.ckpt'):
        # Call the parent class
        Model.__init__(self, tensor, keep_prob, num_classes, retrain_layer,
                       weights_path)

        # TODO This implementation has a problem while validation (is still set to training)
        is_training = True if retrain_layer else False
        with slim.arg_scope(resnet_arg_scope()):
            self.final, self.endpoints = resnet_v2_101(self.tensor,
                                                       num_classes=num_classes,
                                                       is_training=is_training)
Example #17
0
File: encoder.py Project: footh/tgs
def build_resnet50_v2(img_input, l2_weight_decay=0.01, is_training=True, prefix=''):
    """
        Builds resnet50_v2 model from slim

        Returns the last five block outputs to be used transposed convolution layers
    """

    with slim.arg_scope(resnet_v2.resnet_arg_scope(weight_decay=l2_weight_decay)):
        block4, endpoints = resnet_v2_50(img_input, is_training=is_training, global_pool=False)

    block3 = endpoints[f'{prefix}resnet_v2_50/block3']
    block2 = endpoints[f'{prefix}resnet_v2_50/block2']
    block1 = endpoints[f'{prefix}resnet_v2_50/block1']
    conv1 = endpoints[f'{prefix}resnet_v2_50/conv1']

    return conv1, block1, block2, block3, block4
Example #18
0
    def __call__(self, inputs):

        inputs = ((inputs / 255.0) - 0.5) * 2.0

        with tf.contrib.slim.arg_scope(resnet_arg_scope()):

            image_features, end_points = resnet_v2_101(
                inputs,
                num_classes=self.num_classes,
                is_training=self.is_training,
                global_pool=self.global_pool,
                output_stride=self.output_stride,
                reuse=self.reuse,
                scope=self.scope)
            self.reuse = True

        return image_features
Example #19
0
def test_network(img_path, label_path):
    x = tf.placeholder("float", shape=[None, 224, 224, 3], name='input')
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        logits, end_points = resnet_v2.resnet_v2_50(x, num_classes=1001, is_training=False)
    predictions = end_points["predictions"]
    saver = tf.train.Saver()
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        saver.restore(sess, "resnet_v2_50.ckpt");

        imgfloat = tf.cast(tf.image.decode_jpeg(tf.read_file(img_path), channels=3), dtype=tf.float32)
        img = tf.subtract(tf.multiply(tf.div(tf.image.resize_images(tf.expand_dims(imgfloat, 0), (224, 224), method=0), 255.0), 2), 1.0)
        predictions_val = predictions.eval(feed_dict={x: img.eval()})
        predicted_classes = np.argmax(predictions_val, axis=3)

        file = open(label_path, encoding="utf-8")
        labels = file.readlines()
        print(predicted_classes, labels[predicted_classes[0][0][0]])
Example #20
0
  def build_pretrained_graph(
      self, images, resnet_layer, checkpoint, is_training, reuse=False):
    """See baseclass."""
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
      _, endpoints = resnet_v2.resnet_v2_50(
          images, is_training=is_training, reuse=reuse)
      resnet_layer = 'resnet_v2_50/block%d' % resnet_layer
      resnet_output = endpoints[resnet_layer]
      resnet_variables = slim.get_variables_to_restore()
      resnet_variables = [
          i for i in resnet_variables if 'global_step' not in i.name]
      if is_training and not reuse:
        init_saver = tf.train.Saver(resnet_variables)
        def init_fn(scaffold, sess):
          del scaffold
          init_saver.restore(sess, checkpoint)
      else:
        init_fn = None

      return resnet_output, resnet_variables, init_fn
Example #21
0
    def fprop(self, x):
        """Exposes all the layers of the model.

        Args:
            x (tf.Variable): Tensor which is input to the model.

        Returns:
            dict: A dictionary mapping layer names to the corresponding
                 node in the tensorflow graph.
        """

        if x is self.x:
            return self.end_points

        else:
            with slim.arg_scope(resnet_arg_scope()):
                net, end_points = resnet_v2_50(
                    x, num_classes=self.num_classes,
                    is_training=False, reuse=tf.AUTO_REUSE)

            return _get_updated_endpoints(end_points)
Example #22
0
def get_box_resnet(inputs, is_training=False):
    with tf.variable_scope("box_net",
                           custom_getter=float32_variable_storage_getter):
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            out, end_points = resnet_v2.resnet_v2_50(inputs,
                                                     num_classes=None,
                                                     global_pool=False,
                                                     reuse=tf.AUTO_REUSE,
                                                     is_training=is_training)
            l2_reg = tf.contrib.layers.l2_regularizer(scale=0.1)
            attn = tf.layers.conv2d(out,
                                    2048, [1, 1],
                                    activation=None,
                                    name='attn',
                                    kernel_regularizer=l2_reg,
                                    reuse=tf.AUTO_REUSE)
            attn = tf.reduce_mean(attn, [3], name='attn_pool', keepdims=True)

            #            attn = tf.layers.conv2d(out, 64, [1,1], padding='same',activation=tf.nn.leaky_relu,name='attn1',reuse=tf.AUTO_REUSE)
            #            attn = tf.layers.conv2d(attn, 32, [1,1], padding='same',activation=tf.nn.leaky_relu,name='attn2',reuse=tf.AUTO_REUSE)
            #            attn = tf.layers.conv2d(attn, 1,[1,1],padding='valid', activation=tf.nn.sigmoid,name='attn3',reuse=tf.AUTO_REUSE)
            #            attn = tf.layers.conv2d(attn, 2048,[1,1],padding='same',activation=None,use_bias=False,kernel_initializer=tf.initializers.ones,name='attn4',trainable=False,reuse=tf.AUTO_REUSE)
            out = tf.multiply(attn, out)
            #            out = tf.reduce_mean(out,[1,2],name='pool6',keepdims=True)
            out = tf.layers.conv2d(out,
                                   512, [3, 3],
                                   padding='same',
                                   activation=None,
                                   name='box',
                                   reuse=tf.AUTO_REUSE)
            out = tf.layers.flatten(out, name='box_flatten')
            box_out = tf.layers.dense(out,
                                      4,
                                      activation=None,
                                      name='box_out',
                                      reuse=tf.AUTO_REUSE)

#            box_out = tf.squeeze(box_out,[1,2])
    return box_out, attn
Example #23
0
    def build_model(self):
        # 默认参数
        FILTER_SIZE = (5, 5)
        Z_DIM = 2048
        STRIDE = (2, 2)
        DEPTHS = [64, 128, 256, 256, 128, 32]
        CHANNELS = 4
        N_CLASS = self.config.nclass

        def encoder(input, z_dim=Z_DIM, is_training=False):
            net = conv2d_BN(input,
                            DEPTHS[0],
                            FILTER_SIZE,
                            is_training,
                            stride=STRIDE,
                            name='conv_1',
                            kernel_initializer=tf.truncated_normal_initializer(
                                stddev=0.01))
            net = conv2d_BN(net,
                            DEPTHS[1],
                            FILTER_SIZE,
                            is_training,
                            stride=STRIDE,
                            name='conv_2',
                            kernel_initializer=tf.truncated_normal_initializer(
                                stddev=0.01))
            net = conv2d_BN(net,
                            DEPTHS[2],
                            FILTER_SIZE,
                            is_training,
                            stride=STRIDE,
                            name='conv_3',
                            kernel_initializer=tf.truncated_normal_initializer(
                                stddev=0.01))
            z = tf.layers.dense(
                tf.layers.flatten(net),
                z_dim,
                kernel_initializer=tf.truncated_normal_initializer(
                    stddev=0.01))
            z = tf.nn.relu(tf.layers.batch_normalization(z,
                                                         training=is_training),
                           name='enc')
            return z

        self.x = tf.placeholder(tf.float32,
                                shape=[None] + self.config.input_shape,
                                name="input")
        self.y = tf.placeholder(tf.int32, shape=[None], name="label")
        self.is_training = tf.placeholder(tf.bool, name="is_training")
        # network architecture
        batch_norm_decay = 0.997 if self.config.get(
            'bn_decay') == None else self.config.bn_decay
        output_stride = self.config.get('output_stride')
        if self.config.model == "resnet":
            with slim.arg_scope(
                    resnet_v2.resnet_arg_scope(
                        batch_norm_decay=batch_norm_decay)):
                net, end_points = resnet_v2.resnet_v2_50(
                    self.x,
                    N_CLASS,
                    is_training=self.is_training,
                    output_stride=output_stride)
                logits = tf.squeeze(end_points["resnet_v2_50/logits"],
                                    axis=[1, 2])
            pred = tf.nn.softmax(logits, "pred")
        elif self.config.model == "resnet_101":
            with slim.arg_scope(
                    resnet_v2.resnet_arg_scope(
                        batch_norm_decay=batch_norm_decay)):
                pred, end_points = resnet_v2.resnet_v2_101(
                    self.x,
                    N_CLASS,
                    is_training=self.is_training,
                    output_stride=output_stride)
                logits = tf.squeeze(end_points["resnet_v2_101/logits"],
                                    axis=[1, 2])
            pred = tf.nn.softmax(logits)
        elif self.config.model == "resnet_v1_50":
            with slim.arg_scope(
                    resnet_v2.resnet_arg_scope(
                        batch_norm_decay=batch_norm_decay)):
                pred, end_points = resnet_v1.resnet_v1_50(
                    self.x,
                    N_CLASS,
                    is_training=self.is_training,
                    output_stride=output_stride)
                logits = tf.squeeze(end_points["resnet_v1_50/logits"],
                                    axis=[1, 2])
            pred = tf.nn.softmax(logits)
        else:
            z = encoder(self.x, z_dim=Z_DIM, is_training=self.is_training)
            logits = tf.layers.dense(
                z,
                N_CLASS,
                kernel_initializer=tf.truncated_normal_initializer(
                    stddev=0.01))
            pred = tf.nn.softmax(logits)

        self.pred = pred
        with tf.name_scope("loss"):

            self.loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(
                labels=self.y, logits=logits),
                                       name='cross_entropy')

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            print(update_ops)
            with tf.control_dependencies(update_ops):
                self.train_op = tf.train.AdamOptimizer(
                    self.config.learning_rate).minimize(
                        self.loss, global_step=self.global_step_tensor)
            correct_prediction = tf.equal(
                tf.argmax(pred, 1, output_type=tf.int32), self.y)
            self.acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
Example #24
0
def main(argv=None):
    # 加载处理好的数据
    processed_data = np.load(INPUT_DATA)
    training_images = processed_data[0]
    n_training_examples = len(training_images)
    training_labels = processed_data[1]
    validation_images = processed_data[2]
    validation_labels = processed_data[3]
    testing_images = processed_data[4]
    testing_labels = processed_data[5]
    print('%d training, %d validation, %d testing' %
          (n_training_examples, len(validation_labels), len(testing_labels)))

    # 定义resNet v2 50的输入, resnet_v2.default_image_size = 224
    images = tf.placeholder(tf.float32, [None, 224, 224, 3],
                            name='input_image')
    labels = tf.placeholder(tf.int64, [None], name='labels')

    # 引用定义resnetv250模型
    with slim.arg_scope(resnet.resnet_arg_scope()):
        logits, _ = resnet.resnet_v2_50(images, num_classes=N_CLASSES)

    with tf.variable_scope('squeeze_logits'):
        logits = tf.squeeze(logits, axis=[1, 2])

    trainable_var = get_trainable_variables()

    # 损失函数
    tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES), logits)
    # 训练
    train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize(
        tf.losses.get_total_loss())
    # 只训练最后一层
    # train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize(tf.losses.get_total_loss(),
    #                                                               var_list=get_trainable_variables())

    # 正确率
    with tf.variable_scope('evaluation'):
        correct_prediction = tf.equal(tf.argmax(logits, 1), labels)
        evaluation_step = tf.reduce_mean(
            tf.cast(correct_prediction, tf.float32))

    ckpt = tf.train.get_checkpoint_state(SAVE_PATH)
    if ckpt and ckpt.model_checkpoint_path:
        # 加载之前训练的参数继续训练
        variables_to_restore = slim.get_model_variables()
        print('continue training from %s' % ckpt)
        step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        step = int(step)
        ckpt = ckpt.model_checkpoint_path
    else:
        # 没有训练数据,就先迁移一部分训练好的
        ckpt = TRAINED_CKPT_FILE
        variables_to_restore = get_tuned_variable()
        print('loading tuned variables from %s' % TRAINED_CKPT_FILE)
        step = 0

    load_fn = slim.assign_from_checkpoint_fn(ckpt,
                                             variables_to_restore,
                                             ignore_missing_vars=True)

    # 开启会话训练
    saver = tf.train.Saver()
    with tf.Session() as sess:
        # 初始化所有参数
        init = tf.global_variables_initializer()
        sess.run(init)
        load_fn(sess)

        start = 0
        end = BATCH
        for i in range(step + 1, step + 1 + STEPS):
            start_time = time.time()
            # 运行训练,不会更新所有参数
            sess.run(train_step,
                     feed_dict={
                         images: training_images[start:end],
                         labels: training_labels[start:end]
                     })
            duration = time.time() - start_time

            print('current train step duration %.3f' % duration)

            # 输出日志
            if i % 100 == 0:
                saver.save(sess, TRAIN_FILE, global_step=i)
                validation_accuracy = sess.run(evaluation_step,
                                               feed_dict={
                                                   images: validation_images,
                                                   labels: validation_labels
                                               })
                print('Step %d Validation accuracy = %.1f%%' %
                      (i, validation_accuracy * 100.0))

            start = end
            if start == n_training_examples:
                start = 0

            end = start + BATCH
            if end > n_training_examples:
                end = n_training_examples

        # 在测试集上测试正确率
        test_accuracy = sess.run(evaluation_step,
                                 feed_dict={
                                     images: testing_images,
                                     labels: testing_labels
                                 })
        print('Final test accuracy = %.1f%%' % (test_accuracy * 100.0))
Example #25
0
def main():
    # 加 载 预 处 理 好 的 数 据
    processed_data = np.load(INPUT_DATA, allow_pickle=True)
    training_images = processed_data[0]
    n_training_example = len(training_images)
    training_labels = processed_data[1]
    validation_images = processed_data[2]
    validation_labels = processed_data[3]
    testing_images = processed_data[4]
    testing_labels = processed_data[5]
    print(
        "%d training examples, %d validation examples and %d testing examples."
        % (n_training_example, len(validation_labels), len(testing_labels)))
    # 定 义 inception-v3 的 输 入 , images 为 输 入 图 片 , labels 为 每 一 张 图 片 对 应 的 标 签
    images = tf.placeholder(tf.float32, [None, 299, 299, 3],
                            name='Input_images')
    labels = tf.placeholder(tf.int64, [None], name='labels')

    # 定 义 inception-v3 模 型
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        logits, _ = resnet_v2.resnet_v2_50(images, num_classes=None)

    with tf.variable_scope("Logits"):
        #将原始模型的输出数据去掉维度为2和3的维度,最后只剩维度1的batch数和维度4的300*300*3
        #也就是将原来的二三四维度全部压缩到第四维度
        net = tf.squeeze(logits, axis=[1, 2])
        #加入一层dropout层
        net = slim.dropout(net, keep_prob=0.5, scope='dropout_scope')
        #加入一层全连接层,指定最后输出大小
        logits = slim.fully_connected(net, num_outputs=N_CLASSES, scope='fc')

    # 获 取 需 要 训 练 的 变 量
    trainable_variables = get_trainable_variables()
    # 定 义 交 叉 熵 损 失
    tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES),
                                    logits,
                                    weights=1.0)
    # 定 义 训 练 过 程
    train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize(
        tf.losses.get_total_loss())
    # 计 算 正 确 率
    with tf.name_scope('evaluation'):
        correct_prediction = tf.equal(tf.argmax(logits, 1), labels)
        evaluation_step = tf.reduce_mean(
            tf.cast(correct_prediction, tf.float32))
    # 定 义 加 载 模 型 的 函 数
    load_fn = slim.assign_from_checkpoint_fn(CKPT_FILE,
                                             get_tuned_variables(),
                                             ignore_missing_vars=True)

    #定义保存新的训练好的模型的函数
    saver = tf.train.Saver()
    with tf.Session() as sess:
        # 初 始 化 没 有 加 载 进 来 的 变 量
        init = tf.global_variables_initializer()
        sess.run(init)
        #加 载 谷 歌 已 经 训 练 好 的 模 型
        print('Loading tuned variables from%s' % CKPT_FILE)
        load_fn(sess)
        start = 0
        end = BATCH
        for i in range(STEPS):
            # 运 行 训 练 过 程 , 这 里 不 会 更 新 全 部 的 参 数 , 只 会 更 新 指 定 的 部 分 参 数
            sess.run(train_step,
                     feed_dict={
                         images: training_images[start:end],
                         labels: training_labels[start:end]
                     })
            # 输 出 日 志
            if i % 5 == 0 or i + 1 == STEPS:
                #saver.save(sess, TRAIN_FILE, global_step = i)
                validation_accuracy = sess.run(evaluation_step,
                                               feed_dict={
                                                   images: validation_images,
                                                   labels: validation_labels
                                               })
                print('Step %d: Validation accuracy = %.lf%%' %
                      (i, validation_accuracy * 100.0))
            # 因 为 在 数 据 预 处 理 的 时 候 已 经 做 过 了 打 乱 数 据 的 操 作 , 所 以 这 里 只 需 要 顺 序 使 用 训 练 数 据
            start = end
            if start == n_training_example:
                start = 0
            end = start + BATCH
            if end > n_training_example:
                end = n_training_example
        # 在 后 的 测 试 数 据 上 测 试 正 确 率
        test_accuracy = sess.run(evaluation_step,
                                 feed_dict={
                                     images: testing_images,
                                     labels: testing_labels
                                 })
        print('Final test accuracy = %.lf%%' % (test_accuracy * 100))
Example #26
0
def model(inputs):

    batch_size, height, width = config.BATCH_SIZE, config.IMAGE_SHAPE[
        0], config.IMAGE_SHAPE[1]

    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        #net, end_points = resnet_v2.resnet_v2_101(inputs, 1001, is_training=False)
        net, end_points = resnet_v2.resnet_v2_152(
            inputs,
            2048,
            is_training=True,
            global_pool=False,
            reuse=tf.AUTO_REUSE,
            output_stride=config.OUTPUT_STRIDE)
    # print(net)
    kp_maps = tf.contrib.layers.conv2d(net,
                                       num_outputs=config.NUM_KP,
                                       kernel_size=(1, 1),
                                       activation_fn=tf.nn.sigmoid,
                                       stride=1,
                                       scope='kp_maps',
                                       reuse=tf.AUTO_REUSE)
    short_offsets = tf.contrib.layers.conv2d(net,
                                             num_outputs=2 * config.NUM_KP,
                                             kernel_size=(1, 1),
                                             activation_fn=None,
                                             stride=1,
                                             scope='short_offsets',
                                             reuse=tf.AUTO_REUSE)
    mid_offsets = tf.contrib.layers.conv2d(net,
                                           num_outputs=4 * config.NUM_EDGES,
                                           kernel_size=(1, 1),
                                           activation_fn=None,
                                           stride=1,
                                           scope='mid_offsets',
                                           reuse=tf.AUTO_REUSE)
    long_offsets = tf.contrib.layers.conv2d(net,
                                            num_outputs=2 * config.NUM_KP,
                                            kernel_size=(1, 1),
                                            activation_fn=None,
                                            stride=1,
                                            scope='long_offsets',
                                            reuse=tf.AUTO_REUSE)
    seg_mask = tf.contrib.layers.conv2d(net,
                                        num_outputs=1,
                                        kernel_size=(1, 1),
                                        activation_fn=tf.nn.sigmoid,
                                        stride=1,
                                        scope='seg_mask',
                                        reuse=tf.AUTO_REUSE)

    kp_maps = tf.image.resize_bilinear(kp_maps, (height, width),
                                       align_corners=True)
    short_offsets = tf.image.resize_bilinear(short_offsets, (height, width),
                                             align_corners=True)
    mid_offsets = tf.image.resize_bilinear(mid_offsets, (height, width),
                                           align_corners=True)
    long_offsets = tf.image.resize_bilinear(long_offsets, (height, width),
                                            align_corners=True)
    seg_mask = tf.image.resize_bilinear(seg_mask, (height, width),
                                        align_corners=True)
    '''
    with tf.name_scope('kp_maps_deconv') as scope:
        wt = tf.Variable(tf.truncated_normal([9, 9, config.NUM_KP, config.NUM_KP]))
        kp_maps = tf.nn.conv2d_transpose(kp_maps, wt, [batch_size, height, width, config.NUM_KP], [1, 8, 8, 1], 'SAME')
    
    with tf.name_scope('short_offsets_deconv') as scope:
        wt = tf.Variable(tf.truncated_normal([9, 9, 2*config.NUM_KP, 2*config.NUM_KP]))
        short_offsets = tf.nn.conv2d_transpose(short_offsets, wt, [batch_size, height, width, 2*config.NUM_KP], [1, 8, 8, 1], 'SAME')
    
    with tf.name_scope('mid_offsets_deconv') as scope:
        wt = tf.Variable(tf.truncated_normal([9, 9, 4*config.NUM_EDGES, 4*config.NUM_EDGES]))
        mid_offsets = tf.nn.conv2d_transpose(mid_offsets, wt, [batch_size, height, width, 4*config.NUM_EDGES], [1, 8, 8, 1], 'SAME')
    
    with tf.name_scope('long_offsets_deconv') as scope:
        wt = tf.Variable(tf.truncated_normal([9, 9, 2*config.NUM_KP, 2*config.NUM_KP]))
        long_offsets = tf.nn.conv2d_transpose(long_offsets, wt, [batch_size, height, width, 2*config.NUM_KP], [1, 8, 8, 1], 'SAME')
    
    with tf.name_scope('seg_mask_deconv') as scope:
        wt = tf.Variable(tf.truncated_normal([9, 9, 1, 1]))
        seg_mask = tf.nn.conv2d_transpose(seg_mask, wt, [batch_size, height, width, 1], [1, 8, 8, 1], 'SAME')
    '''
    mid_offsets = split_and_refine_mid_offsets(mid_offsets, short_offsets)
    long_offsets = split_and_refine_long_offsets(long_offsets, short_offsets)
    outputs = [kp_maps, short_offsets, mid_offsets, long_offsets, seg_mask]
    return outputs
def feature_extractor_resnet(images,
                             dim=256,
                             weight_decay=0.0001,
                             batch_norm_decay=0.999,
                             batch_renorm_decay=0.99,
                             batch_renorm_rmax=3.,
                             batch_renorm_dmax=5.,
                             is_training=True,
                             use_conv3d=True):
    from tensorflow.contrib.slim.python.slim.nets import resnet_v2
    if use_conv3d:
        orig_shape = tf.shape(images)
        # [N,T,H,W,C] -> [N*T,H,W,C]
        images = tf.reshape(images, tf.concat([[-1], orig_shape[2:]], 0))

    resnet_arg_scope = resnet_v2.resnet_arg_scope(
        weight_decay=weight_decay, batch_norm_decay=batch_norm_decay)
    # batch size is small so we use batch renormalization
    batch_norm_key = filter(lambda x: 'batch_norm' in x,
                            resnet_arg_scope.keys())[0]
    resnet_arg_scope[batch_norm_key].update({
        'renorm': True,
        'renorm_decay': batch_renorm_decay,
        'renorm_clipping': {
            'rmin': 1. / batch_renorm_rmax,
            'rmax': batch_renorm_rmax,
            'dmax': batch_renorm_dmax
        }
    })

    with slim.arg_scope(resnet_arg_scope):
        blocks = [
            resnet_v2.resnet_v2_block('block1',
                                      base_depth=16,
                                      num_units=3,
                                      stride=2),
            resnet_v2.resnet_v2_block('block2',
                                      base_depth=32,
                                      num_units=4,
                                      stride=2),
            resnet_v2.resnet_v2_block('block3',
                                      base_depth=64,
                                      num_units=6,
                                      stride=2),  #256
            resnet_v2.resnet_v2_block('block4',
                                      base_depth=128,
                                      num_units=3,
                                      stride=1)  #512
        ]
        _, end_points = resnet_v2.resnet_v2(images,
                                            blocks,
                                            is_training=is_training,
                                            include_root_block=False)
    net = end_points['resnet_v2/block4']
    if use_conv3d:
        # [N*T,H',W',C'] -> [N,T,H',W',C']
        net = tf.reshape(net, tf.concat(
            [orig_shape[:2], tf.shape(net)[1:]], 0))

    arg_scope = convert_resnet_arg_scope_to_slim(resnet_arg_scope)
    arg_scope[slim.conv2d].update({'stride': 1, 'padding': 'SAME'})
    arg_scope[slim.conv3d].update({'stride': 1, 'padding': 'SAME'})
    arg_scope[slim.batch_norm]['is_training'] = is_training
    with slim.arg_scope(arg_scope):
        if use_conv3d:
            net = slim.conv3d(net, 512, [3, 3, 3])
            net = slim.conv3d(net, 256, [1, 1, 1])
            net = slim.conv3d(net, 512, [3, 3, 3])
            # the last layer without activation function
            feature_map = slim.conv3d(net,
                                      dim, [1, 1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None)
        else:
            # the last layer without activation function
            feature_map = slim.conv2d(net,
                                      dim, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None)
    return feature_map
Example #28
0
    def build_model(self):
        self.img = tf.placeholder(tf.float32, [None, 150, 100, 3]) / 255
        self.label = tf.placeholder(tf.float32, [None, 2])

        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            net, _ = resnet_v2.resnet_v2_50(self.img,
                                            num_classes=2,
                                            is_training=True,
                                            global_pool=True)
            net = tf.reshape(net, [self.batch_size, 2])
            # dense_1 = tf.layers.dense(net, units=5, activation=tf.nn.relu)
            self.prediction_1 = net

            # self.prediction = tf.nn.softmax(tf.layers.dense(dense_1, units=2))

        with tf.variable_scope("ensemble_2"):
            with tf.variable_scope('convolution_1') as scope:
                W_conv1 = tf.Variable(tf.truncated_normal(shape=[5, 5, 3, 32],
                                                          mean=0,
                                                          stddev=0.01),
                                      name='conv1')
                b_conv1 = tf.Variable(tf.truncated_normal(shape=[32],
                                                          mean=0,
                                                          stddev=0.1),
                                      name='bias1')
                h_conv1 = tf.nn.dropout(
                    tf.nn.relu(
                        tf.nn.bias_add(
                            tf.nn.conv2d(self.img,
                                         W_conv1,
                                         strides=[1, 1, 1, 1],
                                         padding='VALID'), b_conv1)),
                    keep_prob=self.dropout_prob)  # output size 84x84x16
                h_pool1 = tf.nn.max_pool(
                    h_conv1,
                    ksize=[1, 2, 2, 1],
                    strides=[1, 2, 2, 1],
                    padding='VALID')  # output size 42x42x16

            with tf.variable_scope('convolution_2') as scope:
                W_conv2 = tf.Variable(tf.truncated_normal(shape=[5, 5, 32, 10],
                                                          mean=0,
                                                          stddev=0.01),
                                      name='conv2')
                b_conv2 = tf.Variable(tf.truncated_normal(shape=[10],
                                                          mean=0,
                                                          stddev=0.05),
                                      name='bias2')
                h_conv2 = tf.nn.dropout(tf.nn.relu(
                    tf.nn.bias_add(
                        tf.nn.conv2d(h_pool1,
                                     W_conv2,
                                     strides=[1, 1, 1, 1],
                                     padding='VALID'), b_conv2)),
                                        keep_prob=self.dropout_prob)
                h_pool2 = tf.nn.max_pool(h_conv2,
                                         ksize=[1, 2, 2, 1],
                                         strides=[1, 2, 2, 1],
                                         padding='VALID')
                self.dense_input = tf.reshape(h_pool2, (-1, 34 * 22 * 10))

            with tf.variable_scope('dense') as scope:
                h_dense1 = tf.nn.dropout(tf.layers.dense(
                    self.dense_input,
                    units=34 * 22 * 10,
                    activation=tf.nn.relu),
                                         keep_prob=self.dropout_prob)
                h_dense2 = tf.layers.dense(h_dense1, units=2, activation=None)
                self.prediction_2 = h_dense2

        # self.sum_prediction = tf.concat([self.prediction_1, self.prediction_2], axis=1)
        self.sum_prediction = (self.prediction_1 + self.prediction_2) / 2
        print('sum_prediction:', self.sum_prediction)

        with tf.variable_scope('result') as scope:
            # weight = tf.reshape(tf.nn.softmax(tf.layers.dense(self.dense_input, units=2)), (self.batch_size, 1, 2))
            # self.pin = weight
            # self.prediction = tf.nn.softmax(tf.reshape(tf.matmul(weight, self.sum_prediction), (self.batch_size, 2)))
            self.prediction = tf.nn.softmax(self.sum_prediction)
            print("prediction:", self.prediction)
            correct_prediction = tf.equal(tf.argmax(self.prediction, 1),
                                          tf.argmax(self.label, 1))

            self.accuracy = tf.reduce_mean(
                tf.cast(correct_prediction, tf.float32))

        reg = tf.contrib.layers.apply_regularization(
            tf.contrib.layers.l2_regularizer(1e-4), tf.trainable_variables())
        self.cross_entropy = tf.reduce_mean(-tf.reduce_sum(
            self.label *
            tf.log(tf.clip_by_value(self.prediction, 1e-10, 0.999999)),
            reduction_indices=[1]))

        global_step = tf.Variable(0, trainable=False)
        # self.learning = tf.train.exponential_decay(self.lr, global_step, 70, 0.8, staircase=True)
        self.train_step = tf.train.AdamOptimizer(self.lr).minimize(
            self.cross_entropy, global_step=global_step)
Example #29
0
    def resnet_v2_spkid(self, inputs, spk_labels, blocks, num_classes,
                        is_training, global_pool, output_stride, reuse, scope):

        with arg_scope(resnet_v2.resnet_arg_scope()):
            with tf.variable_scope(scope, 'resnet_v2', [inputs],
                                   reuse=reuse) as sc:
                end_points_collection = sc.original_name_scope + '_end_points'
                with arg_scope([
                        layers_lib.conv2d, resnet_v2.bottleneck, slim.conv2d,
                        self.stack_blocks_dense
                ],
                               outputs_collections=end_points_collection):
                    with arg_scope(
                        [layers_lib.conv2d],
                            weights_regularizer=None,
                            weights_initializer=tf.contrib.layers.
                            xavier_initializer(),
                            biases_initializer=tf.constant_initializer(0.001)):

                        with arg_scope(
                            [layers.batch_norm],
                                is_training=is_training,
                                decay=0.9,
                                epsilon=1e-3,
                                scale=True,
                                param_initializers={
                                    "beta":
                                    tf.constant_initializer(value=0),
                                    "gamma":
                                    tf.random_normal_initializer(mean=1,
                                                                 stddev=0.045),
                                    "moving_mean":
                                    tf.constant_initializer(value=0),
                                    "moving_variance":
                                    tf.constant_initializer(value=1)
                                }):
                            net = inputs
                            with arg_scope([layers_lib.conv2d],
                                           activation_fn=None,
                                           normalizer_fn=None,
                                           weights_regularizer=None):
                                net = resnet_utils.conv2d_same(net,
                                                               64,
                                                               13,
                                                               1,
                                                               scope='conv1')
                            # net = layers.max_pool2d(net, [2, 2], stride=2, scope='pool1')
                            # net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
                            net = self.stack_blocks_dense(
                                net, blocks, output_stride)
                            net = layers.batch_norm(net,
                                                    activation_fn=tf.nn.relu,
                                                    scope='postnorm')
                            end_points = utils.convert_collection_to_dict(
                                end_points_collection)
                            net = layers_lib.conv2d(net,
                                                    512, [1, 5],
                                                    stride=1,
                                                    activation_fn=None,
                                                    normalizer_fn=None,
                                                    scope='res_fc',
                                                    padding='VALID')
                            end_points[sc.name + '/res_fc'] = net
                            net = layers.batch_norm(net,
                                                    activation_fn=tf.nn.relu,
                                                    scope='res_fc_bn')

                            if global_pool:
                                ## net : batchsize X W(frame_length) X 1 X Dim
                                ## Global average pooling.
                                # net = tf.reduce_mean(net, [1], name='pool5', keep_dims=True)

                                ## Global statistical pooling
                                # mean,var = tf.nn.moments(net,1,name='pool5', keep_dims=True)
                                # net = tf.concat([mean,var],3)

                                ## Apply attention + stats
                                attention = self.attention_layer(net)
                                end_points['attention'] = attention
                                mean, std = tf.nn.weighted_moments(
                                    net, 1, attention, keep_dims=True)
                                net = tf.concat([mean, std], 3)

                                end_points['global_pool'] = net

                            ## Fully Connected layers
                            ## fc1
                            net = layers_lib.conv2d(net,
                                                    1000, [1, 1],
                                                    stride=1,
                                                    activation_fn=None,
                                                    normalizer_fn=None,
                                                    scope='fc1')
                            end_points[sc.name + '/fc1'] = net
                            net = layers.batch_norm(net,
                                                    activation_fn=tf.nn.relu,
                                                    scope='fc1_bn')
                            ## fc2
                            net = layers_lib.conv2d(net,
                                                    512, [1, 1],
                                                    stride=1,
                                                    activation_fn=None,
                                                    normalizer_fn=None,
                                                    scope='fc2')
                            end_points[sc.name + '/fc2'] = net

                            ## output layer
                            ## For AM-softmax
                            net = tf.squeeze(net, [1, 2],
                                             name='SpatialSqueeze')
                            end_points[sc.name + '/spatial_squeeze'] = net
                            net, embedding = self.AM_logits_compute(
                                net, spk_labels, num_classes, is_training)
                            end_points[sc.name + '/logits'] = net
                            end_points[sc.name + '/fc3'] = embedding

                            ## for softmax
                            #                             net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='fc2_bn')
                            #                             net = layers_lib.conv2d(net, num_classes, [1, 1], stride=1, activation_fn=None,
                            #                                             normalizer_fn=None, scope='logits')
                            #                             end_points[sc.name + '/logits'] = net
                            #                             net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
                            #                             end_points[sc.name + '/spatial_squeeze'] = net

                            ## loss
                            end_points['predictions'] = layers.softmax(
                                net, scope='predictions')
                            loss = tf.reduce_mean(
                                tf.nn.sparse_softmax_cross_entropy_with_logits(
                                    labels=spk_labels, logits=net))
                            end_points[sc.name + '/loss'] = loss
                            end_points[sc.name + '/spk_labels'] = spk_labels

                            return loss, end_points
Example #30
0
def Encoder_resnet_v2(x,
                      depth=None,
                      is_training=True,
                      weight_decay=0.001,
                      reuse=False,
                      name='Encoder_resnet_v2'):
    """
    Resnet v2-50
    Assumes input is [batch, height_in, width_in, channels]!!
    Input:
    - x: N x H x W x 3
    - weight_decay: float
    - reuse: bool->True if test

    Outputs:
    - cam: N x 3
    - Pose vector: N x 72
    - Shape vector: N x 10
    - variables: tf variables
    """
    from tensorflow.contrib.slim.python.slim.nets import resnet_v2
    with tf.variable_scope(name, reuse=reuse) as scope:
        #with tf.name_scope("Encoder_resnet", [x, depth]):
        # Defines the default ResNet arg scope
        with slim.arg_scope(
                resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            """ added by CCJ from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py;
                (*) Args: reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given.
                (*) resnet_v2() Returns:
                'net': A rank-4 tensor of size [batch, height_out, width_out, channels_out].
                      - If 'global_pool' is False, then height_out and width_out are reduced by a factor of output_stride compared 
                        to the respective height_in and width_in,
                      - else both height_out and width_out equal one. 
                      - If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. 
                      - If num_classes is not None, net contains the pre-softmax activations.
                (*) end_points: A dictionary from components of the network to the corresponding activation.
            """
            with tf.variable_scope("resnet_v2_50_img"):
                net_img, end_points_img = resnet_v2.resnet_v2_50(
                    inputs=x,  # inputs,
                    num_classes=None,
                    is_training=is_training,
                    #added by CCJ: global_pool=True as default;
                    #reuse=reuse,
                    #scope='resnet_v2_50_img'
                )
                net_img = tf.squeeze(net_img, axis=[1,
                                                    2])  # output in N x 2048

            # added by CCJ: for depth encoded by resnet_v2_50
            with tf.variable_scope("resnet_v2_50_dep"):
                net_depth = tf.zeros(shape=tf.shape(net_img), dtype=tf.float32)
                if depth is not None:
                    #NOTE: since we want to load the pre-trained resnet_v2_50 model,
                    # which assumes the input tensor has 3 channels;
                    # So we copy the depth to 3 chanels. Maybe have to find more advanced way to deal with this;
                    net_depth, end_points_depth = resnet_v2.resnet_v2_50(
                        inputs=tf.concat([depth, depth, depth],
                                         axis=-1),  # inputs,
                        num_classes=None,
                        is_training=is_training,
                        #added by CCJ: global_pool=True as default;
                        #reuse= tf.AUTO_REUSE,
                        #scope='resnet_v2_50_dep'
                    )
                    net_depth = tf.squeeze(net_depth,
                                           axis=[1, 2])  # output in N x 2048

        # NOTE: to be updated:!!!
        # added by CCJ: fully connected layer for encoded_depth and encoded_image
        with tf.variable_scope("encoder_fc"):
            #tmp = tf.concat([net_img, net_depth], 1)
            #net = slim.fully_connected(tmp, 2048*2)

            tmp = tf.concat([
                slim.fully_connected(net_img, 2048),
                slim.fully_connected(net_depth, 2048)
            ], 1)
            net = slim.fully_connected(tmp, 2048)

    variables = tf.contrib.framework.get_variables(scope)
    #print ("[**] Encoder_resnet_v2(), returns variables: ", variables)
    return net, variables
Example #31
0
def Encoder_resnet(x,
                   depth=None,
                   is_training=True,
                   weight_decay=0.001,
                   reuse=False):
    """
    Resnet v2-50
    Assumes input is [batch, height_in, width_in, channels]!!
    Input:
    - x: N x H x W x 3
    - depth : N x H x W x 1
    - weight_decay: float
    - reuse: bool->True if test

    Outputs:
    - cam: N x 3
    - Pose vector: N x 72
    - Shape vector: N x 10
    - variables: tf variables
    """
    from tensorflow.contrib.slim.python.slim.nets import resnet_v2
    with tf.name_scope("Encoder_resnet", [x, depth]):
        with slim.arg_scope(
                resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            """ added by CCJ from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py;
                (*) Args: reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given.
                (*) resnet_v2() Returns:
                'net': A rank-4 tensor of size [batch, height_out, width_out, channels_out].
                      - If 'global_pool' is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in,
                      - else both height_out and width_out equal one. 
                      - If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. 
                      - If num_classes is not None, net contains the pre-softmax activations.
                (*) end_points: A dictionary from components of the network to the corresponding activation.
            """
            net, end_points = resnet_v2.resnet_v2_50(
                inputs=x,  # inputs,
                num_classes=None,
                is_training=is_training,
                #added by CCJ: global_pool=True as default;
                reuse=reuse,
                scope='resnet_v2_50'
                #scope='resnet_v2_50_img'
            )
            net = tf.squeeze(net, axis=[1, 2])

            # added by CCJ: for depth encoded by resnet_v2_50
            net_depth = None
            if depth is not None:
                net_depth, end_points_depth = resnet_v2.resnet_v2_50(
                    inputs=depth,  # inputs,
                    num_classes=None,
                    is_training=is_training,
                    #added by CCJ: global_pool=True as default;
                    #reuse= tf.AUTO_REUSE,
                    scope='resnet_v2_50_dep')
                net_depth = tf.squeeze(net_depth, axis=[1, 2])

    variables = tf.contrib.framework.get_variables('resnet_v2_50_img')
    if depth is not None:
        variables = variables + tf.contrib.framework.get_variables(
            'resnet_v2_50_dep')
    #return net, variables
    return net, net_depth, variables
Example #32
0
# -*- coding: utf-8 -*-
# @ File ResNetDemo.py
# @ Description :
# @ Author alexchung
# @ Time 21/1/2019 09:52

import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.slim.python.slim.nets import resnet_v2

images = tf.Variable(initial_value=tf.random_uniform(shape=(5, 224, 224, 3),
                                                     minval=0,
                                                     maxval=3),
                     dtype=tf.float32)
num_classes = tf.constant(value=5, dtype=tf.int32)
# is_training = True

if __name__ == "__main__":

    init = tf.group(tf.global_variables_initializer(),
                    tf.local_variables_initializer())
    with tf.Session() as sess:
        # images, class_num = sess.run([images, class_num])
        sess.run(init)
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_50(
                images, num_classes=num_classes.eval(), is_training=True)

        for var in tf.model_variables():
            print(var.name, var.shape)