Esempio n. 1
0
def Encoder_resnet(x, is_training=True, weight_decay=0.001, reuse=False):
    """
    Resnet v2-50
    Assumes input is [batch, height_in, width_in, channels]!!
    Input:
    - x: N x H x W x 3
    - weight_decay: float
    - reuse: bool->True if test

    Outputs:
    - cam: N x 3
    - Pose vector: N x 72
    - Shape vector: N x 10
    - variables: tf variables
    """
    from tensorflow.contrib.slim.python.slim.nets import resnet_v2
    with tf.name_scope("Encoder_resnet", [x]):
        with slim.arg_scope(
                resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            net, end_points = resnet_v2.resnet_v2_50(
                x,
                num_classes=None,
                is_training=is_training,
                reuse=reuse,
                scope='resnet_v2_50')
            net = tf.squeeze(net, axis=[1, 2])
    variables = tf.contrib.framework.get_variables('resnet_v2_50')
    return net, variables
Esempio n. 2
0
    def __init__(self, x, num_classes=1001, is_training=False):
        """Initializes the tensorflow graph for the ResNet50-v2 model.

        Args:
            x (tf.Variable): The variable in the tensorflow graph
                that feeds into the model nodes.
            num_classes (int):
                Number of predicted classes for classification tasks.
                If 0 or None, the features before the logit layer are returned.
            is_training (bool): Whether batch_norm layers are in training mode.
        """

        super(ResNet50v2, self).__init__()

        self.x = x
        self.num_classes = num_classes

        # populating the tensorflow graph
        with slim.arg_scope(resnet_arg_scope()):
            net, end_points = resnet_v2_50(
                x, num_classes=num_classes,
                is_training=is_training, reuse=None)

        self.end_points = _get_updated_endpoints(end_points)
        self.variables_to_restore = slim.get_variables_to_restore(exclude=[])
Esempio n. 3
0
def resnet50(x, nums, is_training=True, reuse=False):
    """
    Resnet v2-50
    Assumes input is [batch, height_in, width_in, channels]!!
    Input:
    - x: N x H x W x 3
    - reuse: bool->True if test

    Outputs:
    - cam: N x 3
    - Pose vector: N x 72
    - Shape vector: N x 10
    - variables: tf variables
    """
    from tensorflow.contrib.slim.python.slim.nets import resnet_v2
    with tf.name_scope("Resnet", [x]):
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            net, end_points = resnet_v2.resnet_v2_50(  # shape=(N, 1, 1, 2048)
                x,
                num_classes=None,
                is_training=is_training,
                reuse=reuse,
                scope='resnet_v2_50')
            net = tf.squeeze(net, axis=[1, 2])  # shape=(N, 2048)
            net = slim.fully_connected(net,
                                       num_outputs=nums,
                                       activation_fn=None,
                                       trainable=is_training,
                                       reuse=reuse,
                                       scope='fc')
    variables = tf.contrib.framework.get_variables('resnet_v2_50')
    return net, variables
Esempio n. 4
0
def test_network(img_path):
    x = tf.placeholder("float", shape=[None, 224, 224, 3], name='input')
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        logits, end_points = resnet_v2.resnet_v2_50(x,
                                                    num_classes=2,
                                                    is_training=False)
    predictions = end_points["predictions"]

    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, "train.ckpt")

        w = 224
        h = 224
        c = 3
        imgs = []
        img = io.imread(img_path)
        img = transform.resize(img, (w, h, c))
        imgs.append(img)

        data = np.asarray(imgs, np.float32)

        predictions_val = predictions.eval(feed_dict={x: data})

        print(predictions_val)
Esempio n. 5
0
    def fprop(self, x):
        num_original_classes = 1001

        var_to_ckpt_name = lambda v: \
            v.name.replace(self._var_scope+'/', '')\
                  .replace(':0', '')

        with slim.arg_scope(resnet_arg_scope()), \
                tf.variable_scope(self._var_scope):

            x = self._preprocessing_fn(x)

            net, end_points = resnet_v2_50(x,
                                           num_classes=num_original_classes,
                                           is_training=False,
                                           reuse=tf.AUTO_REUSE)
            end_points = self._get_updated_endpoints(end_points)

        # Load weights for a particular scope only once
        if self._var_scope not in self._scopes_loaded:
            variables_to_restore = list(
                filter(lambda v: v.name.split('/')[0] == self._var_scope,
                       slim.get_variables_to_restore(exclude=[])))

            variable_name_map = {
                var_to_ckpt_name(v): v
                for v in variables_to_restore
            }

            saver = tf.train.Saver(var_list=variable_name_map)
            saver.restore(self._sess, self._get_latest_checkpoint_path())

            self._scopes_loaded.add(self._var_scope)

        return end_points
def network_resnet_v2_50():
    input_shape = [1, 224, 224, 3]
    input_ = tf.placeholder(dtype=tf.float32, name='input', shape=input_shape)
    net, _end_points = resnet_v2_50(input_,
                                    num_classes=1000,
                                    is_training=False)
    return net
Esempio n. 7
0
def tower_loss(scope):
    images, labels = read_and_decode()
    if net == 'vgg_16':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_16(images, num_classes=FLAGS.num_classes)
    elif net == 'vgg_19':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_19(images, num_classes=FLAGS.num_classes)
    elif net == 'resnet_v1_101':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_101(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    elif net == 'resnet_v1_50':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_50(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    elif net == 'resnet_v2_50':
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_50(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    else:
        raise Exception('No network matched with net %s.' % net)
    assert logits.shape == (FLAGS.batch_size, FLAGS.num_classes)
    _ = cal_loss(logits, labels)
    losses = tf.get_collection('losses', scope)
    total_loss = tf.add_n(losses, name='total_loss')
    for l in losses + [total_loss]:
        loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
        tf.summary.scalar(loss_name, l)
    return total_loss
 def _encoder(self, input_images, scope_name = "encoder", trainable = True, scope_reuse = False):
     with arg_scope(resnet_utils.resnet_arg_scope()):
         output, end_points = resnet_v2.resnet_v2_50(input_images, output_stride=8, global_pool=False,reuse=scope_reuse)#(256, 256, 2048)==>(32, 32, 2048)
         hidden_state = decoder_layer(output, out_channels = self.lstm_channel, stride = 1, scope_name = 'encoder_layer1', trainable = trainable)#(32, 32, 2048)==>(32, 32, 512)
         print hidden_state.get_shape()
         tf.summary.histogram(hidden_state.op.name + "/activation", hidden_state)
         return hidden_state
Esempio n. 9
0
    def build_pretrained_graph(self,
                               images,
                               resnet_layer,
                               checkpoint,
                               is_training,
                               reuse=False):
        """See baseclass."""
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            _, endpoints = resnet_v2.resnet_v2_50(images,
                                                  is_training=is_training,
                                                  reuse=reuse)
            resnet_layer = 'resnet_v2_50/block%d' % resnet_layer
            resnet_output = endpoints[resnet_layer]
            resnet_variables = slim.get_variables_to_restore()
            resnet_variables = [
                i for i in resnet_variables if 'global_step' not in i.name
            ]
            if is_training and not reuse:
                init_saver = tf.train.Saver(resnet_variables)

                def init_fn(scaffold, sess):
                    del scaffold
                    init_saver.restore(sess, checkpoint)
            else:
                init_fn = None

            return resnet_output, resnet_variables, init_fn
Esempio n. 10
0
def Encoder_resnet(x, is_training=True, weight_decay=0.001, reuse=False):
    """
    Resnet v2-50
    Assumes input is [batch, height_in, width_in, channels]!!
    Input:
    - x: N x H x W x 3
    - weight_decay: float
    - reuse: bool->True if test

    Outputs:
    - cam: N x 3
    - Pose vector: N x 72
    - Shape vector: N x 10
    - variables: tf variables
    """
    from tensorflow.contrib.slim.python.slim.nets import resnet_v2
    with tf.name_scope("Encoder_resnet", [x]):
        with slim.arg_scope(
                resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            net, end_points = resnet_v2.resnet_v2_50(
                x,
                num_classes=None,
                is_training=is_training,
                reuse=reuse,
                scope='resnet_v2_50')
            net = tf.squeeze(net, axis=[1, 2])
    variables = tf.contrib.framework.get_variables('resnet_v2_50')
    return net, variables
Esempio n. 11
0
def resnet_fm(input_ph):
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        net, endpoints = resnet_v2.resnet_v2_50(input_ph,
                                                num_classes=None,
                                                is_training=False,
                                                reuse=tf.AUTO_REUSE)
        feature_map = tf.squeeze(net, axis=[1, 2])
    return feature_map
Esempio n. 12
0
def get_class_resnet(inputs, num_classes, is_training=False):
    with tf.variable_scope("classifier",
                           custom_getter=float32_variable_storage_getter):
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_50(
                inputs,
                num_classes,
                reuse=tf.AUTO_REUSE,
                is_training=is_training)
    return logits, end_points
Esempio n. 13
0
    def get_logits_prob(self, batch_input):
        """
        Prediction from the model on a single batch.
        :param batch_input: the input batch. Must be from size [?, 224, 224, 3]
        :return: the logits and probabilities for the batch
        """

        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_50(batch_input,
                                                        num_classes=1001,
                                                        is_training=False)
            probs = tf.squeeze(end_points["predictions"])
            probs = probs[1:]
        return logits, probs
Esempio n. 14
0
 def extract_features_resnet50(self, im, scope_name, reuse=False):
     use_global_pool = True
     num_classes = 512
     with tf.name_scope(scope_name):
         with slim.arg_scope(resnet_v2.resnet_arg_scope()):
             out, _ = resnet_v2.resnet_v2_50(inputs=im,
                                             num_classes=num_classes,
                                             global_pool=use_global_pool,
                                             is_training=self.is_training,
                                             scope='resnet_v2_50',
                                             reuse=reuse)
     print('\nShape after Resnet_50\n')
     print(out.get_shape())
     out = layers.flatten(out)
     return out
Esempio n. 15
0
def res50_encode(inputs,
                 trainable=False,
                 is_training=False,
                 add_summaries=True):
    fine_tune = is_training & trainable
    net, end_points = resnet_v2_50(inputs,
                                   is_training=fine_tune,
                                   scope="resnet_v2_50")

    net = tf.squeeze(net, [1, 2], name='resnet_v2_50/squeezed')
    if add_summaries:
        for v in end_points.values():
            tf.contrib.layers.summaries.summarize_activation(v)

    return net, end_points
Esempio n. 16
0
def trainmodel(train_batch, train_label_batch, val_label_batch, num_epochs):
    with slim.arg_scope(resnet_arg_scope()):
        train_logits, end_points = resnet_v2.resnet_v2_50(train_batch,
                                                          num_classes=2,
                                                          is_training=True)

    tf.losses.sparse_softmax_cross_entropy(labels=train_label_batch,
                                           logits=train_logits)
    total_loss = tf.losses.get_total_loss()
    global_step = tf.Variable(0, name='global_step', trainable=False)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(total_loss, global_step=global_step)

    prediction_labels = tf.argmax(end_points['predictions'], 3)
    correct_prediction = tf.equal(prediction_labels, val_label_batch)
    train_accuracy_batch = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    saver = tf.train.Saver(tf.trainable_variables() +
                           tf.get_collection_ref("moving_vars"))

    with tf.Session() as sess:
        sess.run(
            tf.group(tf.global_variables_initializer(),
                     tf.local_variables_initializer()))
        print("Initialized!")

        step = 0
        start_time = time.time()
        for epoch_index in range(num_epochs):
            _, loss_out, train_acc_out = sess.run(
                [train_op, total_loss, train_accuracy_batch])

            duration = time.time() - start_time
            start_time = time.time()

            print("Minibatch loss at step %d: %.6f (%.3f sec)" %
                  (step, loss_out, duration))
            print("Minibatch accuracy: %.6f" % train_acc_out)

            step += 1

        print("Saving checkpoint...")
        saver.save(sess, './train.ckpt')
        print("Checkpoint saved!")
Esempio n. 17
0
def test_network(img_path, label_path):
    x = tf.placeholder("float", shape=[None, 224, 224, 3], name='input')
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        logits, end_points = resnet_v2.resnet_v2_50(x, num_classes=1001, is_training=False)
    predictions = end_points["predictions"]
    saver = tf.train.Saver()
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        saver.restore(sess, "resnet_v2_50.ckpt");

        imgfloat = tf.cast(tf.image.decode_jpeg(tf.read_file(img_path), channels=3), dtype=tf.float32)
        img = tf.subtract(tf.multiply(tf.div(tf.image.resize_images(tf.expand_dims(imgfloat, 0), (224, 224), method=0), 255.0), 2), 1.0)
        predictions_val = predictions.eval(feed_dict={x: img.eval()})
        predicted_classes = np.argmax(predictions_val, axis=3)

        file = open(label_path, encoding="utf-8")
        labels = file.readlines()
        print(predicted_classes, labels[predicted_classes[0][0][0]])
Esempio n. 18
0
def build_graph():
    images = tf.placeholder(dtype=tf.float32,
                            shape=[None, 64, 64, 1],
                            name='image_batch')
    labels = tf.placeholder(dtype=tf.int64,
                            shape=[None, CLASS_NUM],
                            name='label_batch')
    global_step = tf.get_variable("step", [],
                                  initializer=tf.constant_initializer(0.0),
                                  trainable=False)

    net, end_points = res.resnet_v2_50(inputs=images,
                                       num_classes=CLASS_NUM,
                                       is_training=True)

    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=net,
                                                            labels=labels)
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy_mean,
                                                     global_step)

    reout = tf.reshape(end_points['predictions'], [-1, CLASS_NUM])

    with tf.name_scope('evaluation'):
        correct_prediction = tf.equal(tf.argmax(reout, 1),
                                      tf.argmax(labels, 1))
        evaluation_step = tf.reduce_mean(
            tf.cast(correct_prediction, tf.float32))

    tf.summary.scalar('loss', cross_entropy_mean)
    tf.summary.scalar('accuracy', evaluation_step)
    merged_summary_op = tf.summary.merge_all()
    print(evaluation_step.get_shape())
    return {
        'images': images,
        'labels': labels,
        'loss': cross_entropy_mean,
        'accuracy': evaluation_step,
        'train_step': train_step,
        'global_step': global_step,
        'merged_summary_op': merged_summary_op
    }
Esempio n. 19
0
  def build_pretrained_graph(
      self, images, resnet_layer, checkpoint, is_training, reuse=False):
    """See baseclass."""
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
      _, endpoints = resnet_v2.resnet_v2_50(
          images, is_training=is_training, reuse=reuse)
      resnet_layer = 'resnet_v2_50/block%d' % resnet_layer
      resnet_output = endpoints[resnet_layer]
      resnet_variables = slim.get_variables_to_restore()
      resnet_variables = [
          i for i in resnet_variables if 'global_step' not in i.name]
      if is_training and not reuse:
        init_saver = tf.train.Saver(resnet_variables)
        def init_fn(scaffold, sess):
          del scaffold
          init_saver.restore(sess, checkpoint)
      else:
        init_fn = None

      return resnet_output, resnet_variables, init_fn
Esempio n. 20
0
    def fprop(self, x):
        """Exposes all the layers of the model.

        Args:
            x (tf.Variable): Tensor which is input to the model.

        Returns:
            dict: A dictionary mapping layer names to the corresponding
                 node in the tensorflow graph.
        """

        if x is self.x:
            return self.end_points

        else:
            with slim.arg_scope(resnet_arg_scope()):
                net, end_points = resnet_v2_50(
                    x, num_classes=self.num_classes,
                    is_training=False, reuse=tf.AUTO_REUSE)

            return _get_updated_endpoints(end_points)
Esempio n. 21
0
def get_box_resnet(inputs, is_training=False):
    with tf.variable_scope("box_net",
                           custom_getter=float32_variable_storage_getter):
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            out, end_points = resnet_v2.resnet_v2_50(inputs,
                                                     num_classes=None,
                                                     global_pool=False,
                                                     reuse=tf.AUTO_REUSE,
                                                     is_training=is_training)
            l2_reg = tf.contrib.layers.l2_regularizer(scale=0.1)
            attn = tf.layers.conv2d(out,
                                    2048, [1, 1],
                                    activation=None,
                                    name='attn',
                                    kernel_regularizer=l2_reg,
                                    reuse=tf.AUTO_REUSE)
            attn = tf.reduce_mean(attn, [3], name='attn_pool', keepdims=True)

            #            attn = tf.layers.conv2d(out, 64, [1,1], padding='same',activation=tf.nn.leaky_relu,name='attn1',reuse=tf.AUTO_REUSE)
            #            attn = tf.layers.conv2d(attn, 32, [1,1], padding='same',activation=tf.nn.leaky_relu,name='attn2',reuse=tf.AUTO_REUSE)
            #            attn = tf.layers.conv2d(attn, 1,[1,1],padding='valid', activation=tf.nn.sigmoid,name='attn3',reuse=tf.AUTO_REUSE)
            #            attn = tf.layers.conv2d(attn, 2048,[1,1],padding='same',activation=None,use_bias=False,kernel_initializer=tf.initializers.ones,name='attn4',trainable=False,reuse=tf.AUTO_REUSE)
            out = tf.multiply(attn, out)
            #            out = tf.reduce_mean(out,[1,2],name='pool6',keepdims=True)
            out = tf.layers.conv2d(out,
                                   512, [3, 3],
                                   padding='same',
                                   activation=None,
                                   name='box',
                                   reuse=tf.AUTO_REUSE)
            out = tf.layers.flatten(out, name='box_flatten')
            box_out = tf.layers.dense(out,
                                      4,
                                      activation=None,
                                      name='box_out',
                                      reuse=tf.AUTO_REUSE)

#            box_out = tf.squeeze(box_out,[1,2])
    return box_out, attn
Esempio n. 22
0
    def build_model(self):
        self.img = tf.placeholder(tf.float32, [None, 150, 100, 3]) / 255
        self.label = tf.placeholder(tf.float32, [None, 2])

        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            net, _ = resnet_v2.resnet_v2_50(self.img,
                                            num_classes=2,
                                            is_training=True,
                                            global_pool=True)
            net = tf.reshape(net, [self.batch_size, 2])
            # dense_1 = tf.layers.dense(net, units=5, activation=tf.nn.relu)
            self.prediction_1 = net

            # self.prediction = tf.nn.softmax(tf.layers.dense(dense_1, units=2))

        with tf.variable_scope("ensemble_2"):
            with tf.variable_scope('convolution_1') as scope:
                W_conv1 = tf.Variable(tf.truncated_normal(shape=[5, 5, 3, 32],
                                                          mean=0,
                                                          stddev=0.01),
                                      name='conv1')
                b_conv1 = tf.Variable(tf.truncated_normal(shape=[32],
                                                          mean=0,
                                                          stddev=0.1),
                                      name='bias1')
                h_conv1 = tf.nn.dropout(
                    tf.nn.relu(
                        tf.nn.bias_add(
                            tf.nn.conv2d(self.img,
                                         W_conv1,
                                         strides=[1, 1, 1, 1],
                                         padding='VALID'), b_conv1)),
                    keep_prob=self.dropout_prob)  # output size 84x84x16
                h_pool1 = tf.nn.max_pool(
                    h_conv1,
                    ksize=[1, 2, 2, 1],
                    strides=[1, 2, 2, 1],
                    padding='VALID')  # output size 42x42x16

            with tf.variable_scope('convolution_2') as scope:
                W_conv2 = tf.Variable(tf.truncated_normal(shape=[5, 5, 32, 10],
                                                          mean=0,
                                                          stddev=0.01),
                                      name='conv2')
                b_conv2 = tf.Variable(tf.truncated_normal(shape=[10],
                                                          mean=0,
                                                          stddev=0.05),
                                      name='bias2')
                h_conv2 = tf.nn.dropout(tf.nn.relu(
                    tf.nn.bias_add(
                        tf.nn.conv2d(h_pool1,
                                     W_conv2,
                                     strides=[1, 1, 1, 1],
                                     padding='VALID'), b_conv2)),
                                        keep_prob=self.dropout_prob)
                h_pool2 = tf.nn.max_pool(h_conv2,
                                         ksize=[1, 2, 2, 1],
                                         strides=[1, 2, 2, 1],
                                         padding='VALID')
                self.dense_input = tf.reshape(h_pool2, (-1, 34 * 22 * 10))

            with tf.variable_scope('dense') as scope:
                h_dense1 = tf.nn.dropout(tf.layers.dense(
                    self.dense_input,
                    units=34 * 22 * 10,
                    activation=tf.nn.relu),
                                         keep_prob=self.dropout_prob)
                h_dense2 = tf.layers.dense(h_dense1, units=2, activation=None)
                self.prediction_2 = h_dense2

        # self.sum_prediction = tf.concat([self.prediction_1, self.prediction_2], axis=1)
        self.sum_prediction = (self.prediction_1 + self.prediction_2) / 2
        print('sum_prediction:', self.sum_prediction)

        with tf.variable_scope('result') as scope:
            # weight = tf.reshape(tf.nn.softmax(tf.layers.dense(self.dense_input, units=2)), (self.batch_size, 1, 2))
            # self.pin = weight
            # self.prediction = tf.nn.softmax(tf.reshape(tf.matmul(weight, self.sum_prediction), (self.batch_size, 2)))
            self.prediction = tf.nn.softmax(self.sum_prediction)
            print("prediction:", self.prediction)
            correct_prediction = tf.equal(tf.argmax(self.prediction, 1),
                                          tf.argmax(self.label, 1))

            self.accuracy = tf.reduce_mean(
                tf.cast(correct_prediction, tf.float32))

        reg = tf.contrib.layers.apply_regularization(
            tf.contrib.layers.l2_regularizer(1e-4), tf.trainable_variables())
        self.cross_entropy = tf.reduce_mean(-tf.reduce_sum(
            self.label *
            tf.log(tf.clip_by_value(self.prediction, 1e-10, 0.999999)),
            reduction_indices=[1]))

        global_step = tf.Variable(0, trainable=False)
        # self.learning = tf.train.exponential_decay(self.lr, global_step, 70, 0.8, staircase=True)
        self.train_step = tf.train.AdamOptimizer(self.lr).minimize(
            self.cross_entropy, global_step=global_step)
Esempio n. 23
0
def main():
    # 加 载 预 处 理 好 的 数 据
    processed_data = np.load(INPUT_DATA, allow_pickle=True)
    training_images = processed_data[0]
    n_training_example = len(training_images)
    training_labels = processed_data[1]
    validation_images = processed_data[2]
    validation_labels = processed_data[3]
    testing_images = processed_data[4]
    testing_labels = processed_data[5]
    print(
        "%d training examples, %d validation examples and %d testing examples."
        % (n_training_example, len(validation_labels), len(testing_labels)))
    # 定 义 inception-v3 的 输 入 , images 为 输 入 图 片 , labels 为 每 一 张 图 片 对 应 的 标 签
    images = tf.placeholder(tf.float32, [None, 299, 299, 3],
                            name='Input_images')
    labels = tf.placeholder(tf.int64, [None], name='labels')

    # 定 义 inception-v3 模 型
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        logits, _ = resnet_v2.resnet_v2_50(images, num_classes=None)

    with tf.variable_scope("Logits"):
        #将原始模型的输出数据去掉维度为2和3的维度,最后只剩维度1的batch数和维度4的300*300*3
        #也就是将原来的二三四维度全部压缩到第四维度
        net = tf.squeeze(logits, axis=[1, 2])
        #加入一层dropout层
        net = slim.dropout(net, keep_prob=0.5, scope='dropout_scope')
        #加入一层全连接层,指定最后输出大小
        logits = slim.fully_connected(net, num_outputs=N_CLASSES, scope='fc')

    # 获 取 需 要 训 练 的 变 量
    trainable_variables = get_trainable_variables()
    # 定 义 交 叉 熵 损 失
    tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES),
                                    logits,
                                    weights=1.0)
    # 定 义 训 练 过 程
    train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize(
        tf.losses.get_total_loss())
    # 计 算 正 确 率
    with tf.name_scope('evaluation'):
        correct_prediction = tf.equal(tf.argmax(logits, 1), labels)
        evaluation_step = tf.reduce_mean(
            tf.cast(correct_prediction, tf.float32))
    # 定 义 加 载 模 型 的 函 数
    load_fn = slim.assign_from_checkpoint_fn(CKPT_FILE,
                                             get_tuned_variables(),
                                             ignore_missing_vars=True)

    #定义保存新的训练好的模型的函数
    saver = tf.train.Saver()
    with tf.Session() as sess:
        # 初 始 化 没 有 加 载 进 来 的 变 量
        init = tf.global_variables_initializer()
        sess.run(init)
        #加 载 谷 歌 已 经 训 练 好 的 模 型
        print('Loading tuned variables from%s' % CKPT_FILE)
        load_fn(sess)
        start = 0
        end = BATCH
        for i in range(STEPS):
            # 运 行 训 练 过 程 , 这 里 不 会 更 新 全 部 的 参 数 , 只 会 更 新 指 定 的 部 分 参 数
            sess.run(train_step,
                     feed_dict={
                         images: training_images[start:end],
                         labels: training_labels[start:end]
                     })
            # 输 出 日 志
            if i % 5 == 0 or i + 1 == STEPS:
                #saver.save(sess, TRAIN_FILE, global_step = i)
                validation_accuracy = sess.run(evaluation_step,
                                               feed_dict={
                                                   images: validation_images,
                                                   labels: validation_labels
                                               })
                print('Step %d: Validation accuracy = %.lf%%' %
                      (i, validation_accuracy * 100.0))
            # 因 为 在 数 据 预 处 理 的 时 候 已 经 做 过 了 打 乱 数 据 的 操 作 , 所 以 这 里 只 需 要 顺 序 使 用 训 练 数 据
            start = end
            if start == n_training_example:
                start = 0
            end = start + BATCH
            if end > n_training_example:
                end = n_training_example
        # 在 后 的 测 试 数 据 上 测 试 正 确 率
        test_accuracy = sess.run(evaluation_step,
                                 feed_dict={
                                     images: testing_images,
                                     labels: testing_labels
                                 })
        print('Final test accuracy = %.lf%%' % (test_accuracy * 100))
Esempio n. 24
0
'''

batch_size = FLAGS.batch_size
video_size = FLAGS.num_frames
total_size = batch_size * video_size

video_data = tf.placeholder(tf.float32, [batch_size, video_size, 224, 224, 3])

batch_video_data = tf.reshape(video_data, [total_size, 224, 224, 3])

# for i in range(2):

pre_logit, epoints = resnet_v2.resnet_v2_50(
    inputs=batch_video_data,
    num_classes=None,
    # reuse = True,
    scope='resnet_v2_50')

orig_vars = slim.get_variables_to_restore()

with tf.variable_scope('post_conv'):
    # pre_logit = tf.reshape(pre_logit, [total_size, 2048])
    embeddings = layers.fully_connected(pre_logit,
                                        1024 if FLAGS.big_embeddings else 10,
                                        activation_fn=None)
    activations = tf.nn.relu(embeddings)
    scores = layers.fully_connected(activations, 1, activation_fn=None)
    scores = tf.reshape(scores, [batch_size, video_size, 1])

post_conv_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
Esempio n. 25
0
 def forward(self, inputs, reuse=None, is_training=None):
     net, endpts = resnet_v2.resnet_v2_50(inputs=inputs,
                                          reuse=reuse,
                                          is_training=is_training)
     return net, endpts
Esempio n. 26
0
def Encoder_resnet_v2(x,
                      depth=None,
                      is_training=True,
                      weight_decay=0.001,
                      reuse=False,
                      name='Encoder_resnet_v2'):
    """
    Resnet v2-50
    Assumes input is [batch, height_in, width_in, channels]!!
    Input:
    - x: N x H x W x 3
    - weight_decay: float
    - reuse: bool->True if test

    Outputs:
    - cam: N x 3
    - Pose vector: N x 72
    - Shape vector: N x 10
    - variables: tf variables
    """
    from tensorflow.contrib.slim.python.slim.nets import resnet_v2
    with tf.variable_scope(name, reuse=reuse) as scope:
        #with tf.name_scope("Encoder_resnet", [x, depth]):
        # Defines the default ResNet arg scope
        with slim.arg_scope(
                resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            """ added by CCJ from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py;
                (*) Args: reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given.
                (*) resnet_v2() Returns:
                'net': A rank-4 tensor of size [batch, height_out, width_out, channels_out].
                      - If 'global_pool' is False, then height_out and width_out are reduced by a factor of output_stride compared 
                        to the respective height_in and width_in,
                      - else both height_out and width_out equal one. 
                      - If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. 
                      - If num_classes is not None, net contains the pre-softmax activations.
                (*) end_points: A dictionary from components of the network to the corresponding activation.
            """
            with tf.variable_scope("resnet_v2_50_img"):
                net_img, end_points_img = resnet_v2.resnet_v2_50(
                    inputs=x,  # inputs,
                    num_classes=None,
                    is_training=is_training,
                    #added by CCJ: global_pool=True as default;
                    #reuse=reuse,
                    #scope='resnet_v2_50_img'
                )
                net_img = tf.squeeze(net_img, axis=[1,
                                                    2])  # output in N x 2048

            # added by CCJ: for depth encoded by resnet_v2_50
            with tf.variable_scope("resnet_v2_50_dep"):
                net_depth = tf.zeros(shape=tf.shape(net_img), dtype=tf.float32)
                if depth is not None:
                    #NOTE: since we want to load the pre-trained resnet_v2_50 model,
                    # which assumes the input tensor has 3 channels;
                    # So we copy the depth to 3 chanels. Maybe have to find more advanced way to deal with this;
                    net_depth, end_points_depth = resnet_v2.resnet_v2_50(
                        inputs=tf.concat([depth, depth, depth],
                                         axis=-1),  # inputs,
                        num_classes=None,
                        is_training=is_training,
                        #added by CCJ: global_pool=True as default;
                        #reuse= tf.AUTO_REUSE,
                        #scope='resnet_v2_50_dep'
                    )
                    net_depth = tf.squeeze(net_depth,
                                           axis=[1, 2])  # output in N x 2048

        # NOTE: to be updated:!!!
        # added by CCJ: fully connected layer for encoded_depth and encoded_image
        with tf.variable_scope("encoder_fc"):
            #tmp = tf.concat([net_img, net_depth], 1)
            #net = slim.fully_connected(tmp, 2048*2)

            tmp = tf.concat([
                slim.fully_connected(net_img, 2048),
                slim.fully_connected(net_depth, 2048)
            ], 1)
            net = slim.fully_connected(tmp, 2048)

    variables = tf.contrib.framework.get_variables(scope)
    #print ("[**] Encoder_resnet_v2(), returns variables: ", variables)
    return net, variables
def extract_pred_latent_attr():
    '''
		Step 1: Create dirs for saving models and logs
	'''
    print('Start extract predicted latent attr')
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_id
    model_path_suffix = os.path.join(FLAGS.network_def + '_' + FLAGS.version +
                                     '_' + 'train_multi' + '_imagesize_' +
                                     str(FLAGS.img_size) + '_batchsize_' +
                                     str(FLAGS.batch_size) + '_experiment_' +
                                     FLAGS.experiment_id)
    model_save_dir = os.path.join('../../data/results_multi/model_weights',
                                  model_path_suffix)

    print('Extract pred attr of train set: ' + model_path_suffix + ' ...')
    la_save_dir_train = os.path.join(
        '../../data/results_extract_la' + '/train', model_path_suffix)
    la_save_dir_test = os.path.join('../../data/results_extract_la' + '/test',
                                    model_path_suffix)

    os.system('mkdir -p {}'.format(la_save_dir_train))
    os.system('mkdir -p {}'.format(la_save_dir_test))
    '''
	Step 2: Create dataset and data generator
	'''
    test_set_train = []
    with open(FLAGS.train_file, 'r') as f:
        for line in f.readlines():
            image_name = line.split('	')[0]
            test_set_train.append(image_name)
    print('READING LABELS OF TRAIN DATA')
    print('Train total num:', len(test_set_train))
    test_size_train = len(test_set_train)

    test_set_test = parse_test_image_list(FLAGS.test_file)
    print('Test total num:', len(test_set_test))
    test_size_test = len(test_set_test)
    '''
	Step 3: Build network graph
	'''
    _, whole_attr_np, _ = parse_repre_label2one_hot_map(
        FLAGS.attrs_per_class_dir)
    # print(whole_attr_np)

    with tf.Graph().as_default() as g3:
        image_placeholder = tf.placeholder(
            dtype=tf.float32,
            shape=[None, FLAGS.img_height, FLAGS.img_width,
                   FLAGS.img_depth])  # [batch, 224, 224, 3]
        is_training = tf.placeholder(dtype=tf.bool)

        feature, endpoints = resnet_v2.resnet_v2_50(image_placeholder,
                                                    num_classes=None,
                                                    reuse=False,
                                                    is_training=is_training)
        feature = tf.squeeze(feature, axis=[1, 2])
        print('feature shape:', feature)
        feature = slim.dropout(feature, keep_prob=1)
        final_logits = slim.fully_connected(feature,
                                            num_outputs=2 *
                                            FLAGS.attribute_label_cnt,
                                            activation_fn=None)
        print('logits shape', final_logits)
    '''
	Step 4: Testing
	'''
    total_start_time = time.time()

    device_count = {'GPU': 1} if FLAGS.use_gpu else {'GPU': 0}
    with tf.Session(config=tf.ConfigProto(device_count=device_count,
                                          allow_soft_placement=True),
                    graph=g3) as sess:
        # Create model saver
        saver = tf.train.Saver()

        # Init all vars
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)

        if True:
            # Restore pretrained weights
            pretrained_model = model_save_dir
            print('load checkpoint of ', pretrained_model)
            checkpoint = tf.train.get_checkpoint_state(pretrained_model)
            ckpt = checkpoint.model_checkpoint_path  # 获取最新保存的模型检查点文件
            saver.restore(sess, ckpt)
            for variable in tf.trainable_variables():  # check weights
                with tf.variable_scope('', reuse=True):
                    var = tf.get_variable(variable.name.split(':0')[0])
                    print(variable.name, np.mean(sess.run(var)))

        # Extract train la start
        step = 0
        train_la_dict = {}
        while True:
            if step < test_size_train:
                image_name = test_set_train[step:step + FLAGS.batch_size_test]
                print('IMAGE_NAME', image_name)
                step = step + FLAGS.batch_size_test
                image_num = len(image_name)
                print('image num', image_num)

                image_data = np.zeros((image_num, FLAGS.img_height,
                                       FLAGS.img_width, FLAGS.img_depth),
                                      dtype=np.float32)
                for i in range(image_num):
                    img = open_img(is_train=True,
                                   name=image_name[i],
                                   size=FLAGS.img_size,
                                   color=FLAGS.img_type)

                    if FLAGS.normalize:
                        image_data[i, :, :, :] = img.astype(np.float32) / 255.0
                    else:
                        image_data[i, :, :, :] = img.astype(np.float32)

                batch_start_time = time.time()

                pred_logits = sess.run([final_logits],
                                       feed_dict={
                                           image_placeholder: image_data,
                                           is_training: False
                                       })
                pred_logits = np.array(pred_logits).squeeze()

                for i in range(image_num):
                    train_la_dict[image_name[i]] = pred_logits[i]
                print('[%s][testing %d][step %d / %d exec %.2f seconds]' %
                      (time.strftime("%Y-%m-%d %H:%M:%S"), image_num, step,
                       test_size_train, (time.time() - batch_start_time)))
            else:
                break
        print('train_la_dict: ', len(train_la_dict))
        np.savez(os.path.join(la_save_dir_train, 'train_la.npz'),
                 dict=train_la_dict)
        train_la_dict_2 = np.load(
            os.path.join(la_save_dir_train, 'train_la.npz'))['dict'][()]
        print(len(train_la_dict_2),
              train_la_dict_2['7c382f330bd76982761f1a9191e9db0e.jpeg'])
        print('Extract train set done.')
        print("[%s][total exec %s seconds" %
              (time.strftime("%Y-%m-%d %H:%M:%S"),
               (time.time() - total_start_time)))

        # Extract test la start
        step = 0
        test_la_dict = {}
        while True:
            if step < test_size_test:
                image_name = test_set_test[step:step + FLAGS.batch_size_test]
                print('IMAGE_NAME', image_name)
                step = step + FLAGS.batch_size_test
                image_num = len(image_name)
                print('image num', image_num)

                image_data = np.zeros((image_num, FLAGS.img_height,
                                       FLAGS.img_width, FLAGS.img_depth),
                                      dtype=np.float32)
                for i in range(image_num):
                    img = open_img(is_train=False,
                                   name=image_name[i],
                                   size=FLAGS.img_size,
                                   color=FLAGS.img_type)

                    if FLAGS.normalize:
                        image_data[i, :, :, :] = img.astype(np.float32) / 255.0
                    else:
                        image_data[i, :, :, :] = img.astype(np.float32)

                batch_start_time = time.time()

                pred_logits = sess.run([final_logits],
                                       feed_dict={
                                           image_placeholder: image_data,
                                           is_training: False
                                       })
                pred_logits = np.array(pred_logits).squeeze()

                for i in range(image_num):
                    test_la_dict[image_name[i]] = pred_logits[i]

                print('[%s][testing %d][step %d / %d exec %.2f seconds]' %
                      (time.strftime("%Y-%m-%d %H:%M:%S"), image_num, step,
                       test_size_test, (time.time() - batch_start_time)))
            else:
                break

        print('test_la_dict: ', len(test_la_dict))
        np.savez(os.path.join(la_save_dir_test, 'test_la.npz'),
                 dict=test_la_dict)
        test_la_dict_2 = np.load(os.path.join(la_save_dir_test,
                                              'test_la.npz'))['dict'][()]
        print(len(test_la_dict_2),
              test_la_dict_2['0003ae092034aa69da9782b2a3b4a15a.jpg'])
        print('Extract test set done.')
        print("[%s][total exec %s seconds" %
              (time.strftime("%Y-%m-%d %H:%M:%S"),
               (time.time() - total_start_time)))

        sess.close()
Esempio n. 28
0
# -*- coding: utf-8 -*-
# @ File ResNetDemo.py
# @ Description :
# @ Author alexchung
# @ Time 21/1/2019 09:52

import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.slim.python.slim.nets import resnet_v2

images = tf.Variable(initial_value=tf.random_uniform(shape=(5, 224, 224, 3),
                                                     minval=0,
                                                     maxval=3),
                     dtype=tf.float32)
num_classes = tf.constant(value=5, dtype=tf.int32)
# is_training = True

if __name__ == "__main__":

    init = tf.group(tf.global_variables_initializer(),
                    tf.local_variables_initializer())
    with tf.Session() as sess:
        # images, class_num = sess.run([images, class_num])
        sess.run(init)
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_50(
                images, num_classes=num_classes.eval(), is_training=True)

        for var in tf.model_variables():
            print(var.name, var.shape)
Esempio n. 29
0
def get_model(input_pls,
              is_training,
              bn=False,
              bn_decay=None,
              img_size=224,
              FLAGS=None):

    if FLAGS.act == "relu":
        activation_fn = tf.nn.relu
    elif FLAGS.act == "elu":
        activation_fn = tf.nn.elu

    input_imgs = input_pls['imgs']
    input_pnts = input_pls['pnts']
    input_gvfs = input_pls['gvfs']
    input_onedge = input_pls['onedge']
    input_trans_mat = input_pls['trans_mats']
    input_obj_rot_mats = input_pls['obj_rot_mats']

    batch_size = input_imgs.get_shape()[0].value

    # endpoints
    end_points = {}
    end_points['pnts'] = input_pnts
    if FLAGS.rot:
        end_points['gt_gvfs_xyz'] = tf.matmul(input_gvfs, input_obj_rot_mats)
        end_points['pnts_rot'] = tf.matmul(input_pnts, input_obj_rot_mats)
    else:
        end_points['gt_gvfs_xyz'] = input_gvfs  #* 10
        end_points['pnts_rot'] = input_pnts
    if FLAGS.edgeweight != 1.0:
        end_points['onedge'] = input_onedge
    input_pnts_rot = end_points['pnts_rot']
    end_points['imgs'] = input_imgs  # B*H*W*3|4

    # Image extract features
    if input_imgs.shape[1] != img_size or input_imgs.shape[2] != img_size:
        if FLAGS.alpha:
            ref_img_rgb = tf.compat.v1.image.resize_bilinear(
                input_imgs[:, :, :, :3], [img_size, img_size])
            ref_img_alpha = tf.image.resize_nearest_neighbor(
                tf.expand_dims(input_imgs[:, :, :, 3], axis=-1),
                [img_size, img_size])
            ref_img = tf.concat([ref_img_rgb, ref_img_alpha], axis=-1)
        else:
            ref_img = tf.compat.v1.image.resize_bilinear(
                input_imgs, [img_size, img_size])
    else:
        ref_img = input_imgs
    end_points['resized_ref_img'] = ref_img
    if FLAGS.encoder[:6] == "vgg_16":
        vgg.vgg_16.default_image_size = img_size
        with slim.arg_scope([slim.conv2d],
                            weights_regularizer=slim.l2_regularizer(FLAGS.wd)):
            ref_feats_embedding, encdr_end_points = vgg.vgg_16(
                ref_img,
                num_classes=FLAGS.num_classes,
                is_training=False,
                scope='vgg_16',
                spatial_squeeze=False)
    elif FLAGS.encoder == "sim_res":
        ref_feats_embedding, encdr_end_points = res_sim_encoder.res_sim_encoder(
            ref_img,
            FLAGS.batch_size,
            is_training=is_training,
            activation_fn=activation_fn,
            bn=bn,
            bn_decay=bn_decay,
            wd=FLAGS.wd)
    elif FLAGS.encoder == "resnet_v1_50":
        resnet_v1.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_50(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v1_50')
        scopelst = [
            "resnet_v1_50/block1", "resnet_v1_50/block2",
            "resnet_v1_50/block3", 'resnet_v1_50/block4'
        ]
    elif FLAGS.encoder == "resnet_v1_101":
        resnet_v1.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_101(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v1_101')
        scopelst = [
            "resnet_v1_101/block1", "resnet_v1_101/block2",
            "resnet_v1_101/block3", 'resnet_v1_101/block4'
        ]
    elif FLAGS.encoder == "resnet_v2_50":
        resnet_v2.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_50(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v2_50')
        scopelst = [
            "resnet_v2_50/block1", "resnet_v2_50/block2",
            "resnet_v2_50/block3", 'resnet_v2_50/block4'
        ]
    elif FLAGS.encoder == "resnet_v2_101":
        resnet_v2.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_101(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v2_101')
        scopelst = [
            "resnet_v2_101/block1", "resnet_v2_101/block2",
            "resnet_v2_101/block3", 'resnet_v2_101/block4'
        ]
    end_points['img_embedding'] = ref_feats_embedding
    point_img_feat = None
    gvfs_feat = None
    sample_img_points = get_img_points(input_pnts,
                                       input_trans_mat)  # B * N * 2

    if FLAGS.img_feat_onestream:
        with tf.compat.v1.variable_scope("sdfimgfeat") as scope:
            if FLAGS.encoder[:3] == "vgg":
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv1/conv1_2'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv2/conv2_2'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv3/conv3_3'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                if FLAGS.encoder[-7:] != "smaller":
                    conv4 = tf.compat.v1.image.resize_bilinear(
                        encdr_end_points['vgg_16/conv4/conv4_3'],
                        (FLAGS.img_h, FLAGS.img_w))
                    point_conv4 = tf.contrib.resampler.resampler(
                        conv4, sample_img_points)
                    point_img_feat = tf.concat(axis=2,
                                               values=[
                                                   point_conv1, point_conv2,
                                                   point_conv3, point_conv4
                                               ])  # small
                else:
                    print("smaller vgg")
                    point_img_feat = tf.concat(
                        axis=2, values=[point_conv1, point_conv2,
                                        point_conv3])  # small
            elif FLAGS.encoder[:3] == "res":
                # print(encdr_end_points.keys())
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[0]], (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[1]], (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[2]], (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w))
                # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points)
                point_img_feat = tf.concat(
                    axis=2, values=[point_conv1, point_conv2, point_conv3])
            else:
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[0], (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[1], (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[2], (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w))
                # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points)
                point_img_feat = tf.concat(
                    axis=2, values=[point_conv1, point_conv2, point_conv3])
            print("point_img_feat.shape", point_img_feat.get_shape())
            point_img_feat = tf.expand_dims(point_img_feat, axis=2)
            if FLAGS.decoder == "att":
                gvfs_feat = gvfnet.get_gvf_att_imgfeat(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
            elif FLAGS.decoder == "skip":
                gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream_skip(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
            else:
                gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
    else:
        if not FLAGS.multi_view:
            with tf.compat.v1.variable_scope("sdfprediction") as scope:
                gvfs_feat = gvfnet.get_gvf_basic(input_pnts_rot,
                                                 ref_feats_embedding,
                                                 is_training,
                                                 batch_size,
                                                 bn,
                                                 bn_decay,
                                                 wd=FLAGS.wd,
                                                 activation_fn=activation_fn)
    end_points['pred_gvfs_xyz'], end_points['pred_gvfs_dist'], end_points[
        'pred_gvfs_direction'] = None, None, None
    if FLAGS.XYZ:
        end_points['pred_gvfs_xyz'] = gvfnet.xyz_gvfhead(
            gvfs_feat, batch_size, wd=FLAGS.wd, activation_fn=activation_fn)
        end_points['pred_gvfs_dist'] = tf.sqrt(
            tf.reduce_sum(tf.square(end_points['pred_gvfs_xyz']),
                          axis=2,
                          keepdims=True))
        end_points[
            'pred_gvfs_direction'] = end_points['pred_gvfs_xyz'] / tf.maximum(
                end_points['pred_gvfs_dist'], 1e-6)
    else:
        end_points['pred_gvfs_dist'], end_points[
            'pred_gvfs_direction'] = gvfnet.dist_direct_gvfhead(
                gvfs_feat,
                batch_size,
                wd=FLAGS.wd,
                activation_fn=activation_fn)
        end_points['pred_gvfs_xyz'] = end_points[
            'pred_gvfs_direction'] * end_points['pred_gvfs_dist']

    end_points["sample_img_points"] = sample_img_points
    # end_points["ref_feats_embedding"] = ref_feats_embedding
    end_points["point_img_feat"] = point_img_feat

    return end_points
Esempio n. 30
0
    def build_model(self):
        """
        :return:
        """
        
        """
        Helper Variables
        """
        #self.global_step_tensor = tf.Variable(0, trainable=False, name='global_step')
        #self.global_step_inc = self.global_step_tensor.assign(self.global_step_tensor + 1)
        self.global_epoch_tensor = tf.Variable(0, trainable=False, name='global_epoch')
        self.global_epoch_inc = self.global_epoch_tensor.assign(self.global_epoch_tensor + 1)
        
        """
        Inputs to the network
        """
        with tf.variable_scope('inputs'):
            self.x, self.y, self.y_mi, self.bi = self.data_loader.get_input()
            self.is_training = tf.placeholder(tf.bool, name='Training_flag')
        tf.add_to_collection('inputs', self.x)
        tf.add_to_collection('inputs', self.y)
        tf.add_to_collection('inputs', self.y_mi)
        tf.add_to_collection('inputs', self.bi)
        tf.add_to_collection('inputs', self.is_training)

        """
        Network Architecture
        """
        
        with tf.variable_scope('network'):
            net, end_points = resnet_v2.resnet_v2_50(inputs = self.x, num_classes = None, global_pool = True)
    
            end_points['resnet_v2_50/pool5:0'] = net 
            print("Size after pool: ", net.shape)
    
            net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
    
            end_points['resnet_v2_50/spatial_squeeze'] = net
            print("Size after squeeze: ", net.shape)
    
            if (self.config.mode == 'si_branch'):
                end_points['resnet_v2_50/output_si'] = fully_connected(net,
                                                                         self.num_classes, activation_fn=None,
                                                                         normalizer_fn=None, scope='logits_si')
                self.logits = end_points['resnet_v2_50/output_si']
                
                net = end_points['resnet_v2_50/output_si']
                
            if (self.config.mode == 'mi_branch'):
                net = self.mi_pool_layer(net, bag_indices = self.bi, pooling = self.config.pooling)
                end_points['resnet_v2_50/mi_pool1:0'] = net
                print("Size after MI: ", net.shape)
                
                end_points['resnet_v2_50/output_mi'] = fully_connected(end_points['resnet_v2_50/mi_pool1:0'],
                                                                         self.num_classes, activation_fn=None,
                                                                         normalizer_fn=None, scope='logits_mi')
                self.logits = end_points['resnet_v2_50/output_mi']
                
                net = end_points['resnet_v2_50/output_mi']
                
            if (self.config.mode == 'si_mi_branch'):
                end_points['resnet_v2_50/mi_pool1:0'] = self.mi_pool_layer(net,
                                                                           bag_indices = self.bi,
                                                                           pooling = self.config.pooling)
                
                end_points['resnet_v2_50/output_mi'] = fully_connected(end_points['resnet_v2_50/mi_pool1:0'],
                                                                         self.num_classes, activation_fn=None,
                                                                         normalizer_fn=None, scope='logits_mi')
                self.logits = end_points['resnet_v2_50/output_mi']
                
                end_points['resnet_v2_50/output_si'] = fully_connected(net,
                                                                         self.num_classes, activation_fn=None,
                                                                         normalizer_fn=None, scope='logits_si')
                self.logits_si = end_points['resnet_v2_50/output_si']
                
                net = end_points['resnet_v2_50/output_mi']
             
            
            end_points['predictions'] = tf.nn.softmax(net)
            
            with tf.variable_scope('out'):
                
                self.out = end_points['predictions']
            
            tf.add_to_collection('out', self.out)
            
            print("predictions out shape: ", self.out.shape)
            
            print("network output argmax resnet")
            with tf.variable_scope('out_argmax'):
                self.out_argmax = tf.argmax(self.out, axis=-1, output_type=tf.int32, name='out_argmax')
                
                print("Arg Max Shape: ", self.out_argmax.shape)

        with tf.variable_scope('loss-acc'):
            
            if (self.config.mode == 'si_mi_branch'):
                self.update_beta_combined_cost()
                self.loss = combined_cost_function(self.y, self.logits_si, self.y_mi, self.logits,
                                                   beta = self.current_beta)
            else:    
                self.loss = tf.losses.sparse_softmax_cross_entropy(labels = self.y, logits = self.logits)

            if (self.config.mode != 'si_branch'):
                self.acc = tf.reduce_mean(tf.cast(tf.equal(self.y_mi, self.out_argmax), tf.float32))
            else:
                self.acc = tf.reduce_mean(tf.cast(tf.equal(self.y, self.out_argmax), tf.float32))
            #self.acc = self.evaluate_accuracy(self.y, self.out_argmax,
            #                                  self.is_training, self.config.patch_count)

        with tf.variable_scope('train_step'):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.train_step = self.optimizer.minimize(self.loss, global_step=self.global_step_tensor)

        tf.add_to_collection('test', self.out_argmax)
        tf.add_to_collection('train', self.train_step)
        tf.add_to_collection('train', self.loss)
        tf.add_to_collection('train', self.acc)
Esempio n. 31
0
def Encoder_resnet(x,
                   depth=None,
                   is_training=True,
                   weight_decay=0.001,
                   reuse=False):
    """
    Resnet v2-50
    Assumes input is [batch, height_in, width_in, channels]!!
    Input:
    - x: N x H x W x 3
    - depth : N x H x W x 1
    - weight_decay: float
    - reuse: bool->True if test

    Outputs:
    - cam: N x 3
    - Pose vector: N x 72
    - Shape vector: N x 10
    - variables: tf variables
    """
    from tensorflow.contrib.slim.python.slim.nets import resnet_v2
    with tf.name_scope("Encoder_resnet", [x, depth]):
        with slim.arg_scope(
                resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            """ added by CCJ from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py;
                (*) Args: reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given.
                (*) resnet_v2() Returns:
                'net': A rank-4 tensor of size [batch, height_out, width_out, channels_out].
                      - If 'global_pool' is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in,
                      - else both height_out and width_out equal one. 
                      - If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. 
                      - If num_classes is not None, net contains the pre-softmax activations.
                (*) end_points: A dictionary from components of the network to the corresponding activation.
            """
            net, end_points = resnet_v2.resnet_v2_50(
                inputs=x,  # inputs,
                num_classes=None,
                is_training=is_training,
                #added by CCJ: global_pool=True as default;
                reuse=reuse,
                scope='resnet_v2_50'
                #scope='resnet_v2_50_img'
            )
            net = tf.squeeze(net, axis=[1, 2])

            # added by CCJ: for depth encoded by resnet_v2_50
            net_depth = None
            if depth is not None:
                net_depth, end_points_depth = resnet_v2.resnet_v2_50(
                    inputs=depth,  # inputs,
                    num_classes=None,
                    is_training=is_training,
                    #added by CCJ: global_pool=True as default;
                    #reuse= tf.AUTO_REUSE,
                    scope='resnet_v2_50_dep')
                net_depth = tf.squeeze(net_depth, axis=[1, 2])

    variables = tf.contrib.framework.get_variables('resnet_v2_50_img')
    if depth is not None:
        variables = variables + tf.contrib.framework.get_variables(
            'resnet_v2_50_dep')
    #return net, variables
    return net, net_depth, variables
def test_one_with_aug_multi():
    '''
	Step 1: Create dirs for saving models and logs
	'''
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_id

    pretrained_model_path_suffix = os.path.join(FLAGS.network_def + '_' +
                                                FLAGS.version + '_' +
                                                'train_multi' + '_imagesize_' +
                                                str(FLAGS.img_size) +
                                                '_batchsize_' +
                                                str(FLAGS.batch_size) +
                                                '_experiment_' +
                                                FLAGS.experiment_id)
    pretrained_model_save_dir = os.path.join(
        '../../data/results_multi/model_weights', pretrained_model_path_suffix)

    print('Test_one_with_aug_multi: ' + pretrained_model_save_dir + ' ...')

    test_save_dir = os.path.join('../../submit/results_multi/test_B_with_aug',
                                 pretrained_model_path_suffix)
    os.system('mkdir -p {}'.format(test_save_dir))
    '''
	Step 2: Create dataset and data generator
	'''
    test_set = parse_test_image_list(FLAGS.test_file)

    # test setp configuration
    test_size = len(test_set)

    image_placeholder = tf.placeholder(
        dtype=tf.float32,
        shape=[None, FLAGS.img_height, FLAGS.img_width, FLAGS.img_depth])
    is_training = tf.placeholder(dtype=tf.bool)
    '''
	Step 3: Build network graph
	'''
    # logits = model.inference(image_placeholder, FLAGS.num_residual_blocks, reuse=False)
    feature, endpoints = resnet_v2.resnet_v2_50(image_placeholder,
                                                num_classes=None,
                                                reuse=False,
                                                is_training=is_training)
    feature = tf.squeeze(feature, axis=[1, 2])
    print('feature shape:', feature)
    feature = slim.dropout(feature, keep_prob=1)
    final_logits = slim.fully_connected(feature,
                                        num_outputs=2 *
                                        FLAGS.attribute_label_cnt,
                                        activation_fn=None)
    print('logits shape', final_logits)
    '''
	Step 4: Testing
	'''
    total_start_time = time.time()

    represent_label2attribute_vec_map = parse_attribute_per_class(
        FLAGS.attrs_per_class_dir)
    print('represent_label2attribute_vec_map: ',
          len(represent_label2attribute_vec_map))

    repre_label_list = []
    attr_vec_list = []
    for repre_label in represent_label2attribute_vec_map.keys():
        # print('REPER_LABEL', repre_label)
        repre_label_list.append(repre_label)
        attr_vec_list.append(represent_label2attribute_vec_map[repre_label])
    print('attribute_vec2represent_label_map: ', len(repre_label_list),
          len(attr_vec_list))

    whole_class_repre_list, whole_attr_np, _ = parse_repre_label2one_hot_map(
        FLAGS.attrs_per_class_dir)
    # ####################### use train set to valid
    train_image2represent_label_map = parse_train_image2represent_label_map(
        FLAGS.train_file)
    print('train file', len(train_image2represent_label_map))

    gt_attr_save_dir = os.path.join('../../data/results_gt_attr_with_latent',
                                    pretrained_model_path_suffix)
    gt_la_attr = np.load(os.path.join(gt_attr_save_dir, 'gt_la.npz'))['list']
    print('gt_la_attr:', gt_la_attr, gt_la_attr.shape)

    repre_label2true_label_map = parse_represent_label2true_label_map(
        FLAGS.label_list)
    word_embedding_per_class = parse_word_embedding_per_class(
        FLAGS.class_wordembeddings)

    whole_word_list = []
    for i in range(len(whole_class_repre_list)):
        true_label = repre_label2true_label_map[whole_class_repre_list[i]]
        word = word_embedding_per_class[true_label]
        whole_word_list.append(word)

    whole_word_np = np.array(whole_word_list, dtype=np.float32)
    print('whole_word_np', whole_word_np.shape, whole_word_np)

    gt_attr = np.concatenate(
        (whole_attr_np[:, 0:FLAGS.attribute_label_cnt], gt_la_attr), axis=1)
    print('gt_attr', gt_attr, gt_attr.shape)

    total_class_set_list, _, _ = parse_repre_label2one_hot_map(
        FLAGS.attrs_per_class_dir)
    print('Total class set', total_class_set_list, len(total_class_set_list))

    train_table = []
    train_class_table = []
    with open(FLAGS.train_file, 'r') as f:
        for line in f.readlines():
            image_name = line.split('	')[0]
            class_repre = line.split('	')[1].replace('\n', '')
            train_table.append(image_name)
            if class_repre not in train_class_table:
                train_class_table.append(class_repre)
    print('READING LABELS OF TRAIN DATA')
    print('Total num:', len(train_table))

    train_class_set_list = [
        item for item in total_class_set_list if item in train_class_table
    ]
    print('Train class set', train_class_set_list, len(train_class_set_list))

    useen_class_set_list = [
        item for item in total_class_set_list if item not in train_class_table
    ]
    print('useen_class_set_list', useen_class_set_list,
          len(useen_class_set_list))

    train_class_index_list = []
    useen_class_index_list = []
    for i in range(len(total_class_set_list)):
        if total_class_set_list[i] in train_class_set_list:
            train_class_index_list.append(i)
        else:
            useen_class_index_list.append(i)
    print('train_class_index_list', train_class_index_list,
          len(train_class_index_list))
    print('useen_class_index_list', useen_class_index_list,
          len(useen_class_index_list))

    device_count = {'GPU': 1} if FLAGS.use_gpu else {'GPU': 0}
    with tf.Session(config=tf.ConfigProto(device_count=device_count,
                                          allow_soft_placement=True)) as sess:
        # Create model saver
        saver = tf.train.Saver()

        # Init all vars
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)

        if True:
            # Restore pretrained weights
            pretrained_model = pretrained_model_save_dir
            checkpoint = tf.train.get_checkpoint_state(pretrained_model)
            ckpt = checkpoint.model_checkpoint_path  # 获取最新保存的模型检查点文件
            saver.restore(sess, ckpt)
            for variable in tf.trainable_variables():  # check weights
                with tf.variable_scope('', reuse=True):
                    var = tf.get_variable(variable.name.split(':0')[0])
                    print(variable.name, np.mean(sess.run(var)))

        # Test start
        step = 0
        pred_labels_total = []
        while True:
            if step < test_size:
                image_name = test_set[step]
                step = step + 1

                image_data = aug_test_image(is_train=False,
                                            name=image_name,
                                            aug_num=FLAGS.aug_num)

                batch_start_time = time.time()

                pred_logits = sess.run([final_logits],
                                       feed_dict={
                                           image_placeholder: image_data,
                                           is_training: False
                                       })
                pred_logits = np.array(pred_logits).squeeze()

                scores = np.matmul(pred_logits, gt_attr.T)
                print('scores_shape', scores.shape)

                scores_useen = np.zeros(
                    (FLAGS.aug_num, len(useen_class_index_list)),
                    dtype=np.float32)
                for i in range(len(useen_class_index_list)):
                    scores_useen[:, i] = scores[:, useen_class_index_list[i]]
                print('scores_useen', scores_useen)

                max_scroes_indexes = np.argmax(scores_useen, axis=1)
                print('max_scroes_indexes', max_scroes_indexes)

                pred_class_index = []
                for i in range(max_scroes_indexes.shape[0]):
                    pred_class_index.append(
                        useen_class_index_list[max_scroes_indexes[i]])
                print('pred_class_index', pred_class_index)

                pred_repre_labels = []
                for i in range(len(pred_class_index)):
                    pred_repre_labels.append(
                        whole_class_repre_list[pred_class_index[i]])

                pred_label_set = list(set(pred_repre_labels))
                print('pred_label_set: ', pred_label_set)
                pred_label_set_num = len(pred_label_set)
                pred_label_set_count = np.zeros(pred_label_set_num,
                                                dtype=np.int32)
                for pred in pred_repre_labels:
                    for j in range(pred_label_set_num):
                        if pred == pred_label_set[j]:
                            pred_label_set_count[j] += 1

                max_index = int(np.argmax(pred_label_set_count))
                pred_label_after_vote = pred_label_set[max_index]
                print('pred_label_after_vote', pred_label_after_vote)

                pred_labels_total.append(pred_label_after_vote)

                print('[%s][testing %d][step %d / %d exec %.2f seconds]' %
                      (time.strftime("%Y-%m-%d %H:%M:%S"), 1, step, test_size,
                       (time.time() - batch_start_time)))
            else:
                break

    print('Testing done.')
    print("[%s][total exec %s seconds" % (time.strftime("%Y-%m-%d %H:%M:%S"),
                                          (time.time() - total_start_time)))

    # write to submit.txt
    with open(
            test_save_dir + '/' +
            'submit_{}.txt'.format(time.strftime("%Y%m%d_%H%M%S")), 'w') as f:
        for i in range(len(test_set)):
            # print('LINES', i)
            f.writelines([test_set[i] + '\t' + pred_labels_total[i] + '\n'])
        f.close()