예제 #1
0
def tower_loss(scope):
    images, labels = read_and_decode()
    if net == 'vgg_16':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_16(images, num_classes=FLAGS.num_classes)
    elif net == 'vgg_19':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_19(images, num_classes=FLAGS.num_classes)
    elif net == 'resnet_v1_101':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_101(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    elif net == 'resnet_v1_50':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_50(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    elif net == 'resnet_v2_50':
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_50(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    else:
        raise Exception('No network matched with net %s.' % net)
    assert logits.shape == (FLAGS.batch_size, FLAGS.num_classes)
    _ = cal_loss(logits, labels)
    losses = tf.get_collection('losses', scope)
    total_loss = tf.add_n(losses, name='total_loss')
    for l in losses + [total_loss]:
        loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
        tf.summary.scalar(loss_name, l)
    return total_loss
예제 #2
0
def get_slim_resnet_v1_byname(net_name,
                              inputs,
                              num_classes=None,
                              is_training=True,
                              global_pool=True,
                              output_stride=None,
                              weight_decay=0.):
    if net_name == 'resnet_v1_50':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_50(
                inputs=inputs,
                num_classes=num_classes,
                is_training=is_training,
                global_pool=global_pool,
                output_stride=output_stride,
            )

        return logits, end_points
    if net_name == 'resnet_v1_101':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_101(
                inputs=inputs,
                num_classes=num_classes,
                is_training=is_training,
                global_pool=global_pool,
                output_stride=output_stride,
            )
        return logits, end_points
    def build_2(self,
                inputs,
                input_pixel_size,
                is_training,
                scope='resnet_v1_101',
                weight_decay=0.0001):
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_101(
                inputs=inputs,
                num_classes=None,
                is_training=is_training,
                global_pool=False,
                output_stride=None,
                spatial_squeeze=False)

        feature_maps_dict = {
            'C2': self.share_net['resnet_v1_101/block1/unit_2/bottleneck_v1'],
            'C3': self.share_net['resnet_v1_101/block2/unit_3/bottleneck_v1'],
            'C4': self.share_net['resnet_v1_101/block3/unit_22/bottleneck_v1'],
            'C5': self.share_net['resnet_v1_101/block4']
        }
        feature_maps_out = feature_maps_dict['C5']

        return feature_maps_out, feature_maps_dict
예제 #4
0
    def getCNNFeatures(self, input_tensor, out_dim, fc_initializer):
        graph = tf.Graph()

        with graph.as_default():

            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_50(input_tensor,
                                                         num_classes=None)
        model_path = os.path.join(self.checkpoints_dir, self.ckpt_name)
        init_fn = tf.contrib.framework.assign_from_checkpoint_fn(
            model_path, slim.get_model_variables('resnet_v1'))
        flattened = tf.reshape(end_points["resnet_v1_50/block4"], [-1, fc_dim])
        print flattened.get_shape()
        with vs.variable_scope('fc_resnet'):
            W = vs.get_variable("W", [fc_dim, out_dim],
                                initializer=fc_initializer)
            b = vs.get_variable("b", [out_dim], initializer=fc_initializer)
            output = tf.nn.relu(tf.matmul(flattened, W) + b)

        return init_fn, output


#TEST:
# cnn_f_extractor = CNN_FeatureExtractor()
# inputt = tf.constant(np.arange(12288, dtype=np.float32), shape=[1, 64, 64, 3])
# inputfn, features = cnn_f_extractor.getCNNFeatures(inputt, 256, tf.contrib.layers.variance_scaling_initializer())
# print features.get_shape()
예제 #5
0
파일: mask_model.py 프로젝트: footh/tgs
    def build_model(self, inp, mode, regularizer=None):
        net = inp['img']

        training = (mode == tf.estimator.ModeKeys.TRAIN)

        with tf.variable_scope('encode'):
            with slim.arg_scope(
                    resnet_v1.resnet_arg_scope(
                        weight_decay=self.config_dict['ext']
                        ['encoder_l2_decay'])):
                net, _ = resnet_v1.resnet_v1_50(net,
                                                num_classes=None,
                                                is_training=training,
                                                global_pool=True)

        with tf.variable_scope('classify'):
            # net = tf.layers.max_pooling2d(net, net.shape.as_list()[1], 1)
            # net = tf.layers.conv2d(net, 1024, 1, kernel_regularizer=regularizer)
            net = tf.layers.conv2d(net,
                                   self.config_dict['label_cnt'],
                                   1,
                                   kernel_regularizer=regularizer)
            logits = tf.squeeze(net, axis=(1, 2))

        return logits
예제 #6
0
def top_feature_net(input, anchors, inds_inside, num_bases):
  stride=8
  with tf.variable_scope("top_base") as sc:
    arg_scope = resnet_v1.resnet_arg_scope(weight_decay=0.0)
    with slim.arg_scope(arg_scope) :
      net, end_points = resnet_v1.resnet_v1_50(input, None, global_pool=False, output_stride=8)
      #pdb.set_trace()
      block=end_points['top_base/resnet_v1_50/block4']
      # block   = conv2d_bn_relu(block, num_kernels=512, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='2')
      tf.summary.histogram('rpn_top_block', block) 
      # tf.summary.histogram('rpn_top_block_weights', tf.get_collection('2/conv_weight')[0])
    with tf.variable_scope('top') as scope:
      #up     = upsample2d(block, factor = 2, has_bias=True, trainable=True, name='1')
      #up     = block
      up      = conv2d_bn_relu(block, num_kernels=128, kernel_size=(3,3), stride=[1,1,1,1], padding='SAME', name='2')
      scores  = conv2d(up, num_kernels=2*num_bases, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='score')
      probs   = tf.nn.softmax( tf.reshape(scores,[-1,2]), name='prob')
      deltas  = conv2d(up, num_kernels=4*num_bases, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='delta')

    #<todo> flip to train and test mode nms (e.g. different nms_pre_topn values): use tf.cond
    with tf.variable_scope('top-nms') as scope:    #non-max
      batch_size, img_height, img_width, img_channel = input.get_shape().as_list()
      img_scale = 1
      # pdb.set_trace()
      rois, roi_scores = tf_rpn_nms( probs, deltas, anchors, inds_inside,
                                       stride, img_width, img_height, img_scale,
                                       nms_thresh=0.7, min_size=stride, nms_pre_topn=300, nms_post_topn=50,
                                       name ='nms')
  
    #<todo> feature = upsample2d(block, factor = 4,  ...)
    feature = block
예제 #7
0
def Encoder_resnet_v1_101(x, weight_decay, is_training=True, reuse=False):
    """
    Resnet v1-101 encoder, adds 2 fc layers after Resnet.
    Assumes input is [batch, height_in, width_in, channels]!!
    Input:
    - x: N x H x W x 3
    - weight_decay: float
    - reuse: bool-> True if test

    Outputs:
    - net: N x F
    - variables: tf variables
    """
    from tensorflow.contrib.slim.python.slim.nets import resnet_v1
    with tf.name_scope("Encoder_resnet_v1_101", [x]):
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            net, end_points = resnet_v1.resnet_v1_101(x,
                                                      num_classes=None,
                                                      is_training=is_training,
                                                      reuse=reuse,
                                                      scope='resnet_v1_101')
            net = tf.reshape(net, [net.shape.as_list()[0], -1])
    variables = tf.contrib.framework.get_variables('resnet_v1_101')
    return net, variables
예제 #8
0
def resnet_50(input_image):
    arg_scope = resnet_v1.resnet_arg_scope()
    with slim.arg_scope(arg_scope):
        features, _ = resnet_v1.resnet_v1_50(input_image)
        # feature flatten
        features = tf.squeeze(features)
    return features
예제 #9
0
파일: resnet.py 프로젝트: liruixl/cifar
 def build_graph(self):
     with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=1e-5)):
         logits, end_point = resnet_v1.resnet_v1_50(
             self.input, num_classes=self.num_classes, scope='resnet_v1_50')
         # logits [-1,1,1,dim]  全局池化
         dim = logits.get_shape()[-1]
         assert dim == self.num_classes
         self.logits = tf.reshape(logits, [-1, dim])
    def build(self):
        # Input
        self.input = tf.placeholder(
            dtype=tf.float32,
            shape=[None, self.img_size[0], self.img_size[1], self.img_size[2]])
        self.input_mean = tfutils.mean_value(self.input, self.img_mean)
        if self.base_net == 'vgg16':
            with slim.arg_scope(vgg.vgg_arg_scope()):
                outputs, end_points = vgg.vgg_16(self.input_mean,
                                                 self.num_classes)
                self.prob = tf.nn.softmax(outputs, -1)
                self.logits = outputs

        elif self.base_net == 'res50':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_50(
                    self.input_mean,
                    self.num_classes,
                    is_training=self.is_train)
                self.prob = tf.nn.softmax(net[:, 0, 0, :], -1)
                self.logits = net[:, 0, 0, :]
        elif self.base_net == 'res101':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_101(
                    self.input_mean,
                    self.num_classes,
                    is_training=self.is_train)
                self.prob = tf.nn.softmax(net[:, 0, 0, :], -1)
                self.logits = net[:, 0, 0, :]
        elif self.base_net == 'res152':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_152(
                    self.input_mean,
                    self.num_classes,
                    is_training=self.is_train)
                self.prob = tf.nn.softmax(net[:, 0, 0, :], -1)
                self.logits = net[:, 0, 0, :]
        else:
            raise ValueError(
                'base network should be vgg16, res50, -101, -152...')
        self.gt = tf.placeholder(dtype=tf.int32, shape=[None])
        # self.var_list = tf.trainable_variables()

        if self.is_train:
            self.loss()
예제 #11
0
def main(_):
  os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id

  if not tf.gfile.Exists(FLAGS.output_dir):
    tf.gfile.MakeDirs(FLAGS.output_dir)

  with tf.Graph().as_default() as g:
    with open(FLAGS.input_fname, 'r') as f:
      filenames = [line.split(',')[0][:-4] for line in f.readlines()]
      filenames = [
          os.path.join(FLAGS.image_dir, name) for name in filenames \
              if not os.path.exists(os.path.join(FLAGS.output_dir, name + '.npy'))
      ]

    filename_queue = tf.train.string_input_producer(filenames)
    reader = tf.WholeFileReader()
    key, value = reader.read(filename_queue)
    image = tf.image.decode_jpeg(value, channels=3)
    image_size = resnet_v1.resnet_v1.default_image_size
    processed_image = vgg_preprocessing.preprocess_image(
        image, image_size, image_size, is_training=False
    )
    processed_images, keys = tf.train.batch(
        [processed_image, key],
        FLAGS.batch_size,
        num_threads=8, capacity=8*FLAGS.batch_size*5,
        allow_smaller_final_batch=True
    )

    # Create the model
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
      net, end_points = resnet_v1.resnet_v1_101(
          processed_images, num_classes=1000, is_training=False
      )
      init_fn = slim.assign_from_checkpoint_fn(
          FLAGS.checkpoint_dir, slim.get_model_variables()
      )
      pool5 = g.get_operation_by_name('resnet_v1_101/pool5').outputs[0]
      pool5 = tf.transpose(pool5, perm=[0, 3, 1, 2])  # (batch_size, 2048, 1, 1)

      with tf.Session() as sess:
        init_fn(sess)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        try:
          for step in tqdm(range(len(filenames) // FLAGS.batch_size + 1), ncols=70):
            if coord.should_stop():
              break
            file_names, pool5_value = sess.run([keys, pool5])
            for i in range(len(file_names)):
              np.save(os.path.join(FLAGS.output_dir, os.path.basename(file_names[i]).decode('utf-8') + '.npy'), pool5_value[i].astype(np.float32))
        except tf.errors.OutOfRangeError:
          print("Done feature extraction -- epoch limit reached")
        finally:
          coord.request_stop()

        coord.join(threads)
예제 #12
0
 def inference(self):
     x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])
     with slim.arg_scope(resnet_v1.resnet_arg_scope()):
         logits, end_points = resnet_v1.resnet_v1_50(x, num_classes=self.nclasses, is_training=self.is_training
         #    , spatial_squeeze=True
             , global_pool=True
             )
     # remove in the future if squeeze build in resnet_v1 function
     net = array_ops.squeeze(logits, [1,2], name='SpatialSqueeze')
     return net
예제 #13
0
def resNet(images, is_training=True, reuse=False, scope=None):
    """Constructs network based on resnet_v1_50.
    Args:
      images: A tensor of size [batch, height, width, channels].
      weight_decay: The parameters for weight_decay regularizer.
      is_training: Whether or not in training mode.
      reuse: Whether or not the layer and its variables should be reused.
    Returns:
      feature_map: Features extracted from the model, which are not l2-normalized.
    """
    # Construct Resnet50 features.
    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=0.0001)):
        block = resnet_v1.resnet_v1_block
        blocks = [
            block('block1', base_depth=64, num_units=3, stride=2),
            block('block2', base_depth=128, num_units=4, stride=2),
            block('block3', base_depth=256, num_units=6, stride=1),
            block('block4', base_depth=512, num_units=3, stride=1)
        ]

        x30, end_points = resnet_v1.resnet_v1(images,
                                              blocks,
                                              is_training=is_training,
                                              global_pool=False,
                                              reuse=reuse,
                                              scope=scope,
                                              include_root_block=True)

    x60 = end_points[scope + '/block1']
    x60 = slim.conv2d(x60,
                      64, [1, 1],
                      1,
                      padding='SAME',
                      activation_fn=None,
                      reuse=reuse,
                      scope='conv2d_final_x60')

    x30 = slim.conv2d(x30,
                      512, [1, 1],
                      1,
                      padding='SAME',
                      activation_fn=None,
                      reuse=reuse,
                      scope='conv2d_final_x30')

    # get layer outputs we want
    end_points_ = {}
    #  end_points_ = end_points['resnet_v1_50/block2']
    #  end_points_ = end_points['resnet_v1_50/block3']
    #  end_points_ = end_points['resnet_v1_50/block4']
    #  end_points_['x30'] = end_points['resnet_v1_50/final']
    end_points_['x60'] = x60
    end_points_['x30'] = x30

    return end_points_
예제 #14
0
def rgb_feature_net(input):

    arg_scope = resnet_v1.resnet_arg_scope(weight_decay=0.0)
    with slim.arg_scope(arg_scope):
      net, end_points = resnet_v1.resnet_v1_50(input, None, global_pool=False, output_stride=8)
      block=end_points['resnet_v1_50/block4']
      # block   = conv2d_bn_relu(block, num_kernels=512, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='2')
    #<todo> feature = upsample2d(block, factor = 4,  ...)
      tf.summary.histogram('rgb_top_block', block)
    feature = block
    return feature
예제 #15
0
 def _vision(preprocessed_inputs, reuse=True):
     with tf.variable_scope("vision", reuse=reuse):
         with slim.arg_scope(resnet_v1.resnet_arg_scope()):
             resnet_output, _ = resnet_v1.resnet_v1_50(
                 preprocessed_inputs, is_training=True)
         if not config["fine_tune_vision"]:
             resnet_output = tf.stop_gradient(resnet_output)
         resnet_output = tf.squeeze(resnet_output, axis=[1, 2])
         resnet_output = tf.nn.dropout(
             resnet_output, keep_prob=self.vision_keep_prob_ph)
         vision_result = slim.fully_connected(resnet_output,
                                              num_hidden_hyper,
                                              activation_fn=None)
     return vision_result, resnet_output
예제 #16
0
def inference(hypes, images, train=True):
    """
    Build ResNet encoder

    :param hypes:
    :param images:
    :param train:
    :return:
    """
    is_training = tf.convert_to_tensor(train, dtype='bool', name='is_training')

    layers = hypes['arch']['layers']
    deep_feat = hypes['arch'].get('deep_feat', 'block4')
    early_feat = hypes['arch'].get('early_feat', 'block1')

    blocks = ['block1', 'block2', 'block3', 'block4']

    assert early_feat in blocks
    assert deep_feat in blocks[1:]

    if layers == 50:
        resnet = resnet_v1.resnet_v1_50
    elif layers == 101:
        resnet = resnet_v1.resnet_v1_101
    elif layers == 152:
        resnet = resnet_v1.resnet_v1_152
    else:
        logging.error('Resnet only has 50, 101, or 152 layers. Got', layers)
        exit(1)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training)):
        logits, endpoints = resnet(images)

        for name in blocks:
            layer_name = 'resnet_v1_%d/%s' % (layers, name)
            tf.summary.histogram('/%s_activation' % name,
                                 endpoints[layer_name])
            tf.summary.scalar('/%s_sparsity' % name,
                              tf.nn.zero_fraction(endpoints[layer_name]))

    if train:
        restore = tf.global_variables()
        hypes['init_function'] = _initalize_variables
        hypes['restore'] = restore

    return {
        'early_feat': endpoints['resnet_v1_%d/%s' % (layers, early_feat)],
        'deep_feat': endpoints['resnet_v1_%d/%s' % (layers, deep_feat)]
    }
예제 #17
0
파일: trainer.py 프로젝트: guker/MoVNect
    def teacher(self, x, j):
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            x = utils.nchw_to_nhwc(x)
            batch_out, batch_list = resnet_v1.resnet_v1_50(x, 1000, is_training=True)
            feature = batch_list['resnet_v1_50/block2/unit_4/bottleneck_v1/conv1']

        self.init_fn_1 = slim.assign_from_checkpoint_fn(
            self.pre_dir + '/resnet_v1_50.ckpt', slim.get_model_variables('resnet_v1_50'))
        '''
        del which has no gradient
        '''
        # print(batch_list)
        x = utils.nhwc_to_nchw(feature)
        x, var = vnect(x, j)
        return x, var
예제 #18
0
파일: encoder.py 프로젝트: footh/tgs
def build_resnet50_v1(img_input, l2_weight_decay=0.01, is_training=True, prefix=''):
    """
        Builds resnet50_v1 model from slim, with strides reversed.

        Returns the last five block outputs to be used transposed convolution layers
    """

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=l2_weight_decay)):
        block4, endpoints = resnet_v1_50(img_input, is_training=is_training, global_pool=False)

    block3 = endpoints[f'{prefix}resnet_v1_50/block3']
    block2 = endpoints[f'{prefix}resnet_v1_50/block2']
    block1 = endpoints[f'{prefix}resnet_v1_50/block1']
    conv1 = endpoints[f'{prefix}resnet_v1_50/conv1']

    return conv1, block1, block2, block3, block4
예제 #19
0
def batch_pred(models_path, images_list, labels_nums, data_format):

    [batch_size, resize_height, resize_width, depths] = data_format
    input_images = tf.placeholder(
        dtype=tf.float32,
        shape=[None, resize_height, resize_width, depths],
        name='input')

    # model
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        out, end_points = resnet_v1.resnet_v1_50(inputs=input_images,
                                                 num_classes=labels_nums,
                                                 is_training=False)

    out = tf.squeeze(out, [1, 2])

    score = tf.nn.softmax(out, name='pre')
    class_id = tf.argmax(score, 1)

    gpu_options = tf.GPUOptions(allow_growth=False)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, models_path)
        tot = len(images_list)

        for idx in range(0, tot, batch_size):
            images = list()
            idx_end = min(tot, idx + batch_size)
            print(idx)
            for i in range(idx, idx_end):
                image_path = images_list[i]
                image = open(image_path, 'rb').read()
                image = tf.image.decode_jpeg(image, channels=3)
                processed_image = preprocess_image(image, resize_height,
                                                   resize_width)
                processed_image = sess.run(processed_image)
                # print("processed_image.shape", processed_image.shape)
                images.append(processed_image)
            images = np.array(images)
            start = time.time()
            sess.run([score, class_id], feed_dict={input_images: images})
            end = time.time()
            print("time of batch {} is %f".format(batch_size) % (end - start))

    sess.close()
예제 #20
0
    def backbone(self):
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            with slim.arg_scope([slim.conv2d], trainable=False):
                # output, end_points = resnet_v1.resnet_v1_50(self.inputs, num_classes=cfgs.NUM_CLASS, is_training=self.is_training)
                output, end_points = resnet_v1.resnet_v1_101(
                    self.inputs,
                    num_classes=None,
                    is_training=self.is_training,
                    global_pool=False)

        output = slim.conv2d(output,
                             cfgs.NUM_CLASS, [1, 1],
                             activation_fn=None,
                             normalizer_fn=None,
                             scope='logits')

        output = tf.reduce_mean(output, [1, 2], name='global_pool')
        logits = tf.nn.softmax(output)

        return output, logits
예제 #21
0
def resnet(x, num_classes=1000, is_train=False, reuse=False):

    net_in = tl.layers.InputLayer(x, name='input_layer')
    with slim.arg_scope(resnet_arg_scope()):
        ## Alternatively, you should implement inception_v3 without TensorLayer as follow.
        # logits, end_points = inception_v3(X, num_classes=1011,
        #                                   is_training=False)
        network = tl.layers.SlimNetsLayer(
            prev_layer=net_in,
            slim_layer=model_,
            slim_args={
                'num_classes': num_classes,
                'is_training': is_train,
                'reuse': reuse
            },
            name=
            model_name  # <-- the name should be the same with the ckpt model
        )
        y = tf.reshape(network.outputs, [-1, num_classes])
#        y = tf.nn.softmax(y)
    return network, y
예제 #22
0
def main():
    tf.reset_default_graph()

    input_node = tf.placeholder(tf.float32,
                                shape=(1, 224, 224, 3),
                                name="input")
    print("input_node:", input_node)

    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        net, _ = resnet_v1.resnet_v1_50(input_node, 1000, is_training=False)
        print("net:", net)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, model_path)

        tf.train.write_graph(sess.graph_def, './pb_model', 'model.pb')

        freeze_graph.freeze_graph('pb_model/model.pb', '', False, model_path,
                                  'resnet_v1_50/logits/BiasAdd',
                                  'save/restore_all', 'save/Const:0',
                                  'pb_model/frozen_resnet_v1_50.pb', False, "")

    print("done")
예제 #23
0
def get_model(input_pls,
              is_training,
              bn=False,
              bn_decay=None,
              img_size=224,
              FLAGS=None):

    if FLAGS.act == "relu":
        activation_fn = tf.nn.relu
    elif FLAGS.act == "elu":
        activation_fn = tf.nn.elu

    input_imgs = input_pls['imgs']
    input_pnts = input_pls['pnts']
    input_gvfs = input_pls['gvfs']
    input_onedge = input_pls['onedge']
    input_trans_mat = input_pls['trans_mats']
    input_obj_rot_mats = input_pls['obj_rot_mats']

    batch_size = input_imgs.get_shape()[0].value

    # endpoints
    end_points = {}
    end_points['pnts'] = input_pnts
    if FLAGS.rot:
        end_points['gt_gvfs_xyz'] = tf.matmul(input_gvfs, input_obj_rot_mats)
        end_points['pnts_rot'] = tf.matmul(input_pnts, input_obj_rot_mats)
    else:
        end_points['gt_gvfs_xyz'] = input_gvfs  #* 10
        end_points['pnts_rot'] = input_pnts
    if FLAGS.edgeweight != 1.0:
        end_points['onedge'] = input_onedge
    input_pnts_rot = end_points['pnts_rot']
    end_points['imgs'] = input_imgs  # B*H*W*3|4

    # Image extract features
    if input_imgs.shape[1] != img_size or input_imgs.shape[2] != img_size:
        if FLAGS.alpha:
            ref_img_rgb = tf.compat.v1.image.resize_bilinear(
                input_imgs[:, :, :, :3], [img_size, img_size])
            ref_img_alpha = tf.image.resize_nearest_neighbor(
                tf.expand_dims(input_imgs[:, :, :, 3], axis=-1),
                [img_size, img_size])
            ref_img = tf.concat([ref_img_rgb, ref_img_alpha], axis=-1)
        else:
            ref_img = tf.compat.v1.image.resize_bilinear(
                input_imgs, [img_size, img_size])
    else:
        ref_img = input_imgs
    end_points['resized_ref_img'] = ref_img
    if FLAGS.encoder[:6] == "vgg_16":
        vgg.vgg_16.default_image_size = img_size
        with slim.arg_scope([slim.conv2d],
                            weights_regularizer=slim.l2_regularizer(FLAGS.wd)):
            ref_feats_embedding, encdr_end_points = vgg.vgg_16(
                ref_img,
                num_classes=FLAGS.num_classes,
                is_training=False,
                scope='vgg_16',
                spatial_squeeze=False)
    elif FLAGS.encoder == "sim_res":
        ref_feats_embedding, encdr_end_points = res_sim_encoder.res_sim_encoder(
            ref_img,
            FLAGS.batch_size,
            is_training=is_training,
            activation_fn=activation_fn,
            bn=bn,
            bn_decay=bn_decay,
            wd=FLAGS.wd)
    elif FLAGS.encoder == "resnet_v1_50":
        resnet_v1.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_50(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v1_50')
        scopelst = [
            "resnet_v1_50/block1", "resnet_v1_50/block2",
            "resnet_v1_50/block3", 'resnet_v1_50/block4'
        ]
    elif FLAGS.encoder == "resnet_v1_101":
        resnet_v1.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_101(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v1_101')
        scopelst = [
            "resnet_v1_101/block1", "resnet_v1_101/block2",
            "resnet_v1_101/block3", 'resnet_v1_101/block4'
        ]
    elif FLAGS.encoder == "resnet_v2_50":
        resnet_v2.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_50(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v2_50')
        scopelst = [
            "resnet_v2_50/block1", "resnet_v2_50/block2",
            "resnet_v2_50/block3", 'resnet_v2_50/block4'
        ]
    elif FLAGS.encoder == "resnet_v2_101":
        resnet_v2.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_101(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v2_101')
        scopelst = [
            "resnet_v2_101/block1", "resnet_v2_101/block2",
            "resnet_v2_101/block3", 'resnet_v2_101/block4'
        ]
    end_points['img_embedding'] = ref_feats_embedding
    point_img_feat = None
    gvfs_feat = None
    sample_img_points = get_img_points(input_pnts,
                                       input_trans_mat)  # B * N * 2

    if FLAGS.img_feat_onestream:
        with tf.compat.v1.variable_scope("sdfimgfeat") as scope:
            if FLAGS.encoder[:3] == "vgg":
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv1/conv1_2'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv2/conv2_2'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv3/conv3_3'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                if FLAGS.encoder[-7:] != "smaller":
                    conv4 = tf.compat.v1.image.resize_bilinear(
                        encdr_end_points['vgg_16/conv4/conv4_3'],
                        (FLAGS.img_h, FLAGS.img_w))
                    point_conv4 = tf.contrib.resampler.resampler(
                        conv4, sample_img_points)
                    point_img_feat = tf.concat(axis=2,
                                               values=[
                                                   point_conv1, point_conv2,
                                                   point_conv3, point_conv4
                                               ])  # small
                else:
                    print("smaller vgg")
                    point_img_feat = tf.concat(
                        axis=2, values=[point_conv1, point_conv2,
                                        point_conv3])  # small
            elif FLAGS.encoder[:3] == "res":
                # print(encdr_end_points.keys())
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[0]], (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[1]], (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[2]], (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w))
                # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points)
                point_img_feat = tf.concat(
                    axis=2, values=[point_conv1, point_conv2, point_conv3])
            else:
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[0], (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[1], (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[2], (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w))
                # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points)
                point_img_feat = tf.concat(
                    axis=2, values=[point_conv1, point_conv2, point_conv3])
            print("point_img_feat.shape", point_img_feat.get_shape())
            point_img_feat = tf.expand_dims(point_img_feat, axis=2)
            if FLAGS.decoder == "att":
                gvfs_feat = gvfnet.get_gvf_att_imgfeat(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
            elif FLAGS.decoder == "skip":
                gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream_skip(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
            else:
                gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
    else:
        if not FLAGS.multi_view:
            with tf.compat.v1.variable_scope("sdfprediction") as scope:
                gvfs_feat = gvfnet.get_gvf_basic(input_pnts_rot,
                                                 ref_feats_embedding,
                                                 is_training,
                                                 batch_size,
                                                 bn,
                                                 bn_decay,
                                                 wd=FLAGS.wd,
                                                 activation_fn=activation_fn)
    end_points['pred_gvfs_xyz'], end_points['pred_gvfs_dist'], end_points[
        'pred_gvfs_direction'] = None, None, None
    if FLAGS.XYZ:
        end_points['pred_gvfs_xyz'] = gvfnet.xyz_gvfhead(
            gvfs_feat, batch_size, wd=FLAGS.wd, activation_fn=activation_fn)
        end_points['pred_gvfs_dist'] = tf.sqrt(
            tf.reduce_sum(tf.square(end_points['pred_gvfs_xyz']),
                          axis=2,
                          keepdims=True))
        end_points[
            'pred_gvfs_direction'] = end_points['pred_gvfs_xyz'] / tf.maximum(
                end_points['pred_gvfs_dist'], 1e-6)
    else:
        end_points['pred_gvfs_dist'], end_points[
            'pred_gvfs_direction'] = gvfnet.dist_direct_gvfhead(
                gvfs_feat,
                batch_size,
                wd=FLAGS.wd,
                activation_fn=activation_fn)
        end_points['pred_gvfs_xyz'] = end_points[
            'pred_gvfs_direction'] * end_points['pred_gvfs_dist']

    end_points["sample_img_points"] = sample_img_points
    # end_points["ref_feats_embedding"] = ref_feats_embedding
    end_points["point_img_feat"] = point_img_feat

    return end_points
예제 #24
0
def rgb_feature_net(input):

    arg_scope = resnet_v1.resnet_arg_scope(weight_decay=0.0)
    with slim.arg_scope(arg_scope):
        net, end_points = resnet_v1.resnet_v1_50(input,
                                                 None,
                                                 global_pool=False,
                                                 output_stride=8)
        # pdb.set_trace()
        block4 = end_points['resnet_v1_50/block4']
        block3 = end_points['resnet_v1_50/block3']
        block2 = end_points['resnet_v1_50/block2']
        # block1=end_points['resnet_v1_50/block1/unit_3/bottleneck_v1/conv1']
        with tf.variable_scope("rgb_up") as sc:
            block4_ = conv2d_bn_relu(block4,
                                     num_kernels=256,
                                     kernel_size=(1, 1),
                                     stride=[1, 1, 1, 1],
                                     padding='SAME',
                                     name='4')
            up4 = upsample2d(block4_,
                             factor=2,
                             has_bias=True,
                             trainable=True,
                             name='up4')
            block3_ = conv2d_bn_relu(block3,
                                     num_kernels=256,
                                     kernel_size=(1, 1),
                                     stride=[1, 1, 1, 1],
                                     padding='SAME',
                                     name='3')
            up3 = upsample2d(block3_,
                             factor=2,
                             has_bias=True,
                             trainable=True,
                             name='up3')
            block2_ = conv2d_bn_relu(block2,
                                     num_kernels=256,
                                     kernel_size=(1, 1),
                                     stride=[1, 1, 1, 1],
                                     padding='SAME',
                                     name='2')
            up2 = upsample2d(block2_,
                             factor=2,
                             has_bias=True,
                             trainable=True,
                             name='up2')
            up_34 = tf.add(up4, up3, name="up_add_3_4")
            up = tf.add(up_34, up2, name="up_add_3_4_2")
            block = conv2d_bn_relu(up,
                                   num_kernels=256,
                                   kernel_size=(3, 3),
                                   stride=[1, 1, 1, 1],
                                   padding='SAME',
                                   name='rgb_ft')
            # block1_   = conv2d_bn_relu(block1, num_kernels=256, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='1')
            # up      =tf.add(block1_, up_, name="up_add")
        # block   = conv2d_bn_relu(block, num_kernels=512, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='2')
    #<todo> feature = upsample2d(block, factor = 4,  ...)
        tf.summary.histogram('rgb_top_block', block)

    feature = block
    return feature
예제 #25
0
def evaluate():
    g = tf.Graph()
    with g.as_default():

        image_list, label_list = data_process.read_labeled_image_list(
            FLAGS.input_file)
        # split into sequences, note: in the cnn models case this is splitting into batches of length: seq_length ;
        # for the cnn-rnn models case, I do not check whether the images in a sequence are consecutive or the images are from the same video/the images are displaying the same person
        image_list, label_list = data_process.make_rnn_input_per_seq_length_size(
            image_list, label_list, FLAGS.seq_length)

        images = tf.convert_to_tensor(image_list)
        labels = tf.convert_to_tensor(label_list)

        # Makes an input queue
        input_queue = tf.train.slice_input_producer([images, labels, images],
                                                    num_epochs=None,
                                                    shuffle=False,
                                                    seed=None,
                                                    capacity=1000,
                                                    shared_name=None,
                                                    name=None)
        images_batch, labels_batch, image_locations_batch = data_process.decodeRGB(
            input_queue, FLAGS.seq_length, FLAGS.size)

        images_batch = tf.to_float(images_batch)
        images_batch -= 128.0
        images_batch /= 128.0  # scale all pixel values in range: [-1,1]

        images_batch = tf.reshape(images_batch, [-1, 96, 96, 3])
        labels_batch = tf.reshape(labels_batch, [-1, 2])

        if FLAGS.network == 'vggface_4096':
            from vggface import vggface_4096x4096x2 as net
            network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length)
            network.setup(images_batch)
            prediction = network.get_output()

        elif FLAGS.network == 'vggface_2000':
            from vggface import vggface_4096x2000x2 as net
            network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length)
            network.setup(images_batch)
            prediction = network.get_output()

        elif FLAGS.network == 'affwildnet_resnet':
            from tensorflow.contrib.slim.python.slim.nets import resnet_v1
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, _ = resnet_v1.resnet_v1_50(inputs=images_batch,
                                                is_training=False,
                                                num_classes=None)

                with tf.variable_scope('rnn') as scope:
                    cnn = tf.reshape(
                        net, [FLAGS.batch_size, FLAGS.sequence_length, -1])
                    cell = tf.nn.rnn_cell.MultiRNNCell(
                        [tf.nn.rnn_cell.GRUCell(128) for _ in range(2)])
                    outputs, _ = tf.nn.dynamic_rnn(cell, cnn, dtype=tf.float32)
                    outputs = tf.reshape(
                        outputs,
                        (FLAGS.batch_size * FLAGS.sequence_length, 128))

                    weights_initializer = tf.truncated_normal_initializer(
                        stddev=0.01)
                    weights = tf.get_variable('weights_output',
                                              shape=[128, 2],
                                              initializer=weights_initializer,
                                              trainable=True)
                    biases = tf.get_variable('biases_output',
                                             shape=[2],
                                             initializer=tf.zeros_initializer,
                                             trainable=True)

                    prediction = tf.nn.xw_plus_b(outputs, weights, biases)

        elif FLAGS.network == 'affwildnet_vggface':
            from affwildnet import vggface_gru as net
            network = net.VGGFace(FLAGS.batch_size, FLAGS.seq_length)
            network.setup(images_batch)
            prediction = network.get_output()

        num_batches = int(len(image_list) / FLAGS.batch_size)

        variables_to_restore = tf.global_variables()

        with tf.Session() as sess:

            init_fn = slim.assign_from_checkpoint_fn(
                FLAGS.pretrained_model_checkpoint_path,
                variables_to_restore,
                ignore_missing_vars=False)

            init_fn(sess)
            print('Loading model {}'.format(
                FLAGS.pretrained_model_checkpoint_path))

            tf.train.start_queue_runners(sess=sess)

            coord = tf.train.Coordinator()

            evaluated_predictions = []
            evaluated_labels = []
            images = []

            try:
                for _ in range(num_batches):

                    pr, l, imm = sess.run(
                        [prediction, labels_batch, image_locations_batch])
                    evaluated_predictions.append(pr)
                    evaluated_labels.append(l)
                    images.append(imm)

                    if coord.should_stop():
                        break
                coord.request_stop()
            except Exception as e:
                coord.request_stop(e)

            predictions = np.reshape(evaluated_predictions, (-1, 2))
            labels = np.reshape(evaluated_labels, (-1, 2))
            images = np.reshape(images, (-1))

            conc_arousal = concordance_cc2(predictions[:, 1], labels[:, 1])
            conc_valence = concordance_cc2(predictions[:, 0], labels[:, 0])

            for i in range(len(predictions)):
                print("Labels: ", labels[i], "Predictions: ", predictions[i],
                      "Error: ", (abs(labels[i] - predictions[i])))
            print(
                "------------------------------------------------------------------------------"
            )
            print('Concordance on valence : {}'.format(conc_valence))
            print('Concordance on arousal : {}'.format(conc_arousal))
            print('Concordance on total : {}'.format(
                (conc_arousal + conc_valence) / 2))

            mse_arousal = sum(
                (predictions[:, 1] - labels[:, 1])**2) / len(labels[:, 1])
            print('MSE Arousal : {}'.format(mse_arousal))
            mse_valence = sum(
                (predictions[:, 0] - labels[:, 0])**2) / len(labels[:, 0])
            print('MSE Valence : {}'.format(mse_valence))

        return conc_valence, conc_arousal, (
            conc_arousal + conc_valence) / 2, mse_arousal, mse_valence
예제 #26
0
def get_featuremap(net_name, input, num_classes=None):
    '''
    #tensorlayer
    input = tl.layers.InputLayer(input)
    if net_name == 'resnet_v1_50':
        with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=cfg.FEATURE_WEIGHT_DECAY)):
            featuremap = tl.layers.SlimNetsLayer(prev_layer=input,
                                                 slim_layer=resnet_v1.resnet_v1_50,
                                                 slim_args={
                                                     'num_classes': num_classes,
                                                     'is_training': True,
                                                     'global_pool': False
                                                 },
                                                 name='resnet_v1_50'
                                                 )
            sv = tf.train.Supervisor()
            with sv.managed_session() as sess:
                a = sess.run(featuremap.all_layers)
                print(a)
            feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses())
            return featuremap.outputs, feature_w_loss, featuremap.all_params
    if net_name == 'resnet_v1_101':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            featuremap = tl.layers.SlimNetsLayer(prev_layer=input,
                                                 slim_layer=resnet_v1.resnet_v1_101,
                                                 slim_args={
                                                     'num_classes': num_classes,
                                                     'is_training': True,
                                                     'global_pool': False
                                                 },
                                                 name='resnet_v1_101'
                                                 )
            feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses())
            return featuremap.outputs, feature_w_loss, featuremap.all_params
    if net_name == 'resnet_v1_152':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            featuremap = tl.layers.SlimNetsLayer(prev_layer=input,
                                                 slim_layer=resnet_v1.resnet_v1_152,
                                                 slim_args={
                                                     'num_classes': num_classes,
                                                     'is_training': True,
                                                     'global_pool': False
                                                 },
                                                 name='resnet_v1_152'
                                                 )
            feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses())
            return featuremap.outputs, feature_w_loss, featuremap.all_params
    if net_name == 'vgg16':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            featuremap = tl.layers.SlimNetsLayer(prev_layer=input,
                                                 slim_layer=vgg.vgg_16,
                                                 slim_args={
                                                     'num_classes': num_classes,
                                                     'is_training': True,
                                                     'spatial_squeeze': False
                                                 },
                                                 name='vgg_16'
                                                 )
            feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses())
            return featuremap.outputs, feature_w_loss, featuremap.all_params
    '''

    #slim
    if net_name == 'resnet_v1_50':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(
                    weight_decay=cfg.FEATURE_WEIGHT_DECAY)):
            featuremap, layer_dic = resnet_v1.resnet_v1_50(
                inputs=input,
                num_classes=num_classes,
                is_training=False,
                global_pool=False)
        if cfg.USE_FPN:
            feature_maps_dict = {
                'C2': layer_dic[
                    'resnet_v1_50/block1/unit_2/bottleneck_v1'],  # [56, 56]
                'C3': layer_dic[
                    'resnet_v1_50/block2/unit_3/bottleneck_v1'],  # [28, 28]
                'C4': layer_dic[
                    'resnet_v1_50/block3/unit_5/bottleneck_v1'],  # [14, 14]
                'C5': layer_dic['resnet_v1_50/block4']  # [7, 7]
            }
            return feature_maps_dict
        return layer_dic['resnet_v1_50/block3/unit_5/bottleneck_v1']
        #return featuremap

    if net_name == 'resnet_v1_101':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(
                    weight_decay=cfg.FEATURE_WEIGHT_DECAY)):
            featuremap, layer_dic = resnet_v1.resnet_v1_101(
                inputs=input,
                num_classes=num_classes,
                is_training=True,
                global_pool=False)
        if cfg.USE_FPN:
            feature_maps_dict = {
                'C2': layer_dic[
                    'resnet_v1_101/block1/unit_2/bottleneck_v1'],  # [56, 56]
                'C3': layer_dic[
                    'resnet_v1_101/block2/unit_3/bottleneck_v1'],  # [28, 28]
                'C4': layer_dic[
                    'resnet_v1_101/block3/unit_22/bottleneck_v1'],  # [14, 14]
                'C5': layer_dic['resnet_v1_101/block4']
            }
            return feature_maps_dict
        return featuremap

    if net_name == 'vgg_16':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(
                    weight_decay=cfg.FEATURE_WEIGHT_DECAY)):
            featuremap, layer_dic = vgg.vgg_16(
                inputs=input,
                num_classes=7,
                is_training=False,
                spatial_squeeze=False,
            )

        return layer_dic['vgg_16/conv5/conv5_3']
def main(_):

    with tf.name_scope('input_placeholder'):
        mv_placeholder = tf.placeholder(tf.float32, 
                    shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'mv_frame')
        flow_placeholder = tf.placeholder(tf.float32, 
                    shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'flow_frame')
        i_placeholder = tf.placeholder(tf.float32,
                    shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'i_frame')
        r_placeholder = tf.placeholder(tf.float32,
                    shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'r_frame')

    with tf.name_scope('label_placeholder'):
        label_placeholder = tf.placeholder(tf.int32, shape=(None), name = 'labels')

    with tf.name_scope('accuracy'):
        combine_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy')
        i_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy')
        mv_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy')
        r_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy')
        tf.summary.scalar('combine_acc', combine_value_)
        tf.summary.scalar('i_acc', i_value_)
        tf.summary.scalar('mv_acc', mv_value_)
        tf.summary.scalar('r_acc', r_value_)
        
    print('Finish placeholder.')


    with tf.name_scope('flatten_input'):
        b_size = tf.shape(mv_placeholder)[0]
        flat_mv = tf.reshape(mv_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3]) # Since we have mulitple segments in a single video
        flat_flow = tf.reshape(flow_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3])
        flat_i = tf.reshape(i_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3])
        flat_r = tf.reshape(r_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3])

    with tf.variable_scope('fc_var') as var_scope:
        mv_weights = {
            'w1': _variable_with_weight_decay('wmv1', [2048 , 512 ], 0.0005),
            'w2': _variable_with_weight_decay('wmv2', [512 , N_CLASS], 0.0005)
        }
        mv_biases = {
            'b1': _variable_with_weight_decay('bmv1', [ 512 ], 0.00),
            'b2': _variable_with_weight_decay('bmv2', [ N_CLASS ], 0.00)
        }
        i_weights = {
            'w1': _variable_with_weight_decay('wi1', [2048 , 512 ], 0.0005),
            'w2': _variable_with_weight_decay('wi2', [512 , N_CLASS], 0.0005)
        }
        i_biases = {
            'b1': _variable_with_weight_decay('bi1', [ 512 ], 0.00),
            'b2': _variable_with_weight_decay('bi2', [ N_CLASS ], 0.00)
        }
        r_weights = {
            'w1': _variable_with_weight_decay('wr1', [2048 , 512 ], 0.0005),
            'w2': _variable_with_weight_decay('wr2', [512 , N_CLASS], 0.0005)
        }
        r_biases = {
            'b1': _variable_with_weight_decay('br1', [ 512 ], 0.00),
            'b2': _variable_with_weight_decay('br2', [ N_CLASS ], 0.00)
        }

    with tf.variable_scope('fusion_var'):
        fusion = tf.get_variable('fusion', [3], initializer=tf.contrib.layers.xavier_initializer())
    
    print('Finish Flatten.')
    
    with tf.device('/gpu:0'):

        with tf.name_scope('FLMG'):
            mv_res = tf.concat([flat_mv, flat_r], axis = -1)
            mv = slim.conv2d(mv_res, 8, kernel_size=[3, 3], scope = 'FLMG_1')
            mv = slim.conv2d(mv, 8, kernel_size=[3, 3], scope = 'FLMG_2')
            mv = slim.conv2d(mv, 6, kernel_size=[3, 3], scope = 'FLMG_3')
            mv = slim.conv2d(mv, 4, kernel_size=[3, 3], scope = 'FLMG_4')
            mv = slim.conv2d(mv, 2, kernel_size=[3, 3], scope = 'FLMG_5')
            mv = slim.conv2d(mv, 3, kernel_size=[3, 3], scope = 'FLMG_6')

        with tf.name_scope('FLMG_LOSS'):
            # The cost function -- l2 mse
            matrix_pow_2 = tf.pow(tf.subtract(mv, flat_flow), 2)
            matrix_norm = tf.reduce_sum(matrix_pow_2, axis = [1,2,3])

            flmg_loss = tf.reduce_mean(matrix_norm)
            tf.summary.scalar('flmg_loss', flmg_loss)

        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            i_feature, _ = resnet_v1.resnet_v1_152(flat_i, num_classes=None, is_training=True, scope='i_resnet')
            mv_feature, _ = resnet_v1.resnet_v1_50(mv, num_classes=None, is_training=True, scope='mv_resnet')
            r_feature, _ = resnet_v1.resnet_v1_50(flat_r, num_classes=None, is_training=True, scope='r_resnet')


        with tf.name_scope('reshape_feature'):
            i_feature = tf.reshape(i_feature, [-1, 2048])
            mv_feature = tf.reshape(mv_feature, [-1, 2048])
            r_feature = tf.reshape(r_feature, [-1, 2048])


        with tf.name_scope('inference_model'):

            i_sc, i_pred = model.inference_feature (i_feature, i_weights, i_biases,
                                                      FLAGS.num_segments, N_CLASS, name = 'i_inf')

            mv_sc, mv_pred = model.inference_feature (mv_feature, mv_weights, mv_biases,
                                                      FLAGS.num_segments, N_CLASS, name = 'mv_inf')

            r_sc, r_pred = model.inference_feature (r_feature, r_weights, r_biases,
                                                      FLAGS.num_segments, N_CLASS, name = 'r_inf')

            combine_sc, pred_class = model.inference_fusion ( i_sc, mv_sc, r_sc, fusion)

    print('Finish Model.')
    
    with tf.name_scope('classiciation_loss'):
        one_hot_labels = tf.one_hot(label_placeholder, N_CLASS)
        mv_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = mv_sc, labels = one_hot_labels, dim=1))
        i_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = i_sc, labels = one_hot_labels, dim=1))
        r_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = r_sc, labels = one_hot_labels, dim=1))
        tf.summary.scalar('mv_cls_loss', mv_class_loss) 
        tf.summary.scalar('i_cls_loss', i_class_loss) 
        tf.summary.scalar('r_cls_loss', r_class_loss)

        combine_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = combine_sc, labels = one_hot_labels, dim=1))
        tf.summary.scalar('fuse_cls_loss', combine_loss)

        total_loss = combine_loss + i_class_loss + mv_class_loss + r_class_loss + flmg_loss
        tf.summary.scalar('tot_cls_loss', total_loss)


    with tf.name_scope('weigh_decay'):
        weight_loss = sum(tf.get_collection('losses'))
        tf.summary.scalar('eight_decay_loss', weight_loss)

    '''
    with tf.name_scope('training_var_list'):
        mv_variable_list = list ( set(mv_weights.values()) | set(mv_biases.values()) )
        mv_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='mv_resnet')
        i_variable_list = list ( set(i_weights.values()) | set(i_biases.values()) )
        i_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='i_resnet')
        r_variable_list = list ( set(r_weights.values()) | set(r_biases.values()) )
        r_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='r_resnet')
    
    with tf.name_scope('summary_var'):
        _variable_summaries(mv_weights['w1'])
        _variable_summaries(i_weights['w2'])
        _variable_summaries(r_weights['w2'])
        _variable_summaries(mv_resnet_variables[0])
        _variable_summaries(i_resnet_variables[0])
        _variable_summaries(r_resnet_variables[0])
        _variable_summaries(fusion)
        
    print('Finish variables.')
    '''
    
    with tf.name_scope('optimizer'):
        '''
        mv_fc_opt = tf.train.AdamOptimizer(FLAGS.mv_lr).minimize(mv_class_loss + weight_loss, var_list = mv_variable_list)
        mv_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(mv_class_loss, var_list = mv_resnet_variables)
        i_fc_opt = tf.train.AdamOptimizer(FLAGS.i_lr).minimize(i_class_loss + weight_loss, var_list = i_variable_list)
        i_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(i_class_loss, var_list = i_resnet_variables)
        r_fc_opt = tf.train.AdamOptimizer(FLAGS.r_lr).minimize(r_class_loss + weight_loss, var_list = r_variable_list)
        r_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(r_class_loss, var_list = r_resnet_variables)
        fusion_opt = tf.train.GradientDescentOptimizer(10e-6).minimize(combine_loss, var_list = fusion)
        '''

        train_opt = tf.train.AdamOptimizer(FLAGS.tot_lr).minimize(total_loss, var_list = tf.trainable_variables())



    print('Finish Optimizer.')
    
    with tf.name_scope('init_function'):
        init_var = tf.global_variables_initializer()
    
    
    
    with tf.name_scope('video_dataset'):
        train_data = dataset.buildTrainDataset_v2(FLAGS.train_list, FLAGS.data_path, FLAGS.num_segments,
                                                  batch_size = FLAGS.batch_size, augment = False,
                                                  shuffle = True, num_threads=2, buffer=100)
        test_data = dataset.buildTestDataset(FLAGS.valid_list, FLAGS.data_path, FLAGS.num_segments, 
                                             batch_size = FLAGS.batch_size, num_threads = 2, buffer = 30)
        
    
        with tf.name_scope('dataset_iterator'):
            it = tf.contrib.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes)
            next_data = it.get_next()
            init_data = it.make_initializer(train_data)
            it_test = tf.contrib.data.Iterator.from_structure(test_data.output_types, test_data.output_shapes)
            next_test_data = it_test.get_next()
            init_test_data = it_test.make_initializer(test_data)
            
            
    print('Finish Dataset.')

    restore_var = [v for v in tf.trainable_variables() if ('Adam'  not in v.name)]

    first_restore_var = [v for v in tf.trainable_variables() if ('Adam'  not in v.name and 'FLMG' not in v.name)]
    first_saver = tf.train.Saver(var_list=first_restore_var)

    my_saver = tf.train.Saver(var_list=restore_var, max_to_keep=5)

    
    config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    sess = tf.Session(config=config)
    
    with tf.name_scope('writer'):

        merged = tf.summary.merge_all()
        if not tf.gfile.Exists(FLAGS.log_path):
            tf.gfile.MakeDirs(FLAGS.log_path)
        previous_runs = os.listdir(FLAGS.log_path)
        if len(previous_runs) == 0:
            run_number = 1
        else:
            run_number = len(previous_runs) + 1
        logdir = 'run_%02d' % run_number
        tf.gfile.MakeDirs(os.path.join(FLAGS.log_path, logdir))
        writer = tf.summary.FileWriter(os.path.join(FLAGS.log_path, logdir), sess.graph)

    with tf.name_scope('saver'):

        if not tf.gfile.Exists(FLAGS.save_path):
            tf.gfile.MakeDirs(FLAGS.save_path)
        '''
        i_saver = tf.train.Saver(i_variable_list)
        mv_saver = tf.train.Saver(mv_variable_list)
        r_saver = tf.train.Saver(r_variable_list)
        i_resnet_saver = tf.train.Saver(i_resnet_variables)
        mv_resnet_saver = tf.train.Saver(mv_resnet_variables)
        r_resnet_saver = tf.train.Saver(r_resnet_variables)
        '''

    with tf.name_scope('intialization'):
        sess.run(init_var)
        sess.run(init_data)
        sess.run(init_test_data)

        #init_i_resent (sess)
        #init_mv_resent (sess)
        #init_r_resent(sess)
        '''
        i_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_model.chkp'+FLAGS.steps))
        mv_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_model.chkp'+FLAGS.steps))
        r_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_model.chkp'+FLAGS.steps))
        i_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_resnet.chkp'+FLAGS.steps))
        mv_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_resnet.chkp'+FLAGS.steps))
        r_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_resnet.chkp'+FLAGS.steps))
        '''
        try:
            my_saver.restore(sess, FLAGS.continue_training)
        except:
            # First train
            first_saver.restore(sess, FLAGS.continue_training)

            '''
            i_resnet_saver = tf.train.Saver(i_resnet_variables)
            mv_resnet_saver = tf.train.Saver(mv_resnet_variables)
            r_resnet_saver = tf.train.Saver(r_resnet_variables)
            i_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_resnet.chkp'+FLAGS.steps))
            mv_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_resnet.chkp'+FLAGS.steps))
            r_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_resnet.chkp'+FLAGS.steps))
            '''

        print('Finish Loading Pretrained Model.')

        
    '''
    Main training loop
    '''
    combine_acc = 0
    i_acc = 0
    mv_acc = 0
    r_acc = 0
    start_time = time.time()
    for step in range(FLAGS.max_steps):
        # Validation
        
        
        if (step) % 1000 == 0 and step > 0:
            combine_classes = []
            mv_classes = []
            i_classes = []
            r_classes = []
            gt_label = []
            

            for i in range(100):
                ti_arr, tmv_arr, tr_arr, tlabel = sess.run(next_test_data)
                i_class, mv_class, r_class, com_class = sess.run([i_pred, mv_pred, r_pred, pred_class], 
                                    feed_dict={mv_placeholder: tmv_arr, i_placeholder: ti_arr,
                                               r_placeholder: tr_arr , label_placeholder : tlabel })
                combine_classes = np.append(combine_classes, com_class)
                mv_classes = np.append(mv_classes, mv_class)
                i_classes = np.append(i_classes, i_class)
                r_classes = np.append(r_classes, r_class)
                gt_label = np.append(gt_label, tlabel)
            
            combine_acc = np.sum((combine_classes == gt_label)) / gt_label.size
            i_acc = np.sum((i_classes == gt_label)) / gt_label.size
            mv_acc = np.sum((mv_classes == gt_label)) / gt_label.size
            r_acc = np.sum((r_classes == gt_label)) / gt_label.size

            print('Step %d finished with accuracy: %f , %f , %f, %f' % (step, i_acc, mv_acc, r_acc, combine_acc))
        
        
        # Training procedure
        i_arr, mv_arr, r_arr, flow_arr, label = sess.run(next_data)
        summary, _, pred, loss1, loss2, loss3, loss4, loss5 = sess.run([merged, train_opt, pred_class, mv_class_loss, i_class_loss, r_class_loss, combine_loss, flmg_loss],
                                    feed_dict={mv_placeholder: mv_arr, i_placeholder: i_arr,
                                                flow_placeholder: flow_arr,
                                               r_placeholder: r_arr , label_placeholder : label,
                                               combine_value_: combine_acc, i_value_ : i_acc,  
                                               mv_value_: mv_acc, r_value_ : r_acc})

        if (step) % 10 == 0 :
            duration = time.time() - start_time
            print('Step %d: %.3f sec' % (step, duration), 'mv_loss:', loss1,  'i_loss:', loss2,  'r_loss:', loss3, 'fusion_loss:', loss4, 'flmg_loss:', loss5)
            print('GT:', label)
            print('Pred:', pred)

            writer.add_summary(summary, step)
            start_time = time.time()

        # Model Saving 

        if (step) % 1000 == 0 and not step == 0 :
            '''
            i_saver.save(sess, os.path.join(FLAGS.save_path, 'i_model.chkp'), global_step = step)
            mv_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_model.chkp'), global_step = step)
            r_saver.save(sess, os.path.join(FLAGS.save_path, 'r_model.chkp'), global_step = step)

            i_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'i_resnet.chkp'), global_step = step)
            mv_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_resnet.chkp'), global_step = step)
            r_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'r_resnet.chkp'), global_step = step)
            '''

            my_saver.save(sess, os.path.join(FLAGS.save_path, 'all_net.chkp'), global_step = step)

        #if (step) % 10000 == 0 and not step == 0 :
        #    i_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'i_resnet.chkp'), global_step = step)
        #    mv_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_resnet.chkp'), global_step = step)
        #    r_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'r_resnet.chkp'), global_step = step)

    
    writer.close()
예제 #28
0
def _get_resnet_features(inputs):
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        resnet_v1.resnet_v1_50(inputs, num_classes=None, is_training=True)

    return tf.get_default_graph().get_tensor_by_name(
        'resnet_v1_50/block4/unit_3/bottleneck_v1/Relu:0')
예제 #29
0
def run_training():
    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    #     sess = tf.Session() # config=tf.ConfigProto(log_device_placement=True))

    # create input path and labels np.array from csv annotations
    df_annos = pd.read_csv(ANNOS_CSV, index_col=0)
    df_annos = df_annos.sample(frac=1).reset_index(
        drop=True)  # shuffle the whole datasets
    if DATA == 'l8':
        path_col = ['l8_vis_jpg']
    elif DATA == 's1':
        path_col = ['s1_vis_jpg']
    elif DATA == 'l8s1':
        path_col = ['l8_vis_jpg', 's1_vis_jpg']

    input_files_train = JPG_DIR + df_annos.loc[df_annos.partition == 'train',
                                               path_col].values
    input_labels_train = df_annos.loc[df_annos.partition == 'train',
                                      'pop_density_log2'].values
    input_files_val = JPG_DIR + df_annos.loc[df_annos.partition == 'val',
                                             path_col].values
    input_labels_val = df_annos.loc[df_annos.partition == 'val',
                                    'pop_density_log2'].values
    input_id_train = df_annos.loc[df_annos.partition == 'train',
                                  'village_id'].values
    input_id_val = df_annos.loc[df_annos.partition == 'val',
                                'village_id'].values

    print('input_files_train shape:', input_files_train.shape)
    train_set_size = len(input_labels_train)

    # data input
    with tf.device('/cpu:0'):
        train_images_batch, train_labels_batch, _ = \
        dataset.input_batches(FLAGS.batch_size, FLAGS.output_size, input_files_train, input_labels_train, input_id_train,
                              IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL, regression=True, augmentation=True, normalization=True)
        val_images_batch, val_labels_batch, _ = \
        dataset.input_batches(FLAGS.batch_size, FLAGS.output_size, input_files_val, input_labels_val, input_id_val,
                              IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL, regression=True, augmentation=False, normalization=True)

    images_placeholder = tf.placeholder(
        tf.float32, shape=[None, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL])
    labels_placeholder = tf.placeholder(tf.float32, shape=[
        None,
    ])
    print('finish data input')

    TRAIN_BATCHES_PER_EPOCH = int(
        train_set_size /
        FLAGS.batch_size)  # number of training batches/steps in each epoch
    MAX_STEPS = TRAIN_BATCHES_PER_EPOCH * FLAGS.max_epoch  # total number of training batches/steps

    # CNN forward reference
    if MODEL == 'vgg':
        with slim.arg_scope(
                vgg.vgg_arg_scope(weight_decay=FLAGS.weight_decay)):
            outputs, _ = vgg.vgg_16(images_placeholder,
                                    num_classes=FLAGS.output_size,
                                    dropout_keep_prob=FLAGS.dropout_keep,
                                    is_training=True)
            outputs = tf.squeeze(
                outputs
            )  # change shape from (B,1) to (B,), same as label input
    if MODEL == 'resnet':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            outputs, _ = resnet_v1.resnet_v1_152(images_placeholder,
                                                 num_classes=FLAGS.output_size,
                                                 is_training=True)
            outputs = tf.squeeze(
                outputs
            )  # change shape from (B,1) to (B,), same as label input

    # loss
    labels_real = tf.pow(2.0, labels_placeholder)
    outputs_real = tf.pow(2.0, outputs)

    # only loss_log2_mse are used for gradient calculate, model minimize this value
    loss_log2_mse = tf.reduce_mean(tf.squared_difference(
        labels_placeholder, outputs),
                                   name='loss_log2_mse')
    loss_real_rmse = tf.sqrt(tf.reduce_mean(
        tf.squared_difference(labels_real, outputs_real)),
                             name='loss_real_rmse')
    loss_real_mae = tf.losses.absolute_difference(labels_real, outputs_real)

    tf.summary.scalar('loss_log2_mse', loss_log2_mse)
    tf.summary.scalar('loss_real_rmse', loss_real_rmse)
    tf.summary.scalar('loss_real_mae', loss_real_mae)

    # accuracy (R2)
    def r_sqaured(labels, outputs):
        sst = tf.reduce_sum(
            tf.squared_difference(labels, tf.reduce_mean(labels)))
        sse = tf.reduce_sum(tf.squared_difference(labels, outputs))
        return (1.0 - tf.div(sse, sst))

    r2_log2 = r_sqaured(labels_placeholder, outputs)
    r2_real = r_sqaured(labels_real, outputs_real)

    tf.summary.scalar('r2_log2', r2_log2)
    tf.summary.scalar('r2_real', r2_real)

    # determine the model vairables to restore from pre-trained checkpoint
    if MODEL == 'vgg':
        if DATA == 'l8s1':
            model_variables = slim.get_variables_to_restore(
                exclude=['vgg_16/fc8', 'vgg_16/conv1'])
        else:
            model_variables = slim.get_variables_to_restore(
                exclude=['vgg_16/fc8'])
    if MODEL == 'resnet':
        model_variables = slim.get_variables_to_restore(
            exclude=['resnet_v1_152/logits', 'resnet_v1_152/conv1'])

    # training step and learning rate
    global_step = tf.Variable(0, name='global_step',
                              trainable=False)  #, dtype=tf.int64)
    learning_rate = tf.train.exponential_decay(
        FLAGS.learning_rate,  # initial learning rate
        global_step=global_step,  # current step
        decay_steps=MAX_STEPS,  # total numbers step to decay 
        decay_rate=FLAGS.lr_decay_rate
    )  # final learning rate = FLAGS.learning_rate * decay_rate
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    #     optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
    #     optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

    # to only update gradient in first and last layer
    #     vars_update = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'vgg_16/(conv1|fc8)')
    #     print('variables to update in traing: ', vars_update)

    train_op = optimizer.minimize(
        loss_log2_mse, global_step=global_step)  #, var_list = vars_update)

    # summary output in tensorboard
    summary = tf.summary.merge_all()
    summary_writer_train = tf.summary.FileWriter(
        os.path.join(LOG_DIR, 'log_train'), sess.graph)
    summary_writer_val = tf.summary.FileWriter(
        os.path.join(LOG_DIR, 'log_val'), sess.graph)

    # variable initialize
    init = tf.global_variables_initializer()
    sess.run(init)

    # restore the model from pre-trained checkpoint
    restorer = tf.train.Saver(model_variables)
    restorer.restore(sess, PRETRAIN_WEIGHTS)
    print('loaded pre-trained weights: ', PRETRAIN_WEIGHTS)

    # saver object to save checkpoint during training
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)

    print('start training...')
    epoch = 0
    best_r2 = -float('inf')
    for step in xrange(MAX_STEPS):
        if step % TRAIN_BATCHES_PER_EPOCH == 0:
            epoch += 1

        start_time = time.time()  # record the time used for each batch

        images_out, labels_out = sess.run(
            [train_images_batch,
             train_labels_batch])  # inputs of this batch, numpy array format

        duration_batch = time.time() - start_time

        if step == 0:
            print("finished reading batch data")
            print("images_out shape:", images_out.shape)
        feed_dict = {
            images_placeholder: images_out,
            labels_placeholder: labels_out
        }
        _, train_loss, train_accuracy, train_outputs, lr = \
            sess.run([train_op, loss_log2_mse, r2_log2, outputs, learning_rate], feed_dict=feed_dict)

        duration = time.time() - start_time

        if step % 10 == 0 or (
                step + 1) == MAX_STEPS:  # print traing loss every 10 batches
            print('Step %d epoch %d lr %.3e: log2 MSE loss = %.4f log2 R2 = %.4f (%.3f sec, %.3f sec(each batch))' \
                  % (step, epoch, lr, train_loss, train_accuracy, duration*10, duration_batch))
            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer_train.add_summary(summary_str, step)
            summary_writer_train.flush()

        if step % 50 == 0 or (
                step + 1
        ) == MAX_STEPS:  # calculate and print validation loss every 50 batches
            images_out, labels_out = sess.run(
                [val_images_batch, val_labels_batch])
            feed_dict = {
                images_placeholder: images_out,
                labels_placeholder: labels_out
            }

            val_loss, val_accuracy = sess.run([loss_log2_mse, r2_log2],
                                              feed_dict=feed_dict)
            print('Step %d epoch %d: val log2 MSE = %.4f val log2 R2 = %.4f ' %
                  (step, epoch, val_loss, val_accuracy))

            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer_val.add_summary(summary_str, step)
            summary_writer_val.flush()

            # in each epoch, if the validation R2 is higher than best R2, save the checkpoint
            if step % (TRAIN_BATCHES_PER_EPOCH -
                       TRAIN_BATCHES_PER_EPOCH % 50) == 0:
                if val_accuracy > best_r2:
                    best_r2 = val_accuracy
                    checkpoint_file = os.path.join(LOG_DIR, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_file,
                               global_step=step,
                               write_state=True)
예제 #30
0
def mem_encoder(img, seg, is_training):
    image = tf.reshape(img, [-1] + list(img.get_shape())[2:])
    seg = tf.reshape(seg, [-1] + list(seg.get_shape())[2:])
    image = image - tf.constant(
        _RGB_MEAN, dtype=tf.float32, shape=(1, 1, 1, 3))
    seg = seg - 127.5
    image_seg = tf.concat([image, seg], axis=-1)

    with tf.contrib.slim.arg_scope(
            resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)):
        with tf.variable_scope('mem_encoder'):
            with tf.variable_scope('resnet_v1_50', values=[image]) as sc:
                end_points_collection = sc.name + '_end_points'
                with slim.arg_scope([slim.conv2d, bottleneck],
                                    outputs_collections=end_points_collection):
                    with slim.arg_scope([slim.batch_norm],
                                        is_training=is_training):
                        net = image_seg
                        net = conv2d_seg(net,
                                         64,
                                         7,
                                         stride=2,
                                         scope='mem_conv1_seg')
                        net = slim.max_pool2d(net, [3, 3],
                                              stride=2,
                                              scope='pool1')

                        with tf.variable_scope('block1',
                                               values=[net]) as sc_block:
                            with tf.variable_scope('unit_1', values=[net]):
                                net = bottleneck(net,
                                                 depth=4 * 64,
                                                 depth_bottleneck=64,
                                                 stride=1)
                            with tf.variable_scope('unit_2', values=[net]):
                                net = bottleneck(net,
                                                 depth=4 * 64,
                                                 depth_bottleneck=64,
                                                 stride=1)
                            with tf.variable_scope('unit_3', values=[net]):
                                net = bottleneck(net,
                                                 depth=4 * 64,
                                                 depth_bottleneck=64,
                                                 stride=2)

                        with tf.variable_scope('block2',
                                               values=[net]) as sc_block:
                            with tf.variable_scope('unit_1', values=[net]):
                                net = bottleneck(net,
                                                 depth=4 * 128,
                                                 depth_bottleneck=128,
                                                 stride=1)
                            with tf.variable_scope('unit_2', values=[net]):
                                net = bottleneck(net,
                                                 depth=4 * 128,
                                                 depth_bottleneck=128,
                                                 stride=1)
                            with tf.variable_scope('unit_3', values=[net]):
                                net = bottleneck(net,
                                                 depth=4 * 128,
                                                 depth_bottleneck=128,
                                                 stride=1)
                            with tf.variable_scope('unit_4', values=[net]):
                                net = bottleneck(net,
                                                 depth=4 * 128,
                                                 depth_bottleneck=128,
                                                 stride=2)

                        with tf.variable_scope('block3',
                                               values=[net]) as sc_block:
                            with tf.variable_scope('unit_1', values=[net]):
                                net = bottleneck(net,
                                                 depth=4 * 256,
                                                 depth_bottleneck=256,
                                                 stride=1)
                            with tf.variable_scope('unit_2', values=[net]):
                                net = bottleneck(net,
                                                 depth=4 * 256,
                                                 depth_bottleneck=256,
                                                 stride=1)
                            with tf.variable_scope('unit_3', values=[net]):
                                net = bottleneck(net,
                                                 depth=4 * 256,
                                                 depth_bottleneck=256,
                                                 stride=1)
                            with tf.variable_scope('unit_4', values=[net]):
                                net = bottleneck(net,
                                                 depth=4 * 256,
                                                 depth_bottleneck=256,
                                                 stride=1)
                            with tf.variable_scope('unit_5', values=[net]):
                                net = bottleneck(net,
                                                 depth=4 * 256,
                                                 depth_bottleneck=256,
                                                 stride=1)
                            with tf.variable_scope('unit_6', values=[net]):
                                net = bottleneck(net,
                                                 depth=4 * 256,
                                                 depth_bottleneck=256,
                                                 stride=2)

                        key = tf.layers.conv2d(
                            net,
                            filters=int(net.get_shape()[-1]) / 8,
                            kernel_size=(1, 1),
                            activation=None,
                            padding='SAME',
                            name='mem_key')
                        value = tf.layers.conv2d(
                            net,
                            filters=int(net.get_shape()[-1]) / 2,
                            kernel_size=(1, 1),
                            activation=None,
                            padding='SAME',
                            name='mem_value')

        net = tf.reshape(net,
                         [config.batch_size, -1] + list(net.get_shape())[1:])
        key = tf.reshape(key,
                         [config.batch_size, -1] + list(key.get_shape())[1:])
        value = tf.reshape(value, [config.batch_size, -1] +
                           list(value.get_shape())[1:])
        return key, value, net