예제 #1
0
    def build(self, input_list, phrase):
        assert phrase == 'train'  # these are samples with labels both
        # 'training' and 'validation' belonging to train in broader sense
        assert len(input_list) == 0  # Input doesn't have input
        INFO = self.__class__.DEBUG_INFO

        # 1. Get queues of file names
        # TODO: distinguish training, validation and unlabelled
        im_files, lb_files, num_files = self.read_filename_list(
            self.sample_list_file)
        # vld_im_files, vld_lb_files, num_files = self.read_filename_list(self.vld_sample_list)

        # 2. Load images
        raw_image, raw_label = self.build_pair_queue_reader(im_files, lb_files)
        # We don't jitter validation images, the raw-to-processed just expand the dimension
        # so each sample becomes a single-sample mini-batch

        if INFO['raw_input']:
            raw_image = build_print_shape(raw_image, "Input image ")
            raw_label = build_print_shape(raw_label, "Input label ")

        # 3. Image pre-process before making batches
        # TODO: if training we do jitter for augument, otherwise, skip
        single_sample_batch = True
        if self.data_split == 'training':
            image, label = \
                self.preproc.build_input_pair_process(raw_image, raw_label)
            if self.preproc.is_shape_fixed():
                # if pre-processor returns is_shape_fixed()==True, it is responsible for
                # setting shapes of the trn_image and trn_label
                image_batch, label_batch = \
                    tf.train.batch([image, label],
                                   batch_size=self.batch_size,
                                   enqueue_many=True)
                # enqueue_many: expect small "batches" from image preprocessing, because
                #   data augument may result in multiple samples from one "raw sample"
                single_sample_batch = False
        else:  # no pre-processing for validation and deploy
            image = raw_image
            label = self.preproc.build_interpret_label(raw_label)

        if single_sample_batch:
            image_batch = tf.expand_dims(image, 0)
            label_batch = tf.expand_dims(label, 0)

        assert isinstance(image_batch, tf.Tensor)
        assert isinstance(label_batch, tf.Tensor)
        if INFO['input_batch']:
            image_batch = build_print_shape(
                image_batch, "Image batch [{}]: ".format(self.data_split))
            label_batch = build_print_shape(
                label_batch, "Label batch [{}]: ".format(self.data_split))

        self.graph['im_files'] = im_files
        self.graph['lb_files'] = lb_files
        self.graph['raw_image'] = raw_image
        self.graph['raw_label'] = raw_label
        self.graph['image_batch'] = image_batch
        self.graph['label_batch'] = label_batch
        return image_batch, label_batch
예제 #2
0
    def _fc_layer(self, bottom, lname, shape_convert,
                  do_relu, debug, num_classes=None):
        """
        :brief _fc_layer build a fully-connected layer up the @bottom, using
          convolution operation, e.g. if in the traditional net, the bottom
          has 7 x 7 x Cin, the output has Cout channels, a fully connected
          net will need (7x7xCin) "flattened" x Cout weights to specify.
          Now we re-shape the 7x7xCin as a convolution on a 7x7 cell, with
          Cin in-channels and Cout out-channels

          Adapt trained model to new tasks: If desired out-channel number
          Cout does not match pretrained filter, generally, much less than
          the pretrained filter, VGG was trained on image-net, with 1000
          classes to predict, the loader will combine the output weights
          of multiple (original output) channels into one (new) output
          channel.
        :shape_convert
        - fc_weight_shape, the shape of original VGG's fully connected layer,
          for confirmation purpose only
        - conv_kernel_shape, the kernel weights of the newly constructed convolution
          layer.
        :param num_classes, desired number of output channels.
        """
        kshape = shape_convert['conv_kernel_shape']
        wshape = shape_convert['fc_weight_shape']
        with tf.variable_scope(lname):
            kweights_var = self.reload_fc_filter(lname, kshape, wshape)
            conv = tf.nn.conv2d(bottom, kweights_var, [1, 1, 1, 1], padding='SAME')
            bias_var = self.adapt_pred_bias(lname, num_classes)
            fc = tf.nn.bias_add(conv, bias_var)
            if do_relu:
                fc = tf.nn.relu(fc)
            if debug:
                fc = build_print_shape(fc, "fc {}:".format(lname))
        return fc
예제 #3
0
    def _upscore_layer(self, bottom, lname, ksize, stride,
                       num_classes,
                       up_w=None, up_h=None,
                       debug=False):
        with tf.variable_scope(lname):
            # determin output shape
            true_bottom_shape = tf.shape(bottom)
            num_imgs = true_bottom_shape[0]
            w = true_bottom_shape[2]
            h = true_bottom_shape[1]
            if up_w is None:
                up_w = stride * (w - 1) + 1
            if up_h is None:
                up_h = stride * (h - 1) + 1
            upscore_shape = tf.stack([num_imgs, up_h, up_w, num_classes])

            num_in_channels = bottom.get_shape()[3]
            assert num_in_channels == num_classes
            filt = self.get_deconv_filter(ksize, num_classes)
            upscore = tf.nn.conv2d_transpose(
                bottom, filt, upscore_shape,
                strides=[1, stride, stride, 1],
                padding='SAME')
            if debug:
                upscore = build_print_shape(upscore, msg="upscore {}".format(lname))
        return upscore
예제 #4
0
    def build(self, inputs, phrase):
        """
        :param inputs: [pred_batch, label_batch]
        :param phrase: train / infer
        """
        assert phrase == 'train'  # infer shouldn't have access to labels
        pred_batch, label_batch = inputs
        sh = tf.shape(pred_batch)
        q = tf.reshape(pred_batch, (-1, sh[-1]))
        sh_new = tf.shape(q)
        p = tf.reshape(tf.cast(label_batch, dtypes.float32), sh_new)
        elem_loss = -tf.reduce_sum(tf.log(q) * p * self.class_weights, axis=1)
        if self.debug['elem_loss']:
            elem_loss = build_print_shape(elem_loss, "elemement loss:")
        loss = tf.reduce_mean(elem_loss, name='xentropy')
        tf.summary.scalar('xentropy_loss', loss)
        if self.debug['mean_loss']:
            loss = build_print_value(loss, msg="mean loss", first_n=9999)

        w_loss = tf.add_n(tf.get_collection('losses'), name='w_loss')
        total_loss = loss + w_loss
        if self.debug['total_loss']:
            total_loss = build_print_value(total_loss,
                                           msg="total loss",
                                           first_n=9999)
        return [
            total_loss,
            loss,
        ]
예제 #5
0
 def _max_pool(self, bottom, slname, debug):
     """
     :param slname name of the "superlayer", see @reload_conv_filter
     """
     pool = tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
                           padding='SAME',
                           name=slname)
     if debug:
         pool = build_print_shape(pool, "maxpool {}:".format(slname), 1)
     return pool
예제 #6
0
 def _conv_layer(self, bottom, lname, debug=False):
     """
     :param bottom is the input. It can be a minibatch of images or
       the output of previous layers.
     :param lname, conv.layer name, see @reload_conv_filter
     :return activation of the conv.layer
     """
     # variable scope is necessary, the variable will have a name like "weights"
     # for all layers.
     with tf.variable_scope(lname):
         filt = self.reload_conv_filter(lname)
         conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
         conv_bias = self.reload_internal_bias(lname)
         bias = tf.nn.bias_add(conv, conv_bias)
         relu = tf.nn.relu(bias)
         if debug:
             relu = build_print_shape(relu, "conv {}:".format(lname), 1)
         return relu
예제 #7
0
    def _adapt_fc_layer(self, bottom, lname, shape_convert,
                        do_relu, debug, num_classes=None):
        """
        Partially reuse a pre-trained fully connected (by convolution) layer, with
        fewer output classes. Specifically,  VGG was trained on image-net, with 1000
        classes to predict. But usually, we have much fewer classes to predict. So
        we combine the weights for multiple original classes to one target class.
        See also @adapt_bias

        :param num_classes, desired number of output channels.
        """

        kshape = shape_convert['conv_kernel_shape']
        wshape = shape_convert['fc_weight_shape']
        with tf.variable_scope(lname):
            kweights_var = self.adapt_fc_filter(lname, kshape, num_classes, wshape)
            bias_var = self.adapt_pred_bias(lname, num_classes)
            conv = tf.nn.conv2d(bottom, kweights_var, [1, 1, 1, 1], padding='SAME')
            fc = tf.nn.bias_add(conv, bias_var)
            if do_relu:
                fc = tf.nn.relu(fc)
            if debug:
                fc = build_print_shape(fc, "fc {}:".format(lname))
        return fc
예제 #8
0
    def build(self, inputs, phrase):
        """
        :param inputs, one-element list = [rgb_batch], which is a image batch
            tensor, [ None x height x width x num_channels ], The shape is
            how images are loaded.
        :param phrase: train or infer
        """
        self.phrase = phrase
        if phrase == 'train':
            self.dropout_rate = self.conf['solver']['dropout_rate']

        self.weight_decay_rate = self.conf['objective']['weight_decay']
        param_file = os.path.join(self.conf['path']['base'],
                                  self.conf['encoder']['pre_trained_param'])
        logging.debug("pm:{} / {} / {}".format(self.conf['path']['base'],self.conf['encoder']['pre_trained_param'], param_file))
        self.vgg_params = self.load_pre_trained_vgg_weights(
            filename=param_file,
            channel_means=self.conf['encoder']['channel_means'])
        logging.info("Based on pre-trained VGG (cold start)"
                     "\n\t{}".format(param_file))

        rgb_batch = inputs[0]
        assert isinstance(rgb_batch, tf.Tensor)
        assert rgb_batch.get_shape().ndims == 4
        assert rgb_batch.get_shape()[3] == 3  # NxHxWxC, 3 channels, rgb

        with tf.name_scope("Processing"):
            rgb_batch = tf.cast(rgb_batch, dtype=dtypes.float32)
            ch_r, ch_g, ch_b = tf.split(rgb_batch, 3, axis=3)
            bgr_batch = tf.concat([
                ch_b - self.vgg_params['mean_b'],
                ch_g - self.vgg_params['mean_g'],
                ch_r - self.vgg_params['mean_r']], axis=3)
            if self.debug['input']:
                bgr_batch = build_print_shape(bgr_batch, "BGR Image", first_n=1)

        # VGG convolutional
        self.conv1_1 = self._conv_layer(bgr_batch, "conv1_1", False)
        self.conv1_2 = self._conv_layer(self.conv1_1, "conv1_2", False)
        self.pool1 = self._max_pool(self.conv1_2, "pool1", self.debug['conv'])

        self.conv2_1 = self._conv_layer(self.pool1, "conv2_1", False)
        self.conv2_2 = self._conv_layer(self.conv2_1, "conv2_2", False)
        self.pool2 = self._max_pool(self.conv2_2, "pool2", self.debug['conv'])

        self.conv3_1 = self._conv_layer(self.pool2, "conv3_1", False)
        self.conv3_2 = self._conv_layer(self.conv3_1, "conv3_2", False)
        self.conv3_3 = self._conv_layer(self.conv3_2, "conv3_3", False)
        self.pool3 = self._max_pool(self.conv3_3, "pool3", self.debug['conv'])

        self.conv4_1 = self._conv_layer(self.pool3, "conv4_1", False)
        self.conv4_2 = self._conv_layer(self.conv4_1, "conv4_2", False)
        self.conv4_3 = self._conv_layer(self.conv4_2, "conv4_3", False)
        self.pool4 = self._max_pool(self.conv4_3, "pool4", self.debug['conv'])

        self.conv5_1 = self._conv_layer(self.pool4, "conv5_1", False)
        self.conv5_2 = self._conv_layer(self.conv5_1, "conv5_2", False)
        self.conv5_3 = self._conv_layer(self.conv5_2, "conv5_3", False)
        self.pool5 = self._max_pool(self.conv5_3, "pool5", self.debug['conv'])

        self.fc6 = self._fc_layer(self.pool5, "fc6",
                                  shape_convert=self.fc_shape_convert['fc6'],
                                  do_relu=True, debug=self.debug['fc'])
        if self.phrase == 'train':
            self.fc6 = tf.nn.dropout(self.fc6, self.dropout_rate)

        self.fc7 = self._fc_layer(self.fc6, "fc7",
                                  shape_convert=self.fc_shape_convert['fc7'],
                                  do_relu=True, debug=self.debug['fc'])
        if self.phrase == 'train':
            self.fc7 = tf.nn.dropout(self.fc7, self.dropout_rate)

        self.fc8 = self._adapt_fc_layer(self.fc7, "fc8",
                                        shape_convert=self.fc_shape_convert['fc8'],
                                        do_relu=False,
                                        num_classes=self.num_classes,
                                        debug=self.debug['fc'])

        pool4_shape = tf.shape(self.pool4)
        self.upscore2 = self._upscore_layer(self.fc8, "upscore2",
                                            ksize=4, stride=2,
                                            num_classes=self.num_classes,
                                            up_w=pool4_shape[2],
                                            up_h=pool4_shape[1],
                                            debug=self.debug['up'])

        self.score_pool4 = self._score_layer(self.pool4, "score_pool4",
                                             num_classes=self.num_classes,
                                             random_weight_stddev=0.001)

        self.fuse_pool4 = tf.add(self.upscore2, self.score_pool4)

        input_shape = tf.shape(bgr_batch)
        self.upscore32 = self._upscore_layer(self.fuse_pool4, "upscore32",
                                             ksize=32, stride=16,
                                             num_classes=self.num_classes,
                                             up_w=input_shape[2], up_h=input_shape[1],
                                             debug=self.debug['up'])

        return [self.upscore32, ]