def build(self, input_list, phrase): assert phrase == 'train' # these are samples with labels both # 'training' and 'validation' belonging to train in broader sense assert len(input_list) == 0 # Input doesn't have input INFO = self.__class__.DEBUG_INFO # 1. Get queues of file names # TODO: distinguish training, validation and unlabelled im_files, lb_files, num_files = self.read_filename_list( self.sample_list_file) # vld_im_files, vld_lb_files, num_files = self.read_filename_list(self.vld_sample_list) # 2. Load images raw_image, raw_label = self.build_pair_queue_reader(im_files, lb_files) # We don't jitter validation images, the raw-to-processed just expand the dimension # so each sample becomes a single-sample mini-batch if INFO['raw_input']: raw_image = build_print_shape(raw_image, "Input image ") raw_label = build_print_shape(raw_label, "Input label ") # 3. Image pre-process before making batches # TODO: if training we do jitter for augument, otherwise, skip single_sample_batch = True if self.data_split == 'training': image, label = \ self.preproc.build_input_pair_process(raw_image, raw_label) if self.preproc.is_shape_fixed(): # if pre-processor returns is_shape_fixed()==True, it is responsible for # setting shapes of the trn_image and trn_label image_batch, label_batch = \ tf.train.batch([image, label], batch_size=self.batch_size, enqueue_many=True) # enqueue_many: expect small "batches" from image preprocessing, because # data augument may result in multiple samples from one "raw sample" single_sample_batch = False else: # no pre-processing for validation and deploy image = raw_image label = self.preproc.build_interpret_label(raw_label) if single_sample_batch: image_batch = tf.expand_dims(image, 0) label_batch = tf.expand_dims(label, 0) assert isinstance(image_batch, tf.Tensor) assert isinstance(label_batch, tf.Tensor) if INFO['input_batch']: image_batch = build_print_shape( image_batch, "Image batch [{}]: ".format(self.data_split)) label_batch = build_print_shape( label_batch, "Label batch [{}]: ".format(self.data_split)) self.graph['im_files'] = im_files self.graph['lb_files'] = lb_files self.graph['raw_image'] = raw_image self.graph['raw_label'] = raw_label self.graph['image_batch'] = image_batch self.graph['label_batch'] = label_batch return image_batch, label_batch
def _fc_layer(self, bottom, lname, shape_convert, do_relu, debug, num_classes=None): """ :brief _fc_layer build a fully-connected layer up the @bottom, using convolution operation, e.g. if in the traditional net, the bottom has 7 x 7 x Cin, the output has Cout channels, a fully connected net will need (7x7xCin) "flattened" x Cout weights to specify. Now we re-shape the 7x7xCin as a convolution on a 7x7 cell, with Cin in-channels and Cout out-channels Adapt trained model to new tasks: If desired out-channel number Cout does not match pretrained filter, generally, much less than the pretrained filter, VGG was trained on image-net, with 1000 classes to predict, the loader will combine the output weights of multiple (original output) channels into one (new) output channel. :shape_convert - fc_weight_shape, the shape of original VGG's fully connected layer, for confirmation purpose only - conv_kernel_shape, the kernel weights of the newly constructed convolution layer. :param num_classes, desired number of output channels. """ kshape = shape_convert['conv_kernel_shape'] wshape = shape_convert['fc_weight_shape'] with tf.variable_scope(lname): kweights_var = self.reload_fc_filter(lname, kshape, wshape) conv = tf.nn.conv2d(bottom, kweights_var, [1, 1, 1, 1], padding='SAME') bias_var = self.adapt_pred_bias(lname, num_classes) fc = tf.nn.bias_add(conv, bias_var) if do_relu: fc = tf.nn.relu(fc) if debug: fc = build_print_shape(fc, "fc {}:".format(lname)) return fc
def _upscore_layer(self, bottom, lname, ksize, stride, num_classes, up_w=None, up_h=None, debug=False): with tf.variable_scope(lname): # determin output shape true_bottom_shape = tf.shape(bottom) num_imgs = true_bottom_shape[0] w = true_bottom_shape[2] h = true_bottom_shape[1] if up_w is None: up_w = stride * (w - 1) + 1 if up_h is None: up_h = stride * (h - 1) + 1 upscore_shape = tf.stack([num_imgs, up_h, up_w, num_classes]) num_in_channels = bottom.get_shape()[3] assert num_in_channels == num_classes filt = self.get_deconv_filter(ksize, num_classes) upscore = tf.nn.conv2d_transpose( bottom, filt, upscore_shape, strides=[1, stride, stride, 1], padding='SAME') if debug: upscore = build_print_shape(upscore, msg="upscore {}".format(lname)) return upscore
def build(self, inputs, phrase): """ :param inputs: [pred_batch, label_batch] :param phrase: train / infer """ assert phrase == 'train' # infer shouldn't have access to labels pred_batch, label_batch = inputs sh = tf.shape(pred_batch) q = tf.reshape(pred_batch, (-1, sh[-1])) sh_new = tf.shape(q) p = tf.reshape(tf.cast(label_batch, dtypes.float32), sh_new) elem_loss = -tf.reduce_sum(tf.log(q) * p * self.class_weights, axis=1) if self.debug['elem_loss']: elem_loss = build_print_shape(elem_loss, "elemement loss:") loss = tf.reduce_mean(elem_loss, name='xentropy') tf.summary.scalar('xentropy_loss', loss) if self.debug['mean_loss']: loss = build_print_value(loss, msg="mean loss", first_n=9999) w_loss = tf.add_n(tf.get_collection('losses'), name='w_loss') total_loss = loss + w_loss if self.debug['total_loss']: total_loss = build_print_value(total_loss, msg="total loss", first_n=9999) return [ total_loss, loss, ]
def _max_pool(self, bottom, slname, debug): """ :param slname name of the "superlayer", see @reload_conv_filter """ pool = tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=slname) if debug: pool = build_print_shape(pool, "maxpool {}:".format(slname), 1) return pool
def _conv_layer(self, bottom, lname, debug=False): """ :param bottom is the input. It can be a minibatch of images or the output of previous layers. :param lname, conv.layer name, see @reload_conv_filter :return activation of the conv.layer """ # variable scope is necessary, the variable will have a name like "weights" # for all layers. with tf.variable_scope(lname): filt = self.reload_conv_filter(lname) conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME') conv_bias = self.reload_internal_bias(lname) bias = tf.nn.bias_add(conv, conv_bias) relu = tf.nn.relu(bias) if debug: relu = build_print_shape(relu, "conv {}:".format(lname), 1) return relu
def _adapt_fc_layer(self, bottom, lname, shape_convert, do_relu, debug, num_classes=None): """ Partially reuse a pre-trained fully connected (by convolution) layer, with fewer output classes. Specifically, VGG was trained on image-net, with 1000 classes to predict. But usually, we have much fewer classes to predict. So we combine the weights for multiple original classes to one target class. See also @adapt_bias :param num_classes, desired number of output channels. """ kshape = shape_convert['conv_kernel_shape'] wshape = shape_convert['fc_weight_shape'] with tf.variable_scope(lname): kweights_var = self.adapt_fc_filter(lname, kshape, num_classes, wshape) bias_var = self.adapt_pred_bias(lname, num_classes) conv = tf.nn.conv2d(bottom, kweights_var, [1, 1, 1, 1], padding='SAME') fc = tf.nn.bias_add(conv, bias_var) if do_relu: fc = tf.nn.relu(fc) if debug: fc = build_print_shape(fc, "fc {}:".format(lname)) return fc
def build(self, inputs, phrase): """ :param inputs, one-element list = [rgb_batch], which is a image batch tensor, [ None x height x width x num_channels ], The shape is how images are loaded. :param phrase: train or infer """ self.phrase = phrase if phrase == 'train': self.dropout_rate = self.conf['solver']['dropout_rate'] self.weight_decay_rate = self.conf['objective']['weight_decay'] param_file = os.path.join(self.conf['path']['base'], self.conf['encoder']['pre_trained_param']) logging.debug("pm:{} / {} / {}".format(self.conf['path']['base'],self.conf['encoder']['pre_trained_param'], param_file)) self.vgg_params = self.load_pre_trained_vgg_weights( filename=param_file, channel_means=self.conf['encoder']['channel_means']) logging.info("Based on pre-trained VGG (cold start)" "\n\t{}".format(param_file)) rgb_batch = inputs[0] assert isinstance(rgb_batch, tf.Tensor) assert rgb_batch.get_shape().ndims == 4 assert rgb_batch.get_shape()[3] == 3 # NxHxWxC, 3 channels, rgb with tf.name_scope("Processing"): rgb_batch = tf.cast(rgb_batch, dtype=dtypes.float32) ch_r, ch_g, ch_b = tf.split(rgb_batch, 3, axis=3) bgr_batch = tf.concat([ ch_b - self.vgg_params['mean_b'], ch_g - self.vgg_params['mean_g'], ch_r - self.vgg_params['mean_r']], axis=3) if self.debug['input']: bgr_batch = build_print_shape(bgr_batch, "BGR Image", first_n=1) # VGG convolutional self.conv1_1 = self._conv_layer(bgr_batch, "conv1_1", False) self.conv1_2 = self._conv_layer(self.conv1_1, "conv1_2", False) self.pool1 = self._max_pool(self.conv1_2, "pool1", self.debug['conv']) self.conv2_1 = self._conv_layer(self.pool1, "conv2_1", False) self.conv2_2 = self._conv_layer(self.conv2_1, "conv2_2", False) self.pool2 = self._max_pool(self.conv2_2, "pool2", self.debug['conv']) self.conv3_1 = self._conv_layer(self.pool2, "conv3_1", False) self.conv3_2 = self._conv_layer(self.conv3_1, "conv3_2", False) self.conv3_3 = self._conv_layer(self.conv3_2, "conv3_3", False) self.pool3 = self._max_pool(self.conv3_3, "pool3", self.debug['conv']) self.conv4_1 = self._conv_layer(self.pool3, "conv4_1", False) self.conv4_2 = self._conv_layer(self.conv4_1, "conv4_2", False) self.conv4_3 = self._conv_layer(self.conv4_2, "conv4_3", False) self.pool4 = self._max_pool(self.conv4_3, "pool4", self.debug['conv']) self.conv5_1 = self._conv_layer(self.pool4, "conv5_1", False) self.conv5_2 = self._conv_layer(self.conv5_1, "conv5_2", False) self.conv5_3 = self._conv_layer(self.conv5_2, "conv5_3", False) self.pool5 = self._max_pool(self.conv5_3, "pool5", self.debug['conv']) self.fc6 = self._fc_layer(self.pool5, "fc6", shape_convert=self.fc_shape_convert['fc6'], do_relu=True, debug=self.debug['fc']) if self.phrase == 'train': self.fc6 = tf.nn.dropout(self.fc6, self.dropout_rate) self.fc7 = self._fc_layer(self.fc6, "fc7", shape_convert=self.fc_shape_convert['fc7'], do_relu=True, debug=self.debug['fc']) if self.phrase == 'train': self.fc7 = tf.nn.dropout(self.fc7, self.dropout_rate) self.fc8 = self._adapt_fc_layer(self.fc7, "fc8", shape_convert=self.fc_shape_convert['fc8'], do_relu=False, num_classes=self.num_classes, debug=self.debug['fc']) pool4_shape = tf.shape(self.pool4) self.upscore2 = self._upscore_layer(self.fc8, "upscore2", ksize=4, stride=2, num_classes=self.num_classes, up_w=pool4_shape[2], up_h=pool4_shape[1], debug=self.debug['up']) self.score_pool4 = self._score_layer(self.pool4, "score_pool4", num_classes=self.num_classes, random_weight_stddev=0.001) self.fuse_pool4 = tf.add(self.upscore2, self.score_pool4) input_shape = tf.shape(bgr_batch) self.upscore32 = self._upscore_layer(self.fuse_pool4, "upscore32", ksize=32, stride=16, num_classes=self.num_classes, up_w=input_shape[2], up_h=input_shape[1], debug=self.debug['up']) return [self.upscore32, ]