class FCN8sMobileNet(BasicModel): """ FCN8s with MobileNet as an encoder Model Architecture """ def __init__(self, args): super().__init__(args) # init encoder self.encoder = None # init network layers def build(self): print("\nBuilding the MODEL...") self.init_input() self.init_network() self.init_output() self.init_train() self.init_summaries() print("The Model is built successfully\n") def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.encoder.score_fr, output_shape=self.encoder.feed1.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self.score_feed1 = conv2d('score_feed1', x=self.encoder.feed1, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, output_shape=self.encoder.feed2.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self.score_feed2 = conv2d('score_feed2', x=self.encoder.feed2, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd) self.logits = self.upscore8
class FCN8sMobileNetUpsample(BasicModel): """ FCN8s with MobileNet Upsampling 2x2 as an encoder Model Architecture """ def __init__(self, args, phase=0): super().__init__(args, phase=phase) # init encoder self.encoder = None # init network layers def build(self): print("\nBuilding the MODEL...") self.init_input() self.init_network() self.init_output() self.init_train() self.init_summaries() print("The Model is built successfully\n") def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscore_2s'): shape = self.encoder.score_fr.shape.as_list()[1:3] upscore2_upsample = tf.image.resize_images( self.encoder.score_fr, (2 * shape[0], 2 * shape[1])) self.upscore2 = conv2d('upscore2', x=upscore2_upsample, num_filters=self.params.num_classes, l2_strength=self.encoder.wd) self.score_feed1 = conv2d('score_feed1', x=self.encoder.feed1, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): shape = self.fuse_feed1.shape.as_list()[1:3] upscore4_upsample = tf.image.resize_images( self.fuse_feed1, (2 * shape[0], 2 * shape[1])) self.upscore4 = conv2d('upscore4', x=upscore4_upsample, num_filters=self.params.num_classes, l2_strength=self.encoder.wd) self.score_feed2 = conv2d('score_feed2', x=self.encoder.feed2, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): shape = self.fuse_feed2.shape.as_list()[1:3] upscore8_upsample = tf.image.resize_images( self.fuse_feed2, (8 * shape[0], 8 * shape[1])) self.upscore8 = conv2d('upscore8', x=upscore8_upsample, num_filters=self.params.num_classes, l2_strength=self.encoder.wd) self.logits = self.upscore8
class DilationV2MobileNet(BasicModel): """ FCN8s with MobileNet as an encoder Model Architecture """ def __init__(self, args): super().__init__(args) # init encoder self.encoder = None self.wd = self.args.weight_decay # init network layers self.upscore2 = None self.score_feed1 = None self.fuse_feed1 = None self.upscore4 = None self.score_feed2 = None self.fuse_feed2 = None self.upscore8 = None self.targets_resize = self.args.targets_resize def build(self): print("\nBuilding the MODEL...") self.init_input() self.init_network() self.init_output() self.init_train() self.init_summaries() print("The Model is built successfully\n") def init_input(self): with tf.name_scope('input'): self.x_pl = tf.placeholder(tf.float32, [ self.args.batch_size, self.params.img_height, self.params.img_width, 3 ]) self.y_pl = tf.placeholder(tf.int32, [ self.args.batch_size, self.params.img_height // self.targets_resize, self.params.img_width // self.targets_resize ]) print('X_batch shape ', self.x_pl.get_shape().as_list(), ' ', self.y_pl.get_shape().as_list()) print('Afterwards: X_batch shape ', self.x_pl.get_shape().as_list(), ' ', self.y_pl.get_shape().as_list()) self.curr_learning_rate = tf.placeholder(tf.float32) if self.params.weighted_loss: self.wghts = np.zeros( (self.args.batch_size, self.params.img_height, self.params.img_width), dtype=np.float32) self.is_training = tf.placeholder(tf.bool) def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('dilation_2'): self.conv4_2 = atrous_conv2d('conv_ds_7_dil', self.encoder.conv4_1, num_filters=512, kernel_size=(3, 3), padding='SAME', activation=tf.nn.relu, dilation_rate=2, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv4_2) self.conv5_1 = depthwise_separable_conv2d( 'conv_ds_8_dil', self.conv4_2, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_1) self.conv5_2 = depthwise_separable_conv2d( 'conv_ds_9_dil', self.conv5_1, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_2) self.conv5_3 = depthwise_separable_conv2d( 'conv_ds_10_dil', self.conv5_2, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_3) self.conv5_4 = depthwise_separable_conv2d( 'conv_ds_11_dil', self.conv5_3, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_4) self.conv5_5 = depthwise_separable_conv2d( 'conv_ds_12_dil', self.conv5_4, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_5) self.conv5_6 = atrous_conv2d('conv_ds_13_dil', self.conv5_5, num_filters=1024, kernel_size=(3, 3), padding='SAME', activation=tf.nn.relu, dilation_rate=4, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_6) self.conv6_1 = depthwise_separable_conv2d( 'conv_ds_14_dil', self.conv5_6, width_multiplier=self.encoder.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv6_1) # Pooling is removed. self.score_fr = conv2d('conv_1c_1x1_dil', self.conv6_1, num_filters=self.params.num_classes, l2_strength=self.wd, kernel_size=(1, 1)) _debug(self.score_fr) if self.targets_resize < 8: self.targets_resize = 8 // self.targets_resize self.upscore8 = conv2d_transpose( 'upscore8', x=self.score_fr, output_shape=self.y_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(self.targets_resize * 2, self.targets_resize * 2), stride=(self.targets_resize, self.targets_resize), l2_strength=self.encoder.wd, is_training=self.is_training) _debug(self.upscore8) self.logits = self.upscore8 else: self.logits = self.score_fr
class UNetMobileNet(BasicModel): def __init__(self, args, phase=0): super().__init__(args, phase=phase) # init encoder self.encoder = None def build(self): print("\nBuilding the MODEL...") self.init_input() self.init_network() self.init_output() self.init_train() self.init_summaries() print("The Model is built successfully\n") @staticmethod def _debug(operation): print("Layer_name: " + operation.op.name + " -Output_Shape: " + str(operation.shape.as_list())) def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscale_1'): self.expand11 = conv2d( 'expand1_1', x=self.encoder.conv5_6, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv5_5.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand11) self.upscale1 = conv2d_transpose( 'upscale1', x=self.expand11, is_training=self.is_training, output_shape=self.encoder.conv5_5.shape.as_list(), batchnorm_enabled=True, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale1) self.add1 = tf.add(self.upscale1, self.encoder.conv5_5) self._debug(self.add1) self.expand12 = conv2d( 'expand1_2', x=self.add1, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv5_5.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand12) with tf.name_scope('upscale_2'): self.expand21 = conv2d( 'expand2_1', x=self.expand12, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv4_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand21) self.upscale2 = conv2d_transpose( 'upscale2', x=self.expand21, is_training=self.is_training, output_shape=self.encoder.conv4_1.shape.as_list(), batchnorm_enabled=True, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale2) self.add2 = tf.add(self.upscale2, self.encoder.conv4_1) self._debug(self.add2) self.expand22 = conv2d( 'expand2_2', x=self.add2, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv4_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand22) with tf.name_scope('upscale_3'): self.expand31 = conv2d( 'expand3_1', x=self.expand22, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv3_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand31) self.upscale3 = conv2d_transpose( 'upscale3', x=self.expand31, batchnorm_enabled=True, is_training=self.is_training, output_shape=self.encoder.conv3_1.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale3) self.add3 = tf.add(self.upscale3, self.encoder.conv3_1) self._debug(self.add3) self.expand32 = conv2d( 'expand3_2', x=self.add3, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv3_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand32) with tf.name_scope('upscale_4'): self.expand41 = conv2d( 'expand4_1', x=self.expand32, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv2_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand41) self.upscale4 = conv2d_transpose( 'upscale4', x=self.expand41, batchnorm_enabled=True, is_training=self.is_training, output_shape=self.encoder.conv2_1.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale4) self.add4 = tf.add(self.upscale4, self.encoder.conv2_1) self._debug(self.add4) self.expand42 = conv2d( 'expand4_2', x=self.add4, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv2_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand42) with tf.name_scope('upscale_5'): self.upscale5 = conv2d_transpose( 'upscale5', x=self.expand42, batchnorm_enabled=True, is_training=self.is_training, output_shape=self.x_pl.shape.as_list()[0:3] + [self.encoder.conv2_1.shape.as_list()[3]], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale5) self.expand5 = conv2d( 'expand5', x=self.upscale5, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv1_1.shape.as_list()[3], kernel_size=(1, 1), dropout_keep_prob=0.5, l2_strength=self.encoder.wd) self._debug(self.expand5) with tf.name_scope('final_score'): self.fscore = conv2d('fscore', x=self.expand5, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.fscore) self.logits = self.fscore
class FCN8s2StreamMobileNet(BasicModel): """ FCN8s with MobileNet as an encoder Model Architecture """ def __init__(self, args): super().__init__(args) # init encoder self.encoder = None # init network layers def build(self): print("\nBuilding the MODEL...") self.init_input() self.init_network() self.init_output() self.init_train() self.init_summaries() print("The Model is built successfully\n") def init_input(self): with tf.name_scope('input'): self.x_pl = tf.placeholder(tf.float32, [ self.args.batch_size, self.params.img_height, self.params.img_width, 3 ]) self.flo_pl = tf.placeholder(tf.float32, [ self.args.batch_size, self.params.img_height, self.params.img_width, 3 ]) self.y_pl = tf.placeholder(tf.int32, [ self.args.batch_size, self.params.img_height, self.params.img_width ]) if self.params.weighted_loss: self.wghts = np.zeros( (self.args.batch_size, self.params.img_height, self.params.img_width), dtype=np.float32) self.is_training = tf.placeholder(tf.bool) def init_summaries(self): with tf.name_scope('pixel_wise_accuracy'): self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_pl, self.out_argmax), tf.float32)) with tf.name_scope('segmented_output'): input_summary = tf.cast(self.x_pl, tf.uint8) flow_summary = tf.cast(self.flo_pl, tf.uint8) # labels_summary = tf.py_func(decode_labels, [self.y_pl, self.params.num_classes], tf.uint8) preds_summary = tf.py_func( decode_labels, [self.out_argmax, self.params.num_classes], tf.uint8) self.segmented_summary = tf.concat( axis=2, values=[input_summary, flow_summary, preds_summary]) # Concatenate row-wise # Every step evaluate these summaries if self.loss is not None: with tf.name_scope('train-summary'): tf.summary.scalar('loss', self.loss) tf.summary.scalar('pixel_wise_accuracy', self.accuracy) self.merged_summaries = tf.summary.merge_all() # Save the best iou on validation self.best_iou_tensor = tf.Variable(0.0, trainable=False, name='best_iou') self.best_iou_input = tf.placeholder('float32', None, name='best_iou_input') self.best_iou_assign_op = self.best_iou_tensor.assign( self.best_iou_input) def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.app_encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, prefix='app_', pretrained_path=self.args.pretrained_path, mean_path=self.args.data_dir + 'mean.npy', train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) self.motion_encoder = MobileNet( x_input=self.flo_pl, num_classes=self.params.num_classes, prefix='mot_', pretrained_path=self.args.pretrained_path, mean_path=self.args.data_dir + 'flo_mean.npy', train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.app_encoder.build() self.motion_encoder.build() self.feed2 = tf.multiply(self.app_encoder.conv3_2, self.motion_encoder.conv3_2) self.width_multiplier = 1.0 self.conv4_1 = depthwise_separable_conv2d( 'conv_ds_6_1', self.feed2, width_multiplier=self.width_multiplier, num_filters=256, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv4_1) self.conv4_2 = depthwise_separable_conv2d( 'conv_ds_7_1', self.conv4_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv4_2) self.conv5_1 = depthwise_separable_conv2d( 'conv_ds_8_1', self.conv4_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_1) self.conv5_2 = depthwise_separable_conv2d( 'conv_ds_9_1', self.conv5_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_2) self.conv5_3 = depthwise_separable_conv2d( 'conv_ds_10_1', self.conv5_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_3) self.conv5_4 = depthwise_separable_conv2d( 'conv_ds_11_1', self.conv5_3, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_4) self.conv5_5 = depthwise_separable_conv2d( 'conv_ds_12_1', self.conv5_4, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_5) self.conv5_6 = depthwise_separable_conv2d( 'conv_ds_13_1', self.conv5_5, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_6) self.conv6_1 = depthwise_separable_conv2d( 'conv_ds_14_1', self.conv5_6, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv6_1) # Pooling is removed. self.score_fr = conv2d('conv_1c_1x1_1', self.conv6_1, num_filters=self.params.num_classes, l2_strength=self.args.weight_decay, kernel_size=(1, 1)) self.feed1 = self.conv4_2 # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.score_fr, output_shape=self.feed1.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore2) self.score_feed1 = conv2d( 'score_feed1', x=self.feed1, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.args.weight_decay) _debug(self.score_feed1) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, output_shape=self.feed2.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore4) self.score_feed2 = conv2d( 'score_feed2', x=self.feed2, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.args.weight_decay) _debug(self.score_feed2) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], is_training=self.is_training, kernel_size=(16, 16), stride=(8, 8), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore8) self.logits = self.upscore8
class DilationMobileNet(BasicModel): """ FCN8s with MobileNet as an encoder Model Architecture """ def __init__(self, args): super().__init__(args) # init encoder self.encoder = None self.wd = self.args.weight_decay # init network layers self.upscore2 = None self.score_feed1 = None self.fuse_feed1 = None self.upscore4 = None self.score_feed2 = None self.fuse_feed2 = None self.upscore8 = None def build(self): print("\nBuilding the MODEL...") self.init_input() self.init_network() self.init_output() self.init_train() self.init_summaries() print("The Model is built successfully\n") def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('dilation_2'): self.conv4_2 = atrous_conv2d('conv_ds_7_dil', self.encoder.conv4_1, num_filters=512, kernel_size=(3, 3), padding='SAME', activation=tf.nn.relu, dilation_rate=2, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv4_2) self.conv5_1 = depthwise_separable_conv2d( 'conv_ds_8_dil', self.conv4_2, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_1) self.conv5_2 = depthwise_separable_conv2d( 'conv_ds_9_dil', self.conv5_1, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_2) self.conv5_3 = depthwise_separable_conv2d( 'conv_ds_10_dil', self.conv5_2, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_3) self.conv5_4 = depthwise_separable_conv2d( 'conv_ds_11_dil', self.conv5_3, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_4) self.conv5_5 = depthwise_separable_conv2d( 'conv_ds_12_dil', self.conv5_4, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_5) self.conv5_6 = atrous_conv2d('conv_ds_13_dil', self.conv5_5, num_filters=1024, kernel_size=(3, 3), padding='SAME', activation=tf.nn.relu, dilation_rate=4, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_6) self.conv6_1 = depthwise_separable_conv2d( 'conv_ds_14_dil', self.conv5_6, width_multiplier=self.encoder.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv6_1) # Pooling is removed. self.score_fr = conv2d('conv_1c_1x1_dil', self.conv6_1, num_filters=self.params.num_classes, l2_strength=self.wd, batchnorm_enabled=True, is_training=self.is_training, kernel_size=(1, 1)) _debug(self.score_fr) self.upscore8 = conv2d_transpose( 'upscore8', x=self.score_fr, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd, is_training=self.is_training) _debug(self.upscore8) self.logits = self.upscore8
class FCN8sMobileNetTFRecords(BasicModel): """ FCN8s with MobileNet as an encoder Model Architecture """ def __init__(self, args): super().__init__(args) # init encoder self.encoder = None # init network layers self.upscore2 = None self.score_feed1 = None self.fuse_feed1 = None self.upscore4 = None self.score_feed2 = None self.fuse_feed2 = None self.upscore8 = None # init tfrecords needs self.handle = None self.training_iterator = None self.validation_iterator = None self.next_img = None self.training_handle = None self.validation_handle = None # get the default session self.sess = tf.get_default_session() def build(self): print("\nBuilding the MODEL...") self.init_input() self.init_tfrecord_input() self.init_network() self.init_output() self.init_train() self.init_summaries() print("The Model is built successfully\n") def init_tfrecord_input(self): if self.args.mode == 'train': print("USING TF RECORDS") # Use `tf.parse_single_example()` to extract data from a `tf.Example` # protocol buffer, and perform any additional per-record preprocessing. def parser(record): keys_to_features = { 'height': tf.FixedLenFeature([], tf.int64), 'width': tf.FixedLenFeature([], tf.int64), 'image_raw': tf.FixedLenFeature([], tf.string), 'mask_raw': tf.FixedLenFeature([], tf.string) } parsed = tf.parse_single_example(record, keys_to_features) image = tf.cast(tf.decode_raw(parsed['image_raw'], tf.uint8), tf.float32) annotation = tf.cast( tf.decode_raw(parsed['mask_raw'], tf.uint8), tf.int32) height = tf.cast(parsed['height'], tf.int32) width = tf.cast(parsed['width'], tf.int32) image_shape = tf.stack([height, width, 3]) annotation_shape = tf.stack([height, width]) image = tf.reshape(image, image_shape) annotation = tf.reshape(annotation, annotation_shape) return image, annotation # Use `Dataset.map()` to build a pair of a feature dictionary and a label # tensor for each example. train_filename = "./data/" + self.args.tfrecord_train_file train_dataset = tf.contrib.data.TFRecordDataset(['./data/cscapes_train_1.tfrecords', \ './data/cscapes_train_2.tfrecords',\ './data/cscapes_train_3.tfrecord', \ './data/cscapes_train_4.tfrecords' ])#train_filename) train_dataset = train_dataset.map(parser) train_dataset = train_dataset.shuffle( buffer_size=self.args.tfrecord_train_len) train_dataset = train_dataset.batch(self.args.batch_size) train_dataset = train_dataset.repeat() val_filename = "./data/" + self.args.tfrecord_val_file val_dataset = tf.contrib.data.TFRecordDataset(val_filename) val_dataset = val_dataset.map(parser) val_dataset = val_dataset.batch(self.args.batch_size) self.training_iterator = train_dataset.make_one_shot_iterator() self.validation_iterator = val_dataset.make_initializable_iterator( ) self.training_handle = self.sess.run( self.training_iterator.string_handle()) self.validation_handle = self.sess.run( self.validation_iterator.string_handle()) self.handle = tf.placeholder(tf.string, shape=[]) iterator = tf.contrib.data.Iterator.from_string_handle( self.handle, train_dataset.output_types, train_dataset.output_shapes) self.next_img = iterator.get_next() self.x_pl, self.y_pl = self.next_img self.x_pl.set_shape( [None, self.args.img_height, self.args.img_width, 3]) self.y_pl.set_shape( [None, self.args.img_height, self.args.img_width]) def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.encoder.score_fr, output_shape=[self.args.batch_size] + self.encoder.feed1.shape.as_list()[1:3] + [self.params.num_classes], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self.score_feed1 = conv2d('score_feed1', x=self.encoder.feed1, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, output_shape=[self.args.batch_size] + self.encoder.feed2.shape.as_list()[1:3] + [self.params.num_classes], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self.score_feed2 = conv2d('score_feed2', x=self.encoder.feed2, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=[self.args.batch_size] + self.x_pl.shape.as_list()[1:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd) self.logits = self.upscore8