class FCN8s2StreamShuffleNet2(BasicModel): """ FCN8s with ShuffleNet as an encoder Model Architecture """ def __init__(self, args): super().__init__(args) # init encoder self.encoder = None # init network layers def build(self): print("\nBuilding the MODEL...") self.init_input() self.init_network() self.init_output() if self.args.data_mode == 'experiment': self.init_train() self.init_summaries() print("The Model is built successfully\n") def init_input(self): with tf.name_scope('input'): self.x_pl = tf.placeholder(tf.float32, [ self.args.batch_size, self.params.img_height, self.params.img_width, 3 ]) self.flo_pl = tf.placeholder(tf.float32, [ self.args.batch_size, self.params.img_height, self.params.img_width, 3 ]) self.y_pl = tf.placeholder(tf.int32, [ self.args.batch_size, self.params.img_height, self.params.img_width ]) if self.params.weighted_loss: self.wghts = np.zeros( (self.args.batch_size, self.params.img_height, self.params.img_width), dtype=np.float32) self.is_training = tf.placeholder(tf.bool) def init_summaries(self): with tf.name_scope('pixel_wise_accuracy'): self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_pl, self.out_argmax), tf.float32)) with tf.name_scope('segmented_output'): input_summary = tf.cast(self.x_pl, tf.uint8) flow_summary = tf.cast(self.flo_pl, tf.uint8) # labels_summary = tf.py_func(decode_labels, [self.y_pl, self.params.num_classes], tf.uint8) preds_summary = tf.py_func( decode_labels, [self.out_argmax, self.params.num_classes], tf.uint8) self.segmented_summary = tf.concat( axis=2, values=[input_summary, flow_summary, preds_summary]) # Concatenate row-wise # Every step evaluate these summaries if self.loss is not None: with tf.name_scope('train-summary'): tf.summary.scalar('loss', self.loss) tf.summary.scalar('pixel_wise_accuracy', self.accuracy) self.merged_summaries = tf.summary.merge_all() # Save the best iou on validation self.best_iou_tensor = tf.Variable(0.0, trainable=False, name='best_iou') self.best_iou_input = tf.placeholder('float32', None, name='best_iou_input') self.best_iou_assign_op = self.best_iou_tensor.assign( self.best_iou_input) def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.app_encoder = ShuffleNet( x_input=self.x_pl, num_classes=self.params.num_classes, prefix='app_', pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias, mean_path=self.args.data_dir + 'mean.npy') self.motion_encoder = ShuffleNet( x_input=self.flo_pl, num_classes=self.params.num_classes, prefix='mot_', pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias, mean_path=self.args.data_dir + 'flo_mean.npy') # Build Encoding part self.app_encoder.build() self.motion_encoder.build() self.combined_score = tf.multiply(self.app_encoder.score_fr, self.motion_encoder.score_fr) self.combined_feed1 = tf.multiply(self.app_encoder.feed1, self.motion_encoder.feed1) self.combined_feed2 = tf.multiply(self.app_encoder.feed2, self.motion_encoder.feed2) # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.combined_score, output_shape=self.combined_feed1.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.app_encoder.wd, bias=self.args.bias) currvars = get_vars_underscope(tf.get_variable_scope().name, 'upscore2') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.score_feed1 = conv2d( 'score_feed1', x=self.combined_feed1, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.app_encoder.wd) currvars = get_vars_underscope(tf.get_variable_scope().name, 'score_feed1') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, output_shape=self.combined_feed2.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.app_encoder.wd, bias=self.args.bias) currvars = get_vars_underscope(tf.get_variable_scope().name, 'upscore4') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.score_feed2 = conv2d( 'score_feed2', x=self.combined_feed2, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.app_encoder.wd) currvars = get_vars_underscope(tf.get_variable_scope().name, 'score_feed2') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.app_encoder.wd, bias=self.args.bias) currvars = get_vars_underscope(tf.get_variable_scope().name, 'upscore8') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.logits = self.upscore8
class DilationShuffleNet(BasicModel): """ FCN8s with ShuffleNet as an encoder Model Architecture """ def __init__(self, args, phase=0): super().__init__(args, phase=phase) # init encoder self.encoder = None # init network layers def build(self): print("\nBuilding the MODEL...") self.init_input() self.init_network() self.init_output() self.init_train() self.init_summaries() print("The Model is built successfully\n") def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.encoder = ShuffleNet( x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias) # Build Encoding part self.encoder.build() with tf.name_scope('dilation_2'): self.stage3 = self.encoder.stage(self.encoder.stage2, stage=3, repeat=7, dilation=2) _debug(self.stage3) self.stage4 = self.encoder.stage(self.stage3, stage=4, repeat=3, dilation=4) _debug(self.stage4) self.score_fr = conv2d('score_fr_dil', x=self.stage4, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd, is_training=self.is_training) _debug(self.score_fr) self.upscore8 = conv2d_transpose( 'upscore8', x=self.score_fr, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd, is_training=self.is_training) _debug(self.upscore8) self.logits = self.upscore8
class FCN8sShuffleNet(BasicModel): """ FCN8s with ShuffleNet as an encoder Model Architecture """ def __init__(self, args): super().__init__(args) # init encoder self.encoder = None # init network layers def build(self): print("\nBuilding the MODEL...") self.init_input() self.init_network() self.init_output() self.init_train() self.init_summaries() print("The Model is built successfully\n") def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.encoder = ShuffleNet( x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.encoder.score_fr, output_shape=self.encoder.feed1.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd, bias=self.args.bias) # currvars= get_vars_underscope(tf.get_variable_scope().name, 'upscore2') # for v in currvars: # tf.add_to_collection('decoding_trainable_vars', v) self.score_feed1 = conv2d( 'score_feed1', x=self.encoder.feed1, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.encoder.wd) # currvars= get_vars_underscope(tf.get_variable_scope().name, 'score_feed1') # for v in currvars: # tf.add_to_collection('decoding_trainable_vars', v) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, output_shape=self.encoder.feed2.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd, bias=self.args.bias, ) # currvars= get_vars_underscope(tf.get_variable_scope().name, 'upscore4') # for v in currvars: # tf.add_to_collection('decoding_trainable_vars', v) self.score_feed2 = conv2d( 'score_feed2', x=self.encoder.feed2, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.encoder.wd) # currvars= get_vars_underscope(tf.get_variable_scope().name, 'score_feed2') # for v in currvars: # tf.add_to_collection('decoding_trainable_vars', v) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], is_training=self.is_training, kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd, bias=self.args.bias, ) # currvars= get_vars_underscope(tf.get_variable_scope().name, 'upscore8') # for v in currvars: # tf.add_to_collection('decoding_trainable_vars', v) self.logits = self.upscore8
class UNetShuffleNet(BasicModel): def __init__(self, args): super().__init__(args) # init encoder self.encoder = None def build(self): print("\nBuilding the MODEL...") self.init_input() self.init_network() self.init_output() self.init_train() self.init_summaries() print("The Model is built successfully\n") @staticmethod def _debug(operation): print("Layer_name: " + operation.op.name + " -Output_Shape: " + str(operation.shape.as_list())) def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.encoder = ShuffleNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscale_1'): self.expand11 = conv2d('expand1_1', x=self.encoder.stage4, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.stage3.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand11) self.upscale1 = conv2d_transpose('upscale1', x=self.expand11, is_training=self.is_training, output_shape=self.encoder.stage3.shape.as_list(), batchnorm_enabled=True, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale1) self.add1 = tf.add(self.upscale1, self.encoder.stage3) self._debug(self.add1) self.expand12 = conv2d('expand1_2', x=self.add1, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.stage3.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand12) with tf.name_scope('upscale_2'): self.expand21 = conv2d('expand2_1', x=self.expand12, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.stage2.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand21) self.upscale2 = conv2d_transpose('upscale2', x=self.expand21, is_training=self.is_training, output_shape=self.encoder.stage2.shape.as_list(), batchnorm_enabled=True, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale2) self.add2 = tf.add(self.upscale2, self.encoder.stage2) self._debug(self.add2) self.expand22 = conv2d('expand2_2', x=self.add2, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.stage2.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand22) with tf.name_scope('upscale_3'): self.expand31 = conv2d('expand3_1', x=self.expand22, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.max_pool.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand31) self.upscale3 = conv2d_transpose('upscale3', x=self.expand31, batchnorm_enabled=True, is_training=self.is_training, output_shape=[self.encoder.max_pool.shape[0], self.encoder.max_pool.shape.as_list()[1] + 1, self.encoder.max_pool.shape.as_list()[2] + 1, self.encoder.max_pool.shape.as_list()[3]], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale3) padded = tf.pad(self.encoder.max_pool, [[0, 0], [0, 1], [0, 1], [0, 0]], "CONSTANT") self.add3 = tf.add(self.upscale3, padded) self._debug(self.add3) self.expand32 = conv2d('expand3_2', x=self.add3, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.max_pool.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand32) with tf.name_scope('upscale_4'): self.expand41 = conv2d('expand4_1', x=self.expand32, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand41) self.upscale4 = conv2d_transpose('upscale4', x=self.expand41, batchnorm_enabled=True, is_training=self.is_training, output_shape=[self.encoder.conv1.shape[0], self.encoder.conv1.shape.as_list()[1] + 1, self.encoder.conv1.shape.as_list()[2] + 1, self.encoder.conv1.shape.as_list()[3]], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale4) padded2 = tf.pad(self.encoder.conv1, [[0, 0], [0, 1], [0, 1], [0, 0]], "CONSTANT") self.add4 = tf.add(self.upscale4, padded2) self._debug(self.add4) self.expand42 = conv2d('expand4_2', x=self.add4, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand42) with tf.name_scope('upscale_5'): self.upscale5 = conv2d_transpose('upscale5', x=self.expand42, batchnorm_enabled=True, is_training=self.is_training, output_shape=self.x_pl.shape.as_list()[0:3] + [ self.encoder.conv1.shape.as_list()[3]], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale5) self.expand5 = conv2d('expand5', x=self.upscale5, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv1.shape.as_list()[3], kernel_size=(1, 1), dropout_keep_prob=0.5, l2_strength=self.encoder.wd) self._debug(self.expand5) with tf.name_scope('final_score'): self.fscore = conv2d('fscore', x=self.expand5, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.fscore) self.logits = self.fscore
class FCN8sShuffleNetUpsample(BasicModel): """ FCN8s with ShuffleNet Upsampling 2x2 as an encoder Model Architecture """ def __init__(self, args, phase=0): super().__init__(args, phase=phase) # init encoder self.encoder = None # init network layers def build(self): print("\nBuilding the MODEL...") self.init_input() self.init_network() self.init_output() self.init_train() self.init_summaries() print("The Model is built successfully\n") def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.encoder = ShuffleNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscore_2s'): shape = self.encoder.score_fr.shape.as_list()[1:3] upscore2_upsample = tf.image.resize_images(self.encoder.score_fr,(2 * shape[0], 2 * shape[1])) self.upscore2 = conv2d('upscore2', x=upscore2_upsample, num_filters=self.params.num_classes, l2_strength=self.encoder.wd) self.score_feed1 = conv2d('score_feed1', x=self.encoder.feed1, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): shape = self.fuse_feed1.shape.as_list()[1:3] upscore4_upsample = tf.image.resize_images(self.fuse_feed1,(2 * shape[0], 2 * shape[1])) self.upscore4 = conv2d('upscore4', x=upscore4_upsample, num_filters=self.params.num_classes, l2_strength=self.encoder.wd) self.score_feed2 = conv2d('score_feed2', x=self.encoder.feed2, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): shape = self.fuse_feed2.shape.as_list()[1:3] upscore8_upsample = tf.image.resize_images(self.fuse_feed2,(8 * shape[0], 8 * shape[1])) self.upscore8 = conv2d('upscore8', x=upscore8_upsample, num_filters=self.params.num_classes, l2_strength=self.encoder.wd) self.logits = self.upscore8
class DilationV2ShuffleNet(BasicModel): """ FCN8s with ShuffleNet as an encoder Model Architecture """ def __init__(self, args): super().__init__(args) # init encoder self.encoder = None self.targets_resize = self.args.targets_resize # init network layers def build(self): print("\nBuilding the MODEL...") self.init_input() self.init_network() self.init_output() self.init_train() self.init_summaries() print("The Model is built successfully\n") def init_input(self): with tf.name_scope('input'): self.x_pl = tf.placeholder(tf.float32, [ self.args.batch_size, self.params.img_height, self.params.img_width, 3 ]) self.y_pl = tf.placeholder(tf.int32, [ self.args.batch_size, self.params.img_height // self.targets_resize, self.params.img_width // self.targets_resize ]) print('X_batch shape ', self.x_pl.get_shape().as_list(), ' ', self.y_pl.get_shape().as_list()) print('Afterwards: X_batch shape ', self.x_pl.get_shape().as_list(), ' ', self.y_pl.get_shape().as_list()) self.curr_learning_rate = tf.placeholder(tf.float32) if self.params.weighted_loss: self.wghts = np.zeros( (self.args.batch_size, self.params.img_height, self.params.img_width), dtype=np.float32) self.is_training = tf.placeholder(tf.bool) def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.encoder = ShuffleNet( x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias) # Build Encoding part self.encoder.build() with tf.name_scope('dilation_2'): self.stage3 = self.encoder.stage(self.encoder.stage2, stage=3, repeat=7, dilation=2) _debug(self.stage3) self.stage4 = self.encoder.stage(self.stage3, stage=4, repeat=3, dilation=4) _debug(self.stage4) self.score_fr = conv2d('score_fr_dil', x=self.stage4, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd, is_training=self.is_training) _debug(self.score_fr) if self.targets_resize < 8: self.targets_resize = 8 // self.targets_resize self.upscore8 = conv2d_transpose( 'upscore8', x=self.score_fr, output_shape=self.y_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(self.targets_resize * 2, self.targets_resize * 2), stride=(self.targets_resize, self.targets_resize), l2_strength=self.encoder.wd, is_training=self.is_training) _debug(self.upscore8) self.logits = self.upscore8 else: self.logits = self.score_fr