Beispiel #1
0
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.encoder = MobileNet(x_input=self.x_pl,
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 width_multipler=1.0,
                                 weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            shape = self.encoder.score_fr.shape.as_list()[1:3]
            upscore2_upsample = tf.image.resize_images(
                self.encoder.score_fr, (2 * shape[0], 2 * shape[1]))
            self.upscore2 = conv2d('upscore2',
                                   x=upscore2_upsample,
                                   num_filters=self.params.num_classes,
                                   l2_strength=self.encoder.wd)
            self.score_feed1 = conv2d('score_feed1',
                                      x=self.encoder.feed1,
                                      num_filters=self.params.num_classes,
                                      kernel_size=(1, 1),
                                      l2_strength=self.encoder.wd)
            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)

        with tf.name_scope('upscore_4s'):
            shape = self.fuse_feed1.shape.as_list()[1:3]
            upscore4_upsample = tf.image.resize_images(
                self.fuse_feed1, (2 * shape[0], 2 * shape[1]))
            self.upscore4 = conv2d('upscore4',
                                   x=upscore4_upsample,
                                   num_filters=self.params.num_classes,
                                   l2_strength=self.encoder.wd)

            self.score_feed2 = conv2d('score_feed2',
                                      x=self.encoder.feed2,
                                      num_filters=self.params.num_classes,
                                      kernel_size=(1, 1),
                                      l2_strength=self.encoder.wd)
            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)

        with tf.name_scope('upscore_8s'):
            shape = self.fuse_feed2.shape.as_list()[1:3]
            upscore8_upsample = tf.image.resize_images(
                self.fuse_feed2, (8 * shape[0], 8 * shape[1]))
            self.upscore8 = conv2d('upscore8',
                                   x=upscore8_upsample,
                                   num_filters=self.params.num_classes,
                                   l2_strength=self.encoder.wd)

        self.logits = self.upscore8
Beispiel #2
0
class FCN8sMobileNetUpsample(BasicModel):
    """
    FCN8s with MobileNet Upsampling 2x2 as an encoder Model Architecture
    """
    def __init__(self, args, phase=0):
        super().__init__(args, phase=phase)
        # init encoder
        self.encoder = None
        # init network layers

    def build(self):
        print("\nBuilding the MODEL...")
        self.init_input()
        self.init_network()
        self.init_output()
        self.init_train()
        self.init_summaries()
        print("The Model is built successfully\n")

    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.encoder = MobileNet(x_input=self.x_pl,
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 width_multipler=1.0,
                                 weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            shape = self.encoder.score_fr.shape.as_list()[1:3]
            upscore2_upsample = tf.image.resize_images(
                self.encoder.score_fr, (2 * shape[0], 2 * shape[1]))
            self.upscore2 = conv2d('upscore2',
                                   x=upscore2_upsample,
                                   num_filters=self.params.num_classes,
                                   l2_strength=self.encoder.wd)
            self.score_feed1 = conv2d('score_feed1',
                                      x=self.encoder.feed1,
                                      num_filters=self.params.num_classes,
                                      kernel_size=(1, 1),
                                      l2_strength=self.encoder.wd)
            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)

        with tf.name_scope('upscore_4s'):
            shape = self.fuse_feed1.shape.as_list()[1:3]
            upscore4_upsample = tf.image.resize_images(
                self.fuse_feed1, (2 * shape[0], 2 * shape[1]))
            self.upscore4 = conv2d('upscore4',
                                   x=upscore4_upsample,
                                   num_filters=self.params.num_classes,
                                   l2_strength=self.encoder.wd)

            self.score_feed2 = conv2d('score_feed2',
                                      x=self.encoder.feed2,
                                      num_filters=self.params.num_classes,
                                      kernel_size=(1, 1),
                                      l2_strength=self.encoder.wd)
            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)

        with tf.name_scope('upscore_8s'):
            shape = self.fuse_feed2.shape.as_list()[1:3]
            upscore8_upsample = tf.image.resize_images(
                self.fuse_feed2, (8 * shape[0], 8 * shape[1]))
            self.upscore8 = conv2d('upscore8',
                                   x=upscore8_upsample,
                                   num_filters=self.params.num_classes,
                                   l2_strength=self.encoder.wd)

        self.logits = self.upscore8
class FCN8sMobileNet(BasicModel):
    """
    FCN8s with MobileNet as an encoder Model Architecture
    """
    def __init__(self, args):
        super().__init__(args)
        # init encoder
        self.encoder = None
        # init network layers

    def build(self):
        print("\nBuilding the MODEL...")
        self.init_input()
        self.init_network()
        self.init_output()
        self.init_train()
        self.init_summaries()
        print("The Model is built successfully\n")

    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.encoder = MobileNet(x_input=self.x_pl,
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 width_multipler=1.0,
                                 weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            self.upscore2 = conv2d_transpose(
                'upscore2',
                x=self.encoder.score_fr,
                output_shape=self.encoder.feed1.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self.score_feed1 = conv2d('score_feed1',
                                      x=self.encoder.feed1,
                                      num_filters=self.params.num_classes,
                                      kernel_size=(1, 1),
                                      l2_strength=self.encoder.wd)
            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)

        with tf.name_scope('upscore_4s'):
            self.upscore4 = conv2d_transpose(
                'upscore4',
                x=self.fuse_feed1,
                output_shape=self.encoder.feed2.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self.score_feed2 = conv2d('score_feed2',
                                      x=self.encoder.feed2,
                                      num_filters=self.params.num_classes,
                                      kernel_size=(1, 1),
                                      l2_strength=self.encoder.wd)
            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)

        with tf.name_scope('upscore_8s'):
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.fuse_feed2,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.encoder.wd)

        self.logits = self.upscore8
Beispiel #4
0
class DilationV2MobileNet(BasicModel):
    """
    FCN8s with MobileNet as an encoder Model Architecture
    """
    def __init__(self, args):
        super().__init__(args)
        # init encoder
        self.encoder = None
        self.wd = self.args.weight_decay

        # init network layers
        self.upscore2 = None
        self.score_feed1 = None
        self.fuse_feed1 = None
        self.upscore4 = None
        self.score_feed2 = None
        self.fuse_feed2 = None
        self.upscore8 = None
        self.targets_resize = self.args.targets_resize

    def build(self):
        print("\nBuilding the MODEL...")
        self.init_input()
        self.init_network()
        self.init_output()
        self.init_train()
        self.init_summaries()
        print("The Model is built successfully\n")

    def init_input(self):
        with tf.name_scope('input'):
            self.x_pl = tf.placeholder(tf.float32, [
                self.args.batch_size, self.params.img_height,
                self.params.img_width, 3
            ])
            self.y_pl = tf.placeholder(tf.int32, [
                self.args.batch_size,
                self.params.img_height // self.targets_resize,
                self.params.img_width // self.targets_resize
            ])
            print('X_batch shape ',
                  self.x_pl.get_shape().as_list(), ' ',
                  self.y_pl.get_shape().as_list())
            print('Afterwards: X_batch shape ',
                  self.x_pl.get_shape().as_list(), ' ',
                  self.y_pl.get_shape().as_list())

            self.curr_learning_rate = tf.placeholder(tf.float32)
            if self.params.weighted_loss:
                self.wghts = np.zeros(
                    (self.args.batch_size, self.params.img_height,
                     self.params.img_width),
                    dtype=np.float32)
            self.is_training = tf.placeholder(tf.bool)

    def init_network(self):
        """
        Building the Network here
        :return:
        """
        # Init MobileNet as an encoder
        self.encoder = MobileNet(x_input=self.x_pl,
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 width_multipler=1.0,
                                 weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('dilation_2'):
            self.conv4_2 = atrous_conv2d('conv_ds_7_dil',
                                         self.encoder.conv4_1,
                                         num_filters=512,
                                         kernel_size=(3, 3),
                                         padding='SAME',
                                         activation=tf.nn.relu,
                                         dilation_rate=2,
                                         batchnorm_enabled=True,
                                         is_training=self.is_training,
                                         l2_strength=self.wd)
            _debug(self.conv4_2)
            self.conv5_1 = depthwise_separable_conv2d(
                'conv_ds_8_dil',
                self.conv4_2,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_1)
            self.conv5_2 = depthwise_separable_conv2d(
                'conv_ds_9_dil',
                self.conv5_1,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_2)
            self.conv5_3 = depthwise_separable_conv2d(
                'conv_ds_10_dil',
                self.conv5_2,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_3)
            self.conv5_4 = depthwise_separable_conv2d(
                'conv_ds_11_dil',
                self.conv5_3,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_4)
            self.conv5_5 = depthwise_separable_conv2d(
                'conv_ds_12_dil',
                self.conv5_4,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_5)
            self.conv5_6 = atrous_conv2d('conv_ds_13_dil',
                                         self.conv5_5,
                                         num_filters=1024,
                                         kernel_size=(3, 3),
                                         padding='SAME',
                                         activation=tf.nn.relu,
                                         dilation_rate=4,
                                         batchnorm_enabled=True,
                                         is_training=self.is_training,
                                         l2_strength=self.wd)
            _debug(self.conv5_6)
            self.conv6_1 = depthwise_separable_conv2d(
                'conv_ds_14_dil',
                self.conv5_6,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=1024,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv6_1)
            # Pooling is removed.
            self.score_fr = conv2d('conv_1c_1x1_dil',
                                   self.conv6_1,
                                   num_filters=self.params.num_classes,
                                   l2_strength=self.wd,
                                   kernel_size=(1, 1))
            _debug(self.score_fr)

            if self.targets_resize < 8:
                self.targets_resize = 8 // self.targets_resize
                self.upscore8 = conv2d_transpose(
                    'upscore8',
                    x=self.score_fr,
                    output_shape=self.y_pl.shape.as_list()[0:3] +
                    [self.params.num_classes],
                    kernel_size=(self.targets_resize * 2,
                                 self.targets_resize * 2),
                    stride=(self.targets_resize, self.targets_resize),
                    l2_strength=self.encoder.wd,
                    is_training=self.is_training)
                _debug(self.upscore8)
                self.logits = self.upscore8
            else:
                self.logits = self.score_fr
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.encoder = MobileNet(x_input=self.x_pl,
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 width_multipler=1.0,
                                 weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            self.upscore2 = conv2d_transpose(
                'upscore2',
                x=self.encoder.score_fr,
                output_shape=self.encoder.feed1.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self.score_feed1 = conv2d('score_feed1',
                                      x=self.encoder.feed1,
                                      num_filters=self.params.num_classes,
                                      kernel_size=(1, 1),
                                      l2_strength=self.encoder.wd)
            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)

        with tf.name_scope('upscore_4s'):
            self.upscore4 = conv2d_transpose(
                'upscore4',
                x=self.fuse_feed1,
                output_shape=self.encoder.feed2.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self.score_feed2 = conv2d('score_feed2',
                                      x=self.encoder.feed2,
                                      num_filters=self.params.num_classes,
                                      kernel_size=(1, 1),
                                      l2_strength=self.encoder.wd)
            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)

        with tf.name_scope('upscore_8s'):
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.fuse_feed2,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.encoder.wd)

        self.logits = self.upscore8
Beispiel #6
0
class UNetMobileNet(BasicModel):
    def __init__(self, args, phase=0):
        super().__init__(args, phase=phase)
        # init encoder
        self.encoder = None

    def build(self):
        print("\nBuilding the MODEL...")
        self.init_input()
        self.init_network()
        self.init_output()
        self.init_train()
        self.init_summaries()
        print("The Model is built successfully\n")

    @staticmethod
    def _debug(operation):
        print("Layer_name: " + operation.op.name + " -Output_Shape: " +
              str(operation.shape.as_list()))

    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.encoder = MobileNet(x_input=self.x_pl,
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 width_multipler=1.0,
                                 weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('upscale_1'):
            self.expand11 = conv2d(
                'expand1_1',
                x=self.encoder.conv5_6,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv5_5.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand11)
            self.upscale1 = conv2d_transpose(
                'upscale1',
                x=self.expand11,
                is_training=self.is_training,
                output_shape=self.encoder.conv5_5.shape.as_list(),
                batchnorm_enabled=True,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale1)
            self.add1 = tf.add(self.upscale1, self.encoder.conv5_5)
            self._debug(self.add1)
            self.expand12 = conv2d(
                'expand1_2',
                x=self.add1,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv5_5.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand12)

        with tf.name_scope('upscale_2'):
            self.expand21 = conv2d(
                'expand2_1',
                x=self.expand12,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv4_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand21)
            self.upscale2 = conv2d_transpose(
                'upscale2',
                x=self.expand21,
                is_training=self.is_training,
                output_shape=self.encoder.conv4_1.shape.as_list(),
                batchnorm_enabled=True,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale2)
            self.add2 = tf.add(self.upscale2, self.encoder.conv4_1)
            self._debug(self.add2)
            self.expand22 = conv2d(
                'expand2_2',
                x=self.add2,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv4_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand22)

        with tf.name_scope('upscale_3'):
            self.expand31 = conv2d(
                'expand3_1',
                x=self.expand22,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv3_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand31)
            self.upscale3 = conv2d_transpose(
                'upscale3',
                x=self.expand31,
                batchnorm_enabled=True,
                is_training=self.is_training,
                output_shape=self.encoder.conv3_1.shape.as_list(),
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale3)
            self.add3 = tf.add(self.upscale3, self.encoder.conv3_1)
            self._debug(self.add3)
            self.expand32 = conv2d(
                'expand3_2',
                x=self.add3,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv3_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand32)

        with tf.name_scope('upscale_4'):
            self.expand41 = conv2d(
                'expand4_1',
                x=self.expand32,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv2_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand41)
            self.upscale4 = conv2d_transpose(
                'upscale4',
                x=self.expand41,
                batchnorm_enabled=True,
                is_training=self.is_training,
                output_shape=self.encoder.conv2_1.shape.as_list(),
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale4)
            self.add4 = tf.add(self.upscale4, self.encoder.conv2_1)
            self._debug(self.add4)
            self.expand42 = conv2d(
                'expand4_2',
                x=self.add4,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv2_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand42)

        with tf.name_scope('upscale_5'):
            self.upscale5 = conv2d_transpose(
                'upscale5',
                x=self.expand42,
                batchnorm_enabled=True,
                is_training=self.is_training,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.encoder.conv2_1.shape.as_list()[3]],
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale5)
            self.expand5 = conv2d(
                'expand5',
                x=self.upscale5,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv1_1.shape.as_list()[3],
                kernel_size=(1, 1),
                dropout_keep_prob=0.5,
                l2_strength=self.encoder.wd)
            self._debug(self.expand5)

        with tf.name_scope('final_score'):
            self.fscore = conv2d('fscore',
                                 x=self.expand5,
                                 num_filters=self.params.num_classes,
                                 kernel_size=(1, 1),
                                 l2_strength=self.encoder.wd)
            self._debug(self.fscore)

        self.logits = self.fscore
Beispiel #7
0
    def init_network(self):
        """
        Building the Network here
        :return:
        """
        # Init MobileNet as an encoder
        self.encoder = MobileNet(x_input=self.x_pl,
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 width_multipler=1.0,
                                 weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('dilation_2'):
            self.conv4_2 = atrous_conv2d('conv_ds_7_dil',
                                         self.encoder.conv4_1,
                                         num_filters=512,
                                         kernel_size=(3, 3),
                                         padding='SAME',
                                         activation=tf.nn.relu,
                                         dilation_rate=2,
                                         batchnorm_enabled=True,
                                         is_training=self.is_training,
                                         l2_strength=self.wd)
            _debug(self.conv4_2)
            self.conv5_1 = depthwise_separable_conv2d(
                'conv_ds_8_dil',
                self.conv4_2,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_1)
            self.conv5_2 = depthwise_separable_conv2d(
                'conv_ds_9_dil',
                self.conv5_1,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_2)
            self.conv5_3 = depthwise_separable_conv2d(
                'conv_ds_10_dil',
                self.conv5_2,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_3)
            self.conv5_4 = depthwise_separable_conv2d(
                'conv_ds_11_dil',
                self.conv5_3,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_4)
            self.conv5_5 = depthwise_separable_conv2d(
                'conv_ds_12_dil',
                self.conv5_4,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_5)
            self.conv5_6 = atrous_conv2d('conv_ds_13_dil',
                                         self.conv5_5,
                                         num_filters=1024,
                                         kernel_size=(3, 3),
                                         padding='SAME',
                                         activation=tf.nn.relu,
                                         dilation_rate=4,
                                         batchnorm_enabled=True,
                                         is_training=self.is_training,
                                         l2_strength=self.wd)
            _debug(self.conv5_6)
            self.conv6_1 = depthwise_separable_conv2d(
                'conv_ds_14_dil',
                self.conv5_6,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=1024,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv6_1)
            # Pooling is removed.
            self.score_fr = conv2d('conv_1c_1x1_dil',
                                   self.conv6_1,
                                   num_filters=self.params.num_classes,
                                   l2_strength=self.wd,
                                   kernel_size=(1, 1))
            _debug(self.score_fr)

            if self.targets_resize < 8:
                self.targets_resize = 8 // self.targets_resize
                self.upscore8 = conv2d_transpose(
                    'upscore8',
                    x=self.score_fr,
                    output_shape=self.y_pl.shape.as_list()[0:3] +
                    [self.params.num_classes],
                    kernel_size=(self.targets_resize * 2,
                                 self.targets_resize * 2),
                    stride=(self.targets_resize, self.targets_resize),
                    l2_strength=self.encoder.wd,
                    is_training=self.is_training)
                _debug(self.upscore8)
                self.logits = self.upscore8
            else:
                self.logits = self.score_fr
Beispiel #8
0
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.encoder = MobileNet(x_input=self.x_pl,
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 width_multipler=1.0,
                                 weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('upscale_1'):
            self.expand11 = conv2d(
                'expand1_1',
                x=self.encoder.conv5_6,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv5_5.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand11)
            self.upscale1 = conv2d_transpose(
                'upscale1',
                x=self.expand11,
                is_training=self.is_training,
                output_shape=self.encoder.conv5_5.shape.as_list(),
                batchnorm_enabled=True,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale1)
            self.add1 = tf.add(self.upscale1, self.encoder.conv5_5)
            self._debug(self.add1)
            self.expand12 = conv2d(
                'expand1_2',
                x=self.add1,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv5_5.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand12)

        with tf.name_scope('upscale_2'):
            self.expand21 = conv2d(
                'expand2_1',
                x=self.expand12,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv4_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand21)
            self.upscale2 = conv2d_transpose(
                'upscale2',
                x=self.expand21,
                is_training=self.is_training,
                output_shape=self.encoder.conv4_1.shape.as_list(),
                batchnorm_enabled=True,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale2)
            self.add2 = tf.add(self.upscale2, self.encoder.conv4_1)
            self._debug(self.add2)
            self.expand22 = conv2d(
                'expand2_2',
                x=self.add2,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv4_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand22)

        with tf.name_scope('upscale_3'):
            self.expand31 = conv2d(
                'expand3_1',
                x=self.expand22,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv3_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand31)
            self.upscale3 = conv2d_transpose(
                'upscale3',
                x=self.expand31,
                batchnorm_enabled=True,
                is_training=self.is_training,
                output_shape=self.encoder.conv3_1.shape.as_list(),
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale3)
            self.add3 = tf.add(self.upscale3, self.encoder.conv3_1)
            self._debug(self.add3)
            self.expand32 = conv2d(
                'expand3_2',
                x=self.add3,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv3_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand32)

        with tf.name_scope('upscale_4'):
            self.expand41 = conv2d(
                'expand4_1',
                x=self.expand32,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv2_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand41)
            self.upscale4 = conv2d_transpose(
                'upscale4',
                x=self.expand41,
                batchnorm_enabled=True,
                is_training=self.is_training,
                output_shape=self.encoder.conv2_1.shape.as_list(),
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale4)
            self.add4 = tf.add(self.upscale4, self.encoder.conv2_1)
            self._debug(self.add4)
            self.expand42 = conv2d(
                'expand4_2',
                x=self.add4,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv2_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand42)

        with tf.name_scope('upscale_5'):
            self.upscale5 = conv2d_transpose(
                'upscale5',
                x=self.expand42,
                batchnorm_enabled=True,
                is_training=self.is_training,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.encoder.conv2_1.shape.as_list()[3]],
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale5)
            self.expand5 = conv2d(
                'expand5',
                x=self.upscale5,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv1_1.shape.as_list()[3],
                kernel_size=(1, 1),
                dropout_keep_prob=0.5,
                l2_strength=self.encoder.wd)
            self._debug(self.expand5)

        with tf.name_scope('final_score'):
            self.fscore = conv2d('fscore',
                                 x=self.expand5,
                                 num_filters=self.params.num_classes,
                                 kernel_size=(1, 1),
                                 l2_strength=self.encoder.wd)
            self._debug(self.fscore)

        self.logits = self.fscore
Beispiel #9
0
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.app_encoder = MobileNet(x_input=self.x_pl,
                                     num_classes=self.params.num_classes,
                                     prefix='app_',
                                     pretrained_path=self.args.pretrained_path,
                                     mean_path=self.args.data_dir + 'mean.npy',
                                     train_flag=self.is_training,
                                     width_multipler=1.0,
                                     weight_decay=self.args.weight_decay)
        self.motion_encoder = MobileNet(
            x_input=self.flo_pl,
            num_classes=self.params.num_classes,
            prefix='mot_',
            pretrained_path=self.args.pretrained_path,
            mean_path=self.args.data_dir + 'flo_mean.npy',
            train_flag=self.is_training,
            width_multipler=1.0,
            weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.app_encoder.build()
        self.motion_encoder.build()
        self.feed2 = tf.multiply(self.app_encoder.conv3_2,
                                 self.motion_encoder.conv3_2)
        self.width_multiplier = 1.0
        self.conv4_1 = depthwise_separable_conv2d(
            'conv_ds_6_1',
            self.feed2,
            width_multiplier=self.width_multiplier,
            num_filters=256,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv4_1)
        self.conv4_2 = depthwise_separable_conv2d(
            'conv_ds_7_1',
            self.conv4_1,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(2, 2),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv4_2)
        self.conv5_1 = depthwise_separable_conv2d(
            'conv_ds_8_1',
            self.conv4_2,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_1)
        self.conv5_2 = depthwise_separable_conv2d(
            'conv_ds_9_1',
            self.conv5_1,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_2)
        self.conv5_3 = depthwise_separable_conv2d(
            'conv_ds_10_1',
            self.conv5_2,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_3)
        self.conv5_4 = depthwise_separable_conv2d(
            'conv_ds_11_1',
            self.conv5_3,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_4)
        self.conv5_5 = depthwise_separable_conv2d(
            'conv_ds_12_1',
            self.conv5_4,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_5)
        self.conv5_6 = depthwise_separable_conv2d(
            'conv_ds_13_1',
            self.conv5_5,
            width_multiplier=self.width_multiplier,
            num_filters=1024,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(2, 2),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_6)
        self.conv6_1 = depthwise_separable_conv2d(
            'conv_ds_14_1',
            self.conv5_6,
            width_multiplier=self.width_multiplier,
            num_filters=1024,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv6_1)
        # Pooling is removed.
        self.score_fr = conv2d('conv_1c_1x1_1',
                               self.conv6_1,
                               num_filters=self.params.num_classes,
                               l2_strength=self.args.weight_decay,
                               kernel_size=(1, 1))

        self.feed1 = self.conv4_2

        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            self.upscore2 = conv2d_transpose(
                'upscore2',
                x=self.score_fr,
                output_shape=self.feed1.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore2)

            self.score_feed1 = conv2d(
                'score_feed1',
                x=self.feed1,
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.args.weight_decay)
            _debug(self.score_feed1)
            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)

        with tf.name_scope('upscore_4s'):
            self.upscore4 = conv2d_transpose(
                'upscore4',
                x=self.fuse_feed1,
                output_shape=self.feed2.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore4)
            self.score_feed2 = conv2d(
                'score_feed2',
                x=self.feed2,
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.args.weight_decay)
            _debug(self.score_feed2)
            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)

        with tf.name_scope('upscore_8s'):
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.fuse_feed2,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                is_training=self.is_training,
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore8)
        self.logits = self.upscore8
Beispiel #10
0
class FCN8s2StreamMobileNet(BasicModel):
    """
    FCN8s with MobileNet as an encoder Model Architecture
    """
    def __init__(self, args):
        super().__init__(args)
        # init encoder
        self.encoder = None
        # init network layers

    def build(self):
        print("\nBuilding the MODEL...")
        self.init_input()
        self.init_network()
        self.init_output()
        self.init_train()
        self.init_summaries()
        print("The Model is built successfully\n")

    def init_input(self):
        with tf.name_scope('input'):
            self.x_pl = tf.placeholder(tf.float32, [
                self.args.batch_size, self.params.img_height,
                self.params.img_width, 3
            ])
            self.flo_pl = tf.placeholder(tf.float32, [
                self.args.batch_size, self.params.img_height,
                self.params.img_width, 3
            ])
            self.y_pl = tf.placeholder(tf.int32, [
                self.args.batch_size, self.params.img_height,
                self.params.img_width
            ])

            if self.params.weighted_loss:
                self.wghts = np.zeros(
                    (self.args.batch_size, self.params.img_height,
                     self.params.img_width),
                    dtype=np.float32)
            self.is_training = tf.placeholder(tf.bool)

    def init_summaries(self):
        with tf.name_scope('pixel_wise_accuracy'):
            self.accuracy = tf.reduce_mean(
                tf.cast(tf.equal(self.y_pl, self.out_argmax), tf.float32))

        with tf.name_scope('segmented_output'):
            input_summary = tf.cast(self.x_pl, tf.uint8)
            flow_summary = tf.cast(self.flo_pl, tf.uint8)
            # labels_summary = tf.py_func(decode_labels, [self.y_pl, self.params.num_classes], tf.uint8)
            preds_summary = tf.py_func(
                decode_labels, [self.out_argmax, self.params.num_classes],
                tf.uint8)
            self.segmented_summary = tf.concat(
                axis=2, values=[input_summary, flow_summary,
                                preds_summary])  # Concatenate row-wise

        # Every step evaluate these summaries
        if self.loss is not None:
            with tf.name_scope('train-summary'):
                tf.summary.scalar('loss', self.loss)
                tf.summary.scalar('pixel_wise_accuracy', self.accuracy)

        self.merged_summaries = tf.summary.merge_all()

        # Save the best iou on validation
        self.best_iou_tensor = tf.Variable(0.0,
                                           trainable=False,
                                           name='best_iou')
        self.best_iou_input = tf.placeholder('float32',
                                             None,
                                             name='best_iou_input')
        self.best_iou_assign_op = self.best_iou_tensor.assign(
            self.best_iou_input)

    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.app_encoder = MobileNet(x_input=self.x_pl,
                                     num_classes=self.params.num_classes,
                                     prefix='app_',
                                     pretrained_path=self.args.pretrained_path,
                                     mean_path=self.args.data_dir + 'mean.npy',
                                     train_flag=self.is_training,
                                     width_multipler=1.0,
                                     weight_decay=self.args.weight_decay)
        self.motion_encoder = MobileNet(
            x_input=self.flo_pl,
            num_classes=self.params.num_classes,
            prefix='mot_',
            pretrained_path=self.args.pretrained_path,
            mean_path=self.args.data_dir + 'flo_mean.npy',
            train_flag=self.is_training,
            width_multipler=1.0,
            weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.app_encoder.build()
        self.motion_encoder.build()
        self.feed2 = tf.multiply(self.app_encoder.conv3_2,
                                 self.motion_encoder.conv3_2)
        self.width_multiplier = 1.0
        self.conv4_1 = depthwise_separable_conv2d(
            'conv_ds_6_1',
            self.feed2,
            width_multiplier=self.width_multiplier,
            num_filters=256,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv4_1)
        self.conv4_2 = depthwise_separable_conv2d(
            'conv_ds_7_1',
            self.conv4_1,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(2, 2),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv4_2)
        self.conv5_1 = depthwise_separable_conv2d(
            'conv_ds_8_1',
            self.conv4_2,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_1)
        self.conv5_2 = depthwise_separable_conv2d(
            'conv_ds_9_1',
            self.conv5_1,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_2)
        self.conv5_3 = depthwise_separable_conv2d(
            'conv_ds_10_1',
            self.conv5_2,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_3)
        self.conv5_4 = depthwise_separable_conv2d(
            'conv_ds_11_1',
            self.conv5_3,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_4)
        self.conv5_5 = depthwise_separable_conv2d(
            'conv_ds_12_1',
            self.conv5_4,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_5)
        self.conv5_6 = depthwise_separable_conv2d(
            'conv_ds_13_1',
            self.conv5_5,
            width_multiplier=self.width_multiplier,
            num_filters=1024,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(2, 2),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_6)
        self.conv6_1 = depthwise_separable_conv2d(
            'conv_ds_14_1',
            self.conv5_6,
            width_multiplier=self.width_multiplier,
            num_filters=1024,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv6_1)
        # Pooling is removed.
        self.score_fr = conv2d('conv_1c_1x1_1',
                               self.conv6_1,
                               num_filters=self.params.num_classes,
                               l2_strength=self.args.weight_decay,
                               kernel_size=(1, 1))

        self.feed1 = self.conv4_2

        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            self.upscore2 = conv2d_transpose(
                'upscore2',
                x=self.score_fr,
                output_shape=self.feed1.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore2)

            self.score_feed1 = conv2d(
                'score_feed1',
                x=self.feed1,
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.args.weight_decay)
            _debug(self.score_feed1)
            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)

        with tf.name_scope('upscore_4s'):
            self.upscore4 = conv2d_transpose(
                'upscore4',
                x=self.fuse_feed1,
                output_shape=self.feed2.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore4)
            self.score_feed2 = conv2d(
                'score_feed2',
                x=self.feed2,
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.args.weight_decay)
            _debug(self.score_feed2)
            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)

        with tf.name_scope('upscore_8s'):
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.fuse_feed2,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                is_training=self.is_training,
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore8)
        self.logits = self.upscore8
Beispiel #11
0
class DilationMobileNet(BasicModel):
    """
    FCN8s with MobileNet as an encoder Model Architecture
    """
    def __init__(self, args):
        super().__init__(args)
        # init encoder
        self.encoder = None
        self.wd = self.args.weight_decay

        # init network layers
        self.upscore2 = None
        self.score_feed1 = None
        self.fuse_feed1 = None
        self.upscore4 = None
        self.score_feed2 = None
        self.fuse_feed2 = None
        self.upscore8 = None

    def build(self):
        print("\nBuilding the MODEL...")
        self.init_input()
        self.init_network()
        self.init_output()
        self.init_train()
        self.init_summaries()
        print("The Model is built successfully\n")

    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.encoder = MobileNet(x_input=self.x_pl,
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 width_multipler=1.0,
                                 weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('dilation_2'):
            self.conv4_2 = atrous_conv2d('conv_ds_7_dil',
                                         self.encoder.conv4_1,
                                         num_filters=512,
                                         kernel_size=(3, 3),
                                         padding='SAME',
                                         activation=tf.nn.relu,
                                         dilation_rate=2,
                                         batchnorm_enabled=True,
                                         is_training=self.is_training,
                                         l2_strength=self.wd)
            _debug(self.conv4_2)
            self.conv5_1 = depthwise_separable_conv2d(
                'conv_ds_8_dil',
                self.conv4_2,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_1)
            self.conv5_2 = depthwise_separable_conv2d(
                'conv_ds_9_dil',
                self.conv5_1,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_2)
            self.conv5_3 = depthwise_separable_conv2d(
                'conv_ds_10_dil',
                self.conv5_2,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_3)
            self.conv5_4 = depthwise_separable_conv2d(
                'conv_ds_11_dil',
                self.conv5_3,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_4)
            self.conv5_5 = depthwise_separable_conv2d(
                'conv_ds_12_dil',
                self.conv5_4,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_5)
            self.conv5_6 = atrous_conv2d('conv_ds_13_dil',
                                         self.conv5_5,
                                         num_filters=1024,
                                         kernel_size=(3, 3),
                                         padding='SAME',
                                         activation=tf.nn.relu,
                                         dilation_rate=4,
                                         batchnorm_enabled=True,
                                         is_training=self.is_training,
                                         l2_strength=self.wd)
            _debug(self.conv5_6)
            self.conv6_1 = depthwise_separable_conv2d(
                'conv_ds_14_dil',
                self.conv5_6,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=1024,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv6_1)
            # Pooling is removed.
            self.score_fr = conv2d('conv_1c_1x1_dil',
                                   self.conv6_1,
                                   num_filters=self.params.num_classes,
                                   l2_strength=self.wd,
                                   batchnorm_enabled=True,
                                   is_training=self.is_training,
                                   kernel_size=(1, 1))

            _debug(self.score_fr)
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.score_fr,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.encoder.wd,
                is_training=self.is_training)
            _debug(self.upscore8)
            self.logits = self.upscore8
class FCN8sMobileNetTFRecords(BasicModel):
    """
    FCN8s with MobileNet as an encoder Model Architecture
    """
    def __init__(self, args):
        super().__init__(args)
        # init encoder
        self.encoder = None
        # init network layers
        self.upscore2 = None
        self.score_feed1 = None
        self.fuse_feed1 = None
        self.upscore4 = None
        self.score_feed2 = None
        self.fuse_feed2 = None
        self.upscore8 = None
        # init tfrecords needs
        self.handle = None
        self.training_iterator = None
        self.validation_iterator = None
        self.next_img = None
        self.training_handle = None
        self.validation_handle = None
        # get the default session
        self.sess = tf.get_default_session()

    def build(self):
        print("\nBuilding the MODEL...")
        self.init_input()
        self.init_tfrecord_input()
        self.init_network()
        self.init_output()
        self.init_train()
        self.init_summaries()
        print("The Model is built successfully\n")

    def init_tfrecord_input(self):
        if self.args.mode == 'train':
            print("USING TF RECORDS")

            # Use `tf.parse_single_example()` to extract data from a `tf.Example`
            # protocol buffer, and perform any additional per-record preprocessing.
            def parser(record):
                keys_to_features = {
                    'height': tf.FixedLenFeature([], tf.int64),
                    'width': tf.FixedLenFeature([], tf.int64),
                    'image_raw': tf.FixedLenFeature([], tf.string),
                    'mask_raw': tf.FixedLenFeature([], tf.string)
                }
                parsed = tf.parse_single_example(record, keys_to_features)

                image = tf.cast(tf.decode_raw(parsed['image_raw'], tf.uint8),
                                tf.float32)
                annotation = tf.cast(
                    tf.decode_raw(parsed['mask_raw'], tf.uint8), tf.int32)

                height = tf.cast(parsed['height'], tf.int32)
                width = tf.cast(parsed['width'], tf.int32)

                image_shape = tf.stack([height, width, 3])

                annotation_shape = tf.stack([height, width])

                image = tf.reshape(image, image_shape)
                annotation = tf.reshape(annotation, annotation_shape)

                return image, annotation

            # Use `Dataset.map()` to build a pair of a feature dictionary and a label
            # tensor for each example.
            train_filename = "./data/" + self.args.tfrecord_train_file
            train_dataset = tf.contrib.data.TFRecordDataset(['./data/cscapes_train_1.tfrecords', \
                                './data/cscapes_train_2.tfrecords',\
                                './data/cscapes_train_3.tfrecord', \
                                './data/cscapes_train_4.tfrecords' ])#train_filename)
            train_dataset = train_dataset.map(parser)
            train_dataset = train_dataset.shuffle(
                buffer_size=self.args.tfrecord_train_len)
            train_dataset = train_dataset.batch(self.args.batch_size)
            train_dataset = train_dataset.repeat()

            val_filename = "./data/" + self.args.tfrecord_val_file
            val_dataset = tf.contrib.data.TFRecordDataset(val_filename)
            val_dataset = val_dataset.map(parser)
            val_dataset = val_dataset.batch(self.args.batch_size)

            self.training_iterator = train_dataset.make_one_shot_iterator()
            self.validation_iterator = val_dataset.make_initializable_iterator(
            )

            self.training_handle = self.sess.run(
                self.training_iterator.string_handle())
            self.validation_handle = self.sess.run(
                self.validation_iterator.string_handle())

            self.handle = tf.placeholder(tf.string, shape=[])
            iterator = tf.contrib.data.Iterator.from_string_handle(
                self.handle, train_dataset.output_types,
                train_dataset.output_shapes)

            self.next_img = iterator.get_next()
            self.x_pl, self.y_pl = self.next_img
            self.x_pl.set_shape(
                [None, self.args.img_height, self.args.img_width, 3])
            self.y_pl.set_shape(
                [None, self.args.img_height, self.args.img_width])

    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.encoder = MobileNet(x_input=self.x_pl,
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 width_multipler=1.0,
                                 weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            self.upscore2 = conv2d_transpose(
                'upscore2',
                x=self.encoder.score_fr,
                output_shape=[self.args.batch_size] +
                self.encoder.feed1.shape.as_list()[1:3] +
                [self.params.num_classes],
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self.score_feed1 = conv2d('score_feed1',
                                      x=self.encoder.feed1,
                                      num_filters=self.params.num_classes,
                                      kernel_size=(1, 1),
                                      l2_strength=self.encoder.wd)
            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)

        with tf.name_scope('upscore_4s'):
            self.upscore4 = conv2d_transpose(
                'upscore4',
                x=self.fuse_feed1,
                output_shape=[self.args.batch_size] +
                self.encoder.feed2.shape.as_list()[1:3] +
                [self.params.num_classes],
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self.score_feed2 = conv2d('score_feed2',
                                      x=self.encoder.feed2,
                                      num_filters=self.params.num_classes,
                                      kernel_size=(1, 1),
                                      l2_strength=self.encoder.wd)
            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)

        with tf.name_scope('upscore_8s'):
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.fuse_feed2,
                output_shape=[self.args.batch_size] +
                self.x_pl.shape.as_list()[1:3] + [self.params.num_classes],
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.encoder.wd)

        self.logits = self.upscore8