コード例 #1
0
class FCN8s2StreamShuffleNetEarly(BasicModel):
    """
    FCN8s with ShuffleNet as an encoder Model Architecture
    """
    def __init__(self, args):
        super().__init__(args)
        # init encoder
        self.encoder = None
        # init network layers

    def build(self):
        print("\nBuilding the MODEL...")
        self.init_input()
        self.init_network()
        self.init_output()
        if self.args.data_mode == 'experiment':
            self.init_train()
        self.init_summaries()
        print("The Model is built successfully\n")

    def init_input(self):
        with tf.name_scope('input'):
            self.x_pl = tf.placeholder(tf.float32, [
                self.args.batch_size, self.params.img_height,
                self.params.img_width, 3
            ])
            self.flo_pl = tf.placeholder(tf.float32, [
                self.args.batch_size, self.params.img_height,
                self.params.img_width, 3
            ])
            self.y_pl = tf.placeholder(tf.int32, [
                self.args.batch_size, self.params.img_height,
                self.params.img_width
            ])

            if self.params.weighted_loss:
                self.wghts = np.zeros(
                    (self.args.batch_size, self.params.img_height,
                     self.params.img_width),
                    dtype=np.float32)
            self.is_training = tf.placeholder(tf.bool)

    def init_summaries(self):
        with tf.name_scope('pixel_wise_accuracy'):
            self.accuracy = tf.reduce_mean(
                tf.cast(tf.equal(self.y_pl, self.out_argmax), tf.float32))

        with tf.name_scope('segmented_output'):
            input_summary = tf.cast(self.x_pl, tf.uint8)
            flow_summary = tf.cast(self.flo_pl, tf.uint8)
            # labels_summary = tf.py_func(decode_labels, [self.y_pl, self.params.num_classes], tf.uint8)
            preds_summary = tf.py_func(
                decode_labels, [self.out_argmax, self.params.num_classes],
                tf.uint8)
            self.segmented_summary = tf.concat(
                axis=2, values=[input_summary, flow_summary,
                                preds_summary])  # Concatenate row-wise

        # Every step evaluate these summaries
        if self.loss is not None:
            with tf.name_scope('train-summary'):
                tf.summary.scalar('loss', self.loss)
                tf.summary.scalar('pixel_wise_accuracy', self.accuracy)

        self.merged_summaries = tf.summary.merge_all()

        # Save the best iou on validation
        self.best_iou_tensor = tf.Variable(0.0,
                                           trainable=False,
                                           name='best_iou')
        self.best_iou_input = tf.placeholder('float32',
                                             None,
                                             name='best_iou_input')
        self.best_iou_assign_op = self.best_iou_tensor.assign(
            self.best_iou_input)

    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init ShuffleNet as an encoder
        self.app_encoder = ShuffleNet(
            x_input=self.x_pl,
            num_classes=self.params.num_classes,
            prefix='app_',
            pretrained_path=self.args.pretrained_path,
            train_flag=self.is_training,
            batchnorm_enabled=self.args.batchnorm_enabled,
            num_groups=self.args.num_groups,
            weight_decay=self.args.weight_decay,
            bias=self.args.bias,
            mean_path=self.args.data_dir + 'mean.npy')

        self.motion_encoder = ShuffleNet(
            x_input=self.flo_pl,
            num_classes=self.params.num_classes,
            prefix='mot_',
            pretrained_path=self.args.pretrained_path,
            train_flag=self.is_training,
            batchnorm_enabled=self.args.batchnorm_enabled,
            num_groups=self.args.num_groups,
            weight_decay=self.args.weight_decay,
            bias=self.args.bias,
            mean_path=self.args.data_dir + 'flo_mean.npy')

        # Build Encoding part
        self.app_encoder.build()
        self.motion_encoder.build()
        self.combined_score = tf.multiply(self.app_encoder.stage2,
                                          self.motion_encoder.stage2)
        #        self.combined_score= tf.concat((self.app_encoder.stage2, self.motion_encoder.stage2), axis=3)
        #        _debug(self.combined_score)
        #        self.combined_score = conv2d('combined_score', self.combined_score, num_filters= 240, l2_strength=self.args.weight_decay,
        #                               kernel_size=(1, 1))

        self.stage3 = self.app_encoder.stage(self.combined_score,
                                             stage=3,
                                             repeat=7)
        _debug(self.stage3)

        self.stage4 = self.app_encoder.stage(self.stage3, stage=4, repeat=3)
        _debug(self.stage4)

        self.feed1 = self.stage3
        self.feed2 = self.combined_score
        # First Experiment is to use the regular conv2d
        self.score_fr = conv2d('combined_conv_1c_1x1',
                               self.stage4,
                               num_filters=self.params.num_classes,
                               l2_strength=self.args.weight_decay,
                               kernel_size=(1, 1))
        _debug(self.score_fr)

        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            self.upscore2 = conv2d_transpose(
                'upscore2',
                x=self.score_fr,
                output_shape=self.feed1.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore2)

            self.score_feed1 = conv2d(
                'score_feed1',
                x=self.feed1,
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.args.weight_decay)
            _debug(self.score_feed1)
            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)

        with tf.name_scope('upscore_4s'):
            self.upscore4 = conv2d_transpose(
                'upscore4',
                x=self.fuse_feed1,
                output_shape=self.feed2.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore4)
            self.score_feed2 = conv2d(
                'score_feed2',
                x=self.feed2,
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.args.weight_decay)
            _debug(self.score_feed2)
            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)

        with tf.name_scope('upscore_8s'):
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.fuse_feed2,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                is_training=self.is_training,
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore8)
        self.logits = self.upscore8
コード例 #2
0
class DilationShuffleNet(BasicModel):
    """
    FCN8s with ShuffleNet as an encoder Model Architecture
    """
    def __init__(self, args, phase=0):
        super().__init__(args, phase=phase)
        # init encoder
        self.encoder = None
        # init network layers

    def build(self):
        print("\nBuilding the MODEL...")
        self.init_input()
        self.init_network()
        self.init_output()
        self.init_train()
        self.init_summaries()
        print("The Model is built successfully\n")

    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init ShuffleNet as an encoder
        self.encoder = ShuffleNet(
            x_input=self.x_pl,
            num_classes=self.params.num_classes,
            pretrained_path=self.args.pretrained_path,
            train_flag=self.is_training,
            batchnorm_enabled=self.args.batchnorm_enabled,
            num_groups=self.args.num_groups,
            weight_decay=self.args.weight_decay,
            bias=self.args.bias)

        # Build Encoding part
        self.encoder.build()

        with tf.name_scope('dilation_2'):
            self.stage3 = self.encoder.stage(self.encoder.stage2,
                                             stage=3,
                                             repeat=7,
                                             dilation=2)
            _debug(self.stage3)
            self.stage4 = self.encoder.stage(self.stage3,
                                             stage=4,
                                             repeat=3,
                                             dilation=4)
            _debug(self.stage4)

            self.score_fr = conv2d('score_fr_dil',
                                   x=self.stage4,
                                   num_filters=self.params.num_classes,
                                   kernel_size=(1, 1),
                                   l2_strength=self.encoder.wd,
                                   is_training=self.is_training)
            _debug(self.score_fr)

            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.score_fr,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.encoder.wd,
                is_training=self.is_training)
            _debug(self.upscore8)

        self.logits = self.upscore8
class DilationV2ShuffleNet(BasicModel):
    """
    FCN8s with ShuffleNet as an encoder Model Architecture
    """
    def __init__(self, args):
        super().__init__(args)
        # init encoder
        self.encoder = None
        self.targets_resize = self.args.targets_resize
        # init network layers

    def build(self):
        print("\nBuilding the MODEL...")
        self.init_input()
        self.init_network()
        self.init_output()
        self.init_train()
        self.init_summaries()
        print("The Model is built successfully\n")

    def init_input(self):
        with tf.name_scope('input'):
            self.x_pl = tf.placeholder(tf.float32, [
                self.args.batch_size, self.params.img_height,
                self.params.img_width, 3
            ])
            self.y_pl = tf.placeholder(tf.int32, [
                self.args.batch_size,
                self.params.img_height // self.targets_resize,
                self.params.img_width // self.targets_resize
            ])
            print('X_batch shape ',
                  self.x_pl.get_shape().as_list(), ' ',
                  self.y_pl.get_shape().as_list())
            print('Afterwards: X_batch shape ',
                  self.x_pl.get_shape().as_list(), ' ',
                  self.y_pl.get_shape().as_list())

            self.curr_learning_rate = tf.placeholder(tf.float32)
            if self.params.weighted_loss:
                self.wghts = np.zeros(
                    (self.args.batch_size, self.params.img_height,
                     self.params.img_width),
                    dtype=np.float32)
            self.is_training = tf.placeholder(tf.bool)

    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init ShuffleNet as an encoder
        self.encoder = ShuffleNet(
            x_input=self.x_pl,
            num_classes=self.params.num_classes,
            pretrained_path=self.args.pretrained_path,
            train_flag=self.is_training,
            batchnorm_enabled=self.args.batchnorm_enabled,
            num_groups=self.args.num_groups,
            weight_decay=self.args.weight_decay,
            bias=self.args.bias)

        # Build Encoding part
        self.encoder.build()
        with tf.name_scope('dilation_2'):
            self.stage3 = self.encoder.stage(self.encoder.stage2,
                                             stage=3,
                                             repeat=7,
                                             dilation=2)
            _debug(self.stage3)
            self.stage4 = self.encoder.stage(self.stage3,
                                             stage=4,
                                             repeat=3,
                                             dilation=4)
            _debug(self.stage4)

            self.score_fr = conv2d('score_fr_dil',
                                   x=self.stage4,
                                   num_filters=self.params.num_classes,
                                   kernel_size=(1, 1),
                                   l2_strength=self.encoder.wd,
                                   is_training=self.is_training)
            _debug(self.score_fr)

        if self.targets_resize < 8:
            self.targets_resize = 8 // self.targets_resize
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.score_fr,
                output_shape=self.y_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(self.targets_resize * 2, self.targets_resize * 2),
                stride=(self.targets_resize, self.targets_resize),
                l2_strength=self.encoder.wd,
                is_training=self.is_training)

            _debug(self.upscore8)
            self.logits = self.upscore8
        else:
            self.logits = self.score_fr