コード例 #1
0
ファイル: bisenet.py プロジェクト: CodePlay2016/BiSENet-TF
    def __init__(self, model_config, train_config, num_classes, mode):
        self.model_config = model_config
        self.train_config = train_config
        self.num_classes = num_classes
        self.mode = mode
        assert mode in ['train', 'validation', 'inference', 'test']
        if self.mode == 'train':
            self.data_config = self.train_config['train_data_config']
        elif self.mode == 'validation':
            self.data_config = self.train_config['validation_data_config']
        elif self.mode == 'test':
            self.data_config = self.train_config['test_data_config']

        self.images = None
        self.images_feed = None
        self.labels = None
        self.net = None
        self.sup1 = None
        self.sup2 = None
        self.init_fn = None
        self.loss = None
        self.total_loss = []
        self.response = None

        with tf.device("/cpu:0"):
            self.dataset = DataLoader(self.data_config,
                                      self.train_config['DataSet'],
                                      self.train_config['dataset_dir'],
                                      self.train_config['class_dict'])
コード例 #2
0
    def build_inputs(self):
        """Input fetching and batching

        Outputs:
          self.images: image batch of shape [batch, hz, wz, 3]
          labels: image batch of shape [batch, hx, wx, num_classes]
        """
        if self.mode in ['train', 'validation', 'test']:
            # Put data loading and preprocessing in CPU is substantially faster
            # DataSet prepare
            with tf.device("/cpu:0"):
                dataset = DataLoader(self.data_config, self.train_config['DataSet'], self.train_config['class_dict'])
                self.images, labels = dataset.get_one_batch()
                self.labels = tf.one_hot(labels, self.num_classes)

        else:
            self.images_feed = tf.placeholder(shape=[None, None, None, 3],
                                    dtype=tf.uint8, name='images_input')

            self.images = tf.to_float(self.images_feed)/255
コード例 #3
0
ファイル: bisenet.py プロジェクト: CodePlay2016/BiSENet-TF
class BiseNet(object):
    def __init__(self, model_config, train_config, num_classes, mode):
        self.model_config = model_config
        self.train_config = train_config
        self.num_classes = num_classes
        self.mode = mode
        assert mode in ['train', 'validation', 'inference', 'test']
        if self.mode == 'train':
            self.data_config = self.train_config['train_data_config']
        elif self.mode == 'validation':
            self.data_config = self.train_config['validation_data_config']
        elif self.mode == 'test':
            self.data_config = self.train_config['test_data_config']

        self.images = None
        self.images_feed = None
        self.labels = None
        self.net = None
        self.sup1 = None
        self.sup2 = None
        self.init_fn = None
        self.loss = None
        self.total_loss = []
        self.response = None

        with tf.device("/cpu:0"):
            self.dataset = DataLoader(self.data_config,
                                      self.train_config['DataSet'],
                                      self.train_config['dataset_dir'],
                                      self.train_config['class_dict'])

    def build_inputs(self):
        """Input fetching and batching

        Outputs:
          self.images: image batch of shape [batch, hz, wz, 3]
          labels: image batch of shape [batch, hx, wx, num_classes]
        """
        if self.mode in ['train', 'validation', 'test']:
            # Put data loading and preprocessing in CPU is substantially faster
            # DataSet prepare
            self.images, labels = self.dataset.get_one_batch()
            # labels = tf.Print(labels, [tf.unique(tf.reshape(labels,[-1,]))[0]], message="labels:", summarize=10)
            self.labels = tf.one_hot(labels, self.num_classes)

        else:
            self.images_feed = tf.placeholder(shape=[None, None, None, 3],
                                              dtype=tf.uint8,
                                              name='images_input')

            self.images = tf.to_float(self.images_feed) / 255

    def is_training(self):
        """Returns true if the model is built for training mode"""
        return self.mode == 'train'

    def setup_global_step(self):
        global_step = tf.Variable(initial_value=0,
                                  name='global_step',
                                  trainable=False,
                                  collections=[
                                      tf.GraphKeys.GLOBAL_STEP,
                                      tf.GraphKeys.GLOBAL_VARIABLES
                                  ])

        self.global_step = global_step

    def build_bisenet_custom(self, reuse=False):
        """
        Builds the BiSeNet model.

        Arguments:
          reuse: Reuse variable or not

        Returns:
          BiSeNet model
        """
        ### The spatial path
        ### The number of feature maps for each convolution is not specified in the paper
        ### It was chosen here to be equal to the number of feature maps of a classification
        ### model at each corresponding stage
        batch_norm_params = self.model_config['batch_norm_params']
        init_method = self.model_config['conv_config']['init_method']
        down_16x_end_points = self.model_config['net_node']['16xdown:50']
        down_32x_end_points = self.model_config['net_node']['32xdown:25']
        if init_method == 'kaiming_normal':
            initializer = slim.variance_scaling_initializer(factor=2.0,
                                                            mode='FAN_IN',
                                                            uniform=False)
        else:
            initializer = slim.xavier_initializer()

        with tf.variable_scope('spatial_net', reuse=reuse):
            with slim.arg_scope([slim.conv2d],
                                biases_initializer=None,
                                weights_initializer=initializer):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=self.is_training(),
                                    **batch_norm_params):
                    # inference/spatial_net/Conv/Conv2D run 1 average cost 250.552994 ms, 25.405 %, FlopsRate: 9.064 %
                    # conv2d
                    spatial_net = slim.conv2d(self.images,
                                              16, [3, 3],
                                              stride=[2, 2],
                                              activation_fn=None)
                    spatial_net = hard_swish(
                        slim.batch_norm(spatial_net, fused=True))

                    # bneck1
                    exp_size = _make_divisible(16)
                    spatial_net = slim.conv2d(spatial_net,
                                              exp_size, [1, 1],
                                              stride=[1, 1],
                                              activation_fn=None)
                    spatial_net = slim.batch_norm(spatial_net, fused=True)
                    spatial_net = DepthSepConv(spatial_net,
                                               16,
                                               kernel=[3, 3],
                                               stride=2)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))

                    # bneck2
                    exp_size = _make_divisible(72)
                    spatial_net = slim.conv2d(spatial_net,
                                              exp_size, [1, 1],
                                              stride=[1, 1],
                                              activation_fn=None)
                    spatial_net = slim.batch_norm(spatial_net, fused=True)
                    spatial_net = DepthSepConv(spatial_net,
                                               24,
                                               kernel=[3, 3],
                                               stride=2)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))
                    # bneck3
                    exp_size = _make_divisible(88)
                    spatial_net = slim.conv2d(spatial_net,
                                              exp_size, [1, 1],
                                              stride=[1, 1],
                                              activation_fn=None)
                    spatial_net = slim.batch_norm(spatial_net, fused=True)
                    spatial_net = DepthSepConv(spatial_net,
                                               24,
                                               kernel=[3, 3],
                                               stride=1)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))
                    # bneck4
                    exp_size = _make_divisible(96)
                    spatial_net = slim.conv2d(spatial_net,
                                              exp_size, [1, 1],
                                              stride=[1, 1],
                                              activation_fn=None)
                    spatial_net = slim.batch_norm(spatial_net, fused=True)
                    spatial_net = DepthSepConv(spatial_net,
                                               40,
                                               kernel=[3, 3],
                                               stride=1)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))
                    # bneck5
                    spatial_net = DepthSepConv(spatial_net,
                                               80,
                                               kernel=[3, 3],
                                               stride=1)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))
                    # bneck6
                    spatial_net = DepthSepConv(spatial_net,
                                               128,
                                               kernel=[3, 3],
                                               stride=1)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))

        frontend_config = self.model_config['frontend_config']
        ### Context path
        logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
            self.images, frontend_config, self.is_training(), reuse)

        ### Combining the paths
        with tf.variable_scope('combine_path', reuse=reuse):
            with slim.arg_scope([slim.conv2d],
                                biases_initializer=None,
                                weights_initializer=initializer):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=self.is_training(),
                                    **batch_norm_params):
                    # tail part
                    global_context = tf.reduce_mean(
                        end_points[down_32x_end_points], [1, 2],
                        keep_dims=True)
                    global_context = slim.conv2d(global_context,
                                                 128,
                                                 1, [1, 1],
                                                 activation_fn=None)
                    global_context = tf.nn.relu(
                        slim.batch_norm(global_context, fused=True))
                    ARM_out1 = AttentionRefinementModule_Custom(
                        end_points[down_32x_end_points], n_filters=128)
                    ARM_out2 = AttentionRefinementModule_Custom(
                        end_points[down_16x_end_points], n_filters=128)

                    ARM_out1 = tf.add(ARM_out1, global_context)
                    ARM_out1 = Upsampling(ARM_out1, scale=2)
                    # inference/combine_path/Conv_6/Conv2D run 1 average cost 23.034000 ms, 2.336 %, FlopsRate: 8.879 %
                    exp_size = _make_divisible(256)
                    ARM_out1 = slim.conv2d(ARM_out1,
                                           exp_size, [1, 1],
                                           stride=[1, 1],
                                           activation_fn=None)
                    ARM_out1 = slim.batch_norm(ARM_out1, fused=True)
                    ARM_out1 = DepthSepConv(ARM_out1,
                                            128,
                                            kernel=[3, 3],
                                            stride=1)
                    ARM_out1 = tf.nn.relu(slim.batch_norm(ARM_out1,
                                                          fused=True))
                    ARM_out2 = tf.add(ARM_out2, ARM_out1)
                    ARM_out2 = Upsampling(ARM_out2, scale=2)
                    # inference/combine_path/Conv_13/Conv2D run 1 average cost 23.034000 ms, 2.336 %, FlopsRate: 8.879 %
                    exp_size = _make_divisible(256)
                    ARM_out2 = slim.conv2d(ARM_out2,
                                           exp_size, [1, 1],
                                           stride=[1, 1],
                                           activation_fn=None)
                    ARM_out2 = slim.batch_norm(ARM_out2, fused=True)
                    ARM_out2 = DepthSepConv(ARM_out2,
                                            128,
                                            kernel=[3, 3],
                                            stride=1)
                    ARM_out2 = tf.nn.relu(slim.batch_norm(ARM_out2,
                                                          fused=True))
                    context_net = ARM_out2

                    FFM_out = FeatureFusionModule_Custom(input_1=spatial_net,
                                                         input_2=context_net,
                                                         n_filters=256)

                    ARM_out1 = ConvBlock(ARM_out1,
                                         n_filters=128,
                                         kernel_size=[3, 3])
                    ARM_out2 = ConvBlock(ARM_out2,
                                         n_filters=128,
                                         kernel_size=[3, 3])
                    exp_size = _make_divisible(128)
                    FFM_out = slim.conv2d(FFM_out,
                                          exp_size, [1, 1],
                                          stride=[1, 1],
                                          activation_fn=None)
                    FFM_out = slim.batch_norm(FFM_out, fused=True)
                    FFM_out = DepthSepConv(FFM_out,
                                           64,
                                           kernel=[3, 3],
                                           stride=1)
                    FFM_out = tf.nn.relu(slim.batch_norm(FFM_out, fused=True))
                    # Upsampling + dilation or only Upsampling
                    FFM_out = Upsampling(FFM_out, scale=2)
                    # inference/combine_path/Conv_12/Conv2D run 1 average cost 32.151001 ms, 3.260 %, FlopsRate: 8.879 %
                    exp_size = _make_divisible(128)
                    FFM_out = slim.conv2d(FFM_out,
                                          exp_size, [1, 1],
                                          stride=[1, 1],
                                          activation_fn=None)
                    FFM_out = DepthSepConv(FFM_out,
                                           64,
                                           kernel=[3, 3],
                                           stride=1,
                                           rate=2)
                    FFM_out = tf.nn.relu(slim.batch_norm(FFM_out, fused=True))
                    FFM_out = slim.conv2d(FFM_out,
                                          self.num_classes, [1, 1],
                                          activation_fn=None,
                                          scope='logits')
                    self.net = Upsampling(FFM_out, 4)

                    if self.mode in ['train', 'validation', 'test']:
                        sup1 = slim.conv2d(ARM_out1,
                                           self.num_classes, [1, 1],
                                           activation_fn=None,
                                           scope='supl1')
                        sup2 = slim.conv2d(ARM_out2,
                                           self.num_classes, [1, 1],
                                           activation_fn=None,
                                           scope='supl2')
                        self.sup1 = Upsampling(sup1, scale=16)
                        self.sup2 = Upsampling(sup2, scale=8)
                        self.init_fn = init_fn

    def build_bisenet(self, reuse=False):
        """
        Builds the BiSeNet model.

        Arguments:
          reuse: Reuse variable or not

        Returns:
          BiSeNet model
        """

        ### The spatial path
        ### The number of feature maps for each convolution is not specified in the paper
        ### It was chosen here to be equal to the number of feature maps of a classification
        ### model at each corresponding stage
        batch_norm_params = self.model_config['batch_norm_params']
        init_method = self.model_config['conv_config']['init_method']
        down_16x_end_points = self.model_config['net_node']['16xdown:50']
        down_32x_end_points = self.model_config['net_node']['32xdown:25']
        if init_method == 'kaiming_normal':
            initializer = slim.variance_scaling_initializer(factor=2.0,
                                                            mode='FAN_IN',
                                                            uniform=False)
        else:
            initializer = slim.xavier_initializer()

        with tf.variable_scope('spatial_net', reuse=reuse):
            with slim.arg_scope([slim.conv2d],
                                biases_initializer=None,
                                weights_initializer=initializer):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=self.is_training(),
                                    **batch_norm_params):
                    #print("*"*20)
                    print(self.images)
                    #print("*" * 20)
                    spatial_net = ConvBlock(self.images,
                                            n_filters=64,
                                            kernel_size=[7, 7],
                                            strides=2)
                    spatial_net = ConvBlock(spatial_net,
                                            n_filters=64,
                                            kernel_size=[3, 3],
                                            strides=2)
                    spatial_net = ConvBlock(spatial_net,
                                            n_filters=64,
                                            kernel_size=[3, 3],
                                            strides=2)
                    spatial_net = ConvBlock(spatial_net,
                                            n_filters=128,
                                            kernel_size=[1, 1])

        frontend_config = self.model_config['frontend_config']
        ### Context path
        logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
            self.images, frontend_config, self.is_training(), reuse)

        ### Combining the paths
        with tf.variable_scope('combine_path', reuse=reuse):
            with slim.arg_scope([slim.conv2d],
                                biases_initializer=None,
                                weights_initializer=initializer):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=self.is_training(),
                                    **batch_norm_params):
                    # tail part
                    size = tf.shape(end_points[down_32x_end_points])[1:3]
                    global_context = tf.reduce_mean(
                        end_points[down_32x_end_points], [1, 2],
                        keep_dims=True)
                    global_context = slim.conv2d(global_context,
                                                 128,
                                                 1, [1, 1],
                                                 activation_fn=None)
                    global_context = tf.nn.relu(
                        slim.batch_norm(global_context, fused=True))
                    net_5 = AttentionRefinementModule(
                        end_points[down_32x_end_points], n_filters=128)
                    net_4 = AttentionRefinementModule(
                        end_points[down_16x_end_points], n_filters=128)

                    net_5 = tf.add(net_5, global_context)
                    net_5 = Upsampling(net_5, scale=2)
                    net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3])
                    #net_4=net_5
                    net_4 = tf.add(net_4, net_5)
                    net_4 = Upsampling(net_4, scale=2)
                    net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3])

                    context_net = net_4

                    net = FeatureFusionModule(input_1=spatial_net,
                                              input_2=context_net,
                                              n_filters=256)
                    net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3])
                    net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3])
                    net = ConvBlock(net, n_filters=64, kernel_size=[3, 3])

                    # Upsampling + dilation or only Upsampling
                    net = Upsampling(net, scale=2)
                    net = slim.conv2d(net,
                                      64, [3, 3],
                                      rate=2,
                                      activation_fn=tf.nn.relu,
                                      biases_initializer=None,
                                      normalizer_fn=slim.batch_norm)

                    net = slim.conv2d(net,
                                      self.num_classes, [1, 1],
                                      activation_fn=None,
                                      scope='logits')
                    self.net = Upsampling(net, 4)

                    # net = slim.conv2d(net, self.num_classes, [1, 1], activation_fn=None, scope='logits')
                    # self.net = Upsampling(net, scale=8)

                    if self.mode in ['train', 'validation', 'test']:
                        sup1 = slim.conv2d(net_5,
                                           self.num_classes, [1, 1],
                                           activation_fn=None,
                                           scope='supl1')
                        sup2 = slim.conv2d(net_4,
                                           self.num_classes, [1, 1],
                                           activation_fn=None,
                                           scope='supl2')
                        self.sup1 = Upsampling(sup1, scale=16)
                        self.sup2 = Upsampling(sup2, scale=8)
                        self.init_fn = init_fn

    def build_loss(self):
        # self.labels = tf.Print(self.labels, [tf.unique(tf.reshape(self.labels, (-1,)))[0]], message="label:", summarize=10)
        loss1 = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=self.net,
                                                    labels=self.labels))
        loss2 = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=self.sup1,
                                                    labels=self.labels))
        loss3 = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=self.sup2,
                                                    labels=self.labels))
        loss = tf.add_n([loss1, loss2, loss3])

        # self.loss = loss1
        # self.total_loss = tf.losses.get_total_loss()
        return loss

    def summarize(self):
        shape = tf.shape(self.labels)

        # Tensorboard inspection
        tf.summary.image('image', self.images, family=self.mode, max_outputs=1)
        # tf.Print(self.labels, [tf.shape(self.labels)], message="label size:", summarize=10)
        color_map = colors_dict[self.train_config['DataSet']]
        tf.summary.image('GT',
                         tf.reshape(
                             tf.matmul(
                                 tf.reshape(self.labels,
                                            [-1, self.num_classes]),
                                 color_map), [-1, shape[1], shape[2], 3]),
                         family=self.mode,
                         max_outputs=1)
        tf.summary.image('response',
                         tf.reshape(
                             tf.matmul(
                                 tf.reshape(
                                     tf.one_hot(tf.argmax(self.net, -1),
                                                self.num_classes),
                                     [-1, self.num_classes]), color_map),
                             [-1, shape[1], shape[2], 3]),
                         family=self.mode,
                         max_outputs=1)
        tf.summary.scalar('total_loss',
                          tf.reduce_mean(self.total_loss),
                          family=self.mode)
        # tf.summary.scalar('loss', self.loss, family=self.mode)

        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(
            tf.argmax(self.net, -1), tf.argmax(self.labels, -1))
        mean_IOU, mean_IOU_update = tf.contrib.metrics.streaming_mean_iou(
            predictions=tf.argmax(self.net, -1),
            labels=tf.argmax(self.labels, -1),
            num_classes=self.num_classes)
        with tf.control_dependencies([accuracy_update, mean_IOU_update]):
            tf.summary.scalar('accuracy', accuracy, family=self.mode)
            tf.summary.scalar('mean_IOU', mean_IOU, family=self.mode)

    def predict(self):
        self.response = self.net

    def build(self, num_gpus=1, reuse=False):
        """Creates all ops for training and evaluation"""
        with tf.name_scope(self.mode):
            if self.mode in ['train', 'validation', 'test']:
                tower_losses = []
                for i in range(num_gpus):
                    self.build_inputs()
                    with tf.device('/gpu:%d' % i):
                        # First tower has default name scope.
                        name_scope = ('clone_%d' % i) if i else ''
                        with tf.name_scope(name_scope) as scope:
                            with tf.variable_scope(
                                    tf.get_variable_scope(),
                                    reuse=True if i != 0 else None):
                                if self.model_config['use_custom']:
                                    self.build_bisenet_custom(reuse=reuse)
                                else:
                                    self.build_bisenet(reuse=reuse)
                        loss = self.build_loss()
                        self.total_loss.append(loss)
                with tf.device('/cpu:0'):
                    self.summarize()
            else:
                self.build_inputs()
                self.build_bisenet(reuse=reuse)
                self.predict()

            if self.is_training():
                self.setup_global_step()