Beispiel #1
0
def flatten_fully_connected(inputs,
                            num_outputs,
                            activation_fn=tf.nn.relu,
                            normalizer_fn=None,
                            normalizer_params=None,
                            weights_initializer=slim.xavier_initializer(),
                            weights_regularizer=None,
                            biases_initializer=tf.zeros_initializer(),
                            biases_regularizer=None,
                            reuse=None,
                            variables_collections=None,
                            outputs_collections=None,
                            trainable=True,
                            scope=None):
    with tf.variable_scope(scope, 'flatten_fully_connected', [inputs]):
        if inputs.shape.ndims > 2:
            inputs = slim.flatten(inputs)
        return slim.fully_connected(inputs,
                                    num_outputs,
                                    activation_fn,
                                    normalizer_fn,
                                    normalizer_params,
                                    weights_initializer,
                                    weights_regularizer,
                                    biases_initializer,
                                    biases_regularizer,
                                    reuse,
                                    variables_collections,
                                    outputs_collections,
                                    trainable,
                                    scope)
Beispiel #2
0
    def _region_proposal(self, net_conv, is_training):
        initializer = slim.xavier_initializer(uniform=True)

        rpn = slim.conv2d(net_conv, cfg.RPN_CHANNELS, [3, 3], trainable=is_training, weights_initializer=initializer,
                          scope="rpn_conv/3x3")
        self._act_summaries.append(rpn)

        hidden_num = 128
        # bi_lstm shape: [-1, hidden_num * 2]
        bi_lstm = self._BiLstm(rpn, cfg.RPN_CHANNELS, hidden_num, name="bi_lstm")

        shape = tf.shape(rpn)
        N, H, W, _ = shape[0], shape[1], shape[2], shape[3]
        bi_lstm_reshape = tf.reshape(bi_lstm, [N, H, W, hidden_num * 2])

        fc = slim.conv2d(bi_lstm_reshape, 512, [1, 1], weights_initializer=initializer,
                         padding='VALID', scope='conv_fc')

        # use 1x1 conv as FC (N, H, W, num_anchors * 2)
        rpn_cls_score = slim.conv2d(fc, self._num_anchors * 2, [1, 1], weights_initializer=initializer,
                                    padding='VALID', activation_fn=None, scope='rpn_cls_score')

        # use 1x1 conv as FC (N, H, W, num_anchors * 4)
        rpn_bbox_pred = slim.conv2d(fc, self._num_anchors * 4, [1, 1], weights_initializer=initializer,
                                    padding='VALID', activation_fn=None, scope='rpn_bbox_pred')

        # (N, H, W, num_anchors * 2) -> (N, H, W * num_anchors, 2)
        rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')
        rpn_cls_prob = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob")

        # (N, H, W*num_anchors, 2) -> (N, H, W, num_anchors*2)
        rpn_cls_prob_reshape = self._reshape_layer(rpn_cls_prob, self._num_anchors * 2, "rpn_cls_prob_reshape")

        if is_training:
            self._anchor_target_layer(rpn_cls_score, "anchor")
        else:
            if cfg.TEST.MODE == 'nms':
                rois, _ = self._proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, "rois")
            elif cfg.TEST.MODE == 'top':
                rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
            else:
                raise NotImplementedError
            self._predictions["rois"] = rois

        self._predictions["rpn_cls_score"] = rpn_cls_score
        self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
        self._predictions["rpn_cls_prob"] = rpn_cls_prob_reshape
        self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
Beispiel #3
0
    def build_bisenet(self, reuse=False):
        """
        Builds the BiSeNet model.

        Arguments:
          reuse: Reuse variable or not

        Returns:
          BiSeNet model
        """

        ### The spatial path
        ### The number of feature maps for each convolution is not specified in the paper
        ### It was chosen here to be equal to the number of feature maps of a classification
        ### model at each corresponding stage
        batch_norm_params = self.model_config['batch_norm_params']
        init_method = self.model_config['conv_config']['init_method']

        if init_method == 'kaiming_normal':
            initializer = slim.variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False)
        else:
            initializer = slim.xavier_initializer()

        with tf.variable_scope('spatial_net', reuse=reuse):
            with slim.arg_scope([slim.conv2d], biases_initializer=None, weights_initializer=initializer):
                with slim.arg_scope([slim.batch_norm], is_training=self.is_training(), **batch_norm_params):
                    spatial_net = ConvBlock(self.images, n_filters=64, kernel_size=[7, 7], strides=2)
                    spatial_net = ConvBlock(spatial_net, n_filters=64, kernel_size=[3, 3], strides=2)
                    spatial_net = ConvBlock(spatial_net, n_filters=64, kernel_size=[3, 3], strides=2)
                    spatial_net = ConvBlock(spatial_net, n_filters=128, kernel_size=[1, 1])

        frontend_config = self.model_config['frontend_config']
        ### Context path
        logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(self.images, frontend_config,
                                                                                      self.is_training(), reuse)

        ### Combining the paths
        with tf.variable_scope('combine_path', reuse=reuse):
            with slim.arg_scope([slim.conv2d], biases_initializer=None, weights_initializer=initializer):
                with slim.arg_scope([slim.batch_norm], is_training=self.is_training(), **batch_norm_params):
                    # tail part
                    size = tf.shape(end_points['pool5'])[1:3]
                    global_context = tf.reduce_mean(end_points['pool5'], [1, 2], keep_dims=True)
                    global_context = slim.conv2d(global_context, 128, 1, [1, 1], activation_fn=None)
                    global_context = tf.nn.relu(slim.batch_norm(global_context, fused=True))
                    global_context = tf.image.resize_bilinear(global_context, size=size)

                    net_5 = AttentionRefinementModule(end_points['pool5'], n_filters=128)
                    net_4 = AttentionRefinementModule(end_points['pool4'], n_filters=128)

                    net_5 = tf.add(net_5, global_context)
                    net_5 = Upsampling(net_5, scale=2)
                    net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3])
                    print('111111111111111', net_5, net_4, tf.add(net_4, net_5))
                    exit()
                    net_4 = tf.add(net_4, net_5)
                    net_4 = Upsampling(net_4, scale=2)
                    net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3])

                    context_net = net_4

                    net = FeatureFusionModule(input_1=spatial_net, input_2=context_net, n_filters=256)
                    net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3])
                    net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3])
                    net = ConvBlock(net, n_filters=64, kernel_size=[3, 3])
                    
                    # Upsampling + dilation or only Upsampling
                    net = Upsampling(net, scale=2)
                    net = slim.conv2d(net, 64, [3, 3], rate=2, activation_fn=tf.nn.relu, biases_initializer=None,
                                      normalizer_fn=slim.batch_norm)

                    net = slim.conv2d(net, self.num_classes, [1, 1], activation_fn=None, scope='logits')
                    self.net = Upsampling(net, 4)

                    # net = slim.conv2d(net, self.num_classes, [1, 1], activation_fn=None, scope='logits')
                    # self.net = Upsampling(net, scale=8)

                    if self.mode in ['train', 'validation', 'test']:
                        sup1 = slim.conv2d(net_5, self.num_classes, [1, 1], activation_fn=None, scope='supl1')
                        sup2 = slim.conv2d(net_4, self.num_classes, [1, 1], activation_fn=None, scope='supl2')
                        self.sup1 = Upsampling(sup1, scale=16)
                        self.sup2 = Upsampling(sup2, scale=8)
                        self.init_fn = init_fn
Beispiel #4
0
    def build(self):

        with self._graph.as_default(), tf.device('/cpu:0'):

            # Create an optimizer that performs gradient descent.
            opt, lr, global_step = self.get_opt()

            ##some global placeholder
            L2_reg = tf.placeholder(tf.float32, name="L2_reg")
            training = tf.placeholder(tf.bool, name="training_flag")

            total_loss_to_show = 0.
            images_place_holder_list = []
            hm_gt_place_holder_list = []
            wh_gt_place_holder_list = []
            reg_place_holder_list = []
            ind_place_holder_list = []
            regmask_place_holder_list = []

            weights_initializer = slim.xavier_initializer()
            biases_initializer = tf.constant_initializer(0.)
            biases_regularizer = tf.no_regularizer
            weights_regularizer = tf.contrib.layers.l2_regularizer(L2_reg)

            # Calculate the gradients for each model tower.
            tower_grads = []
            with tf.variable_scope(tf.get_variable_scope()):
                for i in range(cfg.TRAIN.num_gpu):
                    with tf.device('/gpu:%d' % i):
                        with tf.name_scope('tower_%d' % (i)) as scope:
                            with slim.arg_scope([slim.model_variable, slim.variable], device='/cpu:0'):
                                if cfg.MODEL.deployee:
                                    images_ = tf.placeholder(tf.float32,
                                                             [1, cfg.DATA.hin, cfg.DATA.win, cfg.DATA.channel],
                                                             name="images")
                                else:
                                    ###fix size
                                    images_ = tf.placeholder(tf.float32, [None,None,None, cfg.DATA.channel],
                                                             name="images")

                                hm_ = tf.placeholder(tf.float32,
                                                     [cfg.TRAIN.batch_size, None, None, cfg.DATA.num_class],
                                                     name="heatmap_target")
                                wh_ = tf.placeholder(tf.float32,
                                                     [cfg.TRAIN.batch_size, None, 2],
                                                     name="wh_target")
                                reg_ = tf.placeholder(tf.float32,
                                                      [cfg.TRAIN.batch_size, None, 2],
                                                      name="reg_target")
                                ind_ = tf.placeholder(tf.float32,
                                                      [cfg.TRAIN.batch_size, None],
                                                      name="ind_target")
                                regmask_ = tf.placeholder(tf.float32,
                                                          [cfg.TRAIN.batch_size, None],
                                                          name="regmask_target")
                                ###total anchor

                                images_place_holder_list.append(images_)
                                hm_gt_place_holder_list.append(hm_)
                                wh_gt_place_holder_list.append(wh_)
                                reg_place_holder_list.append(reg_)
                                ind_place_holder_list.append(ind_)
                                regmask_place_holder_list.append(regmask_)
                                with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \
                                                     slim.conv2d_transpose, slim.separable_conv2d,
                                                     slim.fully_connected],
                                                    weights_regularizer=weights_regularizer,
                                                    biases_regularizer=biases_regularizer,
                                                    weights_initializer=weights_initializer,
                                                    biases_initializer=biases_initializer):
                                    hm_loss, wh_loss, reg_loss, l2_loss = self.tower_loss(
                                        scope, images_, hm_, wh_, reg_, ind_, regmask_, L2_reg, training)

                                    ##use muti gpu ,large batch
                                    if i == cfg.TRAIN.num_gpu - 1:
                                        total_loss = tf.add_n([hm_loss, wh_loss, reg_loss, l2_loss])
                                    else:
                                        total_loss = tf.add_n([hm_loss, wh_loss, reg_loss])
                                total_loss_to_show += total_loss
                                # Reuse variables for the next tower.
                                tf.get_variable_scope().reuse_variables()

                                ##when use batchnorm, updates operations only from the
                                ## final tower. Ideally, we should grab the updates from all towers
                                # but these stats accumulate extremely fast so we can ignore the
                                #  other stats from the other towers without significant detriment.
                                bn_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=scope)

                                # Retain the summaries from the final tower.
                                self.summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

                                ###freeze some params
                                train_var_list = self.frozen()
                                # Calculate the gradients for the batch of data on this CIFAR tower.
                                grads = opt.compute_gradients(total_loss, train_var_list)

                                # Keep track of the gradients across all towers.
                                tower_grads.append(grads)
            # We must calculate the mean of each gradient. Note that this is the
            # synchronization point across all towers.
            grads = self.average_gradients(tower_grads)

            # Add a summary to track the learning rate.
            self.add_summary(tf.summary.scalar('learning_rate', lr))
            self.add_summary(tf.summary.scalar('total_loss', total_loss_to_show))
            self.add_summary(tf.summary.scalar('hm_loss', hm_loss))
            self.add_summary(tf.summary.scalar('wh_loss', wh_loss))
            self.add_summary(tf.summary.scalar('reg_loss', reg_loss))
            self.add_summary(tf.summary.scalar('l2_loss', l2_loss))

            # Add histograms for gradients.
            for grad, var in grads:
                if grad is not None:
                    self.add_summary(tf.summary.histogram(var.op.name + '/gradients', grad))

            # Apply the gradients to adjust the shared variables.
            apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

            # Add histograms for trainable variables.
            for var in tf.trainable_variables():
                self.add_summary(tf.summary.histogram(var.op.name, var))

            if self.ema_weights:
                # Track the moving averages of all trainable variables.
                variable_averages = tf.train.ExponentialMovingAverage(
                    0.9, global_step)
                variables_averages_op = variable_averages.apply(tf.trainable_variables())
                # Group all updates to into a single train op.
                train_op = tf.group(apply_gradient_op, variables_averages_op, *bn_update_ops)
            else:
                train_op = tf.group(apply_gradient_op, *bn_update_ops)

            ###set inputs and ouputs
            self.inputs = [images_place_holder_list,
                           hm_gt_place_holder_list,
                           wh_gt_place_holder_list,
                           reg_place_holder_list,
                           ind_place_holder_list,
                           regmask_place_holder_list,
                           L2_reg,
                           training]
            self.outputs = [train_op,
                            total_loss_to_show,
                            hm_loss,
                            wh_loss,
                            reg_loss,
                            l2_loss,
                            lr]
            self.val_outputs = [total_loss_to_show,
                                hm_loss,
                                wh_loss,
                                reg_loss,
                                l2_loss,
                                lr]

            tf_config = tf.ConfigProto(
                allow_soft_placement=True,
                log_device_placement=False)
            tf_config.gpu_options.allow_growth = True
            self.sess = tf.Session(config=tf_config)

            ##init all variables
            init = tf.global_variables_initializer()
            self.sess.run(init)
    def build(self):

        with self._graph.as_default(), tf.device('/cpu:0'):

            # Create an optimizer that performs gradient descent.
            opt, lr, global_step = self.get_opt()

            ##some global placeholder
            keep_prob = tf.placeholder(tf.float32, name="keep_prob")
            L2_reg = tf.placeholder(tf.float32, name="L2_reg")
            training = tf.placeholder(tf.bool, name="training_flag")

            total_loss_to_show = 0.
            images_place_holder_list = []
            labels_place_holder_list = []
            boxes_place_holder_list = []

            weights_initializer = slim.xavier_initializer()
            biases_initializer = tf.constant_initializer(0.)
            biases_regularizer = tf.no_regularizer
            weights_regularizer = tf.contrib.layers.l2_regularizer(L2_reg)

            # Calculate the gradients for each model tower.
            tower_grads = []
            with tf.variable_scope(tf.get_variable_scope()):
                for i in range(cfg.TRAIN.num_gpu):
                    with tf.device('/gpu:%d' % i):
                        with tf.name_scope('tower_%d' % (i)) as scope:
                            with slim.arg_scope(
                                [slim.model_variable, slim.variable],
                                    device='/cpu:0'):

                                images_ = tf.placeholder(tf.float32,
                                                         [None, None, None, 3],
                                                         name="images")
                                boxes_ = tf.placeholder(
                                    tf.float32,
                                    [cfg.TRAIN.batch_size, None, 4],
                                    name="input_boxes")
                                labels_ = tf.placeholder(
                                    tf.int64, [cfg.TRAIN.batch_size, None],
                                    name="input_labels")
                                ###total anchor

                                images_place_holder_list.append(images_)
                                labels_place_holder_list.append(labels_)
                                boxes_place_holder_list.append(boxes_)

                                with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \
                                                     slim.conv2d_transpose, slim.separable_conv2d,
                                                     slim.fully_connected],
                                                    weights_regularizer=weights_regularizer,
                                                    biases_regularizer=biases_regularizer,
                                                    weights_initializer=weights_initializer,
                                                    biases_initializer=biases_initializer):
                                    reg_loss, cla_loss, l2_loss = self.tower_loss(
                                        scope, images_, labels_, boxes_,
                                        L2_reg, training)

                                    ##use muti gpu ,large batch
                                    if i == cfg.TRAIN.num_gpu - 1:
                                        total_loss = tf.add_n(
                                            [reg_loss, cla_loss, l2_loss])
                                    else:
                                        total_loss = tf.add_n(
                                            [reg_loss, cla_loss])
                                total_loss_to_show += total_loss
                                # Reuse variables for the next tower.
                                tf.get_variable_scope().reuse_variables()

                                ##when use batchnorm, updates operations only from the
                                ## final tower. Ideally, we should grab the updates from all towers
                                # but these stats accumulate extremely fast so we can ignore the
                                #  other stats from the other towers without significant detriment.
                                bn_update_ops = tf.get_collection(
                                    tf.GraphKeys.UPDATE_OPS, scope=scope)

                                # Retain the summaries from the final tower.
                                self.summaries = tf.get_collection(
                                    tf.GraphKeys.SUMMARIES, scope)
                                # Calculate the gradients for the batch of data on this CIFAR tower.
                                grads = opt.compute_gradients(total_loss)

                                # Keep track of the gradients across all towers.
                                tower_grads.append(grads)
            # We must calculate the mean of each gradient. Note that this is the
            # synchronization point across all towers.
            grads = self.average_gradients(tower_grads)

            # Add a summary to track the learning rate.
            self.add_summary(tf.summary.scalar('learning_rate', lr))
            self.add_summary(
                tf.summary.scalar('total_loss', total_loss_to_show))
            self.add_summary(tf.summary.scalar('loc_loss', reg_loss))
            self.add_summary(tf.summary.scalar('cla_loss', cla_loss))
            self.add_summary(tf.summary.scalar('l2_loss', l2_loss))

            # Add histograms for gradients.
            for grad, var in grads:
                if grad is not None:
                    self.add_summary(
                        tf.summary.histogram(var.op.name + '/gradients', grad))

            # Apply the gradients to adjust the shared variables.
            apply_gradient_op = opt.apply_gradients(grads,
                                                    global_step=global_step)

            # Add histograms for trainable variables.
            for var in tf.trainable_variables():
                self.add_summary(tf.summary.histogram(var.op.name, var))

            if self.ema_weights:
                # Track the moving averages of all trainable variables.
                variable_averages = tf.train.ExponentialMovingAverage(
                    0.9, global_step)
                variables_averages_op = variable_averages.apply(
                    tf.trainable_variables())
                # Group all updates to into a single train op.
                train_op = tf.group(apply_gradient_op, variables_averages_op,
                                    *bn_update_ops)
            else:
                train_op = tf.group(apply_gradient_op, *bn_update_ops)

            ###set inputs and ouputs
            self.inputs = [
                images_place_holder_list, boxes_place_holder_list,
                labels_place_holder_list, keep_prob, L2_reg, training
            ]
            self.outputs = [
                train_op, total_loss_to_show, reg_loss, cla_loss, l2_loss, lr
            ]
            self.val_outputs = [
                total_loss_to_show, reg_loss, cla_loss, l2_loss, lr
            ]

            ##init all variables
            init = tf.global_variables_initializer()
            self.sess.run(init)
Beispiel #6
0
    def build_bisenet_custom(self, reuse=False):
        """
        Builds the BiSeNet model.

        Arguments:
          reuse: Reuse variable or not

        Returns:
          BiSeNet model
        """
        ### The spatial path
        ### The number of feature maps for each convolution is not specified in the paper
        ### It was chosen here to be equal to the number of feature maps of a classification
        ### model at each corresponding stage
        batch_norm_params = self.model_config['batch_norm_params']
        init_method = self.model_config['conv_config']['init_method']
        down_16x_end_points = self.model_config['net_node']['16xdown:50']
        down_32x_end_points = self.model_config['net_node']['32xdown:25']
        if init_method == 'kaiming_normal':
            initializer = slim.variance_scaling_initializer(factor=2.0,
                                                            mode='FAN_IN',
                                                            uniform=False)
        else:
            initializer = slim.xavier_initializer()

        with tf.variable_scope('spatial_net', reuse=reuse):
            with slim.arg_scope([slim.conv2d],
                                biases_initializer=None,
                                weights_initializer=initializer):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=self.is_training(),
                                    **batch_norm_params):
                    # inference/spatial_net/Conv/Conv2D run 1 average cost 250.552994 ms, 25.405 %, FlopsRate: 9.064 %
                    # conv2d
                    spatial_net = slim.conv2d(self.images,
                                              16, [3, 3],
                                              stride=[2, 2],
                                              activation_fn=None)
                    spatial_net = hard_swish(
                        slim.batch_norm(spatial_net, fused=True))

                    # bneck1
                    exp_size = _make_divisible(16)
                    spatial_net = slim.conv2d(spatial_net,
                                              exp_size, [1, 1],
                                              stride=[1, 1],
                                              activation_fn=None)
                    spatial_net = slim.batch_norm(spatial_net, fused=True)
                    spatial_net = DepthSepConv(spatial_net,
                                               16,
                                               kernel=[3, 3],
                                               stride=2)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))

                    # bneck2
                    exp_size = _make_divisible(72)
                    spatial_net = slim.conv2d(spatial_net,
                                              exp_size, [1, 1],
                                              stride=[1, 1],
                                              activation_fn=None)
                    spatial_net = slim.batch_norm(spatial_net, fused=True)
                    spatial_net = DepthSepConv(spatial_net,
                                               24,
                                               kernel=[3, 3],
                                               stride=2)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))
                    # bneck3
                    exp_size = _make_divisible(88)
                    spatial_net = slim.conv2d(spatial_net,
                                              exp_size, [1, 1],
                                              stride=[1, 1],
                                              activation_fn=None)
                    spatial_net = slim.batch_norm(spatial_net, fused=True)
                    spatial_net = DepthSepConv(spatial_net,
                                               24,
                                               kernel=[3, 3],
                                               stride=1)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))
                    # bneck4
                    exp_size = _make_divisible(96)
                    spatial_net = slim.conv2d(spatial_net,
                                              exp_size, [1, 1],
                                              stride=[1, 1],
                                              activation_fn=None)
                    spatial_net = slim.batch_norm(spatial_net, fused=True)
                    spatial_net = DepthSepConv(spatial_net,
                                               40,
                                               kernel=[3, 3],
                                               stride=1)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))
                    # bneck5
                    spatial_net = DepthSepConv(spatial_net,
                                               80,
                                               kernel=[3, 3],
                                               stride=1)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))
                    # bneck6
                    spatial_net = DepthSepConv(spatial_net,
                                               128,
                                               kernel=[3, 3],
                                               stride=1)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))

        frontend_config = self.model_config['frontend_config']
        ### Context path
        logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
            self.images, frontend_config, self.is_training(), reuse)

        ### Combining the paths
        with tf.variable_scope('combine_path', reuse=reuse):
            with slim.arg_scope([slim.conv2d],
                                biases_initializer=None,
                                weights_initializer=initializer):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=self.is_training(),
                                    **batch_norm_params):
                    # tail part
                    global_context = tf.reduce_mean(
                        end_points[down_32x_end_points], [1, 2],
                        keep_dims=True)
                    global_context = slim.conv2d(global_context,
                                                 128,
                                                 1, [1, 1],
                                                 activation_fn=None)
                    global_context = tf.nn.relu(
                        slim.batch_norm(global_context, fused=True))
                    ARM_out1 = AttentionRefinementModule_Custom(
                        end_points[down_32x_end_points], n_filters=128)
                    ARM_out2 = AttentionRefinementModule_Custom(
                        end_points[down_16x_end_points], n_filters=128)

                    ARM_out1 = tf.add(ARM_out1, global_context)
                    ARM_out1 = Upsampling(ARM_out1, scale=2)
                    # inference/combine_path/Conv_6/Conv2D run 1 average cost 23.034000 ms, 2.336 %, FlopsRate: 8.879 %
                    exp_size = _make_divisible(256)
                    ARM_out1 = slim.conv2d(ARM_out1,
                                           exp_size, [1, 1],
                                           stride=[1, 1],
                                           activation_fn=None)
                    ARM_out1 = slim.batch_norm(ARM_out1, fused=True)
                    ARM_out1 = DepthSepConv(ARM_out1,
                                            128,
                                            kernel=[3, 3],
                                            stride=1)
                    ARM_out1 = tf.nn.relu(slim.batch_norm(ARM_out1,
                                                          fused=True))
                    ARM_out2 = tf.add(ARM_out2, ARM_out1)
                    ARM_out2 = Upsampling(ARM_out2, scale=2)
                    # inference/combine_path/Conv_13/Conv2D run 1 average cost 23.034000 ms, 2.336 %, FlopsRate: 8.879 %
                    exp_size = _make_divisible(256)
                    ARM_out2 = slim.conv2d(ARM_out2,
                                           exp_size, [1, 1],
                                           stride=[1, 1],
                                           activation_fn=None)
                    ARM_out2 = slim.batch_norm(ARM_out2, fused=True)
                    ARM_out2 = DepthSepConv(ARM_out2,
                                            128,
                                            kernel=[3, 3],
                                            stride=1)
                    ARM_out2 = tf.nn.relu(slim.batch_norm(ARM_out2,
                                                          fused=True))
                    context_net = ARM_out2

                    FFM_out = FeatureFusionModule_Custom(input_1=spatial_net,
                                                         input_2=context_net,
                                                         n_filters=256)

                    ARM_out1 = ConvBlock(ARM_out1,
                                         n_filters=128,
                                         kernel_size=[3, 3])
                    ARM_out2 = ConvBlock(ARM_out2,
                                         n_filters=128,
                                         kernel_size=[3, 3])
                    exp_size = _make_divisible(128)
                    FFM_out = slim.conv2d(FFM_out,
                                          exp_size, [1, 1],
                                          stride=[1, 1],
                                          activation_fn=None)
                    FFM_out = slim.batch_norm(FFM_out, fused=True)
                    FFM_out = DepthSepConv(FFM_out,
                                           64,
                                           kernel=[3, 3],
                                           stride=1)
                    FFM_out = tf.nn.relu(slim.batch_norm(FFM_out, fused=True))
                    # Upsampling + dilation or only Upsampling
                    FFM_out = Upsampling(FFM_out, scale=2)
                    # inference/combine_path/Conv_12/Conv2D run 1 average cost 32.151001 ms, 3.260 %, FlopsRate: 8.879 %
                    exp_size = _make_divisible(128)
                    FFM_out = slim.conv2d(FFM_out,
                                          exp_size, [1, 1],
                                          stride=[1, 1],
                                          activation_fn=None)
                    FFM_out = DepthSepConv(FFM_out,
                                           64,
                                           kernel=[3, 3],
                                           stride=1,
                                           rate=2)
                    FFM_out = tf.nn.relu(slim.batch_norm(FFM_out, fused=True))
                    FFM_out = slim.conv2d(FFM_out,
                                          self.num_classes, [1, 1],
                                          activation_fn=None,
                                          scope='logits')
                    self.net = Upsampling(FFM_out, 4)

                    if self.mode in ['train', 'validation', 'test']:
                        sup1 = slim.conv2d(ARM_out1,
                                           self.num_classes, [1, 1],
                                           activation_fn=None,
                                           scope='supl1')
                        sup2 = slim.conv2d(ARM_out2,
                                           self.num_classes, [1, 1],
                                           activation_fn=None,
                                           scope='supl2')
                        self.sup1 = Upsampling(sup1, scale=16)
                        self.sup2 = Upsampling(sup2, scale=8)
                        self.init_fn = init_fn
def O_Net(inputs,
          label=None,
          bbox_target=None,
          landmark_target=None,
          training=True):
    with slim.arg_scope([slim.conv2d],
                        activation_fn=prelu,
                        weights_initializer=slim.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        weights_regularizer=slim.l2_regularizer(0.0005),
                        padding='valid'):
        print inputs.get_shape()
        net = slim.conv2d(inputs,
                          num_outputs=32,
                          kernel_size=[3, 3],
                          stride=1,
                          scope="conv1")
        print net.get_shape()
        net = slim.max_pool2d(net,
                              kernel_size=[3, 3],
                              stride=2,
                              scope="pool1",
                              padding='SAME')
        print net.get_shape()
        net = slim.conv2d(net,
                          num_outputs=64,
                          kernel_size=[3, 3],
                          stride=1,
                          scope="conv2")
        print net.get_shape()
        net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool2")
        print net.get_shape()
        net = slim.conv2d(net,
                          num_outputs=64,
                          kernel_size=[3, 3],
                          stride=1,
                          scope="conv3")
        print net.get_shape()
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              scope="pool3",
                              padding='SAME')
        print net.get_shape()
        net = slim.conv2d(net,
                          num_outputs=128,
                          kernel_size=[2, 2],
                          stride=1,
                          scope="conv4")
        print net.get_shape()
        fc_flatten = slim.flatten(net)
        print fc_flatten.get_shape()
        fc1 = slim.fully_connected(fc_flatten,
                                   num_outputs=256,
                                   scope="fc1",
                                   activation_fn=tf.nn.relu)
        print fc1.get_shape()
        #batch*2
        cls_prob = slim.fully_connected(fc1,
                                        num_outputs=2,
                                        scope="cls_fc",
                                        activation_fn=tf.nn.softmax)
        print cls_prob.get_shape()
        #batch*4
        bbox_pred = slim.fully_connected(fc1,
                                         num_outputs=4,
                                         scope="bbox_fc",
                                         activation_fn=None)
        print bbox_pred.get_shape()
        #batch*10
        landmark_pred = slim.fully_connected(fc1,
                                             num_outputs=10,
                                             scope="landmark_fc",
                                             activation_fn=None)
        print landmark_pred.get_shape()
        #train
        if training:
            cls_loss = cls_ohem(cls_prob, label)
            bbox_loss = bbox_ohem(bbox_pred, bbox_target, label)
            accuracy = cal_accuracy(cls_prob, label)
            landmark_loss = landmark_ohem(landmark_pred, landmark_target,
                                          label)
            L2_loss = tf.add_n(slim.losses.get_regularization_losses())
            return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy
        else:
            return cls_prob, bbox_pred, landmark_pred
Beispiel #8
0
def onet_cnn6(inputs,label=None,bbox_target=None,landmark_target=None,training=True):
    with slim.arg_scope([slim.conv2d],
                        activation_fn = prelu,
                        weights_initializer=slim.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        weights_regularizer=slim.l2_regularizer(0.0005),                        
                        padding='valid'):

        #model structure
        #
        #

        #print (inputs.get_shape())
        
        net = slim.conv2d(inputs, num_outputs=32, kernel_size=[3,3], stride=1, scope="conv1")
        print("Conv 1: ", net.get_shape())

        #net = slim.conv2d(inputs, num_outputs=64, kernel_size=[3,3], stride=2, scope="conv2")
        #print(net.get_shape())

        net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool1", padding='SAME')
        print("Pool 1: ", net.get_shape())

        net = slim.conv2d(net,num_outputs=64,kernel_size=[3,3],stride=1,scope="conv2")
        print("Conv 2: ", net.get_shape())

        #net = slim.conv2d(net,num_outputs=128,kernel_size=[3,3],stride=2,scope="conv4")
        #print(net.get_shape())

        net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool2")
        print("Pool 2: ", net.get_shape())

        net = slim.conv2d(net,num_outputs=64,kernel_size=[3,3],stride=1,scope="conv3")
        print("Conv 3: ", net.get_shape())

        net = slim.conv2d(net,num_outputs=64,kernel_size=[3,3],padding="SAME",stride=1,scope="conv4")
        print("Conv 4: ", net.get_shape())

        #net = slim.conv2d(net,num_outputs=128,kernel_size=[3,3],stride=2,scope="conv6")
        #print(net.get_shape())

        net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope="pool3", padding='SAME')
        print("Pool 3: ", net.get_shape())

        net = slim.conv2d(net,num_outputs=128,kernel_size=[2,2],stride=1,scope="conv5")
        print("Conv 5: ", net.get_shape())

        net = slim.conv2d(net,num_outputs=128,kernel_size=[2,2],padding="SAME",stride=1,scope="conv6")
        print("Conv 6: ", net.get_shape())

        fc_flatten = slim.flatten(net)
        #print(fc_flatten.get_shape())

        fc1 = slim.fully_connected(fc_flatten, num_outputs=256,scope="fc1")
        #print(fc1.get_shape())
        #batch*2
        cls_prob = slim.fully_connected(fc1,num_outputs=2,scope="cls_fc",activation_fn=tf.nn.softmax)
        #print(cls_prob.get_shape())
        #batch*4
        bbox_pred = slim.fully_connected(fc1,num_outputs=4,scope="bbox_fc",activation_fn=None)
        #print(bbox_pred.get_shape())
        #batch*10
        landmark_pred = slim.fully_connected(fc1,num_outputs=4,scope="landmark_fc",activation_fn=None)
        #print(landmark_pred.get_shape())


#train
        if training:
            config = singleton.configuration._instance.config
            #cls_loss = tf.reduce_mean(tf.keras.backend.binary_crossentropy(label,cls_prob))
            #cls_loss = tf.reduce_mean(tf.losses.sigmoid_cross_entropy(label,cls_prob))
            #cls_loss = cls_ohem(cls_prob,label)
            cls_loss = tf.reduce_mean(tf.keras.backend.binary_crossentropy(label,cls_prob,from_logits=False))
            #bbox_loss = bbox_ohem(bbox_pred,bbox_target,label)
            if config.bbox_loss == "mse":
                bbox_loss = get_bb_loss(bbox_pred,bbox_target,label)
            else:
                bbox_loss = bbox_ohem(bbox_pred,bbox_target,label)
                
            if config.landmark_loss == "mse":
                landmark_loss = get_landmark_loss(landmark_pred, landmark_target,label)
            else:
                landmark_loss = landmark_ohem(landmark_pred, landmark_target,label)
            #bbox_loss = tf.reduce_mean(tf.losses.mean_squared_error(bbox_target,bbox_pred))
            accuracy = cal_accuracy(cls_prob,label)
            #landmark_loss = get_landmark_loss(landmark_pred, landmark_target,label)
            #landmark_loss = landmark_ohem(landmark_pred, landmark_target,label)
            #landmark_loss = tf.reduce_mean(tf.losses.mean_squared_error(landmark_target,landmark_pred))
            L2_loss = tf.add_n(slim.losses.get_regularization_losses())
            return cls_loss,bbox_loss,landmark_loss,L2_loss,accuracy,cls_prob

        else:
            return cls_prob,bbox_pred,landmark_pred
    def _make_graph(self):
        self.logger.info("Generating training graph on {} GPUs ...".format(
            self.cfg.num_gpus))

        weights_initializer = slim.xavier_initializer()
        biases_initializer = tf.constant_initializer(0.)
        biases_regularizer = tf.no_regularizer
        weights_regularizer = tf.contrib.layers.l2_regularizer(
            self.cfg.TRAIN.weight_decay)

        tower_grads = []
        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(self.cfg.num_gpus):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('tower_%d' % i) as name_scope:
                        # Force all Variables to reside on the CPU.

                        with slim.arg_scope(
                            [slim.model_variable, slim.variable],
                                device='/device:CPU:0'):
                            with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \
                                                 slim.conv2d_transpose, slim.separable_conv2d,
                                                 slim.fully_connected],
                                                weights_regularizer=weights_regularizer,
                                                biases_regularizer=biases_regularizer,
                                                weights_initializer=weights_initializer,
                                                biases_initializer=biases_initializer):
                                # loss over single GPU
                                if (self.cfg.MODEL.occluded_detection):
                                    self.net.make_occ_network(is_train=True)
                                else:
                                    self.net.make_network(is_train=True)
                                if i == self.cfg.num_gpus - 1:
                                    loss = self.net.get_loss(include_wd=True)
                                else:
                                    loss = self.net.get_loss()
                                self._input_list.append(self.net.get_inputs())

                        tf.get_variable_scope().reuse_variables()

                        if i == 0:
                            if self.cfg.num_gpus > 1 and self.cfg.TRAIN.batch_norm is True:
                                self.logger.warning(
                                    "BN is calculated only on single GPU.")
                            extra_update_ops = tf.get_collection(
                                tf.GraphKeys.UPDATE_OPS, name_scope)
                            with tf.control_dependencies(extra_update_ops):
                                grads = self._optimizer.compute_gradients(loss)
                        else:
                            grads = self._optimizer.compute_gradients(loss)
                        final_grads = []

                        with tf.variable_scope('Gradient_Mult') as scope:
                            for grad, var in grads:
                                final_grads.append((grad, var))
                        tower_grads.append(final_grads)

        if len(tower_grads) > 1:
            grads = average_gradients(tower_grads)
        else:
            grads = tower_grads[0]

        apply_gradient_op = self._optimizer.apply_gradients(grads)
        train_op = tf.group(apply_gradient_op, *extra_update_ops)

        return train_op
def P_Net(inputs,
          label=None,
          bbox_target=None,
          landmark_target=None,
          training=True):
    #why activation is prelu, why?
    '''
    leaky relu vs prelu:
      https://datascience.stackexchange.com/questions/18583/what-is-the-difference-between-leakyrelu-and-prelu
      Leaky ReLUs: allow a small, non-zero gradient when the unit is not active.
      Parametric ReLUs: take this idea further by making the coefficient of leakage into a parameter
                        that is learned along with the other neural network parameters.
    '''
    with slim.arg_scope(
        [slim.conv2d],
            activation_fn=prelu,
            weights_initializer=slim.xavier_initializer(),
            biases_initializer=tf.zeros_initializer(
            ),  # slim does not have zeros initilizer
            weights_regularizer=slim.l2_regularizer(0.0005),
            padding='valid'):
        print("PNet input shape: ", inputs.get_shape())
        net = slim.conv2d(inputs,
                          num_outputs=10,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv1')
        print("PNet conv1 shape: ", net.get_shape())
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              padding='SAME',
                              scope='pool1')
        print("PNet pool1 shape: ", net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=16,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv2')
        print("PNet conv2 shape: ", net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=32,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv3')
        print("PNet conv3 shape: ", net.get_shape())
        # final 3 conv to get H*W*2 classifier, H*W*4 bbox, H*W*10 landmar_pred
        conv4_1 = slim.conv2d(net,
                              num_outputs=2,
                              kernel_size=[1, 1],
                              stride=1,
                              scope='conv4_1',
                              activation_fn=tf.nn.softmax)
        print('P_Net conv4_1 shape ', net.get_shape())
        bbox_pred = slim.conv2d(
            net,
            num_outputs=4,
            kernel_size=[1, 1],
            stride=1,
            scope='conv4_2',
            activation_fn=None
        )  # important scope name should not be the same as veriable name
        print('P_Net bbox_pred conv layer shape ', bbox_pred.get_shape())
        landmark_pred = slim.conv2d(net,
                                    num_outputs=10,
                                    kernel_size=[1, 1],
                                    stride=1,
                                    scope='conv4_3',
                                    activation_fn=None)
        print('P_Net ladmark conv layer shape', landmark_pred.get_shape())

        if training:
            #batch*2 to determin if it is a face
            #why squeezing? what will happe
            cls_prob = tf.squeeze(conv4_1, [1, 2], name='cls_prob')
            cls_loss = cls_ohem(cls_prob, label)
            #check bbox_loss
            bbox_pred = tf.squeeze(bbox_pred, [1, 2], name='bbox_pred')
            bbox_loss = bbox_ohem(bbox_pred, bbox_target, label)
            #landmark loss
            landmark_pred = tf.squeeze(landmark_pred, [1, 2],
                                       name='landmark_pred')
            landmark_loss = landmark_ohem(landmark_pred, landmark_target,
                                          label)
            accuracy = cal_accuracy(cls_prob, label)

            #tf.add_n: Adds all input tensors element-wise.
            L2_loss = tf.add_n(slim.losses.get_regularization_losses())
            return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy
        else:
            #test, batch_size=1
            cls_prob_test = tf.squeeze(conv4_1, axis=0)
            bbox_pred_test = tf.squeeze(bbox_pred, axis=0)
            landmark_pred_test = tf.squeeze(landmark_pred, axis=0)
            return cls_prob_test, bbox_pred_test, landmark_pred_test
def O_Net(inputs,
          label=None,
          bbox_target=None,
          landmark_target=None,
          training=True):
    with slim.arg_scope([slim.conv2d],
                        activation_fn=prelu,
                        weights_initializer=slim.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        weights_regularizer=slim.l2_regularizer(0.0005),
                        padding='valid'):
        print("ONet input shape: ", inputs.get_shape())
        net = slim.conv2d(inputs,
                          num_outputs=32,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv1')
        print("ONet conv1 shape: ", net.get_shape())
        # in the original model, for O net all pooling using stride of 2
        net = slim.max_pool2d(net,
                              kernel_size=[3, 3],
                              stride=2,
                              scope='pool1',
                              padding='SAME')
        print("ONet pool1 shape: ", net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=64,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv2')
        print("ONet conv2 shape: ", net.get_shape())
        net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope='pool2')
        print("ONet pool2 shape: ", net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=64,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv3')
        print("ONet conv3 shape: ", net.get_shape())
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              scope='pool3',
                              padding='SAME')
        print("ONet pool3 shape: ", net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=128,
                          kernel_size=[2, 2],
                          stride=1,
                          scope='conv4')
        print("ONet conv4 shape: ", net.get_shape())
        fc_flatten = slim.flatten(net)
        print("ONet fc input shape: ", fc_flatten.get_shape())
        fc1 = slim.fully_connected(fc_flatten,
                                   num_outputs=256,
                                   scope='fc1',
                                   activation_fn=tf.nn.relu)
        #cls
        print('ONet fc shape after flattening: ', fc1.get_shape())
        cls_prob = slim.fully_connected(fc1,
                                        num_outputs=2,
                                        scope='cls_fc',
                                        activation_fn=tf.nn.softmax)
        print('ONet cls_prob fc shape ', cls_prob.get_shape())
        #bbox
        bbox_pred = slim.fully_connected(fc1,
                                         num_outputs=4,
                                         scope='bbox_fc',
                                         activation_fn=None)
        print('ONet bbox_pred fc shape ', bbox_pred.get_shape())
        #landmark
        landmark_pred = slim.fully_connected(fc1,
                                             num_outputs=10,
                                             scope='landmark_fc',
                                             activation_fn=None)
        print('ONet landmark fc shape ', landmark_pred.get_shape())
        if training:
            cls_loss = cls_ohem(cls_prob, label)
            bbox_loss = bbox_ohem(bbox_pred, bbox_target, label)
            accuracy = cal_accuracy(cls_prob, label)
            landmark_loss = landmark_ohem(landmark_pred, landmark_target,
                                          label)
            L2_loss = tf.add_n(slim.losses.get_regularization_losses())
            return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy
        else:
            return cls_prob, bbox_pred, landmark_pred
Beispiel #12
0
    def initialize(self, config, num_classes):
        '''
            Initialize the graph from scratch according config.
        '''
        #A default graph is registered, operations will be added to the graph
        with self.graph.as_default():
            #A default session is created, operations will be added to the session
            with self.sess.as_default():
                # Set up placeholders
                #width and height from image size, [112,112]
                w, h = config.image_size
                #channels = 3 (RGB)
                channels = config.channels
                #A placeholder is a variable that we will assign data to at a later date
                #It allows us to create our operations and build our computation graph without needing the data.
                #In TensorFlowterminology, we then feed data into the graph through these placeholders.
                image_batch_placeholder = tf.placeholder(tf.float32, shape=[None, h, w, channels], name='image_batch')
                label_batch_placeholder = tf.placeholder(tf.int32, shape=[None], name='label_batch')
                learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate')
                keep_prob_placeholder = tf.placeholder(tf.float32, name='keep_prob')
                phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
                global_step = tf.Variable(0, trainable=False, dtype=tf.int32, name='global_step')

                #splits a tensor into sub tensors 
                image_splits = tf.split(image_batch_placeholder, config.num_gpus)
                label_splits = tf.split(label_batch_placeholder, config.num_gpus)
                grads_splits = []
                split_dict = {}

                #function for insering values into a dicitonary based on a key
                def insert_dict(k,v):
                    if k in split_dict: split_dict[k].append(v)
                    else: split_dict[k] = [v]

                #numgpus = 1
                for i in range(config.num_gpus):
                    scope_name = '' if i==0 else 'gpu_%d' % i
                    # A context manager for use when defining a Python op
                    # context manager pushes a name scope, which will make the name of all operations added within it have a prefix.
                    with tf.name_scope(scope_name):
                        with tf.variable_scope('', reuse=i>0):
                            #Specifies the device for ops created/executed in this context
                            with tf.device('/gpu:%d' % i):
                                #identity returns a tensor with same shape and contents as input
                                images = tf.identity(image_splits[i], name='inputs')
                                labels = tf.identity(label_splits[i], name='labels')
                                # Save the first channel for testing
                                if i == 0:
                                    self.inputs = images
                                
                                # Build networks
                                if config.localization_net is not None:
                                    localization_net = utils.import_file(config.localization_net, 'network')
                                    imsize = (112, 112)
                                    images, theta = localization_net.inference(images, imsize, 
                                                    phase_train_placeholder,
                                                    weight_decay = 0.0)
                                    images = tf.identity(images, name='transformed_image')
                                    if i == 0:
                                        tf.summary.image('transformed_image', images)
                                else:
                                    images = images
                                #calls import_file, passes sealnet as network
                                network = utils.import_file(config.network, 'network')
                                #calls inference function in sealnet file
                                prelogits = network.inference(images, keep_prob_placeholder, phase_train_placeholder,
                                                        bottleneck_layer_size = config.embedding_size, 
                                                        weight_decay = config.weight_decay, 
                                                        model_version = config.model_version)
                                prelogits = tf.identity(prelogits, name='prelogits')
                                #Normalizes along dimension axis using an L2 norm
                                embeddings = tf.nn.l2_normalize(prelogits, dim=1, name='embeddings')
                                if i == 0:
                                    self.outputs = tf.identity(embeddings, name='outputs')

                                # Build all loss functions
                                losses = []

                                # Orignal Softmax
                                if 'softmax' in config.losses.keys():
                                    logits = slim.fully_connected(prelogits, num_classes, 
                                                                    weights_regularizer=slim.l2_regularizer(config.weight_decay),
                                                                    # weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                                                    weights_initializer=slim.xavier_initializer(),
                                                                    biases_initializer=tf.constant_initializer(0.0),
                                                                    activation_fn=None, scope='Logits')
                                    cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
                                                    labels=labels, logits=logits), name='cross_entropy')
                                    losses.append(cross_entropy)
                                    insert_dict('sloss', cross_entropy)
                                # L2-Softmax
                                if 'cosine' in config.losses.keys():
                                    logits, cosine_loss = tflib.cosine_softmax(prelogits, labels, num_classes, 
                                                            gamma=config.losses['cosine']['gamma'], 
                                                            weight_decay=config.weight_decay)
                                    losses.append(cosine_loss)
                                    insert_dict('closs', cosine_loss)
                                # A-Softmax
                                if 'angular' in config.losses.keys():
                                    a_cfg = config.losses['angular']
                                    angular_loss = tflib.angular_softmax(prelogits, labels, num_classes, 
                                                            global_step, a_cfg['m'], a_cfg['lamb_min'], a_cfg['lamb_max'],
                                                            weight_decay=config.weight_decay)
                                    losses.append(angular_loss)
                                    insert_dict('aloss', angular_loss)
                                # Split Loss
                                if 'split' in config.losses.keys():
                                    split_losses = tflib.split_softmax(prelogits, labels, num_classes, 
                                                            global_step, gamma=config.losses['split']['gamma'], 
                                                            weight_decay=config.weight_decay)
                                    losses.extend(split_losses)
                                    insert_dict('loss', split_losses[0])

                               # Collect all losses
                                reg_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES), name='reg_loss')
                                losses.append(reg_loss)
                                insert_dict('reg_loss', reg_loss)

                                total_loss = tf.add_n(losses, name='total_loss')
                                grads_split = tf.gradients(total_loss, tf.trainable_variables())
                                grads_splits.append(grads_split)



                # Merge the splits
                grads = tflib.average_grads(grads_splits)
                for k,v in split_dict.items():
                    v = tflib.average_tensors(v)
                    split_dict[k] = v
                    if 'loss' in k:
                        tf.summary.scalar('losses/' + k, v)
                    else:
                        tf.summary.scalar(k, v)


                # Training Operaters
                apply_gradient_op = tflib.apply_gradient(tf.trainable_variables(), grads, config.optimizer,
                                        learning_rate_placeholder, config.learning_rate_multipliers)

                update_global_step_op = tf.assign_add(global_step, 1)

                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

                train_ops = [apply_gradient_op, update_global_step_op] + update_ops
                train_op = tf.group(*train_ops)

                tf.summary.scalar('learning_rate', learning_rate_placeholder)
                summary_op = tf.summary.merge_all()

                # Initialize variables
                self.sess.run(tf.local_variables_initializer())
                self.sess.run(tf.global_variables_initializer())
                self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=None)

                # Keep useful tensors
                self.image_batch_placeholder = image_batch_placeholder
                self.label_batch_placeholder = label_batch_placeholder 
                self.learning_rate_placeholder = learning_rate_placeholder 
                self.keep_prob_placeholder = keep_prob_placeholder 
                self.phase_train_placeholder = phase_train_placeholder 
                self.global_step = global_step
                self.watch_list = split_dict
                self.train_op = train_op
                self.summary_op = summary_op
Beispiel #13
0
def split_softmax(prelogits, label, num_classes, 
                global_step, weight_decay, gamma=16.0, reuse=None):
    nrof_features = prelogits.shape[1].value
    batch_size = tf.shape(prelogits)[0]
    with tf.variable_scope('SplitSoftmax', reuse=reuse):
        weights = tf.get_variable('weights', shape=(num_classes, nrof_features),
                regularizer=slim.l2_regularizer(weight_decay),
                initializer=slim.xavier_initializer(),
                # initializer=tf.truncated_normal_initializer(stddev=0.1),
                # initializer=tf.constant_initializer(0),
                trainable=True,
                dtype=tf.float32)
        alpha = tf.get_variable('alpha', shape=(),
                regularizer=slim.l2_regularizer(1e-2),
                initializer=tf.constant_initializer(1.00),
                trainable=True,
                dtype=tf.float32)
        beta = tf.get_variable('beta', shape=(),
                # regularizer=slim.l2_regularizer(1e-2),
                initializer=tf.constant_initializer(0.0),
                trainable=True,
                dtype=tf.float32)
        sigma = tf.get_variable('sigma', shape=(),
                regularizer=slim.l2_regularizer(1e-1),
                initializer=tf.constant_initializer(1.0),
                trainable=True,
                dtype=tf.float32)
        threshold_pos = tf.get_variable('threshold_pos', shape=(),
                initializer=tf.constant_initializer(16.0),
                trainable=False, 
                dtype=tf.float32)
        threshold_neg = tf.get_variable('threshold_neg', shape=(),
                initializer=tf.constant_initializer(0.0),
                trainable=False, 
                dtype=tf.float32)

        # Normalizing the vecotors
        weights_normed = tf.nn.l2_normalize(weights, dim=1)
        prelogits_normed = tf.nn.l2_normalize(prelogits, dim=1)
        # weights_normed = weights
        # prelogits_normed = prelogits

        # Caluculate Centers
        centers, label_center, center_idx, center_weight = centers_by_label(prelogits_normed, label)
        centers = tf.gather(centers, center_idx)
        centers_normed = tf.nn.l2_normalize(centers, dim=1)

        coef = 1.0
        # Label and logits between batch and examplars
        label_mat_glob = tf.one_hot(label, num_classes, dtype=tf.float32)
        label_mask_pos_glob = tf.cast(label_mat_glob, tf.bool)
        label_mask_neg_glob = tf.logical_not(label_mask_pos_glob)
        # label_exp_batch = tf.expand_dims(label, 1)
        # label_exp_glob = tf.expand_dims(label_history, 1)
        # label_mat_glob = tf.equal(label_exp_batch, tf.transpose(label_exp_glob))
        # label_mask_pos_glob = tf.cast(label_mat_glob, tf.bool)
        # label_mask_neg_glob = tf.logical_not(label_mat_glob)

        # dist_mat_glob = euclidean_distance(prelogits_normed, tf.transpose(weights_normed), False)
        dist_mat_glob = tf.matmul(prelogits_normed, tf.transpose(weights_normed)) # + beta
        dist_pos_glob = tf.boolean_mask(dist_mat_glob, label_mask_pos_glob)
        dist_neg_glob = tf.boolean_mask(dist_mat_glob, label_mask_neg_glob)

        logits_glob = coef * dist_mat_glob
        logits_pos_glob = tf.boolean_mask(logits_glob, label_mask_pos_glob)
        logits_neg_glob = tf.boolean_mask(logits_glob, label_mask_neg_glob)


        # Label and logits within batch
        label_exp_batch = tf.expand_dims(label, 1)
        label_mat_batch = tf.equal(label_exp_batch, tf.transpose(label_exp_batch))
        label_mask_pos_batch = tf.cast(label_mat_batch, tf.bool)
        label_mask_neg_batch = tf.logical_not(label_mask_pos_batch)
        mask_non_diag = tf.logical_not(tf.cast(tf.eye(batch_size), tf.bool))
        label_mask_pos_batch = tf.logical_and(label_mask_pos_batch, mask_non_diag)

        # dist_mat_batch = euclidean_distance(prelogits_normed, tf.transpose(prelogits_normed), False)
        dist_mat_batch = tf.matmul(prelogits_normed, tf.transpose(prelogits_normed))
        dist_pos_batch = tf.boolean_mask(dist_mat_batch, label_mask_pos_batch)
        dist_neg_batch = tf.boolean_mask(dist_mat_batch, label_mask_neg_batch)

        logits_batch =  coef * dist_mat_batch
        logits_pos_batch = tf.boolean_mask(logits_batch, label_mask_pos_batch)
        logits_neg_batch = tf.boolean_mask(logits_batch, label_mask_neg_batch)


        # num_anchor = 32
        # prelogits_anchor = tf.reshape(prelogits_normed[:num_anchor], [num_anchor, 1, nrof_features])
        # prelogits_refer = tf.reshape(prelogits_normed[num_anchor:], [num_anchor, -1, nrof_features])
        # dist_anchor = tf.reduce_sum(tf.square(prelogits_anchor-prelogits_refer), axis=2)
        # dist_anchor = tf.reshape(dist_anchor, [-1])
        # logits_anchor = -0.5 * gamma * dist_anchor
        

        logits_pos = logits_pos_glob
        logits_neg = logits_neg_glob
    
        dist_pos = dist_pos_glob
        dist_neg = dist_neg_glob

        # epsilon_trsd = 0.3
        t_pos = coef * (threshold_pos)
        t_neg = coef * (threshold_neg)


        if gamma == 'auto':
            # gamma = tf.nn.softplus(alpha)
            gamma = tf.log(tf.exp(1.0) + tf.exp(alpha))
        elif type(gamma) == tuple:
            t_min, decay = gamma
            epsilon = 1e-5
            t = t_min + 1.0/(epsilon + decay*tf.cast(global_step, tf.float32))
            gamma = 1.0 / t
        else:
            assert type(gamma) == float
            gamma = tf.constant(gamma)

        hinge_loss = lambda x: tf.nn.relu(1.0 + x)
        margin_func = hinge_loss

        # Losses
        losses = []
        # num_pos = tf.cast(0.95 * tf.cast(tf.size(logits_pos), tf.float32), tf.int32)
        # # num_neg = tf.cast(0.75 * tf.cast(tf.size(logits_neg), tf.float32), tf.int32)
        # q_d = tf.pow(tf.sqrt(dist_neg), 2-nrof_features)*tf.pow(1-0.25*dist_neg, (3-nrof_features)/2)
        # tf.add_to_collection('watch_list', ('q_d', tf.reduce_sum(q_d)))
        # q_d = tf.minimum(1.0, 1 * q_d / tf.reduce_sum(q_d))
        # tf.add_to_collection('watch_list', ('q_d', tf.reduce_mean(q_d)))
        # sample_mask = tf.random_uniform(shape=tf.shape(logits_neg)) <= q_d
        # sample_mask = logits_neg >= tf.reduce_min(logits_pos)
        # _logits_neg = tf.boolean_mask(logits_neg, sample_mask)
        # tf.add_to_collection('watch_list', ('sample_ratio', 
        #    tf.cast(tf.size(_logits_neg),tf.float32) / tf.cast(tf.size(logits_neg),tf.float32)))
               

        # gamma2 = 1 / 0.01
        _logits_pos = tf.reshape(logits_pos, [batch_size, -1])
        _logits_neg = tf.reshape(logits_neg, [batch_size, -1])

        norm = tf.square(tf.reduce_sum(tf.square(prelogits), axis=1, keep_dims=True))
        norm_weights = tf.norm(tf.gather(weights, label), axis=1, keep_dims=True)
        t_pos = (beta)
        t_neg = (beta)


        _logits_pos =  _logits_pos * gamma 
        _logits_neg =  _logits_neg * gamma
        # _logits_neg, _ = tf.nn.top_k(_logits_neg, num_neg)
        # _logits_pos, _ = tf.nn.top_k(_logits_pos, num_pos)
        # _logits_neg = tf.boolean_mask(_logits_neg, sample_mask)
        # _logits_pos = -tf.reduce_logsumexp(-_logits_pos)# , axis=1)[:,None]
        _logits_neg = tf.reduce_logsumexp(_logits_neg, axis=1)[:,None]
        # _logits_pos = tf.reduce_mean(_logits_pos)
        #-- Simulate Ranking
        # se_neg = tf.reduce_sum(tf.exp(_logits_neg))
        # min_pos = tf.reduce_min(_logits_pos)
        # t_pos = tf.stop_gradient(tf.log(se_neg))
        # t_neg = tf.stop_gradient(tf.log(se_neg - tf.exp(_logits_neg)))
        

        # norm = tf.reshape(prelogits[:,-1], [batch_size, -1])
        # norm_weighted = tf.exp(-norm)
        # norm_weighted = norm / tf.reduce_sum(norm) * tf.cast(tf.size(norm), tf.float32)

        # sigma_batch = tf.reshape(tf.gather(sigma, label), [batch_size, -1])

        m = 5.0
        # tf.add_to_collection('watch_list', ('m',m))

        factor = 1 / tf.cast(batch_size, tf.float32)
        bias = tf.log(tf.cast(num_classes, tf.float32))
        loss_pos = tf.nn.relu(m + _logits_neg - _logits_pos) * 0.5
        loss_neg = tf.nn.relu(m + _logits_neg - _logits_pos) * 0.5
        loss = tf.reduce_mean((loss_pos + loss_neg), name='split_loss')
        losses.extend([loss])
        tf.add_to_collection('watch_list', ('split_loss', loss))

        # Global loss
        # weights_batch = tf.gather(weights_normed, label)
        # _logits_pos_glob = tf.reduce_sum(tf.square(prelogits_normed - weights_batch), axis=1)  * coef * gamma
        _logits_pos_glob = tf.reshape(logits_pos_glob, [batch_size, -1]) * gamma
        _logits_neg_glob = tf.reshape(logits_neg_glob, [batch_size, -1]) * gamma
        _logits_neg_glob = tf.reduce_logsumexp(_logits_neg_glob) # , axis=1)[:,None]
        loss_glob = tf.reduce_mean(tf.nn.relu(1 + _logits_neg_glob - _logits_pos_glob), name='loss_glob')
        # losses.append(loss_glob)
        # tf.add_to_collection('watch_list', ('loss_glob', loss_glob))

        # Weight decay
        loss_weight = tf.reduce_sum( 1e-7 * tf.square(weights_normed), name='loss_weight')
        # losses.append(loss_weight)
        # tf.add_to_collection('watch_list', ('loss_weight', loss_weight))

        # Split Softmax
        # _logits_pos_glob = tf.reshape(logits_pos_glob, [batch_size, -1]) * gamma
        # _logits_neg_glob = tf.reshape(logits_neg_glob, [batch_size, -1]) * gamma
        # _logits_pos_glob = tf.log(tf.reduce_sum(tf.exp(_logits_pos_glob) + num_classes-1, axis=1)[:,None])
        # _logits_neg_glob = tf.reduce_logsumexp(_logits_neg_glob, axis=1)[:,None]
        # _t_pos = t_pos * gamma
        # _t_neg = t_neg * gamma
        # loss_pos = tf.reduce_mean(tf.nn.softplus(_t_pos - _logits_pos_glob), name='loss_pos')
        # loss_neg = tf.reduce_mean(tf.nn.softplus(_logits_neg_glob - _t_neg), name='loss_neg')
        # losses.extend([loss_pos, loss_neg])



        # Batch Center loss
        # centers_batch = tf.gather(centers, center_idx)
        centers_batch = tf.gather(weights_normed, label)
        dist_center = tf.reduce_sum(tf.square(prelogits_normed - centers_batch), axis=1)
        loss_center = tf.reduce_mean(1.0*dist_center, name='loss_center')
        # losses.append(loss_center)
        # tf.add_to_collection('watch_list', ('loss_center', loss_center))


        # Update threshold
        if not threshold_pos in tf.trainable_variables():
            # -- Mean threshold        
            mean_pos, var_pos = tf.nn.moments(dist_pos, axes=[0])
            mean_neg, var_neg = tf.nn.moments(dist_neg, axes=[0])
            std_pos = tf.sqrt(var_pos)
            std_neg = tf.sqrt(var_neg)
            threshold_batch = std_neg*mean_pos / (std_pos+std_neg) + std_pos*mean_neg / (std_pos+std_neg)
            threshold_pos_batch = threshold_neg_batch = threshold_batch
            # -- Logits
            # threshold_pos_batch = tf.reduce_logsumexp(_logits_neg)
            # threshold_neg_batch = -tf.reduce_logsumexp(-_logits_pos)
            # -- Quantile
            # diff_pos_sorted, _ = tf.nn.top_k(logits_pos, 2)
            # diff_neg_sorted, _ = tf.nn.top_k(logits_neg, 2704237)
            # threshold_pos_batch = diff_neg_sorted[-1]
            # threshold_neg_batch = diff_pos_sorted[-1]
            threshold_neg_batch = tf.reduce_min(_logits_pos)
            threshold_pos_batch = tf.reduce_max(_logits_neg)
            # -- Update
            diff_threshold_pos = threshold_pos - threshold_pos_batch
            diff_threshold_neg = threshold_neg - threshold_neg_batch
            diff_threshold_pos = 0.1 * diff_threshold_pos
            diff_threshold_neg = 0.1 * diff_threshold_neg
            threshold_pos_update_op = tf.assign_sub(threshold_pos, diff_threshold_pos)
            threshold_neg_update_op = tf.assign_sub(threshold_neg, diff_threshold_neg)
            threshold_update_op = tf.group(threshold_pos_update_op, threshold_neg_update_op)
            tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, threshold_update_op)


        # Update centers
        if not weights in tf.trainable_variables():
            weights_batch = tf.gather(weights, label)
            diff_centers = weights_batch - prelogits
            unique_label, unique_idx, unique_count = tf.unique_with_counts(label)
            appear_times = tf.gather(unique_count, unique_idx)
            appear_times = tf.reshape(appear_times, [-1, 1])
            diff_centers = diff_centers / tf.cast((1 + appear_times), tf.float32)
            diff_centers = 0.5 * diff_centers
            centers_update_op = tf.scatter_sub(weights, label, diff_centers)
            # centers_decay_op = tf.assign_sub(weights, 2*weight_decay*weights)# weight decay
            centers_update_op = tf.group(centers_update_op)
            tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, centers_update_op)

        # if not sigma in tf.trainable_variables(): 
        #     weights_batch = tf.gather(weights, label)
        #     diff_centers = weights_batch - prelogits
        #     _, var_pos = tf.nn.moments(diff_centers, axes=[0])
        #     sigma_batch = tf.reduce_mean(tf.sqrt(var_pos))
        #     diff_sigma = sigma - sigma_batch
        #     diff_sigma = 0.01 * diff_sigma
        #     sigma_update_op = tf.assign_sub(sigma, diff_sigma)
        #     tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, sigma_update_op)



        # Analysis
        mean_dist_pos = tf.reduce_mean(dist_pos, name='mean_dist_pos')
        mean_dist_neg = tf.reduce_mean(dist_neg, name='mean_dist_neg')
        acc_pos = tf.reduce_mean(tf.cast(tf.greater_equal(logits_pos, t_pos), tf.float32), name='acc_pos')
        acc_neg = tf.reduce_mean(tf.cast(tf.less(logits_neg, t_neg), tf.float32), name='acc_neg')
        tf.summary.scalar('threshold_pos', threshold_pos)
        tf.summary.scalar('mean_dist_pos', mean_dist_pos)
        tf.summary.scalar('mean_dist_neg', mean_dist_neg)
        tf.summary.scalar('acc_pos', acc_pos)
        tf.summary.scalar('acc_neg', acc_neg)
        tf.summary.scalar('gamma', gamma)
        tf.summary.scalar('alpha', alpha)
        tf.summary.scalar('beta', beta)
        tf.summary.histogram('dist_pos', dist_pos)
        tf.summary.histogram('dist_neg', dist_neg)
        # tf.summary.histogram('dist_neg_min', _logits_neg / coef)
        # tf.summary.histogram('sigma', sigma)

        # tf.add_to_collection('watch_list', ('alpha', alpha))
        tf.add_to_collection('watch_list', ('gamma', gamma))
        tf.add_to_collection('watch_list', ('alpha', alpha))
        tf.add_to_collection('watch_list', ('beta', beta))
        # tf.add_to_collection('watch_list', ('t_pos', t_pos))
        # tf.add_to_collection('watch_list', ('t_neg', tf.reduce_mean(t_neg)))
        # tf.add_to_collection('watch_list', ('dpos', mean_dist_pos))
        # tf.add_to_collection('watch_list', ('dneg', mean_dist_neg))
        # tf.add_to_collection('watch_list', ('loss_pos', loss_pos))
        # tf.add_to_collection('watch_list', ('loss_neg', loss_neg))
        # tf.add_to_collection('watch_list', ('sigma', sigma))
        # tf.add_to_collection('watch_list', ('logits_pos', tf.reduce_mean(_logits_pos)))
        # tf.add_to_collection('watch_list', ('logits_neg', tf.reduce_mean(_logits_neg)))
        # tf.add_to_collection('watch_list', ('acc_pos', acc_pos))
        # tf.add_to_collection('watch_list', ('acc_neg', acc_neg))

    return losses
Beispiel #14
0
    def _setup_basic_network(self, inputs, is_training=True):
        self._end_points = {}

        with slim.arg_scope([slim.conv2d],
                            activation_fn=prelu,
                            weights_initializer=slim.xavier_initializer(),
                            biases_initializer=tf.zeros_initializer(),
                            weights_regularizer=slim.l2_regularizer(0.00005),
                            padding='valid'):

            end_point = 'conv1'
            net = slim.conv2d(inputs, 10, 3, stride=1, scope=end_point)
            self._end_points[end_point] = net

            end_point = 'pool1'
            net = slim.max_pool2d(net,
                                  kernel_size=[2, 2],
                                  stride=2,
                                  scope=end_point,
                                  padding='SAME')
            self._end_points[end_point] = net

            end_point = 'conv2'
            net = slim.conv2d(net,
                              num_outputs=16,
                              kernel_size=[3, 3],
                              stride=1,
                              scope=end_point)
            self._end_points[end_point] = net

            end_point = 'conv3'
            net = slim.conv2d(net,
                              num_outputs=32,
                              kernel_size=[3, 3],
                              stride=1,
                              scope=end_point)
            self._end_points[end_point] = net

            end_point = 'conv4_1'
            conv4_1 = slim.conv2d(net,
                                  num_outputs=2,
                                  kernel_size=[1, 1],
                                  stride=1,
                                  scope=end_point,
                                  activation_fn=tf.nn.softmax)
            self._end_points[end_point] = conv4_1

            end_point = 'conv4_2'
            bounding_box_predictions = slim.conv2d(net,
                                                   num_outputs=4,
                                                   kernel_size=[1, 1],
                                                   stride=1,
                                                   scope=end_point,
                                                   activation_fn=None)
            self._end_points[end_point] = bounding_box_predictions

            end_point = 'conv4_3'
            landmark_predictions = slim.conv2d(net,
                                               num_outputs=10,
                                               kernel_size=[1, 1],
                                               stride=1,
                                               scope=end_point,
                                               activation_fn=None)
            self._end_points[end_point] = landmark_predictions

            return (conv4_1, bounding_box_predictions, landmark_predictions)
Beispiel #15
0
def L_O_Net(inputs,
            label=None,
            bbox_target=None,
            animoji_target=None,
            training=True):
    # batch_norm_params = {
    # 'decay': 0.995,
    # 'epsilon': 0.001,
    # 'updates_collections': None,
    # 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
    # }
    # with slim.arg_scope([slim.conv2d, slim.fully_connected],
    # activation_fn = prelu,
    # weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
    # biases_initializer=tf.zeros_initializer(),
    # weights_regularizer=slim.l2_regularizer(0.0005),
    # normalizer_fn=slim.batch_norm,
    # normalizer_params=batch_norm_params
    # ):

    with slim.arg_scope([slim.conv2d],
                        activation_fn=prelu,
                        weights_initializer=slim.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        weights_regularizer=slim.l2_regularizer(0.0005),
                        padding='valid'):
        print('L_O_Net network shape')
        print(inputs.get_shape())
        net = slim.conv2d(inputs,
                          num_outputs=32,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv1',
                          padding='valid')
        print(net.get_shape())
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              scope='pool1',
                              padding='SAME')
        print(net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=64,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv2',
                          padding='valid')
        print(net.get_shape())
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              scope='pool2',
                              padding='valid')
        print(net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=64,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv3',
                          padding='valid')
        print(net.get_shape())
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              scope='pool3',
                              padding='SAME')
        print(net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=128,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv4',
                          padding='valid')
        print(net.get_shape())
        net = slim.avg_pool2d(net,
                              kernel_size=[2, 2],
                              stride=1,
                              scope='pool4',
                              padding='valid')
        print(net.get_shape())

        #################
        ## mobilenet_v2##
        #################
        # exp = 6  # expansion ratio

        # net = conv2d_block(inputs, 32, 3, 2, training, name='conv1_1')  # size/2
        # print(net.get_shape())
        # net = res_block(net, 1, 16, 1, training, name='res2_1')
        # print(net.get_shape())
        # net = res_block(net, exp, 24, 2, training, name='res3_1')  # size/4
        # net = res_block(net, exp, 24, 1, training, name='res3_2')
        # print(net.get_shape())
        # net = res_block(net, exp, 32, 2, training, name='res4_1')  # size/8
        # net = res_block(net, exp, 32, 1, training, name='res4_2')
        # net = res_block(net, exp, 32, 1, training, name='res4_3')
        # print(net.get_shape())
        # net = res_block(net, exp, 64, 1, training, name='res5_1')
        # net = res_block(net, exp, 64, 1, training, name='res5_2')
        # net = res_block(net, exp, 64, 1, training, name='res5_3')
        # net = res_block(net, exp, 64, 1, training, name='res5_4')
        # print(net.get_shape())
        # net = res_block(net, exp, 96, 2, training, name='res6_1')  # size/16
        # net = res_block(net, exp, 96, 1, training, name='res6_2')
        # net = res_block(net, exp, 96, 1, training, name='res6_3')
        # print(net.get_shape())
        # net = res_block(net, exp, 160, 2, training, name='res7_1')  # size/32
        # net = res_block(net, exp, 160, 1, training, name='res7_2')
        # net = res_block(net, exp, 160, 1, training, name='res7_3')
        # print(net.get_shape())
        # net = res_block(net, exp, 320, 1, training, name='res8_1', shortcut=False)
        # print(net.get_shape())
        # net = pwise_block(net, 1280, training, name='conv9_1')
        # net = global_avg(net)
        # print(net.get_shape())
        # fc_flatten = flatten(conv_1x1(net,96, name='fc_flatten'))
        # print(fc_flatten.get_shape())

        net = tf.transpose(net, perm=[0, 3, 1, 2])
        print(net.get_shape())
        fc_flatten = slim.flatten(net)
        print(fc_flatten.get_shape())
        fc1 = slim.fully_connected(fc_flatten,
                                   num_outputs=256,
                                   scope='fc1',
                                   activation_fn=prelu)
        print(fc1.get_shape())

        cls_prob = slim.fully_connected(fc1,
                                        num_outputs=2,
                                        scope='cls_fc',
                                        activation_fn=tf.nn.softmax)
        print(cls_prob.get_shape())

        bbox_pred = slim.fully_connected(fc1,
                                         num_outputs=4,
                                         scope='bbox_fc',
                                         activation_fn=None)
        print(bbox_pred.get_shape())

        # landmark_pred = slim.fully_connected(fc1,num_outputs=10,scope='landmark_fc',activation_fn=None)
        # print(landmark_pred.get_shape())

        animoji_pred = slim.fully_connected(fc1,
                                            num_outputs=140,
                                            scope='animoji_fc',
                                            activation_fn=None)
        print(animoji_pred.get_shape())

        if training:
            cls_loss = cls_ohem(cls_prob, label)
            bbox_loss = bbox_ohem(bbox_pred, bbox_target, label)
            accuracy, recall = cal_accuracy(cls_prob, label)
            # landmark_loss = landmark_ohem(landmark_pred, landmark_target,label)
            animoji_loss = animoji_ohem(animoji_pred, animoji_target, label)
            print(tf.losses.get_regularization_losses())
            L2_loss = tf.add_n(tf.losses.get_regularization_losses())
            # return cls_loss,bbox_loss,landmark_loss,animoji_loss,L2_loss,accuracy, recall
            return cls_loss, bbox_loss, animoji_loss, L2_loss, accuracy, recall
        else:
            # return cls_prob,bbox_pred,landmark_pred,animoji_pred
            return cls_prob, bbox_pred, animoji_pred
Beispiel #16
0
def O_Net(inputs,
          label=None,
          bbox_target=None,
          landmark_target=None,
          training=True):
    with slim.arg_scope([slim.conv2d],
                        activation_fn=prelu,
                        weights_initializer=slim.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        weights_regularizer=slim.l2_regularizer(0.0005),
                        padding='valid'):
        print('O_Net network shape')
        print(inputs.get_shape())
        net = slim.conv2d(inputs,
                          num_outputs=32,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv1')
        print(net.get_shape())
        net = slim.max_pool2d(net,
                              kernel_size=[3, 3],
                              stride=2,
                              scope='pool1',
                              padding='SAME')
        print(net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=64,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv2')
        print(net.get_shape())
        net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope='pool2')
        print(net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=64,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv3')
        print(net.get_shape())
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              scope='pool3',
                              padding='SAME')
        print(net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=128,
                          kernel_size=[2, 2],
                          stride=1,
                          scope='conv4')
        print(net.get_shape())
        net = tf.transpose(net, perm=[0, 3, 1, 2])
        print(net.get_shape())
        fc_flatten = slim.flatten(net)
        print(fc_flatten.get_shape())
        fc1 = slim.fully_connected(fc_flatten,
                                   num_outputs=256,
                                   scope='fc1',
                                   activation_fn=prelu)
        print(fc1.get_shape())

        cls_prob = slim.fully_connected(fc1,
                                        num_outputs=2,
                                        scope='cls_fc',
                                        activation_fn=tf.nn.softmax)
        print(cls_prob.get_shape())

        bbox_pred = slim.fully_connected(fc1,
                                         num_outputs=4,
                                         scope='bbox_fc',
                                         activation_fn=None)
        print(bbox_pred.get_shape())

        landmark_pred = slim.fully_connected(fc1,
                                             num_outputs=10,
                                             scope='landmark_fc',
                                             activation_fn=None)
        print(landmark_pred.get_shape())

        if training:
            cls_loss = cls_ohem(cls_prob, label)
            bbox_loss = bbox_ohem(bbox_pred, bbox_target, label)
            accuracy, recall = cal_accuracy(cls_prob, label)
            landmark_loss = landmark_ohem(landmark_pred, landmark_target,
                                          label)
            L2_loss = tf.add_n(tf.losses.get_regularization_losses())
            return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy, recall
        else:
            return cls_prob, bbox_pred, landmark_pred, None
Beispiel #17
0
 def O_Net(self, inputs):
     with tf.variable_scope('ONet', reuse=None):
         with slim.arg_scope(
             [slim.conv2d],
                 activation_fn=self.prelu,
                 weights_initializer=slim.xavier_initializer(),
                 biases_initializer=tf.zeros_initializer(),
                 weights_regularizer=slim.l2_regularizer(0.0005),
                 padding='valid'):
             print(inputs.get_shape())
             net = slim.conv2d(inputs,
                               num_outputs=32,
                               kernel_size=[3, 3],
                               stride=1,
                               scope="conv1")
             print(net.get_shape())
             net = slim.max_pool2d(net,
                                   kernel_size=[3, 3],
                                   stride=2,
                                   scope="pool1",
                                   padding='SAME')
             print(net.get_shape())
             net = slim.conv2d(net,
                               num_outputs=64,
                               kernel_size=[3, 3],
                               stride=1,
                               scope="conv2")
             print(net.get_shape())
             net = slim.max_pool2d(net,
                                   kernel_size=[3, 3],
                                   stride=2,
                                   scope="pool2")
             print(net.get_shape())
             net = slim.conv2d(net,
                               num_outputs=64,
                               kernel_size=[3, 3],
                               stride=1,
                               scope="conv3")
             print(net.get_shape())
             net = slim.max_pool2d(net,
                                   kernel_size=[2, 2],
                                   stride=2,
                                   scope="pool3",
                                   padding='SAME')
             print(net.get_shape())
             net = slim.conv2d(net,
                               num_outputs=128,
                               kernel_size=[2, 2],
                               stride=1,
                               scope="conv4")
             # print(net.get_shape())
             fc_flatten = slim.flatten(net)
             print(fc_flatten.get_shape())
             fc1 = slim.fully_connected(fc_flatten,
                                        num_outputs=256,
                                        scope="conv5",
                                        activation_fn=self.prelu)
             print(fc1.get_shape())
             # batch*2
             cls_prob = slim.fully_connected(fc1,
                                             num_outputs=2,
                                             scope="conv6-1",
                                             activation_fn=tf.nn.softmax)
             print(cls_prob.get_shape())
             # batch*4
             bbox_pred = slim.fully_connected(fc1,
                                              num_outputs=4,
                                              scope="conv6-2",
                                              activation_fn=None)
             print(bbox_pred.get_shape())
             # batch*10
             landmark_pred = slim.fully_connected(fc1,
                                                  num_outputs=10,
                                                  scope="conv6-3",
                                                  activation_fn=None)
             print(landmark_pred.get_shape())
             return cls_prob, bbox_pred, landmark_pred
Beispiel #18
0
def P_Net(inputs,
          label=None,
          bbox_target=None,
          landmark_target=None,
          training=True):
    with slim.arg_scope([slim.conv2d],
                        activation_fn=prelu,
                        weights_initializer=slim.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        weights_regularizer=slim.l2_regularizer(0.0005),
                        padding='valid'):

        net = slim.conv2d(inputs, 10, 3, stride=1, scope='conv1')
        _activation_summary(net)
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              scope='pool1',
                              padding='SAME')
        _activation_summary(net)
        net = slim.conv2d(net,
                          num_outputs=16,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv2')
        _activation_summary(net)
        net = slim.conv2d(net,
                          num_outputs=32,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv3')
        _activation_summary(net)
        # batch*H*W*2
        conv4_1 = slim.conv2d(net,
                              num_outputs=2,
                              kernel_size=[1, 1],
                              stride=1,
                              scope='conv4_1',
                              activation_fn=tf.nn.softmax)
        _activation_summary(conv4_1)

        # bbox_pre[batch, H, W, 4]
        bbox_pred = slim.conv2d(net,
                                num_outputs=4,
                                kernel_size=[1, 1],
                                stride=1,
                                scope='conv4_2',
                                activation_fn=None)
        _activation_summary(bbox_pred)
        # landmark_pred[batch, H, W, 10]
        landmark_pred = slim.conv2d(net,
                                    num_outputs=10,
                                    kernel_size=[1, 1],
                                    stride=1,
                                    scope='conv4_3',
                                    activation_fn=None)
        _activation_summary(landmark_pred)

        if training:
            #batch*2
            # calculate classification loss
            cls_prob = tf.squeeze(conv4_1, [1, 2], name='cls_prob')
            cls_loss = cls_ohem(cls_prob, label)
            #batch
            # cal bounding box error, squared sum error
            bbox_pred = tf.squeeze(bbox_pred, [1, 2], name='bbox_pred')
            bbox_loss = bbox_ohem(bbox_pred, bbox_target, label)
            #batch*10
            landmark_pred = tf.squeeze(landmark_pred, [1, 2],
                                       name="landmark_pred")
            landmark_loss = landmark_ohem(landmark_pred, landmark_target,
                                          label)

            accuracy = cal_accuracy(cls_prob, label)
            L2_loss = tf.add_n(slim.losses.get_regularization_losses())
            return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy
        else:
            # when test, batch_size = 1
            cls_pro_test = tf.squeeze(conv4_1, axis=0)
            bbox_pred_test = tf.squeeze(bbox_pred, axis=0)
            landmark_pred_test = tf.squeeze(landmark_pred, axis=0)
            return cls_pro_test, bbox_pred_test, landmark_pred_test
def P_Net(inputs,
          label=None,
          bbox_target=None,
          landmark_target=None,
          training=True):
    #define common param
    with slim.arg_scope([slim.conv2d],
                        activation_fn=prelu,
                        weights_initializer=slim.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        weights_regularizer=slim.l2_regularizer(0.0005),
                        padding='valid'):
        print inputs.get_shape()
        net = slim.conv2d(inputs, 10, 3, stride=1, scope='conv1')
        print net.get_shape()
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              scope='pool1',
                              padding='SAME')
        print net.get_shape()
        net = slim.conv2d(net,
                          num_outputs=16,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv2')
        print net.get_shape()
        net = slim.conv2d(net,
                          num_outputs=32,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv3')
        print net.get_shape()
        #batch*H*W*2
        conv4_1 = slim.conv2d(net,
                              num_outputs=2,
                              kernel_size=[1, 1],
                              stride=1,
                              scope='conv4_1',
                              activation_fn=tf.nn.softmax)
        #conv4_1 = slim.conv2d(net,num_outputs=1,kernel_size=[1,1],stride=1,scope='conv4_1',activation_fn=tf.nn.sigmoid)

        print conv4_1.get_shape()
        #batch*H*W*4
        bbox_pred = slim.conv2d(net,
                                num_outputs=4,
                                kernel_size=[1, 1],
                                stride=1,
                                scope='conv4_2',
                                activation_fn=None)
        print bbox_pred.get_shape()
        #batch*H*W*10
        landmark_pred = slim.conv2d(net,
                                    num_outputs=10,
                                    kernel_size=[1, 1],
                                    stride=1,
                                    scope='conv4_3',
                                    activation_fn=None)
        print landmark_pred.get_shape()
        #cls_prob_original = conv4_1
        #bbox_pred_original = bbox_pred
        if training:
            #batch*2
            cls_prob = tf.squeeze(conv4_1, [1, 2], name='cls_prob')
            cls_loss = cls_ohem(cls_prob, label)
            #batch
            bbox_pred = tf.squeeze(bbox_pred, [1, 2], name='bbox_pred')
            bbox_loss = bbox_ohem(bbox_pred, bbox_target, label)
            #batch*10
            landmark_pred = tf.squeeze(landmark_pred, [1, 2],
                                       name="landmark_pred")
            landmark_loss = landmark_ohem(landmark_pred, landmark_target,
                                          label)

            accuracy = cal_accuracy(cls_prob, label)
            L2_loss = tf.add_n(slim.losses.get_regularization_losses())
            return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy
        #test
        else:
            #when test,batch_size = 1
            cls_pro_test = tf.squeeze(conv4_1, axis=0)
            bbox_pred_test = tf.squeeze(bbox_pred, axis=0)
            landmark_pred_test = tf.squeeze(landmark_pred, axis=0)
            return cls_pro_test, bbox_pred_test, landmark_pred_test
Beispiel #20
0
def network(in_image, if_is_training):
    batch_norm_params = {
        'is_training': if_is_training,
        'zero_debias_moving_mean': True,
        'decay': 0.99,
        'epsilon': 0.001,
        'scale': True,
        'updates_collections': None
    }

    with slim.arg_scope([slim.conv2d],
                        activation_fn=tf.nn.relu,
                        padding='SAME',
                        weights_initializer=slim.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        normalizer_fn=slim.batch_norm,
                        normalizer_params=batch_norm_params,
                        weights_regularizer=slim.l2_regularizer(0.0005)):
        out_1 = 32
        out_2 = 64
        out_3 = 128

        net = slim.conv2d(in_image,
                          num_outputs=out_2,
                          kernel_size=[5, 5],
                          stride=1,
                          scope='conv1')
        print('1_con:\t', net.get_shape())
        net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool1')
        print('1_pool:\t', net.get_shape())

        net = slim.conv2d(net,
                          num_outputs=out_2,
                          kernel_size=[5, 5],
                          stride=1,
                          scope='conv2')
        print('2_con:\t', net.get_shape())
        net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool2')
        print('2_pool:\t', net.get_shape())

        net = slim.conv2d(net,
                          num_outputs=out_3,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv3_1')
        net = slim.conv2d(net,
                          num_outputs=out_3,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv3_2')
        print('3_con:\t', net.get_shape())
        net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool3')
        print('3_pool:\t', net.get_shape())

    # net = tf.reshape(net,shape=[-1,2*2*128])
    net = slim.flatten(net, scope='flatten')

    with slim.arg_scope([slim.fully_connected],
                        activation_fn=tf.nn.relu,
                        normalizer_fn=slim.batch_norm,
                        normalizer_params=batch_norm_params):
        net = slim.fully_connected(
            net,
            1000,
            weights_initializer=slim.xavier_initializer(),
            biases_initializer=tf.zeros_initializer(),
            scope='fc_total')
        print('fc:\t', net.get_shape())

        pre_loca = slim.fully_connected(
            net,
            2000,
            weights_initializer=slim.xavier_initializer(),
            biases_initializer=tf.zeros_initializer(),
            scope='fc2_1')

        pre_loca = slim.fully_connected(
            pre_loca,
            8,
            activation_fn=tf.nn.sigmoid,
            # normalizer_fn=None,
            weights_initializer=slim.xavier_initializer(),
            biases_initializer=tf.zeros_initializer(),
            scope='fc2_2')

        pre_loca = tf.reshape(pre_loca, shape=[-1, 4, 2])
        return pre_loca
Beispiel #21
0
    def build_network(self, images, is_training=True, scope='yolov1'):
        net = images
        with tf.variable_scope(scope):
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected],
                    weights_regularizer=slim.l2_regularizer(0.00004)):
                with slim.arg_scope(
                    [slim.conv2d],
                        weights_initializer=slim.xavier_initializer(),
                        normalizer_fn=slim.batch_norm,
                        activation_fn=slim.nn.leaky_relu,
                        normalizer_params=self.bn_params):
                    with slim.arg_scope([slim.batch_norm, slim.dropout],
                                        is_training=is_training):
                        net = slim.conv2d(net,
                                          64, [7, 7],
                                          stride=2,
                                          padding='SAME',
                                          scope='layer1')
                        net = slim.max_pool2d(net, [2, 2],
                                              stride=2,
                                              padding='SAME',
                                              scope='pool1')

                        net = slim.conv2d(net,
                                          192, [3, 3],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer2')
                        net = slim.max_pool2d(net, [2, 2],
                                              stride=2,
                                              padding='SAME',
                                              scope='pool2')

                        net = slim.conv2d(net,
                                          128, [1, 1],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer3_1')
                        net = slim.conv2d(net,
                                          256, [3, 3],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer3_2')
                        net = slim.conv2d(net,
                                          256, [1, 1],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer3_3')
                        net = slim.conv2d(net,
                                          512, [3, 3],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer3_4')
                        net = slim.max_pool2d(net, [2, 2],
                                              stride=2,
                                              padding='SAME',
                                              scope='pool3')

                        net = slim.conv2d(net,
                                          256, [1, 1],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer4_1')
                        net = slim.conv2d(net,
                                          512, [3, 3],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer4_2')
                        net = slim.conv2d(net,
                                          256, [1, 1],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer4_3')
                        net = slim.conv2d(net,
                                          512, [3, 3],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer4_4')
                        net = slim.conv2d(net,
                                          256, [1, 1],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer4_5')
                        net = slim.conv2d(net,
                                          512, [3, 3],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer4_6')
                        net = slim.conv2d(net,
                                          256, [1, 1],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer4_7')
                        net = slim.conv2d(net,
                                          512, [3, 3],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer4_8')
                        net = slim.conv2d(net,
                                          512, [1, 1],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer4_9')
                        net = slim.conv2d(net,
                                          1024, [3, 3],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer4_10')
                        net = slim.max_pool2d(net, [2, 2],
                                              stride=2,
                                              padding='SAME',
                                              scope='pool4')

                        net = slim.conv2d(net,
                                          512, [1, 1],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer5_1')
                        net = slim.conv2d(net,
                                          1024, [3, 3],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer5_2')
                        net = slim.conv2d(net,
                                          512, [1, 1],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer5_3')
                        net = slim.conv2d(net,
                                          1024, [3, 3],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer5_4')

                        if self.pre_training:
                            net = slim.avg_pool2d(net, [7, 7],
                                                  stride=1,
                                                  padding='VALID',
                                                  scope='clssify_avg5')
                            net = slim.flatten(net)
                            net = slim.fully_connected(
                                net,
                                self.pre_train_num,
                                activation_fn=slim.nn.leaky_relu,
                                scope='classify_fc1')
                            return net

                        net = slim.conv2d(net,
                                          1024, [3, 3],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer5_5')
                        net = slim.conv2d(net,
                                          1024, [3, 3],
                                          stride=2,
                                          padding='SAME',
                                          scope='layer5_6')

                        net = slim.conv2d(net,
                                          1024, [3, 3],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer6_1')
                        net = slim.conv2d(net,
                                          1024, [3, 3],
                                          stride=1,
                                          padding='SAME',
                                          scope='layer6_2')

                        net = slim.flatten(net)

                        net = slim.fully_connected(
                            net,
                            1024,
                            activation_fn=slim.nn.leaky_relu,
                            scope='fc1')
                        net = slim.dropout(net, 0.5)
                        net = slim.fully_connected(
                            net,
                            4096,
                            activation_fn=slim.nn.leaky_relu,
                            scope='fc2')
                        net = slim.dropout(net, 0.5)
                        net = slim.fully_connected(net,
                                                   self.output_size,
                                                   activation_fn=None,
                                                   scope='fc3')
                        # N, 7,7,30
                        # net = tf.reshape(net,[-1,S,S,B*5+C])
            return net
Beispiel #22
0
    def _setup_basic_network(self, inputs, is_training=True):
        self._end_points = {}

        with slim.arg_scope([slim.conv2d],
                            activation_fn=prelu,
                            weights_initializer=slim.xavier_initializer(),
                            biases_initializer=tf.zeros_initializer(),
                            weights_regularizer=slim.l2_regularizer(0.00005),
                            padding='valid'):

            end_point = 'conv1'
            net = slim.conv2d(
                inputs,
                num_outputs=28,
                kernel_size=[3, 3],
                stride=1,
                scope=end_point)
            self._end_points[end_point] = net

            end_point = 'pool1'
            net = slim.max_pool2d(
                net,
                kernel_size=[3, 3],
                stride=2,
                scope=end_point,
                padding='SAME')
            self._end_points[end_point] = net

            end_point = 'conv2'
            net = slim.conv2d(
                net,
                num_outputs=48,
                kernel_size=[3, 3],
                stride=1,
                scope=end_point)
            self._end_points[end_point] = net

            end_point = 'pool2'
            net = slim.max_pool2d(
                net, kernel_size=[3, 3], stride=2, scope=end_point)
            self._end_points[end_point] = net

            end_point = 'conv3'
            net = slim.conv2d(
                net,
                num_outputs=64,
                kernel_size=[2, 2],
                stride=1,
                scope=end_point)
            self._end_points[end_point] = net

            fc_flatten = slim.flatten(net)

            end_point = 'fc1'
            fc1 = slim.fully_connected(
                fc_flatten,
                num_outputs=128,
                scope=end_point,
                activation_fn=prelu)
            self._end_points[end_point] = fc1

            end_point = 'cls_fc'
            class_probability = slim.fully_connected(
                fc1,
                num_outputs=2,
                scope=end_point,
                activation_fn=tf.nn.softmax)
            self._end_points[end_point] = class_probability

            end_point = 'bbox_fc'
            bounding_box_predictions = slim.fully_connected(
                fc1, num_outputs=4, scope=end_point, activation_fn=None)
            self._end_points[end_point] = bounding_box_predictions

            end_point = 'landmark_fc'
            landmark_predictions = slim.fully_connected(
                fc1, num_outputs=10, scope=end_point, activation_fn=None)
            self._end_points[end_point] = landmark_predictions

            return (class_probability, bounding_box_predictions,
                    landmark_predictions)
Beispiel #23
0
    def initialize(self, config, num_classes):
        '''
            Initialize the graph from scratch according config.
        '''
        with self.graph.as_default():
            with self.sess.as_default():
                # Set up placeholders
                w, h = config.image_size
                channels = config.channels
                image_batch_placeholder = tf.placeholder(tf.float32, shape=[None, h, w, channels], name='image_batch')
                label_batch_placeholder = tf.placeholder(tf.int32, shape=[None], name='label_batch')
                learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate')
                keep_prob_placeholder = tf.placeholder(tf.float32, name='keep_prob')
                phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
                global_step = tf.Variable(0, trainable=False, dtype=tf.int32, name='global_step')

                image_splits = tf.split(image_batch_placeholder, config.num_gpus)
                label_splits = tf.split(label_batch_placeholder, config.num_gpus)
                grads_splits = []
                split_dict = {}
                def insert_dict(k,v):
                    if k in split_dict: split_dict[k].append(v)
                    else: split_dict[k] = [v]
                        
                for i in range(config.num_gpus):
                    scope_name = '' if i==0 else 'gpu_%d' % i
                    with tf.name_scope(scope_name):
                        with tf.variable_scope('', reuse=i>0):
                            with tf.device('/gpu:%d' % i):
                                images = tf.identity(image_splits[i], name='inputs')
                                labels = tf.identity(label_splits[i], name='labels')
                                # Save the first channel for testing
                                if i == 0:
                                    self.inputs = images
                                
                                # Build networks
                                network = imp.load_source('network', config.network)
                                prelogits = network.inference(images, keep_prob_placeholder, phase_train_placeholder,
                                                        bottleneck_layer_size = config.embedding_size, 
                                                        weight_decay = config.weight_decay, 
                                                        model_version = config.model_version)
                                prelogits = tf.identity(prelogits, name='prelogits')
                                embeddings = tf.nn.l2_normalize(prelogits, dim=1, name='embeddings')
                                if i == 0:
                                    self.outputs = tf.identity(embeddings, name='outputs')

                                # Build all losses
                                losses = []

                                # Orignal Softmax
                                if 'softmax' in config.losses.keys():
                                    logits = slim.fully_connected(prelogits, num_classes, 
                                                                    weights_regularizer=slim.l2_regularizer(config.weight_decay),
                                                                    weights_initializer=slim.xavier_initializer(),
                                                                    biases_initializer=tf.constant_initializer(0.0),
                                                                    activation_fn=None, scope='Logits')
                                    cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
                                                    labels=labels, logits=logits), name='cross_entropy')
                                    losses.append(cross_entropy)
                                    insert_dict('sloss', cross_entropy)
                                # L2-Softmax
                                if 'cosine' in config.losses.keys():
                                    logits, cosine_loss = tflib.cosine_softmax(prelogits, labels, num_classes, 
                                                            weight_decay=config.weight_decay,
                                                            **config.losses['cosine']) 
                                    losses.append(cosine_loss)
                                    insert_dict('closs', cosine_loss)
                                # A-Softmax
                                if 'angular' in config.losses.keys():
                                    angular_loss = tflib.angular_softmax(prelogits, labels, num_classes, 
                                                            global_step, weight_decay=config.weight_decay,
                                                            **config.losses['angular'])  
                                    losses.append(angular_loss)
                                    insert_dict('aloss', angular_loss)
                                # AM-Softmax
                                if 'am' in config.losses.keys():
                                    am_loss = tflib.am_softmax(prelogits, labels, num_classes, 
                                                            global_step, weight_decay=config.weight_decay,
                                                            **config.losses['am'])
                                    losses.append(am_loss)
                                    insert_dict('loss', am_loss)
                                # Max-margin Pairwise Score (MPS)
                                if 'pair' in config.losses.keys():
                                    pair_loss = tflib.pair_loss(prelogits, labels, num_classes, 
                                                            global_step, weight_decay=config.weight_decay,
                                                            **config.losses['pair'])  
                                    losses.append(pair_loss)
                                    insert_dict('loss', pair_loss)

                               # Collect all losses
                                reg_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES), name='reg_loss')
                                losses.append(reg_loss)
                                insert_dict('reg_loss', reg_loss)

                                total_loss = tf.add_n(losses, name='total_loss')
                                grads_split = tf.gradients(total_loss, tf.trainable_variables())
                                grads_splits.append(grads_split)



                # Merge the splits
                self.watchlist = {}
                grads = tflib.average_grads(grads_splits)
                for k,v in split_dict.items():
                    v = tflib.average_tensors(v)
                    self.watchlist[k] = v
                    if 'loss' in k:
                        tf.summary.scalar('losses/' + k, v)
                    else:
                        tf.summary.scalar(k, v)


                # Training Operaters
                apply_gradient_op = tflib.apply_gradient(tf.trainable_variables(), grads, config.optimizer,
                                        learning_rate_placeholder, config.learning_rate_multipliers)

                update_global_step_op = tf.assign_add(global_step, 1)

                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

                train_ops = [apply_gradient_op, update_global_step_op] + update_ops
                train_op = tf.group(*train_ops)

                tf.summary.scalar('learning_rate', learning_rate_placeholder)
                summary_op = tf.summary.merge_all()

                # Initialize variables
                self.sess.run(tf.local_variables_initializer())
                self.sess.run(tf.global_variables_initializer())
                self.saver = tf.train.Saver(tf.trainable_variables())

                # Keep useful tensors
                self.image_batch_placeholder = image_batch_placeholder
                self.label_batch_placeholder = label_batch_placeholder 
                self.learning_rate_placeholder = learning_rate_placeholder 
                self.keep_prob_placeholder = keep_prob_placeholder 
                self.phase_train_placeholder = phase_train_placeholder 
                self.global_step = global_step
                self.train_op = train_op
                self.summary_op = summary_op
Beispiel #24
0
def discriminator(images,
                  num_classes,
                  bottleneck_size=512,
                  keep_prob=1.0,
                  phase_train=True,
                  weight_decay=0.0,
                  reuse=None,
                  scope='Discriminator'):
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        weights_regularizer=slim.l2_regularizer(weight_decay),
                        activation_fn=leaky_relu,
                        normalizer_fn=None,
                        normalizer_params=batch_norm_params):
        with tf.variable_scope(scope, [images], reuse=reuse):
            with slim.arg_scope([slim.batch_norm, slim.dropout],
                                is_training=phase_train):

                print('{} input shape:'.format(scope),
                      [dim.value for dim in images.shape])

                net = conv(images, 32, kernel_size=4, stride=2, scope='conv1')
                print('module_1 shape:', [dim.value for dim in net.shape])

                net = conv(net, 64, kernel_size=4, stride=2, scope='conv2')
                print('module_2 shape:', [dim.value for dim in net.shape])

                net = conv(net, 128, kernel_size=4, stride=2, scope='conv3')
                print('module_3 shape:', [dim.value for dim in net.shape])

                net = conv(net, 256, kernel_size=4, stride=2, scope='conv4')
                print('module_4 shape:', [dim.value for dim in net.shape])

                net = conv(net, 512, kernel_size=4, stride=2, scope='conv5')
                print('module_5 shape:', [dim.value for dim in net.shape])

                # Patch Discrminator
                patch5_logits = slim.conv2d(net,
                                            3,
                                            1,
                                            activation_fn=None,
                                            normalizer_fn=None,
                                            scope='patch5_logits')
                patch_logits = tf.reshape(patch5_logits, [-1, 3])

                # Global Discriminator
                net = slim.flatten(net)
                prelogits = slim.fully_connected(
                    net,
                    bottleneck_size,
                    scope='Bottleneck',
                    weights_initializer=slim.xavier_initializer(),
                    activation_fn=None,
                    normalizer_fn=None)
                prelogits = tf.nn.l2_normalize(prelogits, dim=1)
                print('latent shape:', [dim.value for dim in prelogits.shape])

                logits = slim.fully_connected(prelogits,
                                              num_classes,
                                              scope='Logits',
                                              activation_fn=None,
                                              normalizer_fn=None)

                return patch_logits, logits
Beispiel #25
0
def O_Net(inputs):
    with tf.variable_scope('O_Net'):
        with slim.arg_scope([slim.conv2d],
                            activation_fn=prelu,
                            weights_initializer=slim.xavier_initializer(),
                            biases_initializer=tf.zeros_initializer(),
                            weights_regularizer=slim.l2_regularizer(0.0005),
                            padding='valid'):
            # pdb.set_trace()
            net = slim.conv2d(inputs,
                              num_outputs=32,
                              kernel_size=[5, 3],
                              stride=1,
                              scope="oconv1")  # 140, 46, 32
            net = slim.max_pool2d(net,
                                  kernel_size=[3, 3],
                                  stride=2,
                                  scope="opool1",
                                  padding='SAME')  # 70, 23, 32  22
            net = slim.conv2d(net,
                              num_outputs=64,
                              kernel_size=[5, 3],
                              stride=1,
                              scope="oconv2")  # 66, 21, 64
            net = slim.max_pool2d(net,
                                  kernel_size=[3, 3],
                                  stride=2,
                                  scope="opool2")  # 32, 10, 64  333
            net = slim.conv2d(net,
                              num_outputs=64,
                              kernel_size=[5, 3],
                              stride=1,
                              scope="oconv3")  # 28, 8, 64
            net = slim.max_pool2d(net,
                                  kernel_size=[2, 2],
                                  stride=2,
                                  scope="opool3",
                                  padding='SAME')  # 14, 4, 64 4444

            net = slim.conv2d(net,
                              num_outputs=64,
                              kernel_size=[5, 3],
                              stride=1,
                              scope="oconv4")  # 10, 2, 64
            net = slim.max_pool2d(net,
                                  kernel_size=[2, 2],
                                  stride=2,
                                  scope="opool4",
                                  padding='SAME')  # 5, 1, 64  555

            net = slim.conv2d(net,
                              num_outputs=128,
                              kernel_size=[3, 1],
                              stride=1,
                              scope="oconv5")  # 3, 1, 128 6666

            fc_flatten = slim.flatten(net)
            fc1 = slim.fully_connected(fc_flatten,
                                       num_outputs=256,
                                       scope="ofc1",
                                       activation_fn=prelu)  ### 777
            fc2_1 = slim.fully_connected(fc1,
                                         num_outputs=2,
                                         scope="ofc2_1",
                                         activation_fn=tf.nn.softmax)
            fc2_2 = slim.fully_connected(fc1,
                                         num_outputs=4,
                                         scope="ofc2_2",
                                         activation_fn=None)

            return (fc2_1, fc2_2)
Beispiel #26
0
def aspp(inputs, output_stride, batch_norm_decay, is_training, depth=256):
    '''实现ASPP
    参数:
      inputs:输入四维向量
      output_stride:决定空洞卷积膨胀率
      batch_norm_decay:同上函数
      is_training:是否训练
      depth:输出通道数
    返回值:
      ASPP后的输出
      '''
    with tf.variable_scope('aspp'):
        if output_stride not in [8, 16]:
            raise ValueError('out_stride整错了')
        # 膨胀率
    # atrous_rates = [6, 12, 18]
        with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=0.0005)):
            with slim.arg_scope([slim.conv2d],
                                weights_initializer=slim.xavier_initializer(),
                                normalizer_fn=slim.batch_norm,
                                normalizer_params={
                                    'is_training': is_training,
                                    'decay': batch_norm_decay
                                }):
                inputs_size = tf.shape(inputs)[1:3]
                # slim.conv2d默认激活函数为relu,padding=SAME
                conv_1x1 = slim.conv2d(inputs,
                                       depth, [1, 1],
                                       stride=1,
                                       scope='conv_1x1')
                # 空洞卷积rate不为1
                conv_3x3_1 = slim.conv2d(inputs,
                                         depth, [3, 3],
                                         stride=1,
                                         rate=1,
                                         scope='conv_3x3_1')
                conv_3x3_2 = slim.conv2d(inputs,
                                         depth, [3, 3],
                                         stride=1,
                                         rate=2,
                                         scope='conv_3x3_2')
                conv_3x3_3 = slim.conv2d(inputs,
                                         depth, [3, 3],
                                         stride=1,
                                         rate=4,
                                         scope='conv_3x3_3')
                # pcam = PAM_Module(inputs)
                with tf.variable_scope('image_level_features'):
                    # 池化
                    image_level_features = tf.reduce_mean(
                        inputs,
                        axis=[1, 2],
                        keep_dims=True,
                        name='global_average_pooling')
                    image_level_features = slim.conv2d(image_level_features,
                                                       depth, [1, 1],
                                                       stride=1,
                                                       scope='conv_1x1')
                    #     # 双线性插值
                    image_level_features = tf.image.resize_bilinear(
                        image_level_features, inputs_size, name='upsample')
                net = tf.concat([
                    conv_1x1, conv_3x3_1, conv_3x3_2, conv_3x3_3,
                    image_level_features
                ],
                                axis=3,
                                name='concat')
                net = slim.conv2d(net,
                                  512, [1, 1],
                                  trainable=is_training,
                                  scope='convq')
                return net
Beispiel #27
0
    def _make_graph(self):
        self.logger.info("Generating training graph on {} GPUs ...".format(
            self.cfg.nr_gpus))

        weights_initializer = slim.xavier_initializer()
        biases_initializer = tf.constant_initializer(0.)
        biases_regularizer = tf.no_regularizer
        weights_regularizer = tf.contrib.layers.l2_regularizer(
            self.cfg.weight_decay)

        tower_grads = []
        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(self.cfg.nr_gpus):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('tower_%d' % i) as name_scope:
                        # Force all Variables to reside on the CPU.
                        with slim.arg_scope(
                            [slim.model_variable, slim.variable],
                                device='/device:CPU:0'):
                            with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \
                                                 slim.conv2d_transpose, slim.separable_conv2d,
                                                 slim.fully_connected],
                                                weights_regularizer=weights_regularizer,
                                                biases_regularizer=biases_regularizer,
                                                weights_initializer=weights_initializer,
                                                biases_initializer=biases_initializer):
                                # loss over single GPU
                                self.net.make_network(is_train=True)
                                if i == self.cfg.nr_gpus - 1:
                                    loss = self.net.get_loss(include_wd=True)
                                else:
                                    loss = self.net.get_loss()
                                self._input_list.append(self.net.get_inputs())

                        tf.get_variable_scope().reuse_variables()

                        if i == 0:
                            if self.cfg.nr_gpus > 1 and self.cfg.bn_train is True:
                                self.logger.warning(
                                    "BN is calculated only on single GPU.")
                            extra_update_ops = tf.get_collection(
                                tf.GraphKeys.UPDATE_OPS, name_scope)
                            with tf.control_dependencies(extra_update_ops):
                                grads = self._optimizer.compute_gradients(loss)
                        else:
                            grads = self._optimizer.compute_gradients(loss)
                        final_grads = []
                        with tf.variable_scope('Gradient_Mult') as scope:
                            for grad, var in grads:
                                scale = 1.
                                if self.cfg.double_bias and '/biases:' in var.name:
                                    scale *= 2.
                                if not np.allclose(scale, 1.):
                                    grad = tf.multiply(grad, scale)
                                final_grads.append((grad, var))
                        tower_grads.append(final_grads)

        if len(tower_grads) > 1:
            grads = sum_gradients(tower_grads)
        else:
            grads = tower_grads[0]

        if False:
            variable_averages = tf.train.ExponentialMovingAverage(0.9999)
            variables_to_average = (tf.trainable_variables() +
                                    tf.moving_average_variables())
            variables_averages_op = variable_averages.apply(
                variables_to_average)

            apply_gradient_op = self._optimizer.apply_gradients(grads)
            train_op = tf.group(apply_gradient_op, variables_averages_op,
                                *extra_update_ops)
        else:
            apply_gradient_op = self._optimizer.apply_gradients(grads)
            train_op = tf.group(apply_gradient_op, *extra_update_ops)

        return train_op
Beispiel #28
0
def L_O_Net(inputs,label=None,bbox_target=None,landmark_target=None,animoji_target=None,training=True):
    # batch_norm_params = {
        # 'decay': 0.995,
        # 'epsilon': 0.001,
        # 'updates_collections': None,
        # 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
    # }
    # with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        # activation_fn = prelu,
                        # weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
                        # biases_initializer=tf.zeros_initializer(),
                        # weights_regularizer=slim.l2_regularizer(0.0005),
                        # normalizer_fn=slim.batch_norm,    
                        # normalizer_params=batch_norm_params
                        # ):
                        
    with slim.arg_scope([slim.conv2d],
                        activation_fn = prelu,
                        weights_initializer=slim.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        weights_regularizer=slim.l2_regularizer(0.0005),                        
                        padding='valid'):
        print('L_O_Net network shape')
        print(inputs.get_shape())
        net = slim.conv2d(inputs, num_outputs=32, kernel_size=[3,3], stride=1, scope='conv1', padding='valid')
        print(net.get_shape())
        net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope='pool1', padding='SAME')
        print(net.get_shape())
        net = slim.conv2d(net,num_outputs=64,kernel_size=[3,3],stride=1,scope='conv2', padding='valid')
        print(net.get_shape())
        net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope='pool2', padding='valid')
        print(net.get_shape())
        net = slim.conv2d(net,num_outputs=64,kernel_size=[3,3],stride=1,scope='conv3', padding='valid')
        print(net.get_shape())
        net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool3', padding='SAME')
        print(net.get_shape())
        net = slim.conv2d(net,num_outputs=128,kernel_size=[2,2],stride=1,scope='conv4', padding='valid')
        print(net.get_shape())
        net = tf.transpose(net, perm=[0,3,1,2]) 
        print(net.get_shape())        
        fc_flatten = slim.flatten(net)
        print(fc_flatten.get_shape())
        fc1 = slim.fully_connected(fc_flatten, num_outputs=256,scope='fc1', activation_fn=prelu)
        print(fc1.get_shape())

        cls_prob = slim.fully_connected(fc1,num_outputs=2,scope='cls_fc',activation_fn=tf.nn.softmax)
        print(cls_prob.get_shape())

        bbox_pred = slim.fully_connected(fc1,num_outputs=4,scope='bbox_fc',activation_fn=None)
        print(bbox_pred.get_shape())

        landmark_pred = slim.fully_connected(fc1,num_outputs=10,scope='landmark_fc',activation_fn=None)
        print(landmark_pred.get_shape())        

        animoji_pred = slim.fully_connected(fc1,num_outputs=140,scope='animoji_fc',activation_fn=None)
        print(animoji_pred.get_shape())
        
        if training:
            cls_loss = cls_ohem(cls_prob,label)
            bbox_loss = bbox_ohem(bbox_pred,bbox_target,label)
            accuracy, recall = cal_accuracy(cls_prob,label)
            landmark_loss = landmark_ohem(landmark_pred, landmark_target,label)
            animoji_loss = animoji_ohem(animoji_pred, animoji_target,label)
            L2_loss = tf.add_n(tf.losses.get_regularization_losses())
            return cls_loss,bbox_loss,landmark_loss,animoji_loss,L2_loss,accuracy, recall
        else:
            return cls_prob,bbox_pred,landmark_pred,animoji_pred
Beispiel #29
0
def angular_softmax(prelogits,
                    label,
                    num_classes,
                    global_step,
                    weight_decay,
                    m,
                    lamb_min,
                    lamb_max,
                    reuse=None):
    ''' Tensorflow implementation of Angular-Sofmax, proposed in:
        W. Liu, Y. Wen, Z. Yu, M. Li, B. Raj, and L. Song. 
        Sphereface: Deep hypersphere embedding for face recognition. In CVPR, 2017.
    '''
    num_features = prelogits.shape[1].value
    batch_size = tf.shape(prelogits)[0]
    lamb_min = lamb_min
    lamb_max = lamb_max
    lambda_m_theta = [
        lambda x: x**0, lambda x: x**1, lambda x: 2.0 * (x**2) - 1.0,
        lambda x: 4.0 * (x**3) - 3.0 * x, lambda x: 8.0 * (x**4) - 8.0 *
        (x**2) + 1.0, lambda x: 16.0 * (x**5) - 20.0 * (x**3) + 5.0 * x
    ]

    with tf.variable_scope('AngularSoftmax', reuse=reuse):
        weights = tf.get_variable(
            'weights',
            shape=(num_features, num_classes),
            regularizer=slim.l2_regularizer(1e-4),
            initializer=slim.xavier_initializer(),
            # initializer=tf.truncated_normal_initializer(stddev=0.1),
            trainable=True,
            dtype=tf.float32)
        lamb = tf.get_variable('lambda',
                               shape=(),
                               initializer=tf.constant_initializer(lamb_max),
                               trainable=False,
                               dtype=tf.float32)
        prelogits_norm = tf.sqrt(
            tf.reduce_sum(tf.square(prelogits), axis=1, keep_dims=True))
        weights_normed = tf.nn.l2_normalize(weights, dim=0)
        prelogits_normed = tf.nn.l2_normalize(prelogits, dim=1)

        # Compute cosine and phi
        cos_theta = tf.matmul(prelogits_normed, weights_normed)
        cos_theta = tf.minimum(1.0, tf.maximum(-1.0, cos_theta))
        theta = tf.acos(cos_theta)
        cos_m_theta = lambda_m_theta[m](cos_theta)
        k = tf.floor(m * theta / 3.14159265)
        phi_theta = tf.pow(-1.0, k) * cos_m_theta - 2.0 * k

        cos_theta = cos_theta * prelogits_norm
        phi_theta = phi_theta * prelogits_norm

        lamb_new = tf.maximum(
            lamb_min,
            lamb_max / (1.0 + 0.1 * tf.cast(global_step, tf.float32)))
        update_lamb = tf.assign(lamb, lamb_new)

        # Compute loss
        with tf.control_dependencies([update_lamb]):
            label_dense = tf.one_hot(label, num_classes, dtype=tf.float32)

            logits = cos_theta
            logits -= label_dense * cos_theta * 1.0 / (1.0 + lamb)
            logits += label_dense * phi_theta * 1.0 / (1.0 + lamb)

            cross_entropy =  tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\
                labels=label, logits=logits), name='cross_entropy')

    return cross_entropy
Beispiel #30
0
# yapf: disable
fc            = ts.add_arg_scope(tf.layers.dense)
conv1d        = ts.add_arg_scope(tf.layers.conv1d)
conv2d        = ts.add_arg_scope(tf.layers.conv2d)
sep_conv2d    = ts.add_arg_scope(tf.layers.separable_conv2d)
max_pooling2d = ts.add_arg_scope(tf.layers.max_pooling2d)
batch_norm    = ts.add_arg_scope(tf.layers.batch_normalization)
# yapf: enable

conv2d_activation = tf.nn.elu

conv2d_params = {
    'kernel_size': 3,
    'strides': (1, 1),
    'padding': 'SAME',
    'kernel_initializer': ts.xavier_initializer(),
    'use_bias': True,
    'bias_initializer': tf.zeros_initializer(),
}

sep_conv2d_params = {
    'kernel_size': 3,
    'strides': (1, 1),
    'dilation_rate': (1, 1),
    'depth_multiplier': 1,
    'padding': 'SAME',
    'depthwise_initializer': ts.xavier_initializer(),
    'pointwise_initializer': ts.xavier_initializer(),
    'use_bias': True,
    'bias_initializer': tf.zeros_initializer(),
}
Beispiel #31
0
def mobilenet(dict_data, params):

    n_labels = params['n_labels']
    is_training = params['is_training']
    inputs = dict_data['images']

    if ('width_multiplier' not in params.keys()):
        width_multiplier = 1.0
    else:
        width_multiplier = params['width_multiplier']

    if ('scope' not in params.keys()):
        scope = 'MobileNet'
    else:
        scope = params['scope']

    if ('freeze_convs' not in params.keys()):
        freeze_convs = False
    else:
        freeze_convs = params['freeze_convs']

    def _depthwise_separable_conv(inputs,
                                  num_pwc_filters,
                                  width_multiplier,
                                  sc,
                                  downsample=False,
                                  freeze_convs=False):
        num_pwc_filters = round(num_pwc_filters * width_multiplier)
        _stride = 2 if downsample else 1

        # skip pointwise by setting num_outputs=None
        depthwise_conv = slim.separable_convolution2d(
            inputs,
            num_outputs=None,
            stride=_stride,
            depth_multiplier=1,
            kernel_size=[3, 3],
            scope=sc + '/depthwise_conv',
            trainable=not freeze_convs)

        bn = slim.batch_norm(depthwise_conv,
                             scope=sc + '/dw_batch_norm',
                             trainable=not freeze_convs)
        pointwise_conv = slim.convolution2d(bn,
                                            num_pwc_filters,
                                            kernel_size=[1, 1],
                                            scope=sc + '/pointwise_conv',
                                            trainable=not freeze_convs)
        bn = slim.batch_norm(pointwise_conv,
                             scope=sc + '/pw_batch_norm',
                             trainable=not freeze_convs)
        return bn

    # with tf.variable_scope(scope) as sc:
    end_points_collection = '_end_points'
    with slim.arg_scope([slim.convolution2d, slim.separable_convolution2d],
                        activation_fn=None,
                        outputs_collections=[end_points_collection]):
        with slim.arg_scope([slim.batch_norm],
                            is_training=is_training,
                            activation_fn=tf.nn.relu,
                            fused=True):
            net = slim.convolution2d(inputs,
                                     round(32 * width_multiplier), [3, 3],
                                     stride=2,
                                     padding='SAME',
                                     scope='conv_1',
                                     trainable=not freeze_convs)
            net = slim.batch_norm(net,
                                  scope='conv_1/batch_norm',
                                  trainable=not freeze_convs)

            net = _depthwise_separable_conv(net,
                                            64,
                                            width_multiplier,
                                            sc='conv_ds_2',
                                            freeze_convs=freeze_convs)
            net = _depthwise_separable_conv(net,
                                            128,
                                            width_multiplier,
                                            downsample=True,
                                            sc='conv_ds_3',
                                            freeze_convs=freeze_convs)
            net = _depthwise_separable_conv(net,
                                            128,
                                            width_multiplier,
                                            sc='conv_ds_4',
                                            freeze_convs=freeze_convs)
            net = _depthwise_separable_conv(net,
                                            256,
                                            width_multiplier,
                                            downsample=True,
                                            sc='conv_ds_5',
                                            freeze_convs=freeze_convs)
            net = _depthwise_separable_conv(net,
                                            256,
                                            width_multiplier,
                                            sc='conv_ds_6',
                                            freeze_convs=freeze_convs)
            net = _depthwise_separable_conv(net,
                                            512,
                                            width_multiplier,
                                            downsample=True,
                                            sc='conv_ds_7',
                                            freeze_convs=freeze_convs)

            net = _depthwise_separable_conv(net,
                                            512,
                                            width_multiplier,
                                            sc='conv_ds_8',
                                            freeze_convs=freeze_convs)
            net = _depthwise_separable_conv(net,
                                            512,
                                            width_multiplier,
                                            sc='conv_ds_9',
                                            freeze_convs=freeze_convs)
            net = _depthwise_separable_conv(net,
                                            512,
                                            width_multiplier,
                                            sc='conv_ds_10',
                                            freeze_convs=freeze_convs)
            net = _depthwise_separable_conv(net,
                                            512,
                                            width_multiplier,
                                            sc='conv_ds_11',
                                            freeze_convs=freeze_convs)
            net = _depthwise_separable_conv(net,
                                            512,
                                            width_multiplier,
                                            sc='conv_ds_12',
                                            freeze_convs=freeze_convs)

            net = _depthwise_separable_conv(net,
                                            1024,
                                            width_multiplier,
                                            downsample=True,
                                            sc='conv_ds_13',
                                            freeze_convs=freeze_convs)
            net = _depthwise_separable_conv(net,
                                            1024,
                                            width_multiplier,
                                            sc='conv_ds_14',
                                            freeze_convs=freeze_convs)
            net = slim.avg_pool2d(net, [7, 7], scope='avg_pool_15')

            net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')

            with tf.variable_scope('block_fc1'):
                net = tf.layers.dense(
                    inputs=net,
                    units=1024,
                    activation=tf.nn.relu,
                    kernel_initializer=slim.xavier_initializer(),
                    kernel_regularizer=slim.l2_regularizer(0.0001))

            with tf.variable_scope('block_fc2'):
                net = tf.layers.dense(
                    inputs=net,
                    units=512,
                    activation=tf.nn.relu,
                    kernel_initializer=slim.xavier_initializer(),
                    kernel_regularizer=slim.l2_regularizer(0.0001))

            with tf.variable_scope('block_fc3'):
                net = tf.layers.dense(
                    inputs=net,
                    units=n_labels,
                    activation=tf.nn.relu,
                    kernel_initializer=slim.xavier_initializer(),
                    kernel_regularizer=slim.l2_regularizer(0.0001))

            return net
Beispiel #32
0
def P_Net(inputs,
          label=None,
          bbox_target=None,
          landmark_target=None,
          training=True):
    with slim.arg_scope([slim.conv2d],
                        activation_fn=prelu,
                        weights_initializer=slim.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        weights_regularizer=slim.l2_regularizer(0.0005),
                        padding='valid'):
        print('P_Net network shape')
        print(inputs.get_shape())
        net = slim.conv2d(inputs, 10, 3, stride=1, scope='conv1')
        print(net.get_shape())
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              scope='pool1',
                              padding='SAME')
        print(net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=16,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv2')
        print(net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=32,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv3')
        print(net.get_shape())
        conv4_1 = slim.conv2d(net,
                              num_outputs=2,
                              kernel_size=[1, 1],
                              stride=1,
                              scope='conv4_1',
                              activation_fn=tf.nn.softmax)

        print(conv4_1.get_shape())
        bbox_pred = slim.conv2d(net,
                                num_outputs=4,
                                kernel_size=[1, 1],
                                stride=1,
                                scope='conv4_2',
                                activation_fn=None)
        print(bbox_pred.get_shape())

        landmark_pred = slim.conv2d(net,
                                    num_outputs=10,
                                    kernel_size=[1, 1],
                                    stride=1,
                                    scope='conv4_3',
                                    activation_fn=None)
        print(landmark_pred.get_shape())

        if training:
            cls_prob = tf.squeeze(conv4_1, [1, 2], name='cls_prob')
            cls_loss = cls_ohem(cls_prob, label)
            bbox_pred = tf.squeeze(bbox_pred, [1, 2], name='bbox_pred')
            bbox_loss = bbox_ohem(bbox_pred, bbox_target, label)
            landmark_pred = tf.squeeze(landmark_pred, [1, 2],
                                       name='landmark_pred')
            landmark_loss = landmark_ohem(landmark_pred, landmark_target,
                                          label)

            accuracy, recall = cal_accuracy(cls_prob, label)
            L2_loss = tf.add_n(tf.losses.get_regularization_losses())
            return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy, recall
        else:
            #when test,batch_size = 1
            cls_pro_test = tf.squeeze(conv4_1, axis=0)
            bbox_pred_test = tf.squeeze(bbox_pred, axis=0)
            landmark_pred_test = tf.squeeze(landmark_pred, axis=0)
            return cls_pro_test, bbox_pred_test, landmark_pred_test