Esempio n. 1
0
    def softmax_cross_entropy_loss(self):
        self._log_accuracy()
        wsummary.variable_summaries_v2(self.gt_classes, "gt_classes")
        wsummary.variable_summaries_v2(self.pred_class_logits,
                                       "pred_class_logits")
        scores = tf.stop_gradient(tf.reshape(self.proposals[ED_SCORES], [-1]))
        #weights = tf.abs(scores-0.5)*4
        weights = tf.minimum(tf.pow(tf.abs(scores - 0.5), 2) * 100, 1.0)
        weights = tf.stop_gradient(weights)
        wsummary.histogram_or_scalar(weights, "cls_loss_weights")
        if self.cfg.MODEL.ROI_HEADS.POS_LABELS_THRESHOLD > 1e-3:
            with tf.name_scope("modify_gtclasses"):
                threshold = self.cfg.MODEL.ROI_HEADS.POS_LABELS_THRESHOLD
                gt_classes = self.gt_classes
                gt_classes = tf.where(tf.greater(scores, threshold),
                                      gt_classes, tf.zeros_like(gt_classes))
            classes_loss = tf.losses.sparse_softmax_cross_entropy(
                logits=self.pred_class_logits,
                labels=gt_classes,
                loss_collection=None,
                reduction=tf.losses.Reduction.NONE)
        else:
            classes_loss = tf.losses.sparse_softmax_cross_entropy(
                logits=self.pred_class_logits,
                labels=self.gt_classes,
                loss_collection=None,
                reduction=tf.losses.Reduction.NONE)

        classes_loss = weights * classes_loss
        classes_loss = tf.reduce_mean(classes_loss)
        wsummary.histogram_or_scalar(classes_loss, "fast_rcnn/classes_loss")
        return classes_loss * self.cfg.MODEL.ROI_HEADS.BOX_CLS_LOSS_SCALE
Esempio n. 2
0
    def get_pred_iou_lossv5(self, threshold=None):
        '''
        使用proposals bboxes与gtbboxes的iou作为目标
        :return: 
        '''
        with tf.name_scope("get_pred_iouv0_loss"):
            gt_scores = self.proposals[ED_SCORES]
            gt_scores = tf.stop_gradient(tf.reshape(gt_scores, [-1]))
            pred_iou_logits = self.pred_iou_logits
            pred_iou_logits = tf.reshape(pred_iou_logits, [-1])
            if threshold is not None:
                pred_score = pred_iou_logits
                mask0 = tf.greater(gt_scores, threshold)
                mask1 = tf.logical_and(
                    tf.logical_not(mask0),
                    tf.greater_equal(pred_score, threshold + 0.05))
                mask = tf.logical_or(mask0, mask1)
                gt_scores = tf.boolean_mask(gt_scores, mask)
                pred_iou_logits = tf.boolean_mask(pred_iou_logits, mask)

            wsummary.histogram_or_scalar(gt_scores, "gt_iou_by_matcher")
            loss = tf.losses.absolute_difference(
                labels=gt_scores,
                predictions=pred_iou_logits,
                loss_collection=None,
                reduction=tf.losses.Reduction.NONE)
            return tf.reduce_mean(loss)
Esempio n. 3
0
    def smooth_l1_loss(self):
        """
        Compute the smooth L1 loss for box regression.

        Returns:
            scalar Tensor
        """
        #gt_anchor_deltas = self.box2box_transform.get_deltas(self.anchors,self.gt_boxes,gt_objectness_logits_i,indices)
        with tf.name_scope("box_regression_loss"):
            gt_proposal_deltas = self.box2box_transform.get_deltas_by_proposals_data(
                self.proposals)
            batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape(
                gt_proposal_deltas)
            gt_proposal_deltas = tf.reshape(gt_proposal_deltas,
                                            [batch_size * box_nr, box_dim])
            cls_agnostic_bbox_reg = self.pred_proposal_deltas.get_shape(
            ).as_list()[-1] == box_dim
            num_classes = self.pred_class_logits.get_shape().as_list()[-1]
            fg_num_classes = num_classes - 1

            # Box delta loss is only computed between the prediction for the gt class k
            # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions
            # for non-gt classes and background.
            # Empty fg_inds produces a valid loss of zero as long as the size_average
            # arg to smooth_l1_loss is False (otherwise it uses mean internally
            # and would produce a nan loss).
            fg_inds = tf.greater(self.gt_classes, 0)
            gt_proposal_deltas = tf.boolean_mask(gt_proposal_deltas, fg_inds)
            pred_proposal_deltas = tf.boolean_mask(self.pred_proposal_deltas,
                                                   fg_inds)
            gt_logits_i = tf.boolean_mask(self.gt_classes, fg_inds)
            if not cls_agnostic_bbox_reg:
                pred_proposal_deltas = tf.reshape(
                    pred_proposal_deltas, [-1, fg_num_classes, box_dim])
                pred_proposal_deltas = wmlt.select_2thdata_by_index_v2(
                    pred_proposal_deltas, gt_logits_i - 1)

            loss_box_reg = tf.losses.huber_loss(
                predictions=pred_proposal_deltas,
                labels=gt_proposal_deltas,
                loss_collection=None,
                reduction=tf.losses.Reduction.SUM,
            )
            num_samples = wmlt.num_elements(self.gt_classes)
            # The loss is normalized using the total number of regions (R), not the number
            # of foreground regions even though the box regression loss is only defined on
            # foreground regions. Why? Because doing so gives equal training influence to
            # each foreground example. To see how, consider two different minibatches:
            #  (1) Contains a single foreground region
            #  (2) Contains 100 foreground regions
            # If we normalize by the number of foreground regions, the single example in
            # minibatch (1) will be given 100 times as much influence as each foreground
            # example in minibatch (2). Normalizing by the total number of regions, R,
            # means that the single example in minibatch (1) and each of the 100 examples
            # in minibatch (2) are given equal influence.
            loss_box_reg = loss_box_reg / num_samples

        wsummary.histogram_or_scalar(loss_box_reg, "fast_rcnn/box_reg_loss")

        return loss_box_reg * self.cfg.MODEL.ROI_HEADS.BOX_REG_LOSS_SCALE
Esempio n. 4
0
    def softmax_cross_entropy_loss(self):
        """
        Compute the softmax cross entropy loss for box classification.

        Returns:
            scalar Tensor
        """
        self._log_accuracy()
        wsummary.variable_summaries_v2(self.gt_classes, "gt_classes")
        wsummary.variable_summaries_v2(self.pred_class_logits,
                                       "pred_class_logits")
        if self.cfg.MODEL.ROI_HEADS.POS_LABELS_THRESHOLD > 1e-3:
            with tf.name_scope("modify_gtclasses"):
                threshold = self.cfg.MODEL.ROI_HEADS.POS_LABELS_THRESHOLD
                scores = tf.reshape(self.proposals[ED_SCORES], [-1])
                gt_classes = self.gt_classes
                gt_classes = tf.where(tf.greater(scores, threshold),
                                      gt_classes, tf.zeros_like(gt_classes))
            classes_loss = tf.losses.sparse_softmax_cross_entropy(
                logits=self.pred_class_logits,
                labels=gt_classes,
                loss_collection=None,
                reduction=tf.losses.Reduction.MEAN)
        else:
            classes_loss = tf.losses.sparse_softmax_cross_entropy(
                logits=self.pred_class_logits,
                labels=self.gt_classes,
                loss_collection=None,
                reduction=tf.losses.Reduction.MEAN)

        wsummary.histogram_or_scalar(classes_loss, "fast_rcnn/classes_loss")
        return classes_loss * self.cfg.MODEL.ROI_HEADS.BOX_CLS_LOSS_SCALE
Esempio n. 5
0
    def losses(self):
        with tf.variable_scope("RPNLoss"):
            gt_objectness_logits, gt_anchor_deltas = self._get_ground_truth()
            #In a image, all anchors concated togather and sample, Detectron2 use the same strategy
            pos_idx, neg_idx = subsample_labels(gt_objectness_logits,
                                                self.batch_size_per_image, self.positive_fraction)

            batch_size = self.pred_objectness_logits[0].get_shape().as_list()[0]
            num_cell_anchors = self.pred_objectness_logits[0].get_shape().as_list()[-1] #RPN num classes==1
            box_dim = self.pred_anchor_deltas[0].get_shape().as_list()[-1]//num_cell_anchors
            pred_objectness_logits = [tf.reshape(x,[batch_size,-1]) for x in self.pred_objectness_logits]
            pred_objectness_logits = tf.concat(pred_objectness_logits,axis=1)
            pred_anchor_deltas = [tf.reshape(x,[batch_size,-1,box_dim]) for x in self.pred_anchor_deltas]
            pred_anchor_deltas = tf.concat(pred_anchor_deltas,axis=1) #shape=[B,-1,4]
            pred_objectness_logits = tf.reshape(pred_objectness_logits,[-1])
            anchors = tf.tile(self.anchors,[batch_size,1,1])
            anchors = tf.reshape(anchors,[-1,box_dim])
            pred_anchor_deltas = tf.reshape(pred_anchor_deltas,[-1,box_dim])
            
            

            if global_cfg.GLOBAL.DEBUG:
                with tf.device(":/cpu:0"):
                    with tf.name_scope("rpn_sampled_box"):
                        log_anchors = self.anchors*tf.ones([batch_size,1,1])
                        logmask = tf.reshape(pos_idx,[batch_size,-1])
                        wsummary.detection_image_summary_by_logmask(images=self.inputs[IMAGE],boxes=log_anchors,
                                                                    logmask=logmask)

            valid_mask = tf.logical_or(pos_idx,neg_idx)
            gt_objectness_logits = tf.reshape(gt_objectness_logits,[-1])
            gt_objectness_logits = tf.boolean_mask(gt_objectness_logits,valid_mask)
            pred_objectness_logits = tf.boolean_mask(pred_objectness_logits,valid_mask)
            
            gt_anchor_deltas = tf.reshape(gt_anchor_deltas,[-1,box_dim])
            
            gt_anchor_deltas = tf.boolean_mask(gt_anchor_deltas,pos_idx)
            pred_anchor_deltas = tf.boolean_mask(pred_anchor_deltas,pos_idx)
            anchors = tf.boolean_mask(anchors,pos_idx)
            
            pred_anchor_deltas = self.box2box_transform.apply_deltas(deltas=pred_anchor_deltas,boxes=anchors)
            objectness_loss, localization_loss = rpn_losses_giou(
                gt_objectness_logits,
                gt_anchor_deltas,
                pred_objectness_logits,
                pred_anchor_deltas,
            )
            if global_cfg.GLOBAL.SUMMARY_LEVEL<=SummaryLevel.INFO:
                with tf.name_scope("RPNCorrectRatio"):
                    ratio = wnn.sigmoid_accuracy_ratio(logits=pred_objectness_logits,labels=gt_objectness_logits)
                tf.summary.scalar("rpn_accuracy_ratio",ratio)
            normalizer = 1.0 / (batch_size* self.batch_size_per_image)
            loss_cls = objectness_loss * normalizer  # cls: classification loss
            loss_loc = localization_loss * normalizer  # loc: localization loss
            losses = {"loss_rpn_cls": loss_cls, "loss_rpn_loc": loss_loc}
            wsummary.histogram_or_scalar(loss_cls,"rpn/cls_loss")
            wsummary.histogram_or_scalar(loss_loc,"rpn/loc_loss")

            return losses
Esempio n. 6
0
    def forward(self, x, bboxes,img_size):
        """
        Args:
            x (list[Tensor]): tensor shape is [batch_size,H,W,C] resolution from high to low
            bboxes:[batch_size,box_nr,4]
            img_size:[2],(H,W)

        Returns:
            Tensor:
                A tensor of shape (M, output_size, output_size,C) where M is the total number of
                boxes aggregated over all batch images and C is the number of channels in `x`.
                which means M = batch_size*box_nr
        """
        assert isinstance(x, list),"Arguments to pooler must be lists"
        level_num = len(x)

        with tf.name_scope("ROIPoolers"):
            if level_num == 1:
                return self.level_pooler(x[0], bboxes)

            level_assignments = assign_boxes_to_levels(
                bboxes, 0, level_num-1, self.canonical_box_size, self.canonical_level,img_size
            )
            self.level_assignments = level_assignments

            features = []
            for net in x:
                features.append(self.level_pooler(net,bboxes))

            if isinstance(features[0],(list,tuple)):
                features = [tf.stack(x,axis=1) for x in zip(*features)]
            else:
                features = tf.stack(features, axis=1)
            level_assignments = tf.reshape(level_assignments,[-1])

            if global_cfg.GLOBAL.SUMMARY_LEVEL<=SummaryLevel.DEBUG:
                wsummary.histogram_or_scalar(level_assignments,"level_assignments")

            if isinstance(features,(list,tuple)):
                output = [wmlt.batch_gather(x, level_assignments) for x in features]
            else:
                output = wmlt.batch_gather(features,level_assignments)

            return output
Esempio n. 7
0
 def preprocess_image(self, batched_inputs):
     """
     Normalize, pad and batch the input images.
     """
     with tf.name_scope(f"preprocess_image_{self.cfg.MODEL.PREPROCESS}"):
         b_img = batched_inputs[IMAGE]
         if self.cfg.MODEL.PREPROCESS == "ton1p1":
             b_img = (b_img - 127.5) / 127.5
         elif self.cfg.MODEL.PREPROCESS == "m0v1":
             b_img = MetaArch.m0v1(b_img)
         elif self.cfg.MODEL.PREPROCESS == "subimagenetmean":
             channel_means = [123.68, 116.779, 103.939]
             b_img = b_img - [[[channel_means]]]
         elif self.cfg.MODEL.PREPROCESS == "standardization":
             b_img = tf.image.per_image_standardization(b_img)
         elif self.cfg.MODEL.PREPROCESS == "none":
             pass
         else:
             raise ValueError(
                 f"Error preprocess type {self.cfg.MODEL.PREPROCESS}")
         wsummary.histogram_or_scalar(b_img, "preprocessed_image")
         batched_inputs[IMAGE] = b_img
         return batched_inputs
Esempio n. 8
0
    def get_pred_iou_lossv0(self, threshold=None):
        '''
        使用proposals bboxes与gtbboxes的iou作为目标
        :return: 
        '''
        with tf.name_scope("get_pred_iouv0_loss"):
            gt_scores = self.proposals[ED_SCORES]
            gt_scores = tf.stop_gradient(tf.reshape(gt_scores, [-1]))
            pred_iou_logits = self.pred_iou_logits
            pred_iou_logits = tf.reshape(pred_iou_logits, [-1])
            if threshold is not None:
                pred_score = tf.nn.sigmoid(pred_iou_logits)
                mask0 = tf.greater(gt_scores, threshold)
                mask1 = tf.logical_and(
                    tf.logical_not(mask0),
                    tf.greater_equal(pred_score, threshold + 0.05))
                mask = tf.logical_or(mask0, mask1)
                gt_scores = tf.boolean_mask(gt_scores, mask)
                pred_iou_logits = tf.boolean_mask(pred_iou_logits, mask)

            wsummary.histogram_or_scalar(gt_scores, "gt_iou_by_matcher")
            loss = wnn.sigmoid_cross_entropy_with_logits_FL(
                labels=gt_scores, logits=pred_iou_logits)
            return tf.reduce_mean(loss)
Esempio n. 9
0
    def get_pred_centerness_loss(self):
        with tf.name_scope("get_pred_centerness_loss"):
            gt_proposal_deltas = wmlt.batch_gather(
                self.proposals.gt_boxes, tf.nn.relu(self.proposals.indices))
            batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape(
                gt_proposal_deltas)
            gt_proposal_deltas = tf.reshape(gt_proposal_deltas,
                                            [batch_size * box_nr, box_dim])
            proposal_bboxes = tf.reshape(self.proposals.boxes,
                                         [batch_size * box_nr, box_dim])
            cls_agnostic_bbox_reg = self.pred_proposal_deltas.get_shape(
            ).as_list()[-1] == box_dim
            num_classes = self.pred_class_logits.get_shape().as_list()[-1]
            fg_num_classes = num_classes - 1
            pred_iou_logits = self.pred_iou_logits

            fg_inds = tf.greater(self.gt_classes, 0)
            gt_proposal_deltas = tf.boolean_mask(gt_proposal_deltas, fg_inds)
            pred_proposal_deltas = tf.boolean_mask(self.pred_proposal_deltas,
                                                   fg_inds)
            proposal_bboxes = tf.boolean_mask(proposal_bboxes, fg_inds)
            gt_logits_i = tf.boolean_mask(self.gt_classes, fg_inds)
            pred_iou_logits_pos = tf.reshape(
                tf.boolean_mask(pred_iou_logits, fg_inds), [-1])
            if not cls_agnostic_bbox_reg:
                pred_proposal_deltas = tf.reshape(
                    pred_proposal_deltas, [-1, fg_num_classes, box_dim])
                pred_proposal_deltas = wmlt.select_2thdata_by_index_v2(
                    pred_proposal_deltas, gt_logits_i - 1)

            pred_bboxes = self.box2box_transform.apply_deltas(
                pred_proposal_deltas, boxes=proposal_bboxes)
            pred_bboxes = odb.to_cxyhw(proposal_bboxes)
            gt_bboxes = odb.to_cxyhw(gt_proposal_deltas)
            deltas = tf.abs(gt_bboxes[..., :2] - pred_bboxes[..., :2]) * 2
            wsummary.histogram_or_scalar(deltas, "centerness_deltas")
            centerness = 1 - tf.reduce_max(
                deltas / (gt_bboxes[..., 2:] + 1e-8), axis=-1, keepdims=False)
            wsummary.histogram_or_scalar(centerness, "centerness")
            loss_pos = wnn.sigmoid_cross_entropy_with_logits_FL(
                labels=centerness, logits=pred_iou_logits_pos)
            wsummary.histogram_or_scalar(tf.nn.sigmoid(pred_iou_logits_pos),
                                         "pred_centerness")
            loss_pos = tf.reduce_mean(loss_pos)

            tf.summary.scalar("centerness_loss", loss_pos)

            loss = loss_pos

        return loss
Esempio n. 10
0
    def losses(self):
        """
        Args:
            For `gt_classes` and `gt_anchors_deltas` parameters, see
                :meth:`FCOSGIou.get_ground_truth`.
            Their shapes are (N, R) and (N, R, 4), respectively, where R is
            the total number of anchors across levels, i.e. sum(Hi x Wi x A)
            For `pred_class_logits` and `pred_anchor_deltas`, see
                :meth:`FCOSGIouHead.forward`.

        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a scalar tensor
                storing the loss. Used during training only. The dict keys are:
                "loss_cls" and "loss_box_reg"
        """

        assert len(self.pred_logits[0].get_shape()) == 4, "error logits dim"

        gt_results = self._get_ground_truth()
        loss_cls_list = []
        loss_regression_list = []
        loss_center_ness_list = []
        total_num_foreground = []

        img_size = tf.shape(self.batched_inputs[IMAGE])[1:3]

        for i, gt_results_item in enumerate(gt_results):
            gt_classes = gt_results_item['g_classes']
            gt_boxes = gt_results_item['g_boxes']
            g_center_ness = gt_results_item['g_center_ness']
            pred_class_logits = self.pred_logits[i]
            pred_regression = self.pred_regression[i]
            pred_center_ness = self.pred_center_ness[i]

            foreground_idxs = (gt_classes > 0)
            num_foreground = tf.reduce_sum(tf.cast(foreground_idxs, tf.int32))
            total_num_foreground.append(num_foreground)

            gt_classes_target = tf.one_hot(gt_classes,
                                           depth=self.num_classes + 1)
            gt_classes_target = gt_classes_target[..., 1:]

            #
            pred_center_ness = tf.expand_dims(pred_center_ness, axis=-1)
            wsummary.histogram_or_scalar(pred_center_ness, "center_ness")
            # logits loss
            loss_cls = tf.reduce_sum(
                wnn.sigmoid_cross_entropy_with_logits_FL(
                    labels=gt_classes_target,
                    logits=pred_class_logits,
                    alpha=self.focal_loss_alpha,
                    gamma=self.focal_loss_gamma))

            # regression loss
            pred_boxes = self.box2box_transform.apply_deltas(
                regression=pred_regression, img_size=img_size)
            if global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG and gt_classes.get_shape(
            ).as_list()[0] > 1:
                log_boxes = self.box2box_transform.apply_deltas(
                    regression=gt_results_item['g_regression'],
                    img_size=img_size)
                log_boxes = odbox.tfabsolutely_boxes_to_relative_boxes(
                    log_boxes, width=img_size[1], height=img_size[0])
                boxes1 = tf.reshape(log_boxes[1:2], [1, -1, 4])
                wsummary.detection_image_summary(
                    images=self.batched_inputs[IMAGE][1:2],
                    boxes=boxes1,
                    name="FCOSGIou_decode_test")
            pred_center_ness = tf.boolean_mask(pred_center_ness,
                                               foreground_idxs)
            g_center_ness = tf.boolean_mask(g_center_ness, foreground_idxs)
            pred_boxes = tf.boolean_mask(pred_boxes, foreground_idxs)
            gt_boxes = tf.boolean_mask(gt_boxes, foreground_idxs)
            wsummary.histogram_or_scalar(pred_center_ness, "center_ness_pos")
            reg_loss_sum = (1.0 - odl.giou(pred_boxes, gt_boxes))
            wmlt.variable_summaries_v2(reg_loss_sum, f"giou_loss{i}")
            pred_center_ness = tf.squeeze(pred_center_ness, axis=-1)
            reg_norm = tf.reduce_sum(g_center_ness) + 1e-5
            reg_loss_sum = reg_loss_sum * g_center_ness
            wmlt.variable_summaries_v2(reg_loss_sum, f"loss_sum{i}")
            loss_box_reg = tf.reduce_sum(reg_loss_sum) * 300 / reg_norm
            wmlt.variable_summaries_v2(loss_box_reg, f"box_reg_loss_{i}")

            loss_center_ness = 0.5 * tf.nn.sigmoid_cross_entropy_with_logits(
                labels=g_center_ness, logits=pred_center_ness)
            loss_center_ness = tf.reduce_sum(loss_center_ness) * 0.1
            wmlt.variable_summaries_v2(loss_center_ness,
                                       f"center_ness_loss{i}")

            loss_cls_list.append(loss_cls)
            loss_regression_list.append(loss_box_reg)
            loss_center_ness_list.append(loss_center_ness)

        total_num_foreground = tf.to_float(
            tf.maximum(tf.add_n(total_num_foreground), 1))
        return {
            "fcos_loss_cls":
            tf.add_n(loss_cls_list) / total_num_foreground,
            "fcos_loss_center_ness":
            tf.add_n(loss_center_ness_list) / total_num_foreground,
            "fcos_loss_box_reg":
            tf.add_n(loss_regression_list) / total_num_foreground
        }
Esempio n. 11
0
    def build_net_run_on_multi_gpus_nccl(self):
        if not os.path.exists(self.log_dir):
            wmlu.create_empty_dir(self.log_dir)
        if not os.path.exists(self.ckpt_dir):
            wmlu.create_empty_dir(self.ckpt_dir)
        '''if self.cfg.GLOBAL.DEBUG:
            data[IMAGE] = tf.Print(data[IMAGE],[tf.shape(data[IMAGE]),data[ORG_HEIGHT],data[ORG_WIDTH],data[HEIGHT],data[WIDTH]],summarize=100,
                                   name="XXXXX")'''
        all_loss_dict = {}
        steps = self.cfg.SOLVER.STEPS
        print("Train steps:", steps)
        lr = wnn.build_learning_rate(
            self.cfg.SOLVER.BASE_LR,
            global_step=self.global_step,
            lr_decay_type=self.cfg.SOLVER.LR_DECAY_TYPE,
            steps=steps,
            decay_factor=self.cfg.SOLVER.LR_DECAY_FACTOR,
            total_steps=steps[-1],
            min_lr=1e-6,
            warmup_steps=self.cfg.SOLVER.WARMUP_ITERS)
        tf.summary.scalar("lr", lr)
        self.max_train_step = steps[-1]

        if self.cfg.SOLVER.OPTIMIZER == "Momentum":
            opt = wnn.str2optimizer(
                "Momentum", lr, momentum=self.cfg.SOLVER.OPTIMIZER_momentum)
        else:
            opt = wnn.str2optimizer(self.cfg.SOLVER.OPTIMIZER, lr)

        tower_grads = []
        if len(self.gpus) == 0:
            self.gpus = [0]
        if len(self.cfg.SOLVER.TRAIN_SCOPES) > 1:
            train_scopes = self.cfg.SOLVER.TRAIN_SCOPES
        else:
            train_scopes = None
        if len(self.cfg.SOLVER.TRAIN_REPATTERN) > 1:
            train_repattern = self.cfg.SOLVER.TRAIN_REPATTERN
        else:
            train_repattern = None

        for i in range(len(self.gpus)):
            scope = tf.get_variable_scope()
            if i > 0:
                #scope._reuse = tf.AUTO_REUSE
                scope.reuse_variables()
            with tf.device(f"/gpu:{i}"):
                with tf.device(":/cpu:0"):
                    data = self.data.get_next()

                self.input_data = data
                with tf.name_scope(f"GPU{self.gpus[i]}"):
                    with tf.device(":/cpu:0"):
                        DataLoader.detection_image_summary(
                            data, name=f"data_source{i}")

                    self.res_data, loss_dict = self.model.forward(data)
                loss_values = []
                for k, v in loss_dict.items():
                    all_loss_dict[k + f"_stage{i}"] = v
                    tf.summary.scalar(f"loss/{k}", v)
                    ##
                    #v = tf.Print(v,[k,tf.is_nan(v), tf.is_inf(v)])
                    ##
                    v = tf.cond(tf.logical_or(tf.is_nan(v), tf.is_inf(v)),
                                lambda: tf.zeros_like(v), lambda: v)
                    loss_values.append(v)

                scope._reuse = tf.AUTO_REUSE
                '''if (i==0) and len(tf.get_collection(GRADIENT_DEBUG_COLLECTION))>0:
                    total_loss_sum = tf.add_n(loss_values)
                    xs = tf.get_collection(GRADIENT_DEBUG_COLLECTION)
                    grads = tf.gradients(total_loss_sum,xs)
                    grads = [tf.reduce_sum(tf.abs(x)) for x in grads]
                    loss_values[0] = tf.Print(loss_values[0],grads+["grads"],summarize=100)'''

                grads, total_loss, variables_to_train = wnn.nget_train_opv3(
                    optimizer=opt,
                    loss=loss_values,
                    scopes=train_scopes,
                    re_pattern=train_repattern)
                #
                if self.cfg.SOLVER.FILTER_NAN_AND_INF_GRADS:
                    grads = [list(x) for x in grads]
                    for i, (g, v) in enumerate(grads):
                        try:
                            if g is not None:
                                g = tf.where(
                                    tf.logical_or(tf.is_nan(g), tf.is_inf(g)),
                                    tf.random_normal(
                                        shape=wmlt.
                                        combined_static_and_dynamic_shape(g),
                                        stddev=1e-5), g)
                        except:
                            print(f"Error {g}/{v}")
                            raise Exception("Error")
                        grads[i][0] = g
                #
                tower_grads.append(grads)
        ########################
        '''tower_grads[0] = [list(x) for x in tower_grads[0]]
        for i,(g,v) in enumerate(tower_grads[0]):
            tower_grads[0][i][0] = tf.Print(g,["B_"+v.name,tf.reduce_min(g),tf.reduce_mean(g),tf.reduce_max(g)])'''
        ########################

        if self.cfg.SOLVER.CLIP_NORM > 1:
            avg_grads = wnn.average_grads_nccl(
                tower_grads, clip_norm=self.cfg.SOLVER.CLIP_NORM)
        else:
            avg_grads = wnn.average_grads_nccl(tower_grads, clip_norm=None)
        '''avg_grads = [list(x) for x in avg_grads]
        for i,(g,v) in enumerate(avg_grads):
            avg_grads[i][0] = tf.Print(g,[v.name,tf.reduce_min(g),tf.reduce_mean(g),tf.reduce_max(g)])'''

        opt0 = wnn.apply_gradientsv3(avg_grads, self.global_step, opt)
        opt1 = wnn.get_batch_norm_ops()
        self.train_op = tf.group(opt0, opt1)

        self.total_loss, self.variables_to_train = total_loss, variables_to_train

        self.loss_dict = all_loss_dict

        config = tf.ConfigProto(allow_soft_placement=True)
        #config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)

        if self.debug_tf:
            self.sess = tfdbg.LocalCLIDebugWrapperSession(self.sess)

        print("variables to train:")
        wmlu.show_list(self.variables_to_train)
        for v in self.variables_to_train:
            wsummary.histogram_or_scalar(v, v.name[:-2])
        wnn.log_moving_variable()

        self.saver = tf.train.Saver(max_to_keep=100)
        tf.summary.scalar("total_loss", self.total_loss)

        self.summary = tf.summary.merge_all()
        self.summary_writer = tf.summary.FileWriter(self.log_dir,
                                                    self.sess.graph)
        init = tf.global_variables_initializer()
        self.sess.run(init)
        print("batch_norm_ops.")
        wmlu.show_list(
            [x.name for x in tf.get_collection(tf.GraphKeys.UPDATE_OPS)])
Esempio n. 12
0
    def build_net(self):
        if not os.path.exists(self.log_dir):
            wmlu.create_empty_dir(self.log_dir)
        if not os.path.exists(self.ckpt_dir):
            wmlu.create_empty_dir(self.ckpt_dir)
        with tf.device(":/cpu:0"):
            data = self.data.get_next()
        DataLoader.detection_image_summary(data, name="data_source")
        self.input_data = data
        '''if self.cfg.GLOBAL.DEBUG:
            data[IMAGE] = tf.Print(data[IMAGE],[tf.shape(data[IMAGE]),data[ORG_HEIGHT],data[ORG_WIDTH],data[HEIGHT],data[WIDTH]],summarize=100,
                                   name="XXXXX")'''
        self.res_data, loss_dict = self.model.forward(data)
        if self.model.is_training:
            for k, v in loss_dict.items():
                tf.summary.scalar(f"loss/{k}", v)
                v = tf.cond(tf.logical_or(tf.is_nan(v), tf.is_inf(v)),
                            lambda: tf.zeros_like(v), lambda: v)
                tf.losses.add_loss(v)
        elif self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.RESEARCH:
            research = self.cfg.GLOBAL.RESEARCH
            if 'result_classes' in research:
                print("replace labels with gtlabels.")
                labels = odt.replace_with_gtlabels(
                    bboxes=self.res_data[RD_BOXES],
                    labels=self.res_data[RD_LABELS],
                    length=self.res_data[RD_LENGTH],
                    gtbboxes=data[GT_BOXES],
                    gtlabels=data[GT_LABELS],
                    gtlength=data[GT_LENGTH])
                self.res_data[RD_LABELS] = labels

            if 'result_bboxes' in research:
                print("replace bboxes with gtbboxes.")
                bboxes = odt.replace_with_gtbboxes(
                    bboxes=self.res_data[RD_BOXES],
                    labels=self.res_data[RD_LABELS],
                    length=self.res_data[RD_LENGTH],
                    gtbboxes=data[GT_BOXES],
                    gtlabels=data[GT_LABELS],
                    gtlength=data[GT_LENGTH])
                self.res_data[RD_BOXES] = bboxes

        self.loss_dict = loss_dict

        if not self.model.is_training and self.cfg.GLOBAL.GPU_MEM_FRACTION > 0.1:
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self.
                                        cfg.GLOBAL.GPU_MEM_FRACTION)
            config = tf.ConfigProto(allow_soft_placement=True,
                                    gpu_options=gpu_options)
        else:
            config = tf.ConfigProto(allow_soft_placement=True)
        if not self.model.is_training and self.cfg.GLOBAL.GPU_MEM_FRACTION <= 0.1:
            config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        self.top_variable_name_scope = "Model"

        if self.model.is_training:
            steps = self.cfg.SOLVER.STEPS
            print("Train steps:", steps)
            lr = wnn.build_learning_rate(
                self.cfg.SOLVER.BASE_LR,
                global_step=self.global_step,
                lr_decay_type=self.cfg.SOLVER.LR_DECAY_TYPE,
                steps=steps,
                decay_factor=self.cfg.SOLVER.LR_DECAY_FACTOR,
                total_steps=steps[-1],
                warmup_steps=self.cfg.SOLVER.WARMUP_ITERS)
            tf.summary.scalar("lr", lr)
            opt = wnn.str2optimizer("Momentum", lr, momentum=0.9)
            self.max_train_step = steps[-1]
            self.train_op, self.total_loss, self.variables_to_train = wnn.nget_train_op(
                self.global_step,
                optimizer=opt,
                clip_norm=self.cfg.SOLVER.CLIP_NORM)
            print("variables to train:")
            wmlu.show_list(self.variables_to_train)
            for v in self.variables_to_train:
                wsummary.histogram_or_scalar(v, v.name[:-2])
            wnn.log_moving_variable()

            self.saver = tf.train.Saver(max_to_keep=100)
            tf.summary.scalar(self.cfg.GLOBAL.PROJ_NAME + "_total_loss",
                              self.total_loss)

        self.summary = tf.summary.merge_all()
        self.summary_writer = tf.summary.FileWriter(self.log_dir,
                                                    self.sess.graph)
        init = tf.global_variables_initializer()
        self.sess.run(init)
        print("batch_norm_ops.")
        wmlu.show_list(
            [x.name for x in tf.get_collection(tf.GraphKeys.UPDATE_OPS)])