コード例 #1
0
ファイル: inference.py プロジェクト: yeqingcheng368/automl
def det_post_process(params: Dict[Any, Any],
                     cls_outputs: Dict[int, tf.Tensor],
                     box_outputs: Dict[int, tf.Tensor],
                     scales: List[float],
                     min_score_thresh,
                     max_boxes_to_draw):
  """Post preprocessing the box/class predictions.

  Args:
    params: a parameter dictionary that includes `min_level`, `max_level`,
      `batch_size`, and `num_classes`.
    cls_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in [batch_size, height, width,
      num_anchors * 4].
    scales: a list of float values indicating image scale.
    min_score_thresh: A float representing the threshold for deciding when to
      remove boxes based on score.
    max_boxes_to_draw: Max number of boxes to draw.

  Returns:
    detections_batch: a batch of detection results. Each detection is a tensor
      with each row representing [image_id, x, y, width, height, score, class].
  """
  # TODO(tanmingxing): refactor the code to make it more explicity.
  outputs = {
      'cls_outputs_all': [None],
      'box_outputs_all': [None],
      'indices_all': [None],
      'classes_all': [None]
  }
  det_model_fn.add_metric_fn_inputs(
      params, cls_outputs, box_outputs, outputs, -1)

  # Create anchor_label for picking top-k predictions.
  eval_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                 params['num_scales'], params['aspect_ratios'],
                                 params['anchor_scale'], params['image_size'])
  anchor_labeler = anchors.AnchorLabeler(eval_anchors, params['num_classes'])

  # Add all detections for each input image.
  detections_batch = []
  for index in range(params['batch_size']):
    cls_outputs_per_sample = outputs['cls_outputs_all'][index]
    box_outputs_per_sample = outputs['box_outputs_all'][index]
    indices_per_sample = outputs['indices_all'][index]
    classes_per_sample = outputs['classes_all'][index]
    detections = anchor_labeler.generate_detections(
        cls_outputs_per_sample,
        box_outputs_per_sample,
        indices_per_sample,
        classes_per_sample,
        image_id=[index],
        image_scale=[scales[index]],
        min_score_thresh=min_score_thresh,
        max_boxes_to_draw=max_boxes_to_draw,
        disable_pyfun=params.get('disable_pyfun'))
    detections_batch.append(detections)
  return tf.stack(detections_batch, name='detections')
コード例 #2
0
ファイル: det_model_fn.py プロジェクト: zhangxinkang/automl
    def metric_fn(**kwargs):
      """Returns a dictionary that has the evaluation metrics."""
      batch_size = params['batch_size']
      eval_anchors = anchors.Anchors(params['min_level'],
                                     params['max_level'],
                                     params['num_scales'],
                                     params['aspect_ratios'],
                                     params['anchor_scale'],
                                     params['image_size'])
      anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                             params['num_classes'])
      cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
      box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])

      if params.get('testdev_dir', None):
        logging.info('Eval testdev_dir %s', params['testdev_dir'])
        coco_metrics = coco_metric_fn(
            batch_size,
            anchor_labeler,
            params['val_json_file'],
            testdev_dir=params['testdev_dir'],
            disable_pyfun=params.get('disable_pyfun', None),
            **kwargs)
      else:
        logging.info('Eval val with groudtruths %s.', params['val_json_file'])
        coco_metrics = coco_metric_fn(batch_size, anchor_labeler,
                                      params['val_json_file'], **kwargs)

      # Add metrics to output.
      output_metrics = {
          'cls_loss': cls_loss,
          'box_loss': box_loss,
      }
      output_metrics.update(coco_metrics)
      return output_metrics
コード例 #3
0
ファイル: postprocess.py プロジェクト: jszendre/automl
def pre_nms(params, cls_outputs, box_outputs) -> Tuple[T, T, T]:
  """Detection post processing before nms.

  It takes the multi-level class and box predictions from network, merge them
  into unified tensors, and compute boxes, scores, and classes.

  Args:
    params: a dict of parameters.
    cls_outputs: a list of tensors for classes, each tensor denotes a level
      of logits with shape [N, H, W, num_class * num_anchors].
    box_outputs: a list of tensors for boxes, each tensor ddenotes a level of
      boxes with shape [N, H, W, 4 * num_anchors].

  Returns:
    A tuple of (boxes, scores, classes).
  """
  cls_outputs, box_outputs = merge_class_box_level_outputs(
      params, cls_outputs, box_outputs)
  cls_outputs, box_outputs, classes, indices = topk_class_boxes(
      params, cls_outputs, box_outputs)

  # get boxes by apply bounding box regression to anchors.
  eval_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                 params['num_scales'], params['aspect_ratios'],
                                 params['anchor_scale'], params['image_size'])
  anchor_boxes = tf.gather(eval_anchors.boxes, indices)
  boxes = anchors.decode_box_outputs_tf(box_outputs, anchor_boxes)

  # convert logits to scores.
  scores = tf.math.sigmoid(cls_outputs)
  return boxes, scores, classes
コード例 #4
0
 def metric_fn(**kwargs):
   """Evaluation metric fn. Performed on CPU, do not reference TPU ops."""
   eval_anchors = anchors.Anchors(params['min_level'],
                                  params['max_level'],
                                  params['num_scales'],
                                  params['aspect_ratios'],
                                  params['anchor_scale'],
                                  params['image_size'])
   anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                          params['num_classes'])
   cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
   box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
   # add metrics to output
   cls_outputs = {}
   box_outputs = {}
   for level in range(params['min_level'], params['max_level'] + 1):
     cls_outputs[level] = kwargs['cls_outputs_%d' % level]
     box_outputs[level] = kwargs['box_outputs_%d' % level]
   detections = anchor_labeler.generate_detections(
       cls_outputs, box_outputs, kwargs['source_ids'])
   eval_metric = coco_metric.EvaluationMetric(params['val_json_file'])
   coco_metrics = eval_metric.estimator_metric_fn(detections,
                                                  kwargs['image_scales'])
   # Add metrics to output.
   output_metrics = {
       'cls_loss': cls_loss,
       'box_loss': box_loss,
   }
   output_metrics.update(coco_metrics)
   return output_metrics
コード例 #5
0
def _predict_postprocess(cls_outputs, box_outputs, labels, params):
    """Post processes prediction outputs."""
    predict_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                      params['num_scales'],
                                      params['aspect_ratios'],
                                      params['anchor_scale'],
                                      params['image_size'])
    cls_outputs, box_outputs, anchor_boxes = postprocess.reshape_outputs(
        cls_outputs, box_outputs, predict_anchors.boxes, params['min_level'],
        params['max_level'], params['num_classes'])
    boxes, scores, classes, num_detections = postprocess.generate_detections(
        cls_outputs, box_outputs, anchor_boxes)

    predictions = {
        'detection_boxes': boxes,
        'detection_classes': classes,
        'detection_scores': scores,
        'num_detections': num_detections,
    }

    if labels is not None:
        predictions.update({
            'image_info': labels['image_info'],
            'source_id': labels['source_ids'],
            'groundtruth_data': labels['groundtruth_data'],
        })

    return predictions
コード例 #6
0
def det_post_process_combined(params, cls_outputs, box_outputs, scales,
                              min_score_thresh, max_boxes_to_draw):
    """A combined version of det_post_process with dynamic batch size support."""
    batch_size = tf.shape(list(cls_outputs.values())[0])[0]
    cls_outputs_all = []
    box_outputs_all = []
    # Concatenates class and box of all levels into one tensor.
    for level in range(params['min_level'], params['max_level'] + 1):
        if params['data_format'] == 'channels_first':
            cls_outputs[level] = tf.transpose(cls_outputs[level], [0, 2, 3, 1])
            box_outputs[level] = tf.transpose(box_outputs[level], [0, 2, 3, 1])

        cls_outputs_all.append(
            tf.reshape(cls_outputs[level],
                       [batch_size, -1, params['num_classes']]))
        box_outputs_all.append(
            tf.reshape(box_outputs[level], [batch_size, -1, 4]))
    cls_outputs_all = tf.concat(cls_outputs_all, 1)
    box_outputs_all = tf.concat(box_outputs_all, 1)

    # Create anchor_label for picking top-k predictions.
    eval_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                   params['num_scales'],
                                   params['aspect_ratios'],
                                   params['anchor_scale'],
                                   params['image_size'])
    anchor_boxes = eval_anchors.boxes
    scores = tf.math.sigmoid(cls_outputs_all)
    # apply bounding box regression to anchors
    boxes = anchors.decode_box_outputs_tf(box_outputs_all, anchor_boxes)
    boxes = tf.expand_dims(boxes, axis=2)
    scales = tf.expand_dims(scales, axis=-1)
    nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = (
        tf.image.combined_non_max_suppression(boxes,
                                              scores,
                                              max_boxes_to_draw,
                                              max_boxes_to_draw,
                                              score_threshold=min_score_thresh,
                                              clip_boxes=False))
    del valid_detections  # to be used in futue.

    image_ids = tf.cast(tf.tile(tf.expand_dims(tf.range(batch_size), axis=1),
                                [1, max_boxes_to_draw]),
                        dtype=tf.float32)
    y = nmsed_boxes[..., 0] * scales
    x = nmsed_boxes[..., 1] * scales
    height = nmsed_boxes[..., 2] * scales - y
    width = nmsed_boxes[..., 3] * scales - x
    detection_list = [
        # Format: (image_ids, y, x, height, width, score, class)
        image_ids,
        y,
        x,
        height,
        width,
        nmsed_scores,
        tf.cast(nmsed_classes + 1, tf.float32)
    ]
    detections = tf.stack(detection_list, axis=2, name='detections')
    return detections
コード例 #7
0
 def __init__(self, iou_loss_type, min_level, max_level, num_scales,
              aspect_ratios, anchor_scale, image_size, **kwargs):
   super().__init__(**kwargs)
   self.iou_loss_type = iou_loss_type
   self.input_anchors = anchors.Anchors(min_level, max_level, num_scales,
                                        aspect_ratios, anchor_scale,
                                        image_size)
   self.box_coder = FasterRcnnBoxCoder()
コード例 #8
0
ファイル: gnetlmm.py プロジェクト: LongZhao1992/GNetLMM
 def load_anchors(self,fn):
     """
     load anchors from file
     """
     F = self.genoreader.get_nrows()
     T = self.phenoreader.get_nrows()
     self.anchors = anchors.Anchors(F,T)
     
     self.anchors.load(fn)
コード例 #9
0
ファイル: tools.py プロジェクト: Timen/tensorflow-video
def get_pred_results(cls_outputs_dict,box_outputs_dict, params):
    input_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                    params['num_scales'],
                                    params['aspect_ratios'],
                                    params['anchor_scale'],
                                    (params['image_size'] - 5))
    anchor_labeler = anchors.AnchorLabeler(input_anchors, params['num_classes'])

    return tf.map_fn(anchor_labeler.generate_detections,(cls_outputs_dict,box_outputs_dict),dtype=tf.float32)
コード例 #10
0
ファイル: dataloader.py プロジェクト: Timen/tensorflow-video
 def __init__(self, params):
     self._max_num_instances = MAX_NUM_INSTANCES
     self._image_size = params["image_size"]
     self._num_classes = params["num_classes"]
     input_anchors = anchors.Anchors(params['min_level'],
                                     params['max_level'],
                                     params['num_scales'],
                                     params['aspect_ratios'],
                                     params['anchor_scale'],
                                     (params['image_size'] - 5))
     self.anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                                 params['num_classes'])
コード例 #11
0
    def det_post_process(params, class_outputs, box_outputs, scales):
        from object_detection.core.post_processing import \
            batch_multiclass_non_max_suppression
        cls_outputs_all, box_outputs_all = [], []
        for level in range(params['min_level'], params['max_level'] + 1):
            cls_outputs_all.append(
                tf.reshape(class_outputs[level],
                           [params['batch_size'], -1, params['num_classes']]))
            box_outputs_all.append(
                tf.reshape(box_outputs[level], [params['batch_size'], -1, 4]))
        cls_outputs_all = tf.concat(cls_outputs_all, 1)
        box_outputs_all = tf.concat(box_outputs_all, 1)
        probs = tf.math.sigmoid(cls_outputs_all)

        # Generate location of anchors.
        eval_anchors = tf.transpose(
            anchors.Anchors(params['min_level'], params['max_level'],
                            params['num_scales'], params['aspect_ratios'],
                            params['anchor_scale'],
                            params['image_size']).boxes)
        ycenter_a = (eval_anchors[0] + eval_anchors[2]) / 2
        xcenter_a = (eval_anchors[1] + eval_anchors[3]) / 2
        ha = eval_anchors[2] - eval_anchors[0]
        wa = eval_anchors[3] - eval_anchors[1]

        # Generate absolute bboxes in the units of pixels of the image.
        box_outputs_per_sample = tf.transpose(box_outputs_all[0])
        ty, tx, th, tw = (box_outputs_per_sample[0], box_outputs_per_sample[1],
                          box_outputs_per_sample[2], box_outputs_per_sample[3])
        w, h = tf.math.exp(tw) * wa, tf.math.exp(th) * ha
        ycenter, xcenter = ty * ha + ycenter_a, tx * wa + xcenter_a
        ymin, ymax = ycenter - h / 2.0, ycenter + h / 2.0
        xmin, xmax = xcenter - w / 2.0, xcenter + w / 2.0
        boxes = tf.transpose(tf.stack([ymin, xmin, ymax, xmax]))

        # Generate the outputs
        boxes_all = tf.reshape(boxes, [params['batch_size'], -1, 1, 4])
        probs_all = tf.reshape(
            probs, [params['batch_size'], -1, params['num_classes']])
        (boxes_tf, scores_tf, classes_tf, _, _, num_detections_tf) = \
            batch_multiclass_non_max_suppression(
                boxes=boxes_all, scores=probs_all, score_thresh=0.5,
                iou_thresh=0.5,
                max_size_per_class=anchors.MAX_DETECTIONS_PER_IMAGE,
                max_total_size=anchors.MAX_DETECTIONS_PER_IMAGE,
                use_combined_nms=False, use_class_agnostic_nms=True)
        boxes_tf *= scales
        return [boxes_tf, scores_tf, classes_tf, num_detections_tf]
コード例 #12
0
ファイル: retinanet_model.py プロジェクト: ryanpstauffer/tpu
        def metric_fn(**kwargs):
            """Evaluation metric fn. Performed on CPU, do not reference TPU ops."""
            eval_anchors = anchors.Anchors(params['min_level'],
                                           params['max_level'],
                                           params['num_scales'],
                                           params['aspect_ratios'],
                                           params['anchor_scale'],
                                           params['image_size'])
            anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                                   params['num_classes'])
            cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
            box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
            # add metrics to output
            cls_outputs = {}
            box_outputs = {}
            detections_bs = []
            for index in range(batch_size):
                for level in range(params['min_level'],
                                   params['max_level'] + 1):
                    _, w, h, c = kwargs['cls_outputs_%d' %
                                        level].get_shape().as_list()
                    cls_outputs[level] = tf.slice(
                        kwargs['cls_outputs_%d' % level], [index, 0, 0, 0],
                        [1, w, h, c])
                    _, w, h, c = kwargs['box_outputs_%d' %
                                        level].get_shape().as_list()
                    box_outputs[level] = tf.slice(
                        kwargs['box_outputs_%d' % level], [index, 0, 0, 0],
                        [1, w, h, c])
                detections = anchor_labeler.generate_detections(
                    cls_outputs, box_outputs,
                    tf.slice(kwargs['source_ids'], [index], [1]),
                    tf.slice(kwargs['image_scales'], [index], [1]))
                detections_bs.append(detections)
            eval_metric = coco_metric.EvaluationMetric(params['val_json_file'])
            coco_metrics = eval_metric.estimator_metric_fn(
                detections_bs, kwargs['groundtruth_data'])

            # Add metrics to output.
            output_metrics = {
                'cls_loss': cls_loss,
                'box_loss': box_loss,
            }
            output_metrics.update(coco_metrics)
            return output_metrics
コード例 #13
0
def _predict_postprocess(cls_outputs, box_outputs, params):
  """Post processes prediction outputs."""
  predict_anchors = anchors.Anchors(
      params['min_level'], params['max_level'], params['num_scales'],
      params['aspect_ratios'], params['anchor_scale'], params['image_size'])
  cls_outputs, box_outputs, anchor_boxes = postprocess.reshape_outputs(
      cls_outputs, box_outputs, predict_anchors.boxes, params['min_level'],
      params['max_level'], params['num_classes'])
  boxes, scores, classes, num_detections = postprocess.generate_detections(
      cls_outputs, box_outputs, anchor_boxes)

  predictions = {
      'detection_boxes': boxes,
      'detection_classes': classes,
      'detection_scores': scores,
      'num_detections': num_detections,
  }

  return predictions
コード例 #14
0
    def metric_fn(**kwargs):
      """Returns a dictionary that has the evaluation metrics."""
      batch_size = params['batch_size']
      eval_anchors = anchors.Anchors(
          params['min_level'], params['max_level'], params['num_scales'],
          params['aspect_ratios'], params['anchor_scale'], params['image_size'])
      anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                             params['num_classes'])
      cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
      box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
      coco_metrics = coco_metric_fn(batch_size, anchor_labeler,
                                    params['val_json_file'], **kwargs)

      # Add metrics to output.
      output_metrics = {
          'cls_loss': cls_loss,
          'box_loss': box_loss,
      }
      output_metrics.update(coco_metrics)
      return output_metrics
コード例 #15
0
ファイル: gnetlmm.py プロジェクト: LongZhao1992/GNetLMM
    def gene_has_anchor(self, thresh, cis=True):
        """
        computes if a gene has a cis anchor

        input:
        cis_thresh   :    threshold for cis-association
        cis_window   :    max distance between snp and gene
        """
        
        F = self.genoreader.get_nrows()
        T = self.phenoreader.get_nrows()
                
        snp_ids  = self.genoreader.getSnpIds()
        gene_ids = self.phenoreader.getGeneIds()
        
        RV = {'pv':[], 'snp_ids':[], 'gene_ids':[], 'isnp':[], 'igene':[]}

        for f, pv0_f in self.assoc0_reader.getRowIterator():
   
            pv_min = np.min(pv0_f)
            if pv_min > thresh: continue
            idx_anchor = pv0_f==pv_min
            if not(idx_anchor.any()): continue

            if cis:
                idx_anchor[idx_anchor] = self.find_cis_genes(f, idx_anchor)
     
            if idx_anchor.any():
                igenes = np.nonzero(idx_anchor)[0]
                for t in igenes:
                    RV['pv'].append(pv_min)
                    RV['snp_ids'].append(snp_ids[f])
                    RV['gene_ids'].append(gene_ids[t])
                    RV['isnp'].append(f)
                    RV['igene'].append(t)
          
        for key in RV.keys():
            RV[key] = np.array(RV[key])

        self.anchors = anchors.Anchors(F,T,pv=RV['pv'],snp_ids=RV['snp_ids'],gene_ids=RV['gene_ids'],
                                                  igene=RV['igene'],isnp=RV['isnp'])
コード例 #16
0
def det_post_process(params: Dict[Any, Any], cls_outputs: Dict[int, tf.Tensor],
                     box_outputs: Dict[int, tf.Tensor], scales: List[float]):

    outputs = {
        'cls_outputs_all': [None],
        'box_outputs_all': [None],
        'indices_all': [None],
        'classes_all': [None]
    }

    add_metric_fn_inputs(params, cls_outputs, box_outputs, outputs)
    #Create anchor_label for picking top-k predictions.
    eval_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                   params['num_scales'],
                                   params['aspect_ratios'],
                                   params['anchor_scale'],
                                   params['image_size'])
    anchor_labeler = anchors.AnchorLabeler(eval_anchors, params['num_classes'])
    #Add all detections for each input image.
    detections_batch = []
    for index in range(params['batch_size']):
        #shape is [MAX_DETECTION_POINTS,]---->score
        cls_outputs_per_sample = outputs['cls_outputs_all'][index]
        #shape is [MAX_DETECTION_POINTS,4]---->box ---ty,tx,th,tw
        box_outputs_per_sample = outputs['box_outputs_all'][index]
        # shape is [MAX_DETECTION_POINTS,]
        indices_per_sample = outputs['indices_all'][index]
        # shape is [MAX_DETECTION_POINTS,]
        classes_per_sample = outputs['classes_all'][index]
        detections = anchor_labeler.generate_detections(
            cls_outputs_per_sample,
            box_outputs_per_sample,
            indices_per_sample,
            classes_per_sample,
            image_id=[index],
            image_scale=[scales[index]],
            disable_pyfun=False)
        detections_batch.append(detections)
    #shape is batch =[batch,M,7]---[image_id, x, y, width, height, score, class]
    return tf.stack(detections_batch, name='detections')
コード例 #17
0
ファイル: model.py プロジェクト: wuwusky/RetinaNet_simple
    def __init__(self, num_classes, block, layers):
        super(RetinaNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if block == BasicBlock:
            fpn_sizes = [128, 256, 512]
        elif block == Bottleneck:
            fpn_sizes = [512, 1024, 2048]

        self.fpn = PFN(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
        self.regression = BoxDetect(256)
        self.classification = Classification(256, num_classes=num_classes)

        self.anchors = anchors.Anchors()
        self.boxs_regression = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.prior = 0.01
        self.classification.out.weight.data.fill_(0)
        self.classification.out.bias.data.fill_(-math.log((1.0 - self.prior) /
                                                          self.prior))

        self.regression.out.weight.data.fill_(0)
        self.regression.out.bias.data.fill_(0)
        self.freeze_bn
コード例 #18
0
ファイル: dataloader.py プロジェクト: jhseu/tpu
    def __call__(self, params):
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'])
        example_decoder = tf_example_decoder.TfExampleDecoder()

        def get_dataset_for_mode(data_dir, is_training):
            """Return the location of input samples for a given mode."""
            if is_training:
                return '%s/coco_train2017_nocrowd-*' % data_dir
            return '%s/coco_val2017-*' % data_dir

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets."""
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                # the image normalization is identical to Cloud TPU ResNet-50
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
                image = _normalize_image(image)

                if params['input_rand_hflip']:
                    image, boxes = preprocessor.random_horizontal_flip(
                        image, boxes=boxes)
                image_original_shape = tf.shape(image)
                image, _ = preprocessor.resize_to_range(
                    image,
                    min_dimension=params['image_size'],
                    max_dimension=params['image_size'])
                image_scale = tf.to_float(
                    image_original_shape[0]) / tf.to_float(tf.shape(image)[0])
                image, boxes = preprocessor.scale_boxes_to_pixel_coordinates(
                    image, boxes, keypoints=None)

                image = tf.image.pad_to_bounding_box(image, 0, 0,
                                                     params['image_size'],
                                                     params['image_size'])
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.string_to_number(source_id, out_type=tf.float32)
                row = (image, cls_targets, box_targets, num_positives,
                       source_id, image_scale)
                return row

        batch_size = params['batch_size']

        data_file_pattern = get_dataset_for_mode(self._data_dir,
                                                 self._is_training)
        dataset = tf.data.Dataset.list_files(data_file_pattern)

        dataset = dataset.shuffle(buffer_size=1024)
        if self._is_training:
            dataset = dataset.repeat()

        def prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(prefetch_dataset,
                                                cycle_length=32,
                                                sloppy=True))
        dataset = dataset.shuffle(20)

        dataset = dataset.map(_dataset_parser, num_parallel_calls=64)
        dataset = dataset.prefetch(batch_size)
        dataset = dataset.apply(
            tf.contrib.data.batch_and_drop_remainder(batch_size))
        dataset = dataset.prefetch(1)

        (images, cls_targets, box_targets, num_positives, source_ids,
         image_scales) = dataset.make_one_shot_iterator().get_next()
        labels = {}
        # count num_positives in a batch
        num_positives_batch = tf.reduce_mean(num_positives)
        labels['mean_num_positives'] = tf.reshape(
            tf.tile(tf.expand_dims(num_positives_batch, 0), [
                batch_size,
            ]), [batch_size, 1])

        for level in range(params['min_level'], params['max_level'] + 1):
            labels['cls_targets_%d' % level] = cls_targets[level]
            labels['box_targets_%d' % level] = box_targets[level]
        labels['source_ids'] = source_ids
        labels['image_scales'] = image_scales
        return images, labels
コード例 #19
0
ファイル: dataloader.py プロジェクト: hitlk/tpu
    def __call__(self, params):
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'])
        example_decoder = tf_example_decoder.TfExampleDecoder()

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets."""
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                # Handle crowd annotations. As crowd annotations are not large
                # instances, the model ignores them in training.
                if params['skip_crowd']:
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)

                # the image normalization is identical to Cloud TPU ResNet-50
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
                image = _normalize_image(image)

                if params['input_rand_hflip']:
                    image, boxes = preprocessor.random_horizontal_flip(
                        image, boxes=boxes)
                image_original_shape = tf.shape(image)
                image, _ = preprocessor.resize_to_range(
                    image,
                    min_dimension=params['image_size'],
                    max_dimension=params['image_size'])
                image_scale = tf.to_float(
                    image_original_shape[0]) / tf.to_float(tf.shape(image)[0])
                image, boxes = preprocessor.scale_boxes_to_pixel_coordinates(
                    image, boxes, keypoints=None)

                image = tf.image.pad_to_bounding_box(image, 0, 0,
                                                     params['image_size'],
                                                     params['image_size'])
                (cls_targets, cls_weights, box_targets, box_weights,
                 num_positives, num_negatives,
                 num_ignored) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.string_to_number(source_id, out_type=tf.float32)
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)
                row = (image, cls_targets, cls_weights, box_targets,
                       box_weights, num_positives, num_negatives, num_ignored,
                       source_id, image_scale)
                return row

        # batch_size = params['batch_size']
        batch_size = self._batch_size

        dataset = tf.data.Dataset.list_files(self._file_pattern)

        dataset = dataset.shuffle(buffer_size=1024)
        if self._is_training:
            dataset = dataset.repeat()

        def prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename,
                                              buffer_size=8 * 1000 * 1000)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(prefetch_dataset,
                                                cycle_length=1,
                                                sloppy=True))
        dataset = dataset.shuffle(buffer_size=3072)

        dataset = dataset.map(_dataset_parser, num_parallel_calls=12)
        dataset = dataset.prefetch(32)
        dataset = dataset.apply(
            tf.contrib.data.batch_and_drop_remainder(batch_size))
        dataset = dataset.prefetch(2)

        (images, cls_targets, cls_weights, box_targets, box_weights,
         num_positives, num_negatives, num_ignored, source_ids,
         image_scales) = dataset.make_one_shot_iterator().get_next()
        labels = {}
        # count num_positives in a batch
        num_positives_batch = tf.reduce_mean(num_positives)
        labels['mean_num_positives'] = tf.reshape(
            tf.tile(tf.expand_dims(num_positives_batch, 0), [
                batch_size,
            ]), [batch_size, 1])

        num_negatives_batch = tf.reduce_mean(num_negatives)
        labels['mean_num_negatives'] = tf.reshape(
            tf.tile(tf.expand_dims(num_negatives_batch, 0), [
                batch_size,
            ]), [batch_size, 1])

        num_ignored_batch = tf.reduce_mean(num_ignored)
        labels['mean_num_ignored'] = tf.reshape(
            tf.tile(tf.expand_dims(num_ignored_batch, 0), [batch_size]),
            [batch_size, 1])

        for level in range(params['min_level'], params['max_level'] + 1):
            labels['cls_targets_%d' % level] = cls_targets[level]
            labels['cls_weights_%d' % level] = cls_weights[level]
            labels['box_targets_%d' % level] = box_targets[level]
            labels['box_weights_%d' % level] = box_weights[level]
        labels['source_ids'] = source_ids
        labels['image_scales'] = image_scales
        return images, labels
コード例 #20
0
ファイル: mask_rcnn_model.py プロジェクト: mypolarbear/tpu
def build_model_graph(features, labels, is_training, params):
    """Builds the forward model graph."""
    model_outputs = {}

    if params['transpose_input'] and is_training:
        features['images'] = tf.transpose(features['images'], [3, 0, 1, 2])
    batch_size, image_height, image_width, _ = (
        features['images'].get_shape().as_list())
    if 'source_ids' not in features:
        features['source_ids'] = -1 * tf.ones([batch_size], dtype=tf.float32)

    all_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                  params['num_scales'],
                                  params['aspect_ratios'],
                                  params['anchor_scale'],
                                  (image_height, image_width))

    with tf.variable_scope('resnet%s' % params['resnet_depth']):
        resnet_fn = resnet.resnet_v1(
            params['resnet_depth'],
            num_batch_norm_group=params['num_batch_norm_group'])
        backbone_feats = resnet_fn(features['images'],
                                   (params['is_training_bn'] and is_training))

    fpn_feats = fpn.fpn(backbone_feats, params['min_level'],
                        params['max_level'])

    rpn_score_outputs, rpn_box_outputs = heads.rpn_head(
        fpn_feats, params['min_level'], params['max_level'],
        len(params['aspect_ratios'] * params['num_scales']))

    if is_training:
        rpn_pre_nms_topn = params['rpn_pre_nms_topn']
        rpn_post_nms_topn = params['rpn_post_nms_topn']
    else:
        rpn_pre_nms_topn = params['test_rpn_pre_nms_topn']
        rpn_post_nms_topn = params['test_rpn_post_nms_topn']

    rpn_box_scores, rpn_box_rois = roi_ops.multilevel_propose_rois(
        rpn_score_outputs,
        rpn_box_outputs,
        all_anchors,
        features['image_info'],
        rpn_pre_nms_topn,
        rpn_post_nms_topn,
        params['rpn_nms_threshold'],
        params['rpn_min_size'],
        bbox_reg_weights=None,
        use_tpu=params['use_tpu'])
    rpn_box_rois = tf.to_float(rpn_box_rois)
    if is_training:
        rpn_box_rois = tf.stop_gradient(rpn_box_rois)
        rpn_box_scores = tf.stop_gradient(rpn_box_scores)

    if is_training:
        # Sampling
        box_targets, class_targets, rpn_box_rois, proposal_to_label_map = (
            training_ops.proposal_label_op(
                rpn_box_rois,
                labels['gt_boxes'],
                labels['gt_classes'],
                features['image_info'],
                batch_size_per_im=params['batch_size_per_im'],
                fg_fraction=params['fg_fraction'],
                fg_thresh=params['fg_thresh'],
                bg_thresh_hi=params['bg_thresh_hi'],
                bg_thresh_lo=params['bg_thresh_lo']))

    # Performs multi-level RoIAlign.
    box_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
        fpn_feats, rpn_box_rois, output_size=7)

    class_outputs, box_outputs, _ = heads.box_head(
        box_roi_features,
        num_classes=params['num_classes'],
        mlp_head_dim=params['fast_rcnn_mlp_head_dim'])

    if not is_training:
        if params['use_tpu']:
            detections = postprocess_ops.generate_detections_tpu(
                class_outputs, box_outputs, rpn_box_rois,
                features['source_ids'], features['image_info'],
                params['test_rpn_post_nms_topn'],
                params['test_detections_per_image'], params['test_nms'],
                params['bbox_reg_weights'])
        else:
            detections = postprocess_ops.generate_detections_gpu(
                class_outputs, box_outputs, rpn_box_rois,
                features['source_ids'], features['image_info'],
                params['test_rpn_post_nms_topn'],
                params['test_detections_per_image'], params['test_nms'],
                params['bbox_reg_weights'])

        model_outputs.update({
            'detections':
            tf.identity(detections, 'Detections'),
        })
        if params['output_box_features']:
            final_box_rois = detections[:, :, 1:5]
            final_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
                fpn_feats, final_box_rois, output_size=7)
            _, _, final_box_features = heads.box_head(
                final_roi_features,
                num_classes=params['num_classes'],
                mlp_head_dim=params['fast_rcnn_mlp_head_dim'])
            model_outputs.update({
                'box_features':
                tf.identity(final_box_features, 'BoxFeatures'),
            })
    else:
        encoded_box_targets = training_ops.encode_box_targets(
            rpn_box_rois, box_targets, class_targets,
            params['bbox_reg_weights'])
        model_outputs.update({
            'rpn_score_outputs': rpn_score_outputs,
            'rpn_box_outputs': rpn_box_outputs,
            'class_outputs': class_outputs,
            'box_outputs': box_outputs,
            'class_targets': class_targets,
            'box_targets': encoded_box_targets,
            'box_rois': rpn_box_rois,
        })

    # Faster-RCNN mode.
    if not params['include_mask']:
        return model_outputs

    # Mask sampling
    if not is_training:
        selected_box_rois = detections[:, :, 1:5]
        class_indices = tf.to_int32(detections[:, :, 6])
    else:
        (selected_class_targets, selected_box_targets, selected_box_rois,
         proposal_to_label_map) = (training_ops.select_fg_for_masks(
             class_targets,
             box_targets,
             rpn_box_rois,
             proposal_to_label_map,
             max_num_fg=int(params['batch_size_per_im'] *
                            params['fg_fraction'])))
        class_indices = tf.to_int32(selected_class_targets)

    mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
        fpn_feats, selected_box_rois, output_size=14)
    mask_outputs = heads.mask_head(mask_roi_features,
                                   class_indices,
                                   num_classes=params['num_classes'],
                                   mrcnn_resolution=params['mrcnn_resolution'])

    model_outputs.update({
        'mask_outputs': mask_outputs,
    })

    if is_training:
        mask_targets = training_ops.get_mask_targets(
            selected_box_rois, proposal_to_label_map, selected_box_targets,
            labels['cropped_gt_masks'], params['mrcnn_resolution'])
        model_outputs.update({
            'mask_targets': mask_targets,
            'selected_class_targets': selected_class_targets,
        })
    else:
        model_outputs['mask_outputs'] = tf.identity(
            tf.nn.sigmoid(model_outputs['mask_outputs']), 'Masks')

    return model_outputs
コード例 #21
0
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        features: a dictionary that contains the image and auxiliary
          information. The following describes {key: value} pairs in the
          dictionary.
          image: Image tensor that is preproessed to have normalized value and
            fixed dimension [image_size, image_size, 3]
          image_info: image information that includes the original height and
            width, the scale of the proccessed image to the original image, and
            the scaled height and width.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
        labels: a dictionary that contains auxiliary information plus (optional)
          labels. The following describes {key: value} pairs in the dictionary.
          `labels` is only for training.
          score_targets_dict: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors]. The height_l and width_l
            represent the dimension of objectiveness score at l-th level.
          box_targets_dict: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors * 4]. The height_l and
            width_l represent the dimension of bounding box regression output at
            l-th level.
          gt_boxes: Groundtruth bounding box annotations. The box is represented
             in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
             fixed dimension [self._max_num_instances, 4].
          gt_classes: Groundtruth classes annotations. The tennsor is padded
            with -1 to the fixed dimension [self._max_num_instances].
          cropped_gt_masks: groundtrugh masks cropped by the bounding box and
            resized to a fixed size determined by params['gt_mask_size']
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                data['groundtruth_is_crowd'] = tf.cond(
                    tf.greater(tf.size(data['groundtruth_is_crowd']),
                               0), lambda: data['groundtruth_is_crowd'],
                    lambda: tf.zeros_like(data['groundtruth_classes'],
                                          dtype=tf.bool))
                image = data['image']
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
                orig_image = image
                source_id = data['source_id']
                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                if self._mode == tf.estimator.ModeKeys.PREDICT:
                    image = preprocess_ops.normalize_image(image)
                    image, image_info, _, _, _ = preprocess_ops.resize_crop_pad(
                        image, params['image_size'], 2**params['max_level'])
                    if params['precision'] == 'bfloat16':
                        image = tf.cast(image, dtype=tf.bfloat16)

                    features = {
                        'images': image,
                        'image_info': image_info,
                        'source_ids': source_id,
                    }
                    if params['visualize_images_summary']:
                        resized_image = tf.image.resize_images(
                            orig_image, params['image_size'])
                        features['orig_images'] = resized_image
                    if params['include_groundtruth_in_features']:
                        labels = _prepare_labels_for_eval(
                            data,
                            target_num_instances=self._max_num_instances,
                            target_polygon_list_len=self.
                            _max_num_polygon_list_len,
                            use_instance_mask=params['include_mask'])
                        return {'features': features, 'labels': labels}
                    else:
                        return {'features': features}

                elif (self._mode == tf.estimator.ModeKeys.TRAIN
                      or self._mode == tf.estimator.ModeKeys.EVAL):
                    instance_masks = None
                    if self._use_instance_mask:
                        instance_masks = data['groundtruth_instance_masks']
                    boxes = data['groundtruth_boxes']
                    classes = data['groundtruth_classes']
                    classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                         [-1, 1])
                    if not params['use_category']:
                        classes = tf.cast(tf.greater(classes, 0),
                                          dtype=tf.float32)

                    if (params['skip_crowd_during_training']
                            and self._mode == tf.estimator.ModeKeys.TRAIN):
                        indices = tf.where(
                            tf.logical_not(data['groundtruth_is_crowd']))
                        classes = tf.gather_nd(classes, indices)
                        boxes = tf.gather_nd(boxes, indices)
                        if self._use_instance_mask:
                            instance_masks = tf.gather_nd(
                                instance_masks, indices)

                    image = preprocess_ops.normalize_image(image)
                    # Random flipping for training only.
                    if (self._mode == tf.estimator.ModeKeys.TRAIN
                            and params['input_rand_hflip']):
                        flipped_results = (
                            preprocess_ops.random_horizontal_flip(
                                image, boxes=boxes, masks=instance_masks))
                        if self._use_instance_mask:
                            image, boxes, instance_masks = flipped_results
                        else:
                            image, boxes = flipped_results
                    # Scaling, jittering and padding.
                    image, image_info, boxes, classes, cropped_gt_masks = (
                        preprocess_ops.resize_crop_pad(
                            image,
                            params['image_size'],
                            2**params['max_level'],
                            aug_scale_min=params['aug_scale_min'],
                            aug_scale_max=params['aug_scale_max'],
                            boxes=boxes,
                            classes=classes,
                            masks=instance_masks,
                            crop_mask_size=params['gt_mask_size']))
                    if cropped_gt_masks is not None:
                        cropped_gt_masks = tf.pad(cropped_gt_masks,
                                                  paddings=tf.constant([[
                                                      0,
                                                      0,
                                                  ], [
                                                      2,
                                                      2,
                                                  ], [2, 2]]),
                                                  mode='CONSTANT',
                                                  constant_values=0.)

                    padded_height, padded_width, _ = image.get_shape().as_list(
                    )
                    padded_image_size = (padded_height, padded_width)
                    input_anchors = anchors.Anchors(params['min_level'],
                                                    params['max_level'],
                                                    params['num_scales'],
                                                    params['aspect_ratios'],
                                                    params['anchor_scale'],
                                                    padded_image_size)
                    anchor_labeler = anchors.AnchorLabeler(
                        input_anchors, params['num_classes'],
                        params['rpn_positive_overlap'],
                        params['rpn_negative_overlap'],
                        params['rpn_batch_size_per_im'],
                        params['rpn_fg_fraction'])

                    # Assign anchors.
                    score_targets, box_targets = anchor_labeler.label_anchors(
                        boxes, classes)

                    # Pad groundtruth data.
                    boxes = preprocess_ops.pad_to_fixed_size(
                        boxes, -1, [self._max_num_instances, 4])
                    classes = preprocess_ops.pad_to_fixed_size(
                        classes, -1, [self._max_num_instances, 1])

                    # Pads cropped_gt_masks.
                    if self._use_instance_mask:
                        cropped_gt_masks = tf.reshape(
                            cropped_gt_masks,
                            tf.stack([tf.shape(cropped_gt_masks)[0], -1]))
                        cropped_gt_masks = preprocess_ops.pad_to_fixed_size(
                            cropped_gt_masks, -1, [
                                self._max_num_instances,
                                (params['gt_mask_size'] + 4)**2
                            ])
                        cropped_gt_masks = tf.reshape(cropped_gt_masks, [
                            self._max_num_instances, params['gt_mask_size'] +
                            4, params['gt_mask_size'] + 4
                        ])

                    if params['precision'] == 'bfloat16':
                        image = tf.cast(image, dtype=tf.bfloat16)

                    features = {
                        'images': image,
                        'image_info': image_info,
                        'source_ids': source_id,
                    }
                    labels = {}
                    for level in range(params['min_level'],
                                       params['max_level'] + 1):
                        labels['score_targets_%d' %
                               level] = score_targets[level]
                        labels['box_targets_%d' % level] = box_targets[level]
                    labels['gt_boxes'] = boxes
                    labels['gt_classes'] = classes
                    if self._use_instance_mask:
                        labels['cropped_gt_masks'] = cropped_gt_masks
                    return features, labels
コード例 #22
0
    def __call__(self, params):
        image_size = (params['image_size'], params['image_size'])
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'], image_size)
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'],
                                               params['rpn_positive_overlap'],
                                               params['rpn_negative_overlap'],
                                               params['rpn_batch_size_per_im'],
                                               params['rpn_fg_fraction'])

        example_decoder = tf_example_decoder.TfExampleDecoder(
            use_instance_mask=self._use_instance_mask)

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        features: a dictionary that contains the image and auxiliary
          information. The following describes {key: value} pairs in the
          dictionary.
          image: Image tensor that is preproessed to have normalized value and
            fixed dimension [image_size, image_size, 3]
          image_info: image information that includes the original height and
            width, the scale of the proccessed image to the original image, and
            the scaled height and width.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
        labels: a dictionary that contains auxiliary information plus (optional)
          labels. The following describes {key: value} pairs in the dictionary.
          `labels` is only for training.
          score_targets_dict: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors]. The height_l and width_l
            represent the dimension of objectiveness score at l-th level.
          box_targets_dict: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors * 4]. The height_l and
            width_l represent the dimension of bounding box regression output at
            l-th level.
          gt_boxes: Groundtruth bounding box annotations. The box is represented
             in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
             fixed dimension [self._max_num_instances, 4].
          gt_classes: Groundtruth classes annotations. The tennsor is padded
            with -1 to the fixed dimension [self._max_num_instances].
          cropped_gt_masks: groundtrugh masks cropped by the bounding box and
            resized to a fixed size determined by params['gt_mask_size']
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                image = data['image']
                source_id = data['source_id']
                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                if self._mode == tf.estimator.ModeKeys.PREDICT:
                    input_processor = InstanceSegmentationInputProcessor(
                        image, image_size)
                    input_processor.normalize_image()
                    input_processor.set_scale_factors_to_output_size()
                    image = input_processor.resize_and_crop_image()
                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    image_info = input_processor.get_image_info()
                    return {
                        'images': image,
                        'image_info': image_info,
                        'source_ids': source_id
                    }

                elif self._mode == tf.estimator.ModeKeys.TRAIN:
                    instance_masks = None
                    if self._use_instance_mask:
                        instance_masks = data['groundtruth_instance_masks']
                    boxes = data['groundtruth_boxes']
                    classes = data['groundtruth_classes']
                    classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                         [-1, 1])
                    if not params['use_category']:
                        classes = tf.cast(tf.greater(classes, 0),
                                          dtype=tf.float32)

                    if (params['skip_crowd_during_training']
                            and self._mode == tf.estimator.ModeKeys.TRAIN):
                        indices = tf.where(
                            tf.logical_not(data['groundtruth_is_crowd']))
                        classes = tf.gather_nd(classes, indices)
                        boxes = tf.gather_nd(boxes, indices)
                        if self._use_instance_mask:
                            instance_masks = tf.gather_nd(
                                instance_masks, indices)

                    input_processor = InstanceSegmentationInputProcessor(
                        image, image_size, boxes, classes, instance_masks)
                    input_processor.normalize_image()
                    if params['input_rand_hflip']:
                        input_processor.random_horizontal_flip()

                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                    image = input_processor.resize_and_crop_image()

                    boxes, classes = input_processor.resize_and_crop_boxes()
                    if self._use_instance_mask:
                        instance_masks = input_processor.resize_and_crop_masks(
                        )
                        cropped_gt_masks = input_processor.crop_gt_masks(
                            instance_masks, boxes, params['gt_mask_size'],
                            image_size)

                    # Assign anchors.
                    score_targets, box_targets = anchor_labeler.label_anchors(
                        boxes, classes)

                    # Pad groundtruth data.
                    image_info = input_processor.get_image_info()
                    boxes *= image_info[2]
                    boxes = pad_to_fixed_size(boxes, -1,
                                              [self._max_num_instances, 4])
                    classes = pad_to_fixed_size(classes, -1,
                                                [self._max_num_instances, 1])

                    # Pads cropped_gt_masks.
                    if self._use_instance_mask:
                        cropped_gt_masks = tf.reshape(
                            cropped_gt_masks, [self._max_num_instances, -1])
                        cropped_gt_masks = pad_to_fixed_size(
                            cropped_gt_masks, -1, [
                                self._max_num_instances,
                                (params['gt_mask_size'] + 4)**2
                            ])
                        cropped_gt_masks = tf.reshape(cropped_gt_masks, [
                            self._max_num_instances, params['gt_mask_size'] +
                            4, params['gt_mask_size'] + 4
                        ])

                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    features = {}
                    features['images'] = image
                    features['image_info'] = image_info
                    features['source_ids'] = source_id
                    labels = {}
                    for level in range(params['min_level'],
                                       params['max_level'] + 1):
                        labels['score_targets_%d' %
                               level] = score_targets[level]
                        labels['box_targets_%d' % level] = box_targets[level]
                    labels['gt_boxes'] = boxes
                    labels['gt_classes'] = classes
                    if self._use_instance_mask:
                        labels['cropped_gt_masks'] = cropped_gt_masks
                    return (features, labels)

        batch_size = params['batch_size'] if 'batch_size' in params else 1
        dataset = tf.data.Dataset.list_files(
            self._file_pattern,
            shuffle=(self._mode == tf.estimator.ModeKeys.TRAIN))
        if self._mode == tf.estimator.ModeKeys.TRAIN:
            dataset = dataset.repeat()

        # Prefetch data from files.
        def _prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(
                _prefetch_dataset,
                cycle_length=32,
                sloppy=(self._mode == tf.estimator.ModeKeys.TRAIN)))
        if self._mode == tf.estimator.ModeKeys.TRAIN:
            dataset = dataset.shuffle(64)

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.apply(
            tf.contrib.data.map_and_batch(_dataset_parser,
                                          batch_size=batch_size,
                                          num_parallel_batches=64,
                                          drop_remainder=True))

        # Transposes images for TPU performance.
        # Given the batch size, the batch dimesion (N) goes to either the minor
        # ((H, W, C, N) when N > C) or the second-minor ((H, W, N, C) when N < C)
        # dimension. Here, we assume N is 4 or 8 and C is 3, so we use
        # (H, W, C, N).
        if (params['transpose_input']
                and self._mode == tf.estimator.ModeKeys.TRAIN):

            def _transpose_images(features, labels):
                features['images'] = tf.transpose(features['images'],
                                                  [1, 2, 3, 0])
                return features, labels

            dataset = dataset.map(_transpose_images, num_parallel_calls=64)

        dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)

        if self._num_examples > 0:
            dataset = dataset.take(self._num_examples)
        if self._use_fake_data:
            # Turn this dataset into a semi-fake dataset which always loop at the
            # first batch. This reduces variance in performance and is useful in
            # testing.
            dataset = dataset.take(1).cache().repeat()
        return dataset
コード例 #23
0
  def __call__(self, params=None):
    if params is None:
      params = self._params
    input_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                    params['num_scales'],
                                    params['aspect_ratios'],
                                    params['anchor_scale'],
                                    params['image_size'])
    anchor_labeler = anchors.AnchorLabeler(input_anchors, params['num_classes'])
    example_decoder = tf_example_decoder.TfExampleDecoder()

    def _dataset_parser(value):
      """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preprocessed to have normalized value and
          fixed dimension [image_size, image_size, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the processed image to the original image.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tensor is padded with -1 to the fixed
          dimension [self._max_num_instances, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tensor is
          padded with 0 to the fixed dimension [self._max_num_instances].
        areas: Groundtruth areas annotations. The tensor is padded with -1
          to the fixed dimension [self._max_num_instances].
        classes: Groundtruth classes annotations. The tensor is padded with -1
          to the fixed dimension [self._max_num_instances].
      """
      with tf.name_scope('parser'):
        data = example_decoder.decode(value)
        source_id = data['source_id']
        image = data['image']
        boxes = data['groundtruth_boxes']
        classes = data['groundtruth_classes']
        classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1])
        areas = data['groundtruth_area']
        is_crowds = data['groundtruth_is_crowd']
        classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1])

        if params['skip_crowd_during_training'] and self._is_training:
          indices = tf.where(tf.logical_not(data['groundtruth_is_crowd']))
          classes = tf.gather_nd(classes, indices)
          boxes = tf.gather_nd(boxes, indices)

        # NOTE: The autoaugment method works best when used alongside the
        # standard horizontal flipping of images along with size jittering
        # and normalization.
        if params.get('autoaugment_policy', None) and self._is_training:
          from aug import autoaugment  # pylint: disable=g-import-not-at-top
          image, boxes = autoaugment.distort_image_with_autoaugment(
              image, boxes, params['autoaugment_policy'])

        input_processor = DetectionInputProcessor(
            image, params['image_size'], boxes, classes)
        input_processor.normalize_image()
        if self._is_training and params['input_rand_hflip']:
          input_processor.random_horizontal_flip()
        if self._is_training:
          input_processor.set_training_random_scale_factors(
              params['train_scale_min'], params['train_scale_max'])
        else:
          input_processor.set_scale_factors_to_output_size()
        image = input_processor.resize_and_crop_image()
        boxes, classes = input_processor.resize_and_crop_boxes()

        # Assign anchors.
        (cls_targets, box_targets,
         num_positives) = anchor_labeler.label_anchors(boxes, classes)

        source_id = tf.where(tf.equal(source_id, tf.constant('')), '-1',
                             source_id)
        source_id = tf.string_to_number(source_id)

        # Pad groundtruth data for evaluation.
        image_scale = input_processor.image_scale_to_original
        boxes *= image_scale
        is_crowds = tf.cast(is_crowds, dtype=tf.float32)
        boxes = pad_to_fixed_size(boxes, -1, [self._max_num_instances, 4])
        is_crowds = pad_to_fixed_size(is_crowds, 0,
                                      [self._max_num_instances, 1])
        areas = pad_to_fixed_size(areas, -1, [self._max_num_instances, 1])
        classes = pad_to_fixed_size(classes, -1, [self._max_num_instances, 1])
        if params['use_bfloat16']:
          image = tf.cast(image, dtype=tf.bfloat16)
        return (image, cls_targets, box_targets, num_positives, source_id,
                image_scale, boxes, is_crowds, areas, classes)

    dataset = tf.data.Dataset.list_files(
        self._file_pattern, shuffle=self._is_training)

    if horovod_enabled() and self._is_training: #multi card eval is not supported yet
      # 根据 GPU 数量做 shard 均分
      dataset = dataset.shard(hvd.size(), hvd.rank())

    if self._is_training:
      dataset = dataset.repeat()

    # Prefetch data from files.
    def _prefetch_dataset(filename):
      dataset = tf.data.TFRecordDataset(filename).prefetch(1)
      return dataset

    cycle_length = 1 if self._is_deterministic else 32
    dataset = dataset.apply(
        tf.data.experimental.parallel_interleave(
            _prefetch_dataset, cycle_length=cycle_length, sloppy=self._is_training))
    if self._is_training:
      dataset = dataset.shuffle(64)

    # Parse the fetched records to input tensors for model function.
    num_parallel_calls = 1 if self._is_deterministic else 64
    dataset = dataset.map(_dataset_parser, num_parallel_calls=num_parallel_calls)
    batch_size = params['batch_size']
    dataset = dataset.prefetch(batch_size)
    dataset = dataset.batch(batch_size, drop_remainder=True)

    def _process_example(images, cls_targets, box_targets, num_positives,
                         source_ids, image_scales, boxes, is_crowds, areas,
                         classes):
      """Processes one batch of data."""
      labels = {}
      # Count num_positives in a batch.
      num_positives_batch = tf.reduce_mean(num_positives)
      labels['mean_num_positives'] = tf.reshape(
          tf.tile(tf.expand_dims(num_positives_batch, 0), [
              batch_size,
          ]), [batch_size, 1])

      for level in range(params['min_level'], params['max_level'] + 1):
        labels['cls_targets_%d' % level] = cls_targets[level]
        labels['box_targets_%d' % level] = box_targets[level]
      # Concatenate groundtruth annotations to a tensor.
      groundtruth_data = tf.concat([boxes, is_crowds, areas, classes], axis=2)
      labels['source_ids'] = source_ids
      labels['groundtruth_data'] = groundtruth_data
      labels['image_scales'] = image_scales
      return images, labels

    dataset = dataset.map(_process_example)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    if self._use_fake_data:
      # Turn this dataset into a semi-fake dataset which always loop at the
      # first batch. This reduces variance in performance and is useful in
      # testing.
      dataset = dataset.take(1).cache().repeat()
    return dataset
コード例 #24
0
ファイル: retinanet_model.py プロジェクト: xiaoyongzhu/tpu
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None):
  """Model defination for the RetinaNet model based on ResNet.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the RetinaNet model outputs class logits and box regression outputs.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.
  """
  def _model_outputs():
    return model(
        features,
        min_level=params['min_level'],
        max_level=params['max_level'],
        num_classes=params['num_classes'],
        num_anchors=len(params['aspect_ratios'] * params['num_scales']),
        resnet_depth=params['resnet_depth'],
        is_training_bn=params['is_training_bn'])

  if params['use_bfloat16']:
    with bfloat16.bfloat16_scope():
      cls_outputs, box_outputs = _model_outputs()
      levels = cls_outputs.keys()
      for level in levels:
        cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
        box_outputs[level] = tf.cast(box_outputs[level], tf.float32)
  else:
    cls_outputs, box_outputs = _model_outputs()
    levels = cls_outputs.keys()

  # First check if it is in PREDICT mode.
  if mode == tf.estimator.ModeKeys.PREDICT:
    # print("entering PREDICT mode")
    predictions = {
        'image': features,
    }
    for level in levels:
      predictions['cls_outputs_%d' % level] = cls_outputs[level]
      predictions['box_outputs_%d' % level] = box_outputs[level]

    eval_anchors = anchors.Anchors(params['min_level'],
                                   params['max_level'],
                                   params['num_scales'],
                                   params['aspect_ratios'],
                                   params['anchor_scale'],
                                   params['image_size'])
    anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                           params['num_classes'])
    detections = anchor_labeler.generate_detections(
        cls_outputs, box_outputs,image_id=100)
    print("detection for image is", detections)
    predictions['detections'] = detections
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  # Load pretrained model from checkpoint.
  if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN:

    def scaffold_fn():
      """Loads pretrained model through scaffold function."""
      tf.train.init_from_checkpoint(params['resnet_checkpoint'], {
          '/': 'resnet%s/' % params['resnet_depth'],
      })
      return tf.train.Scaffold()
  else:
    scaffold_fn = None

  # Set up training loss and learning rate.
  global_step = tf.train.get_global_step()
  learning_rate = _learning_rate_schedule(
      params['learning_rate'], params['lr_warmup_init'],
      params['lr_warmup_step'], params['lr_drop_step'], global_step)
  # cls_loss and box_loss are for logging. only total_loss is optimized.
  total_loss, cls_loss, box_loss = _detection_loss(cls_outputs, box_outputs,
                                                   labels, params)

  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.MomentumOptimizer(
        learning_rate, momentum=params['momentum'])
    if params['use_tpu']:
      optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

    # Batch norm requires update_ops to be added as a train_op dependency.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    var_list = variable_filter_fn(
        tf.trainable_variables(),
        params['resnet_depth']) if variable_filter_fn else None
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(total_loss, global_step, var_list=var_list)
  else:
    train_op = None

  # Evaluation only works on GPU/CPU host and batch_size=1
  eval_metrics = None
  if mode == tf.estimator.ModeKeys.EVAL:

    def metric_fn(**kwargs):
      """Evaluation metric fn. Performed on CPU, do not reference TPU ops."""
      eval_anchors = anchors.Anchors(params['min_level'],
                                     params['max_level'],
                                     params['num_scales'],
                                     params['aspect_ratios'],
                                     params['anchor_scale'],
                                     params['image_size'])
      anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                             params['num_classes'])
      cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
      box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
      # add metrics to output
      cls_outputs = {}
      box_outputs = {}
      for level in range(params['min_level'], params['max_level'] + 1):
        cls_outputs[level] = kwargs['cls_outputs_%d' % level]
        box_outputs[level] = kwargs['box_outputs_%d' % level]
      detections = anchor_labeler.generate_detections(
          cls_outputs, box_outputs, kwargs['source_ids'])
      eval_metric = coco_metric.EvaluationMetric(params['val_json_file'])
      coco_metrics = eval_metric.estimator_metric_fn(detections,
                                                     kwargs['image_scales'])
      # Add metrics to output.
      output_metrics = {
          'cls_loss': cls_loss,
          'box_loss': box_loss,
      }
      output_metrics.update(coco_metrics)
      return output_metrics

    batch_size = params['batch_size']
    cls_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(cls_loss, 0), [
            batch_size,
        ]), [batch_size, 1])
    box_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(box_loss, 0), [
            batch_size,
        ]), [batch_size, 1])
    metric_fn_inputs = {
        'cls_loss_repeat': cls_loss_repeat,
        'box_loss_repeat': box_loss_repeat,
        'source_ids': labels['source_ids'],
        'image_scales': labels['image_scales'],
    }
    for level in range(params['min_level'], params['max_level'] + 1):
      metric_fn_inputs['cls_outputs_%d' % level] = cls_outputs[level]
      metric_fn_inputs['box_outputs_%d' % level] = box_outputs[level]
    eval_metrics = (metric_fn, metric_fn_inputs)

  return tpu_estimator.TPUEstimatorSpec(
      mode=mode,
      loss=total_loss,
      train_op=train_op,
      eval_metrics=eval_metrics,
      scaffold_fn=scaffold_fn)
コード例 #25
0
def main(argv):
    # pbModel_path = './models/pb/blazeFace_model_test.pb'
    pbModel_path = r'C:\Users\17ZY-HPYKFD2\Downloads\dFServer\blazeFace_model_test.pb'
    WIDTH_DES = 256
    HEIGHT_DES = 256
    USE_NORM = True
    UPSCALE = False
    anchorsC = anchors.Anchors()
    boxes_vec = anchorsC.get_anchors(fmSizes=[(16, 16), (8, 8)], fmBased=True)

    # Setup tensorflow and model
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # Force on CPU
    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # Force on CPU
    config = tf.ConfigProto()
    tf.reset_default_graph()
    with tf.Session(config=config) as sess:
        ret = True
        # Loop through video data
        while ret == True:
            # ret, frame = vid_in.read()
            frame = cv2.imread('./img_381.jpg')
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            if UPSCALE:
                r = WIDTH_DES * 2 / frame.shape[1]
                dim_des = (int(WIDTH_DES * 2), int(frame.shape[0] * r))
                frame = cv2.resize(frame, dim_des, interpolation=cv2.INTER_LANCZOS4)
                c_shp = frame.shape
                frame = frame[int(c_shp[0] / 4):-int(c_shp[0] / 4),
                        int((c_shp[1] - WIDTH_DES) / 2):-int((c_shp[1] - WIDTH_DES) / 2)]
            else:
                r = WIDTH_DES / max(frame.shape[1], frame.shape[0])
                dim_des = (int(WIDTH_DES), int(frame.shape[1] * r))
                # frame = cv2.resize(frame, (WIDTH_DES, HEIGHT_DES))
                frame = cv2.resize(frame, (0, 0), fx=r, fy=r) # (WIDTH_DES, HEIGHT_DES))
                frame = np.pad(frame, ((0, HEIGHT_DES - frame.shape[0]), (0, WIDTH_DES - frame.shape[1]), (0, 0)), mode='constant')
            # frame_padded = lighting_balance(frame)
            # frame_padded = cv2.copyMakeBorder(frame, 0, max(0, HEIGHT_DES - frame.shape[0]), 0, 0, cv2.BORDER_CONSTANT, value=(0,0,0))
            # pred_confs, pred_locs = model.test_iter(np.expand_dims(frame, axis = 0))
            tmp_frame = frame / 255.
            pred_locs, pred_confs = freeze_graph_test(pbModel_path, np.expand_dims(tmp_frame, axis=0))

            # f = open('paramR.txt', 'w', encoding='utf-8')
            # confT = pred_confs[0][:, 1]
            # for conf in confT:
            #     print(str(conf), file=f)
            # f.close()
            # exit(1)

            f = open('paramR.txt', 'w', encoding='utf-8')
            for i in range(boxes_vec.shape[0]):
                l = pred_locs[0][i][0]
                t = pred_locs[0][i][1]
                r = pred_locs[0][i][2]
                b = pred_locs[0][i][3]
                p = pred_confs[0][i][1]
                print('index:', i, ', L:', l, ', T:', t, ', R:', r, ', B:', b, ', P:', p, file=f)
            f.close()

            pred_boxes = decode_batch(boxes_vec, pred_locs, pred_confs, min_conf=0.3)[0]
            pred_boxes[pred_boxes < 0] = 0
            pred_boxes[:, [0, 2]][pred_boxes[:, [0, 2]] > WIDTH_DES] = WIDTH_DES
            pred_boxes[:, [1, 3]][pred_boxes[:, [1, 3]] > HEIGHT_DES] = HEIGHT_DES
            h, w = HEIGHT_DES, WIDTH_DES
            for box in pred_boxes.tolist():
                if USE_NORM:
                    print(int(box[0] * w), int(box[1] * h), int(box[2] * w), int(box[3] * h))
                    cv2.rectangle(frame, (int(box[0] * w), int(box[1] * h)), (int(box[2] * w), int(box[3] * h)),
                                  (0, 255, 0), 3)
                    # cv2.rectangle(frame, (480, 72), (654, 294), (0, 255, 0), 3)
                else:
                    cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 0), 3)
            cv2.imshow('Webcam', frame)
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            cv2.imwrite('./tmp.jpg', frame)
            cv2.waitKey(1)
            ret = False
        # vid_in.release()
        cv2.destroyAllWindows()
コード例 #26
0
def _model_fn(features, labels, mode, params, variable_filter_fn=None):
    """Model defination for the Mask-RCNN model based on ResNet.

  Args:
    features: the input image tensor and auxiliary information, such as
      `image_info` and `source_ids`. The image tensor has a shape of
      [batch_size, height, width, 3]. The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include score targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.
  """
    if params['transpose_input'] and mode == tf.estimator.ModeKeys.TRAIN:
        features['images'] = tf.transpose(features['images'], [3, 0, 1, 2])

    image_size = (params['image_size'], params['image_size'])
    all_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                  params['num_scales'],
                                  params['aspect_ratios'],
                                  params['anchor_scale'], image_size)

    def _model_outputs():
        """Generates outputs from the model."""

        model_outputs = {}

        with tf.variable_scope('resnet%s' % params['resnet_depth']):
            resnet_fn = resnet.resnet_v1(
                params['resnet_depth'],
                num_batch_norm_group=params['num_batch_norm_group'])
            backbone_feats = resnet_fn(features['images'],
                                       params['is_training_bn'])

        fpn_feats = fpn.fpn(backbone_feats, params['min_level'],
                            params['max_level'])

        rpn_score_outputs, rpn_box_outputs = heads.rpn_head(
            fpn_feats, params['min_level'], params['max_level'],
            len(params['aspect_ratios'] * params['num_scales']))

        if mode == tf.estimator.ModeKeys.TRAIN:
            rpn_pre_nms_topn = params['rpn_pre_nms_topn']
            rpn_post_nms_topn = params['rpn_post_nms_topn']
        else:
            rpn_pre_nms_topn = params['test_rpn_pre_nms_topn']
            rpn_post_nms_topn = params['test_rpn_post_nms_topn']

        _, rpn_box_rois = mask_rcnn_architecture.proposal_op(
            rpn_score_outputs, rpn_box_outputs, all_anchors,
            features['image_info'], rpn_pre_nms_topn, rpn_post_nms_topn,
            params['rpn_nms_threshold'], params['rpn_min_size'])
        rpn_box_rois = tf.to_float(rpn_box_rois)

        if mode == tf.estimator.ModeKeys.TRAIN:
            # Sampling
            box_targets, class_targets, rpn_box_rois, proposal_to_label_map = (
                mask_rcnn_architecture.proposal_label_op(
                    rpn_box_rois,
                    labels['gt_boxes'],
                    labels['gt_classes'],
                    features['image_info'],
                    batch_size_per_im=params['batch_size_per_im'],
                    fg_fraction=params['fg_fraction'],
                    fg_thresh=params['fg_thresh'],
                    bg_thresh_hi=params['bg_thresh_hi'],
                    bg_thresh_lo=params['bg_thresh_lo']))

        # Performs multi-level RoIAlign.
        box_roi_features = ops.multilevel_crop_and_resize(fpn_feats,
                                                          rpn_box_rois,
                                                          output_size=7)

        class_outputs, box_outputs = heads.box_head(
            box_roi_features,
            num_classes=params['num_classes'],
            mlp_head_dim=params['fast_rcnn_mlp_head_dim'])

        if mode != tf.estimator.ModeKeys.TRAIN:
            batch_size, _, _ = class_outputs.get_shape().as_list()
            detections = []
            softmax_class_outputs = tf.nn.softmax(class_outputs)
            for i in range(batch_size):
                detections.append(
                    anchors.generate_detections_per_image_op(
                        softmax_class_outputs[i], box_outputs[i],
                        rpn_box_rois[i], features['source_ids'][i],
                        features['image_info'][i],
                        params['test_detections_per_image'],
                        params['test_rpn_post_nms_topn'], params['test_nms'],
                        params['bbox_reg_weights']))
            detections = tf.stack(detections, axis=0)
            model_outputs.update({
                'detections': detections,
            })
        else:
            encoded_box_targets = mask_rcnn_architecture.encode_box_targets(
                rpn_box_rois, box_targets, class_targets,
                params['bbox_reg_weights'])
            model_outputs.update({
                'rpn_score_outputs': rpn_score_outputs,
                'rpn_box_outputs': rpn_box_outputs,
                'class_outputs': class_outputs,
                'box_outputs': box_outputs,
                'class_targets': class_targets,
                'box_targets': encoded_box_targets,
                'box_rois': rpn_box_rois,
            })

        # Faster-RCNN mode.
        if not params['include_mask']:
            return model_outputs

        # Mask sampling
        if mode != tf.estimator.ModeKeys.TRAIN:
            selected_box_rois = detections[:, :, 1:5]
            class_indices = tf.to_int32(detections[:, :, 6])
        else:
            (selected_class_targets, selected_box_targets, selected_box_rois,
             proposal_to_label_map) = (
                 mask_rcnn_architecture.select_fg_for_masks(
                     class_targets,
                     box_targets,
                     rpn_box_rois,
                     proposal_to_label_map,
                     max_num_fg=int(params['batch_size_per_im'] *
                                    params['fg_fraction'])))
            class_indices = tf.to_int32(selected_class_targets)

        mask_roi_features = ops.multilevel_crop_and_resize(fpn_feats,
                                                           selected_box_rois,
                                                           output_size=14)
        mask_outputs = heads.mask_head(
            mask_roi_features,
            class_indices,
            num_classes=params['num_classes'],
            mrcnn_resolution=params['mrcnn_resolution'])

        model_outputs.update({
            'mask_outputs': mask_outputs,
        })

        if mode == tf.estimator.ModeKeys.TRAIN:
            mask_targets = mask_rcnn_architecture.get_mask_targets(
                selected_box_rois, proposal_to_label_map, selected_box_targets,
                labels['cropped_gt_masks'], params['mrcnn_resolution'])
            model_outputs.update({
                'mask_targets':
                mask_targets,
                'selected_class_targets':
                selected_class_targets,
            })

        return model_outputs

    if params['use_bfloat16']:
        with tf.contrib.tpu.bfloat16_scope():
            model_outputs = _model_outputs()

            def cast_outputs_to_float(d):
                for k, v in sorted(six.iteritems(d)):
                    if isinstance(v, dict):
                        cast_outputs_to_float(v)
                    else:
                        d[k] = tf.cast(v, tf.float32)

            cast_outputs_to_float(model_outputs)
    else:
        model_outputs = _model_outputs()

    # First check if it is in PREDICT mode.
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {}
        predictions['detections'] = model_outputs['detections']
        predictions['image_info'] = features['image_info']
        if params['include_mask']:
            predictions['mask_outputs'] = tf.nn.sigmoid(
                model_outputs['mask_outputs'])

        if params['use_tpu']:
            return tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                   predictions=predictions)
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Set up training loss and learning rate.
    global_step = tf.train.get_or_create_global_step()
    learning_rate = learning_rates.step_learning_rate_with_linear_warmup(
        global_step, params['init_learning_rate'],
        params['warmup_learning_rate'], params['warmup_steps'],
        params['learning_rate_levels'], params['learning_rate_steps'])
    # score_loss and box_loss are for logging. only total_loss is optimized.
    total_rpn_loss, rpn_score_loss, rpn_box_loss = losses.rpn_loss(
        model_outputs['rpn_score_outputs'], model_outputs['rpn_box_outputs'],
        labels, params)

    (total_fast_rcnn_loss, fast_rcnn_class_loss,
     fast_rcnn_box_loss) = losses.fast_rcnn_loss(
         model_outputs['class_outputs'], model_outputs['box_outputs'],
         model_outputs['class_targets'], model_outputs['box_targets'], params)
    # Only training has the mask loss. Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/model_builder.py  # pylint: disable=line-too-long
    if mode == tf.estimator.ModeKeys.TRAIN and params['include_mask']:
        mask_loss = losses.mask_rcnn_loss(
            model_outputs['mask_outputs'], model_outputs['mask_targets'],
            model_outputs['selected_class_targets'], params)
    else:
        mask_loss = 0.
    if variable_filter_fn:
        var_list = variable_filter_fn(tf.trainable_variables(),
                                      params['resnet_depth'])
    else:
        var_list = None
    l2_regularization_loss = params['l2_weight_decay'] * tf.add_n([
        tf.nn.l2_loss(v) for v in var_list
        if 'batch_normalization' not in v.name and 'bias' not in v.name
    ])
    total_loss = (total_rpn_loss + total_fast_rcnn_loss + mask_loss +
                  l2_regularization_loss)

    host_call = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = create_optimizer(learning_rate, params)
        optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)

        if not params['resnet_checkpoint']:
            scaffold_fn = None
        else:

            def scaffold_fn():
                """Loads pretrained model through scaffold function."""
                # Exclude all variable of optimizer.
                optimizer_vars = set(
                    [var.name for var in optimizer.variables()])
                prefix = 'resnet%s/' % params['resnet_depth']
                resnet_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                                prefix)
                vars_to_load = {}
                for var in resnet_vars:
                    if var.name not in optimizer_vars:
                        var_name = var.name
                        # Trim the index of the variable.
                        if ':' in var_name:
                            var_name = var_name[:var_name.rindex(':')]
                        if params['skip_checkpoint_variables'] and re.match(
                                params['skip_checkpoint_variables'],
                                var_name[len(prefix):]):
                            continue
                        vars_to_load[var_name[len(prefix):]] = var_name
                for var in optimizer_vars:
                    tf.logging.info('Optimizer vars: %s.' % var)
                var_names = sorted(vars_to_load.keys())
                for k in var_names:
                    tf.logging.info('Will train: "%s": "%s",' %
                                    (k, vars_to_load[k]))
                tf.train.init_from_checkpoint(params['resnet_checkpoint'],
                                              vars_to_load)
                if not vars_to_load:
                    raise ValueError('Variables to load is empty.')
                return tf.train.Scaffold()

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        grads_and_vars = optimizer.compute_gradients(total_loss, var_list)
        if params['global_gradient_clip_ratio'] > 0:
            # Clips the gradients for training stability.
            # Refer: https://arxiv.org/abs/1211.5063
            with tf.name_scope('clipping'):
                old_grads, variables = zip(*grads_and_vars)
                num_weights = sum(g.shape.num_elements() for g in old_grads
                                  if g is not None)
                clip_norm = params['global_gradient_clip_ratio'] * math.sqrt(
                    num_weights)
                tf.logging.info(
                    'Global clip norm set to %g for %d variables with %d elements.'
                    % (clip_norm, sum(
                        1 for g in old_grads if g is not None), num_weights))
                gradients, _ = tf.clip_by_global_norm(old_grads, clip_norm)
        else:
            gradients, variables = zip(*grads_and_vars)
        grads_and_vars = []
        # Special treatment for biases (beta is named as bias in reference model)
        # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/optimizer.py#L113  # pylint: disable=line-too-long
        for grad, var in zip(gradients, variables):
            if 'beta' in var.name or 'bias' in var.name:
                grad = 2.0 * grad
            grads_and_vars.append((grad, var))
        minimize_op = optimizer.apply_gradients(grads_and_vars,
                                                global_step=global_step)

        with tf.control_dependencies(update_ops):
            train_op = minimize_op

        if params['use_host_call']:

            def host_call_fn(global_step, total_loss, total_rpn_loss,
                             rpn_score_loss, rpn_box_loss,
                             total_fast_rcnn_loss, fast_rcnn_class_loss,
                             fast_rcnn_box_loss, mask_loss, learning_rate):
                """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          global_step: `Tensor with shape `[batch, ]` for the global_step.
          total_loss: `Tensor` with shape `[batch, ]` for the training loss.
          total_rpn_loss: `Tensor` with shape `[batch, ]` for the training RPN
            loss.
          rpn_score_loss: `Tensor` with shape `[batch, ]` for the training RPN
            score loss.
          rpn_box_loss: `Tensor` with shape `[batch, ]` for the training RPN
            box loss.
          total_fast_rcnn_loss: `Tensor` with shape `[batch, ]` for the
            training Mask-RCNN loss.
          fast_rcnn_class_loss: `Tensor` with shape `[batch, ]` for the
            training Mask-RCNN class loss.
          fast_rcnn_box_loss: `Tensor` with shape `[batch, ]` for the
            training Mask-RCNN box loss.
          mask_loss: `Tensor` with shape `[batch, ]` for the training Mask-RCNN
            mask loss.
          learning_rate: `Tensor` with shape `[batch, ]` for the learning_rate.

        Returns:
          List of summary ops to run on the CPU host.
        """
                # Outfeed supports int32 but global_step is expected to be int64.
                global_step = tf.reduce_mean(global_step)
                # Host call fns are executed FLAGS.iterations_per_loop times after one
                # TPU loop is finished, setting max_queue value to the same as number of
                # iterations will make the summary writer only flush the data to storage
                # once per loop.
                with (tf.contrib.summary.create_file_writer(
                        params['model_dir'],
                        max_queue=params['iterations_per_loop']).as_default()):
                    with tf.contrib.summary.always_record_summaries():
                        tf.contrib.summary.scalar('total_loss',
                                                  tf.reduce_mean(total_loss),
                                                  step=global_step)
                        tf.contrib.summary.scalar(
                            'total_rpn_loss',
                            tf.reduce_mean(total_rpn_loss),
                            step=global_step)
                        tf.contrib.summary.scalar(
                            'rpn_score_loss',
                            tf.reduce_mean(rpn_score_loss),
                            step=global_step)
                        tf.contrib.summary.scalar('rpn_box_loss',
                                                  tf.reduce_mean(rpn_box_loss),
                                                  step=global_step)
                        tf.contrib.summary.scalar(
                            'total_fast_rcnn_loss',
                            tf.reduce_mean(total_fast_rcnn_loss),
                            step=global_step)
                        tf.contrib.summary.scalar(
                            'fast_rcnn_class_loss',
                            tf.reduce_mean(fast_rcnn_class_loss),
                            step=global_step)
                        tf.contrib.summary.scalar(
                            'fast_rcnn_box_loss',
                            tf.reduce_mean(fast_rcnn_box_loss),
                            step=global_step)
                        if params['include_mask']:
                            tf.contrib.summary.scalar(
                                'mask_loss',
                                tf.reduce_mean(mask_loss),
                                step=global_step)
                        tf.contrib.summary.scalar(
                            'learning_rate',
                            tf.reduce_mean(learning_rate),
                            step=global_step)

                        return tf.contrib.summary.all_summary_ops()

            # To log the loss, current learning rate, and epoch for Tensorboard, the
            # summary op needs to be run on the host CPU via host_call. host_call
            # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
            # dimension. These Tensors are implicitly concatenated to
            # [params['batch_size']].
            global_step_t = tf.reshape(global_step, [1])
            total_loss_t = tf.reshape(total_loss, [1])
            total_rpn_loss_t = tf.reshape(total_rpn_loss, [1])
            rpn_score_loss_t = tf.reshape(rpn_score_loss, [1])
            rpn_box_loss_t = tf.reshape(rpn_box_loss, [1])
            total_fast_rcnn_loss_t = tf.reshape(total_fast_rcnn_loss, [1])
            fast_rcnn_class_loss_t = tf.reshape(fast_rcnn_class_loss, [1])
            fast_rcnn_box_loss_t = tf.reshape(fast_rcnn_box_loss, [1])
            mask_loss_t = tf.reshape(mask_loss, [1])
            learning_rate_t = tf.reshape(learning_rate, [1])
            host_call = (host_call_fn, [
                global_step_t, total_loss_t, total_rpn_loss_t,
                rpn_score_loss_t, rpn_box_loss_t, total_fast_rcnn_loss_t,
                fast_rcnn_class_loss_t, fast_rcnn_box_loss_t, mask_loss_t,
                learning_rate_t
            ])
    else:
        train_op = None
        scaffold_fn = None

    return tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                           loss=total_loss,
                                           train_op=train_op,
                                           host_call=host_call,
                                           scaffold_fn=scaffold_fn)
コード例 #27
0
ファイル: dataloader.py プロジェクト: thuyen/tpu
    def __call__(self, params):
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'])
        example_decoder = tf_example_decoder.TfExampleDecoder()

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preproessed to have normalized value and
          fixed dimension [image_size, image_size, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the proccessed image to the original image.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed
          dimension [self._max_num_instances, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tennsor is
          padded with 0 to the fixed dimension [self._max_num_instances].
        areas: Groundtruth areas annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
        classes: Groundtruth classes annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                is_crowds = data['groundtruth_is_crowd']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                if params['skip_crowd_during_training'] and self._is_training:
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)

                input_processor = DetectionInputProcessor(
                    image, params['image_size'], boxes, classes)
                input_processor.normalize_image()
                if self._is_training and params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()
                if self._is_training:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                else:
                    input_processor.set_scale_factors_to_output_size()
                image = input_processor.resize_and_crop_image()
                boxes, classes = input_processor.resize_and_crop_boxes()

                # Assign anchors.
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                # Pad groundtruth data for evaluation.
                image_scale = input_processor.image_scale_to_original
                boxes *= image_scale
                is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                is_crowds = pad_to_fixed_size(is_crowds, 0,
                                              [self._max_num_instances, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_num_instances, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)
                return (image, cls_targets, box_targets, num_positives,
                        source_id, image_scale, boxes, is_crowds, areas,
                        classes)

        batch_size = params['batch_size']
        dataset = tf.data.Dataset.list_files(self._file_pattern,
                                             shuffle=self._is_training,
                                             seed=tf.random.set_random_seed(
                                                 int(time.time() * 1e9)))
        if self._is_training:
            dataset = dataset.repeat()

        # Prefetch data from files.
        def _prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(_prefetch_dataset,
                                                cycle_length=32,
                                                sloppy=self._is_training))
        if self._is_training:
            dataset = dataset.shuffle(64)

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.map(_dataset_parser, num_parallel_calls=64)
        dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)
        dataset = dataset.batch(batch_size, drop_remainder=True)

        def _process_example(images, cls_targets, box_targets, num_positives,
                             source_ids, image_scales, boxes, is_crowds, areas,
                             classes):
            """Processes one batch of data."""
            labels = {}
            # Count num_positives in a batch.
            num_positives_batch = tf.reduce_mean(num_positives)
            labels['mean_num_positives'] = tf.reshape(
                tf.tile(tf.expand_dims(num_positives_batch, 0), [
                    batch_size,
                ]), [batch_size, 1])

            for level in range(params['min_level'], params['max_level'] + 1):
                labels['cls_targets_%d' % level] = cls_targets[level]
                labels['box_targets_%d' % level] = box_targets[level]
            # Concatenate groundtruth annotations to a tensor.
            groundtruth_data = tf.concat([boxes, is_crowds, areas, classes],
                                         axis=2)
            labels['source_ids'] = source_ids
            labels['groundtruth_data'] = groundtruth_data
            labels['image_scales'] = image_scales
            return images, labels

        dataset = dataset.map(_process_example)
        dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)
        return dataset
コード例 #28
0
def main(dataPath=None):
    pbModel_path = './models/pb/blazeFace_model_test.pb'
    # pbModel_path = r'C:\Users\17ZY-HPYKFD2\Downloads\dFServer\blazeFace_model_test.pb'
    if dataPath is not None:
        data_test_dir = dataPath
    else:
        data_test_dir = '/data1/image_data/data/faces/zhengmian_0815'
    # data_test_dir = '/data1/image_data/data/online_pushed_data/parse_result/illegalPicCls/NCNN/ncnn/WIDER_val'

    # lablePath = '/data1/image_data/data/online_pushed_data/parse_result/illegalPicCls/NCNN/ncnn/FDDB/FDDB_xmlanno'
    # lablePath = '/data1/image_data/data/online_pushed_data/parse_result/illegalPicCls/NCNN/ncnn/WIDER_val/xml'

    if not os.path.exists(data_test_dir):
        print('not found dataDir:', data_test_dir)
        exit(-1)

    # if 'FDDB' in data_test_dir:
    #     tail = 'FDDB'
    # else:
    tail = 'Self'

    storePath = './tmpDetImgs_self'

    if not os.path.exists(storePath):
        os.makedirs(storePath)
    else:
        os.system('rm -rf ' + storePath)
        os.makedirs(storePath)

    WIDTH_DES = 256
    HEIGHT_DES = 256
    anchorsC = anchors.Anchors()
    boxes_vec = anchorsC.get_anchors(fmSizes=[(16, 16), (8, 8)], fmBased=True)

    # Setup tensorflow and model
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # Force on CPU
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # gpu编号

    with tf.Graph().as_default():
        output_graph_def = tf.GraphDef()
        with open(pbModel_path, "rb") as f:
            output_graph_def.ParseFromString(f.read())
            tf.import_graph_def(output_graph_def, name="")
        with tf.Session() as sess:
            # 定义输入的张量名称,对应网络结构的输入张量
            # input:0作为输入图像,keep_prob:0作为dropout的参数,测试时值为1,is_training:0训练参数
            input_image_tensor = sess.graph.get_tensor_by_name("input:0")

            # 定义输出的张量名称
            output_tensor_probs = sess.graph.get_tensor_by_name(
                "BlazeNet/probs:0")
            output_tensor_locs = sess.graph.get_tensor_by_name(
                "BlazeNet/reg:0")

            f = open('result_mobileNetSelf_' + data_test_dir.split('/')
                     [-1 if data_test_dir[-1] != '/' else -2] + '.txt',
                     'w',
                     encoding='utf-8')

            for line in os.listdir(data_test_dir):
                if line.endswith('.jpg'):
                    print('process line:', line)
                    xmlPath = os.path.join(data_test_dir,
                                           line.split('.')[0] + '.json')
                    filePath = os.path.join(data_test_dir, line)
                    frame = cv2.imread(filePath)
                    OSize = frame.shape
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    r = WIDTH_DES / max(frame.shape[1], frame.shape[0])
                    # dim_des = (int(WIDTH_DES), int(frame.shape[1] * r))
                    # frame = cv2.resize(frame, (WIDTH_DES, HEIGHT_DES))
                    bt = time.time()
                    frame = cv2.resize(frame, (0, 0), fx=r,
                                       fy=r)  # (WIDTH_DES, HEIGHT_DES))
                    frame = np.pad(frame,
                                   ((0, HEIGHT_DES - frame.shape[0]),
                                    (0, WIDTH_DES - frame.shape[1]), (0, 0)),
                                   mode='constant')
                    tmp_frame = frame / 255.
                    pred_locs, pred_confs = sess.run(
                        [output_tensor_locs, output_tensor_probs],
                        feed_dict={
                            input_image_tensor: np.expand_dims(tmp_frame,
                                                               axis=0)
                        })
                    pred_boxes = decode_batch(boxes_vec,
                                              pred_locs,
                                              pred_confs,
                                              min_conf=0.5)[0]
                    pred_boxes[pred_boxes < 0] = 0
                    totalT = time.time() - bt

                    # pred_boxes[:, [0, 2]][pred_boxes[:, [0, 2]] > WIDTH_DES] = WIDTH_DES
                    # pred_boxes[:, [1, 3]][pred_boxes[:, [1, 3]] > HEIGHT_DES] = HEIGHT_DES
                    h, w = HEIGHT_DES, WIDTH_DES
                    tmpS = line + '\t' + str(totalT) + '\t'

                    if drawOriBox:
                        GT_box = getGTBoxes(xmlPath)
                        for i in range(len(GT_box)):
                            GBox = GT_box[i]
                            if dstSize:
                                r = dstSize / max(OSize[0], OSize[1])
                                GBox = (np.array(GBox) * r).astype(np.int32)
                            cv2.rectangle(frame, (GBox[0], GBox[1]),
                                          (GBox[2], GBox[3]), (0, 0, 0), 3)

                    for box in pred_boxes.tolist():
                        tmpS += str(int(box[0] * w)) + ',' + str(
                            int(box[1] * h)) + ',' + str(int(
                                box[2] * w)) + ',' + str(int(
                                    box[3] * h)) + '\t'
                        cv2.rectangle(frame,
                                      (int(box[0] * w), int(box[1] * h)),
                                      (int(box[2] * w), int(box[3] * h)),
                                      (0, 255, 0), 2)
                    tmpS = tmpS[:-1] + '\n'
                    f.write(tmpS)
                    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                    line = line.replace('/', '_')
                    cv2.imwrite(os.path.join(storePath, line), frame)
            f.close()
    os.system('zip -r tmpDetImgs.zip ' + storePath)
コード例 #29
0
ファイル: mask_rcnn_model.py プロジェクト: zzm422/tpu
def build_model_graph(features, labels, is_training, params):
    """Builds the forward model graph."""
    use_batched_nms = (not params['use_tpu'] and params['use_batched_nms'])
    is_gpu_inference = (not is_training and use_batched_nms)
    model_outputs = {}

    if is_training:
        if params['transpose_input']:
            features['images'] = tf.transpose(features['images'], [2, 0, 1, 3])
    batch_size, image_height, image_width, _ = (
        features['images'].get_shape().as_list())

    # Handles space-to-depth transform.
    conv0_space_to_depth_block_size = 0
    if is_training:
        conv0_space_to_depth_block_size = params[
            'conv0_space_to_depth_block_size']
        image_height *= conv0_space_to_depth_block_size
        image_width *= conv0_space_to_depth_block_size

    if 'source_ids' not in features:
        features['source_ids'] = -1 * tf.ones([batch_size], dtype=tf.float32)

    all_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                  params['num_scales'],
                                  params['aspect_ratios'],
                                  params['anchor_scale'],
                                  (image_height, image_width))

    if 'resnet' in params['backbone']:
        with tf.variable_scope(params['backbone']):
            resnet_fn = resnet.resnet_v1(
                params['backbone'],
                conv0_kernel_size=params['conv0_kernel_size'],
                conv0_space_to_depth_block_size=conv0_space_to_depth_block_size,
                num_batch_norm_group=params['num_batch_norm_group'])
            backbone_feats = resnet_fn(
                features['images'], (params['is_training_bn'] and is_training))
    elif 'mnasnet' in params['backbone']:
        with tf.variable_scope(params['backbone']):
            _, endpoints = mnasnet_models.build_mnasnet_base(
                features['images'],
                params['backbone'],
                training=(params['is_training_bn'] and is_training),
                override_params={'use_keras': False})

            backbone_feats = {
                2: endpoints['reduction_2'],
                3: endpoints['reduction_3'],
                4: endpoints['reduction_4'],
                5: endpoints['reduction_5'],
            }
    else:
        raise ValueError('Not a valid backbone option: %s' %
                         params['backbone'])

    fpn_feats = fpn.fpn(backbone_feats, params['min_level'],
                        params['max_level'])
    model_outputs.update({
        'fpn_features': fpn_feats,
    })

    rpn_score_outputs, rpn_box_outputs = heads.rpn_head(
        fpn_feats, params['min_level'], params['max_level'],
        len(params['aspect_ratios'] * params['num_scales']))

    if is_training:
        rpn_pre_nms_topn = params['rpn_pre_nms_topn']
        rpn_post_nms_topn = params['rpn_post_nms_topn']
    else:
        rpn_pre_nms_topn = params['test_rpn_pre_nms_topn']
        rpn_post_nms_topn = params['test_rpn_post_nms_topn']

    rpn_box_scores, rpn_box_rois = roi_ops.multilevel_propose_rois(
        rpn_score_outputs,
        rpn_box_outputs,
        all_anchors,
        features['image_info'],
        rpn_pre_nms_topn,
        rpn_post_nms_topn,
        params['rpn_nms_threshold'],
        params['rpn_min_size'],
        bbox_reg_weights=None,
        use_batched_nms=use_batched_nms)
    rpn_box_rois = tf.to_float(rpn_box_rois)
    if is_training:
        rpn_box_rois = tf.stop_gradient(rpn_box_rois)
        rpn_box_scores = tf.stop_gradient(rpn_box_scores)

    if is_training:
        # Sampling
        box_targets, class_targets, rpn_box_rois, proposal_to_label_map = (
            training_ops.proposal_label_op(
                rpn_box_rois,
                labels['gt_boxes'],
                labels['gt_classes'],
                features['image_info'],
                batch_size_per_im=params['batch_size_per_im'],
                fg_fraction=params['fg_fraction'],
                fg_thresh=params['fg_thresh'],
                bg_thresh_hi=params['bg_thresh_hi'],
                bg_thresh_lo=params['bg_thresh_lo']))

    # Performs multi-level RoIAlign.
    box_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
        fpn_feats,
        rpn_box_rois,
        output_size=7,
        is_gpu_inference=is_gpu_inference)

    class_outputs, box_outputs, _ = heads.box_head(
        box_roi_features,
        num_classes=params['num_classes'],
        mlp_head_dim=params['fast_rcnn_mlp_head_dim'])

    if not is_training:
        if is_gpu_inference:
            generate_detections_fn = postprocess_ops.generate_detections_gpu
        else:
            generate_detections_fn = postprocess_ops.generate_detections_tpu
        detections = generate_detections_fn(
            class_outputs, box_outputs, rpn_box_rois, features['image_info'],
            params['test_rpn_post_nms_topn'],
            params['test_detections_per_image'], params['test_nms'],
            params['bbox_reg_weights'])

        model_outputs.update({
            'num_detections': detections[0],
            'detection_boxes': detections[1],
            'detection_classes': detections[2],
            'detection_scores': detections[3],
        })
    else:
        encoded_box_targets = training_ops.encode_box_targets(
            rpn_box_rois, box_targets, class_targets,
            params['bbox_reg_weights'])
        model_outputs.update({
            'rpn_score_outputs': rpn_score_outputs,
            'rpn_box_outputs': rpn_box_outputs,
            'class_outputs': class_outputs,
            'box_outputs': box_outputs,
            'class_targets': class_targets,
            'box_targets': encoded_box_targets,
            'box_rois': rpn_box_rois,
        })

    # Faster-RCNN mode.
    if not params['include_mask']:
        return model_outputs

    # Mask sampling
    if not is_training:
        selected_box_rois = model_outputs['detection_boxes']
        class_indices = model_outputs['detection_classes']
        # If using GPU for inference, delay the cast until when Gather ops show up
        # since GPU inference supports float point better.
        # TODO(laigd): revisit this when newer versions of GPU libraries is
        # released.
        if not is_gpu_inference:
            class_indices = tf.to_int32(class_indices)
    else:
        (selected_class_targets, selected_box_targets, selected_box_rois,
         proposal_to_label_map) = (training_ops.select_fg_for_masks(
             class_targets,
             box_targets,
             rpn_box_rois,
             proposal_to_label_map,
             max_num_fg=int(params['batch_size_per_im'] *
                            params['fg_fraction'])))
        class_indices = tf.to_int32(selected_class_targets)

    mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
        fpn_feats,
        selected_box_rois,
        output_size=14,
        is_gpu_inference=is_gpu_inference)
    mask_outputs = heads.mask_head(mask_roi_features,
                                   class_indices,
                                   num_classes=params['num_classes'],
                                   mrcnn_resolution=params['mrcnn_resolution'],
                                   is_gpu_inference=is_gpu_inference)

    if is_training:
        mask_targets = training_ops.get_mask_targets(
            selected_box_rois, proposal_to_label_map, selected_box_targets,
            labels['cropped_gt_masks'], params['mrcnn_resolution'])
        model_outputs.update({
            'mask_outputs': mask_outputs,
            'mask_targets': mask_targets,
            'selected_class_targets': selected_class_targets,
        })
    else:
        model_outputs.update({
            'detection_masks': tf.nn.sigmoid(mask_outputs),
        })

    return model_outputs
コード例 #30
0
    def __call__(self, params):
        image_size = params['dynamic_image_size'] if params[
            'dynamic_input_shapes'] else (params['image_size'],
                                          params['image_size'])
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'], image_size)
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'],
                                               params['rpn_positive_overlap'],
                                               params['rpn_negative_overlap'],
                                               params['rpn_batch_size_per_im'],
                                               params['rpn_fg_fraction'])

        if params['dynamic_input_shapes']:
            height_long_side_image_size = image_size[::-1]
            height_long_side_input_anchors = anchors.Anchors(
                params['min_level'], params['max_level'], params['num_scales'],
                params['aspect_ratios'], params['anchor_scale'],
                height_long_side_image_size)
            height_long_side_anchor_labeler = anchors.AnchorLabeler(
                height_long_side_input_anchors, params['num_classes'],
                params['rpn_positive_overlap'], params['rpn_negative_overlap'],
                params['rpn_batch_size_per_im'], params['rpn_fg_fraction'])

        example_decoder = tf_example_decoder.TfExampleDecoder(
            use_instance_mask=True)

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preproessed to have normalized value and
          fixed dimension [image_size, image_size, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the proccessed image to the original image.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed
          dimension [self._max_num_instances, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tennsor is
          padded with 0 to the fixed dimension [self._max_num_instances].
        areas: Groundtruth areas annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
        classes: Groundtruth classes annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                instance_masks = data['groundtruth_instance_masks']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                is_crowds = data['groundtruth_is_crowd']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                if not params['use_category']:
                    classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)

                if (params['skip_crowd_during_training']
                        and self._mode == tf.estimator.ModeKeys.TRAIN):
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)
                    instance_masks = tf.gather_nd(instance_masks, indices)

                input_processor = InstanceSegmentationInputProcessor(
                    image, image_size, params['short_side_image_size'],
                    params['long_side_max_image_size'], boxes, classes,
                    instance_masks)
                input_processor.normalize_image()
                if (self._mode == tf.estimator.ModeKeys.TRAIN
                        and params['input_rand_hflip']):
                    input_processor.random_horizontal_flip()
                if self._mode == tf.estimator.ModeKeys.TRAIN:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                else:
                    input_processor.set_scale_factors_to_mlperf_reference_size(
                    )
                image = input_processor.resize_and_crop_image()
                boxes, classes = input_processor.resize_and_crop_boxes()
                instance_masks = input_processor.resize_and_crop_masks()
                cropped_gt_masks = input_processor.crop_gt_masks(
                    instance_masks, boxes, params['gt_mask_size'], image_size)

                # Assign anchors.
                if params['dynamic_input_shapes']:
                    is_height_short_side = tf.less(
                        input_processor._scaled_height,  # pylint: disable=protected-access
                        input_processor._scaled_width)  # pylint: disable=protected-access
                    score_targets, box_targets = tf.cond(
                        is_height_short_side,
                        lambda: anchor_labeler.label_anchors(boxes, classes),
                        lambda: height_long_side_anchor_labeler.label_anchors(boxes, classes))  # pylint: disable=line-too-long
                else:
                    score_targets, box_targets = anchor_labeler.label_anchors(
                        boxes, classes)

                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                image_scale = input_processor.image_scale_to_original
                scaled_height = input_processor.get_height_length()
                scaled_width = input_processor.get_width_length()
                image_info = tf.stack([
                    tf.to_float(scaled_height),
                    tf.to_float(scaled_width),
                    image_scale,
                    tf.to_float(input_processor.get_original_height),
                    tf.to_float(input_processor.get_original_width),
                ])
                # Pad groundtruth data for evaluation.
                boxes *= image_scale
                is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                is_crowds = pad_to_fixed_size(is_crowds, 0,
                                              [self._max_num_instances, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_num_instances, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                # Pads cropped_gt_masks.
                cropped_gt_masks = tf.reshape(cropped_gt_masks,
                                              [self._max_num_instances, -1])
                cropped_gt_masks = pad_to_fixed_size(
                    cropped_gt_masks, -1,
                    [self._max_num_instances, (params['gt_mask_size'] + 4)**2])
                cropped_gt_masks = tf.reshape(cropped_gt_masks, [
                    self._max_num_instances, params['gt_mask_size'] + 4,
                    params['gt_mask_size'] + 4
                ])

                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)
                return (image, score_targets, box_targets, source_id,
                        image_info, boxes, is_crowds, areas, classes,
                        cropped_gt_masks)

        # batch_size = params['batch_size']
        batch_size = params['batch_size'] if 'batch_size' in params else 1
        dataset = tf.data.Dataset.list_files(
            self._file_pattern,
            shuffle=(self._mode == tf.estimator.ModeKeys.TRAIN))
        if self._mode == tf.estimator.ModeKeys.TRAIN:
            dataset = dataset.repeat()

        # Prefetch data from files.
        def _prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(
                _prefetch_dataset,
                cycle_length=32,
                sloppy=(self._mode == tf.estimator.ModeKeys.TRAIN)))
        if self._mode == tf.estimator.ModeKeys.TRAIN:
            dataset = dataset.shuffle(64)

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.map(_dataset_parser, num_parallel_calls=64)

        if params['dynamic_input_shapes']:

            def key_func(image, *args):
                del args
                return tf.cast(tf.shape(image)[0], dtype=tf.int64)

            def reduce_func(unused_key, dataset):
                return dataset.batch(batch_size, drop_remainder=True)

            dataset = dataset.apply(
                tf.contrib.data.group_by_window(
                    key_func=key_func,
                    reduce_func=reduce_func,
                    window_size=params['global_batch_size']))
        else:
            dataset = dataset.prefetch(batch_size)
            dataset = dataset.batch(batch_size, drop_remainder=True)

        def _process_example(images, score_targets, box_targets, source_ids,
                             image_info, boxes, is_crowds, areas, classes,
                             cropped_gt_masks):
            """Processes one batch of data."""
            # Transposes images from (N, H, W, C)->(H, W, N, C). As batch size is
            # less than 8, the batch goes to the second minor dimension.
            if (params['transpose_input']
                    and self._mode == tf.estimator.ModeKeys.TRAIN):
                images = tf.transpose(images, [1, 2, 0, 3])

            labels = {}
            for level in range(params['min_level'], params['max_level'] + 1):
                labels['score_targets_%d' % level] = score_targets[level]
                labels['box_targets_%d' % level] = box_targets[level]
            # Concatenate groundtruth annotations to a tensor.
            groundtruth_data = tf.concat([boxes, is_crowds, areas, classes],
                                         axis=2)
            labels['source_ids'] = source_ids
            labels['groundtruth_data'] = groundtruth_data
            labels['image_info'] = image_info
            labels['cropped_gt_masks'] = cropped_gt_masks
            if self._mode == tf.estimator.ModeKeys.PREDICT:
                features = dict(images=images,
                                image_info=image_info,
                                groundtruth_data=groundtruth_data,
                                source_ids=source_ids)
                return features
            elif params['dynamic_input_shapes']:
                # For dynamic input shapes, we have 2 TPU programs. A tf.cond op is run
                # on the host side to decide which TPU program to launch. As we have
                # data prefetch in device side, the data for evaluating the shape needs
                # to sent back from device to host. Thus we retun `images` shape here
                # explictly to avoid copy the entire `images` back.
                return tf.shape(images), images, labels
            else:
                return images, labels

        dataset = dataset.map(_process_example)
        dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)
        return dataset