Example #1
0
def sample_rpn_outputs(boxes, scores, is_training=False, only_positive=False):
    """Sample boxes according to scores and some learning strategies
  assuming the first class is background
  Params:
  boxes: of shape (..., Ax4), each entry is [x1, y1, x2, y2], the last axis has k*4 dims
  scores: of shape (..., A), probs of fg, in [0, 1]
  """
    min_size = cfg.FLAGS.min_size
    rpn_nms_threshold = cfg.FLAGS.rpn_nms_threshold
    pre_nms_top_n = cfg.FLAGS.pre_nms_top_n
    post_nms_top_n = cfg.FLAGS.post_nms_top_n
    if not is_training:
        pre_nms_top_n = int(pre_nms_top_n / 2)
        post_nms_top_n = int(post_nms_top_n / 2)

    boxes = boxes.reshape((-1, 4))
    scores = scores.reshape((-1, 1))

    # filter backgrounds
    # Hope this will filter most of background anchors, since a argsort is too slow..
    if only_positive:
        keeps = np.where(scores > 0.5)[0]
        boxes = boxes[keeps, :]
        scores = scores[keeps]

    # filter minimum size
    keeps = _filter_boxes(boxes, min_size=min_size)
    boxes = boxes[keeps, :]
    scores = scores[keeps]

    # filter with scores
    order = scores.ravel().argsort()[::-1]
    if pre_nms_top_n > 0:
        order = order[:pre_nms_top_n]
    boxes = boxes[order, :]
    scores = scores[order]

    # filter with nms
    det = np.hstack((boxes, scores)).astype(np.float32)
    keeps = nms_wrapper.nms(det, rpn_nms_threshold)

    if post_nms_top_n > 0:
        keeps = keeps[:post_nms_top_n]
    boxes = boxes[keeps, :]
    scores = scores[keeps]
    batch_inds = np.zeros([boxes.shape[0]], dtype=np.int32)

    if _DEBUG:
        LOG('SAMPLE: %d rois has been choosen' % len(keeps))
        LOG('SAMPLE: a positive box: %d %d %d %d %.4f' %
            (boxes[0, 0], boxes[0, 1], boxes[0, 2], boxes[0, 3], scores[0]))
        hs = boxes[:, 3] - boxes[:, 1]
        ws = boxes[:, 2] - boxes[:, 0]
        assert min(np.min(hs), np.min(ws)) > 0, 'invalid boxes'

    return boxes, scores, batch_inds
Example #2
0
def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width):
    """Encode masks groundtruth into learnable targets
  Sample some exmaples
  
  Params
  ------
  gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw)
  gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class]
  rois:     the bounding boxes of shape (N, 4),
  ## scores:   scores of shape (N, 1)
  num_classes; K
  mask_height, mask_width: height and width of output masks
  
  Returns
  -------
  # rois: boxes sampled for cropping masks, of shape (M, 4)
  labels: class-ids of shape (M, 1)
  mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values
  mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1} indicating which mask is sampled
  """
    total_masks = rois.shape[0]
    # B x G
    overlaps = cython_bbox.bbox_overlaps(
        np.ascontiguousarray(rois[:, 0:4], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)  # shape is N
    max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment]  # N
    labels = gt_boxes[gt_assignment, 4]  # N

    # sample positive rois which intersection is more than 0.5
    keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
    num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image))
    if keep_inds.size > 0:
        keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False)
        LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\
                     %(num_masks, rois.shape[0], gt_masks.shape[0]))

    # rois = rois[inds]
    # labels = labels[inds].astype(np.int32)
    # gt_assignment = gt_assignment[inds]

    mask_targets = np.zeros(
        (total_masks, mask_height, mask_width, num_classes), dtype=np.int32)
    mask_inside_weights = np.zeros(
        (total_masks, mask_height, mask_width, num_classes), dtype=np.float32)

    # TODO: speed bottleneck?
    for i in keep_inds:
        roi = rois[i, :4]
        cropped = gt_masks[gt_assignment[i],
                           int(roi[1]):int(roi[3]) + 1,
                           int(roi[0]):int(roi[2]) + 1]
        cropped = cv2.resize(cropped, (mask_width, mask_height),
                             interpolation=cv2.INTER_NEAREST)

        mask_targets[i, :, :, int(labels[i])] = cropped
        mask_inside_weights[i, :, :, int(labels[i])] = 1
    return labels, mask_targets, mask_inside_weights
def _get_coco_masks(coco, img_id, height, width, img_name):
  """ get the masks for all the instances
  Note: some images are not annotated
  Return:
    masks, mxhxw numpy array
    classes, mx1
    bboxes, mx4
  """
  annIds = coco.getAnnIds(imgIds=[img_id], iscrowd=None)
  # assert  annIds is not None and annIds > 0, 'No annotaion for %s' % str(img_id)
  anns = coco.loadAnns(annIds)
  # assert len(anns) > 0, 'No annotaion for %s' % str(img_id)
  masks = []
  classes = []
  bboxes = []
  mask = np.zeros((height, width), dtype=np.float32)
  segmentations = []
  for ann in anns:
    m = coco.annToMask(ann) # zero one mask
    assert m.shape[0] == height and m.shape[1] == width, \
            'image %s and ann %s dont match' % (img_id, ann)
    masks.append(m)
    cat_id = _cat_id_to_real_id(ann['category_id'])
    classes.append(cat_id)
    bboxes.append(ann['bbox'])
    m = m.astype(np.float32) * cat_id
    mask[m > 0] = m[m > 0]

  masks = np.asarray(masks)
  classes = np.asarray(classes)
  bboxes = np.asarray(bboxes)
  # to x1, y1, x2, y2
  non_annotation_tag = False
  if bboxes.shape[0] <= 0:
    bboxes = np.zeros([0, 4], dtype=np.float32)
    classes = np.zeros([0], dtype=np.float32)
    sys.stdout.write('\nNone Annotations %s , passed\n' % img_name)
    sys.stdout.flush()
    LOG('None Annotations %s' % img_name)
    non_annotation_tag = True
  bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
  bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
  gt_boxes = np.hstack((bboxes, classes[:, np.newaxis]))
  gt_boxes = gt_boxes.astype(np.float32)
  masks = masks.astype(np.uint8)
  mask = mask.astype(np.uint8)
  assert masks.shape[0] == gt_boxes.shape[0], 'Shape Error'
  
  return gt_boxes, masks, mask,non_annotation_tag
Example #4
0
def sample_rpn_outputs(boxes, scores, is_training=False, only_positive=False):
    """Sample boxes according to scores and some learning strategies
  assuming the first class is background
  Params:
  boxes: of shape (..., Ax4), each entry is [x1, y1, x2, y2], the last axis has k*4 dims
  scores: of shape (..., A), foreground prob
  """
    min_size = cfg.FLAGS.min_size
    rpn_nms_threshold = cfg.FLAGS.rpn_nms_threshold
    pre_nms_top_n = cfg.FLAGS.pre_nms_top_n
    post_nms_top_n = cfg.FLAGS.post_nms_top_n
    if not is_training:
        pre_nms_top_n = int(pre_nms_top_n / 2)
        post_nms_top_n = int(post_nms_top_n / 2)

    boxes = boxes.reshape((-1, 4))
    scores = scores.reshape((-1, 1))

    # filter backgrounds
    # Hope this will filter most of background anchors, since a argsort is too slow..
    if only_positive:
        keeps = np.where(scores > 0.5)[0]
        boxes = boxes[keeps, :]
        scores = scores[keeps]

    # filter minimum size
    keeps = _filter_boxes(boxes, min_size=min_size)
    boxes = boxes[keeps, :]
    scores = scores[keeps]

    # filter with scores
    order = scores.ravel().argsort()
    if cfg.FLAGS.pre_nms_top_n > 0:
        order = order[:pre_nms_top_n]
    boxes = boxes[order, :]
    scores = scores[order]

    # filter with nms
    det = np.hstack((boxes, scores)).astype(np.float32)
    keeps = nms_wrapper.nms(det, rpn_nms_threshold)

    if cfg.FLAGS.post_nms_top_n > 0:
        keeps = keeps[:post_nms_top_n]
    boxes = boxes[keeps, :]
    scores = scores[keeps]
    LOG('%d rois has been choosen' % len(keeps))

    return boxes, scores
Example #5
0
    def _get_coco_masks(self, coco, img_id, height, width, img_name):
        """ get the masks for all the instances
        Note: some images are not annotated
        Return:
          masks, mxhxw numpy array
          classes, mx1
          bboxes, mx4
        """
        annIds = coco.getAnnIds(imgIds=[img_id], iscrowd=None)
        # assert  annIds is not None and annIds > 0, 'No annotaion for %s' % str(img_id)
        anns = coco.loadAnns(annIds)
        # assert len(anns) > 0, 'No annotaion for %s' % str(img_id)
        masks = []
        classes = []
        bboxes = []
        mask = np.zeros((height, width), dtype=np.float32)
        segmentations = []
        for ann in anns:
            m = coco.annToMask(ann)  # zero one mask
            # m = np.zeros([height, width], dtype=np.int32)
            assert m.shape[0] == height and m.shape[1] == width, \
                'image %s and ann %s dont match' % (img_id, ann)
            masks.append(m)
            cat_id = self._cat_id_to_real_id(ann['category_id'])
            if ann['iscrowd']:
                cat_id = -1
            classes.append(cat_id)
            bboxes.append(ann['bbox'])
            m = m.astype(np.float32) * cat_id
            mask[m > 0] = m[m > 0]

        masks = np.asarray(masks)
        classes = np.asarray(classes)
        bboxes = np.asarray(bboxes)
        # to x1, y1, x2, y2
        if bboxes.shape[0] <= 0:
            bboxes = np.zeros([0, 4], dtype=np.float32)
            classes = np.zeros([0], dtype=np.float32)
            masks = np.zeros([1, height, width], dtype=np.int32)  # force to 1
            # print ('None Annotations %s' % img_name)
            LOG('None Annotations %s' % img_name, onscreen=False)
        bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
        bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
        assert classes.shape[0] == bboxes.shape[
            0], 'Processing Annotation Error'

        return bboxes.astype(np.float32), classes.astype(
            np.float32), masks.astype(np.int32), mask.astype(np.int32)
Example #6
0
def sample_rpn_outputs(boxes, scores, is_training=False, only_positive=False):
    """Sample boxes according to scores and some learning strategies
  assuming the first class is background
  Params:
  boxes: of shape (..., Ax4), each entry is [x1, y1, x2, y2], the last axis has k*4 dims
  scores: of shape (..., A), probs of fg, in [0, 1]
  #but the boxex are allready in form [-1,4] also scores is in shape [-1,1]
  """
    min_size = cfg.FLAGS.min_size
    rpn_nms_threshold = cfg.FLAGS.rpn_nms_threshold
    pre_nms_top_n = cfg.FLAGS.pre_nms_top_n
    post_nms_top_n = cfg.FLAGS.post_nms_top_n

    # training: 12000, 2000
    # testing: 6000, 400
    if not is_training:
        pre_nms_top_n = int(pre_nms_top_n / 2)
        post_nms_top_n = int(post_nms_top_n / 5)

    boxes = boxes.reshape((-1, 4))
    scores = scores.reshape((-1, 1))
    assert scores.shape[0] == boxes.shape[0], 'scores and boxes dont match'

    # filter backgrounds
    # Hope this will filter most of background anchors, since a argsort is too slow..
    if only_positive:
        keeps = np.where(scores > 0.5)[0]
        boxes = boxes[keeps, :]
        scores = scores[keeps]

    # filter minimum size
    keeps = _filter_boxes(boxes, min_size=min_size)
    boxes = boxes[keeps, :]
    scores = scores[keeps]

    # filter with scores
    order = scores.ravel().argsort()[::-1]
    if pre_nms_top_n > 0:
        order = order[:pre_nms_top_n]
    boxes = boxes[order, :]
    scores = scores[order]

    # filter with nms
    det = np.hstack((boxes, scores)).astype(np.float32)
    keeps = nms_wrapper.nms(det, rpn_nms_threshold)

    if post_nms_top_n > 0:
        keeps = keeps[:post_nms_top_n]
    boxes = boxes[keeps, :]
    scores = scores[keeps]
    batch_inds = np.zeros([boxes.shape[0]], dtype=np.int32)

    # # random sample boxes
    ## try early sample later
    # fg_inds = np.where(scores > 0.5)[0]
    # num_fgs = min(len(fg_inds.size), int(rois_per_image * fg_roi_fraction))

    if _DEBUG:
        LOG('SAMPLE: %d rois has been choosen' % len(scores))
        LOG('SAMPLE: a positive box: %d %d %d %d %.4f' %
            (boxes[0, 0], boxes[0, 1], boxes[0, 2], boxes[0, 3], scores[0]))
        LOG('SAMPLE: a negative box: %d %d %d %d %.4f' %
            (boxes[-1, 0], boxes[-1, 1], boxes[-1, 2], boxes[-1,
                                                             3], scores[-1]))
        hs = boxes[:, 3] - boxes[:, 1]
        ws = boxes[:, 2] - boxes[:, 0]
        assert min(np.min(hs), np.min(ws)) > 0, 'invalid boxes'

    return boxes, scores.astype(np.float32), batch_inds
Example #7
0
def encode(gt_boxes, rois, num_classes):
    """Matching and Encoding groundtruth boxes (gt_boxes) into learning targets to boxes
  Sampling
  Parameters
  ---------
  gt_boxes an array of shape (G x 5), [x1, y1, x2, y2, class]
  rois an array of shape (R x 4), [x1, y1, x2, y2]
  num_classes: scalar, number of classes

  Returns
  --------
  labels: Nx1 array in [0, num_classes)
  bbox_targets: of shape (N, Kx4) regression targets
  bbox_inside_weights: of shape (N, Kx4), in {0, 1} indicating which class is assigned.
  """
    all_rois = rois
    num_rois = rois.shape[0]
    if gt_boxes.size > 0:
        # R x G matrix
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(all_rois[:, 0:4], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)  # R
        # max_overlaps = overlaps.max(axis=1)      # R
        max_overlaps = overlaps[np.arange(rois.shape[0]), gt_assignment]
        # note: this will assign every rois with a positive label
        # labels = gt_boxes[gt_assignment, 4]
        labels = np.zeros([num_rois], dtype=np.float32)
        labels[:] = -1

        # if _DEBUG:
        #   print ('gt_assignment')
        #   print (gt_assignment)

        # sample rois as to 1:3
        fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0]
        fg_rois = int(
            min(fg_inds.size,
                cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))
        if fg_inds.size > 0 and fg_rois < fg_inds.size:
            fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False)
        labels[fg_inds] = gt_boxes[gt_assignment[fg_inds], 4]

        # TODO: sampling strategy
        bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0]
        bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64)
        if bg_inds.size > 0 and bg_rois < bg_inds.size:
            bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)
        labels[bg_inds] = 0

        # ignore rois with overlaps between fg_threshold and bg_threshold
        ignore_inds = np.where(((max_overlaps > cfg.FLAGS.bg_threshold) &\
                (max_overlaps < cfg.FLAGS.fg_threshold)))[0]
        labels[ignore_inds] = -1
        keep_inds = np.append(fg_inds, bg_inds)
        if _DEBUG:
            print('keep_inds')
            print(keep_inds)
            print('fg_inds')
            print(fg_inds)
            print('bg_inds')
            print(bg_inds)
            print('bg_rois:', bg_rois)
            print('cfg.FLAGS.bg_threshold:', cfg.FLAGS.bg_threshold)
            # print (max_overlaps)

            LOG('ROIEncoder: %d positive rois, %d negative rois' %
                (len(fg_inds), len(bg_inds)))

        bbox_targets, bbox_inside_weights = _compute_targets(
            rois[keep_inds, 0:4], gt_boxes[gt_assignment[keep_inds], :4],
            labels[keep_inds], num_classes)
        bbox_targets = _unmap(bbox_targets, num_rois, keep_inds, 0)
        bbox_inside_weights = _unmap(bbox_inside_weights, num_rois, keep_inds,
                                     0)

    else:
        # there is no gt
        labels = np.zeros((num_rois, ), np.float32)
        bbox_targets = np.zeros((num_rois, 4 * num_classes), np.float32)
        bbox_inside_weights = np.zeros((num_rois, 4 * num_classes), np.float32)
        bg_rois = min(
            int(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction)),
            64)
        if bg_rois < num_rois:
            bg_inds = np.arange(num_rois)
            ignore_inds = np.random.choice(bg_inds,
                                           size=num_rois - bg_rois,
                                           replace=False)
            labels[ignore_inds] = -1

    return labels, bbox_targets, bbox_inside_weights
Example #8
0
def train():
    """The main function that runs training"""
    ## data
    image, original_image_height, original_image_width, image_height, image_width, gt_boxes, gt_masks, num_instances, image_id = \
        datasets.get_dataset(FLAGS.dataset_name, 
                             FLAGS.dataset_split_name, 
                             FLAGS.dataset_dir, 
                             FLAGS.im_batch,
                             is_training=True)

    ## queuing data
    data_queue = tf.RandomShuffleQueue(capacity=32, min_after_dequeue=16,
            dtypes=(
                image.dtype, original_image_height.dtype, original_image_width.dtype, image_height.dtype, image_width.dtype,
                gt_boxes.dtype, gt_masks.dtype, 
                num_instances.dtype, image_id.dtype)) 
    enqueue_op = data_queue.enqueue((image, original_image_height, original_image_width, image_height, image_width, gt_boxes, gt_masks, num_instances, image_id))
    data_queue_runner = tf.train.QueueRunner(data_queue, [enqueue_op] * 4)
    tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, data_queue_runner)
    (image, original_image_height, original_image_width, image_height, image_width, gt_boxes, gt_masks, num_instances, image_id) =  data_queue.dequeue()

    im_shape = tf.shape(image)
    image = tf.reshape(image, (im_shape[0], im_shape[1], im_shape[2], 3))

    ## network
    logits, end_points, pyramid_map = network.get_network(FLAGS.network, image,
            weight_decay=FLAGS.weight_decay, batch_norm_decay=FLAGS.batch_norm_decay, is_training=True)
    outputs = pyramid_network.build(end_points, image_height, image_width, pyramid_map,
            num_classes=81,
            base_anchors=3,#9#15
            is_training=True,
            gt_boxes=gt_boxes, gt_masks=gt_masks,
            loss_weights=[1.0, 1.0, 10.0, 1.0, 10.0])
            # loss_weights=[10.0, 1.0, 0.0, 0.0, 0.0])
            # loss_weights=[100.0, 100.0, 1000.0, 10.0, 100.0])
            # loss_weights=[0.2, 0.2, 1.0, 0.2, 1.0])
            # loss_weights=[0.1, 0.01, 10.0, 0.1, 1.0])

    total_loss = outputs['total_loss']
    losses  = outputs['losses']
    batch_info = outputs['batch_info']
    regular_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
    input_image = end_points['input']

    training_rcnn_rois                  = outputs['training_rcnn_rois']
    training_rcnn_clses                 = outputs['training_rcnn_clses']
    training_rcnn_clses_target          = outputs['training_rcnn_clses_target'] 
    training_rcnn_scores                = outputs['training_rcnn_scores']
    training_mask_rois                  = outputs['training_mask_rois']
    training_mask_clses_target          = outputs['training_mask_clses_target']
    training_mask_final_mask            = outputs['training_mask_final_mask']
    training_mask_final_mask_target     = outputs['training_mask_final_mask_target']
    tmp_0 = outputs['rpn']['P2']['shape']
    tmp_1 = outputs['rpn']['P3']['shape']
    tmp_2 = outputs['rpn']['P4']['shape']
    tmp_3 = outputs['rpn']['P5']['shape']

    ## solvers
    global_step = slim.create_global_step()
    update_op = solve(global_step)

    cropped_rois = tf.get_collection('__CROPPED__')[0]
    transposed = tf.get_collection('__TRANSPOSED__')[0]
    
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95)
    #gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    #sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
    init_op = tf.group(
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
            )
    sess.run(init_op)

    summary_op = tf.summary.merge_all()
    logdir = os.path.join(FLAGS.train_dir, strftime('%Y%m%d%H%M%S', gmtime()))
    if not os.path.exists(logdir):
        os.makedirs(logdir)
    summary_writer = tf.summary.FileWriter(logdir, graph=sess.graph)

    ## restore
    restore(sess)

    ## coord settings
    coord = tf.train.Coordinator()
    threads = []
    for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
        threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
                                         start=True))
    tf.train.start_queue_runners(sess=sess, coord=coord)

    ## saver init
    saver = tf.train.Saver(max_to_keep=20)

    ## finalize the graph for checking memory leak
    sess.graph.finalize()

    ## main loop
    for step in range(FLAGS.max_iters):
        
        start_time = time.time()

        s_, tot_loss, reg_lossnp, image_id_str, \
        rpn_box_loss, rpn_cls_loss, rcnn_box_loss, rcnn_cls_loss, mask_loss, \
        gt_boxesnp, tmp_0np, tmp_1np, tmp_2np, tmp_3np, \
        rpn_batch_pos, rpn_batch, rcnn_batch_pos, rcnn_batch, mask_batch_pos, mask_batch, \
        input_imagenp, \
        training_rcnn_roisnp, training_rcnn_clsesnp, training_rcnn_clses_targetnp, training_rcnn_scoresnp, training_mask_roisnp, training_mask_clses_targetnp, training_mask_final_masknp, training_mask_final_mask_targetnp  = \
                     sess.run([update_op, total_loss, regular_loss, image_id] + 
                              losses + 
                              [gt_boxes] + [tmp_0] + [tmp_1] + [tmp_2] +[tmp_3] +
                              batch_info + 
                              [input_image] + 
                              [training_rcnn_rois] + [training_rcnn_clses] + [training_rcnn_clses_target] + [training_rcnn_scores] + [training_mask_rois] + [training_mask_clses_target] + [training_mask_final_mask] + [training_mask_final_mask_target])

        duration_time = time.time() - start_time
        if step % 1 == 0: 
            LOG ( """iter %d: image-id:%07d, time:%.3f(sec), regular_loss: %.6f, """
                    """total-loss %.4f(%.4f, %.4f, %.6f, %.4f, %.4f), """
                    """instances: %d, """
                    """batch:(%d|%d, %d|%d, %d|%d)""" 
                   % (step, image_id_str, duration_time, reg_lossnp, 
                      tot_loss, rpn_box_loss, rpn_cls_loss, rcnn_box_loss, rcnn_cls_loss, mask_loss,
                      gt_boxesnp.shape[0], 
                      rpn_batch_pos, rpn_batch, rcnn_batch_pos, rcnn_batch, mask_batch_pos, mask_batch))

            LOG ("target")
            LOG (cat_id_to_cls_name(np.unique(np.argmax(np.asarray(training_rcnn_clses_targetnp),axis=1))))
            LOG ("predict")
            LOG (cat_id_to_cls_name(np.unique(np.argmax(np.array(training_rcnn_clsesnp),axis=1))))
            LOG (tmp_0np)
            LOG (tmp_1np)
            LOG (tmp_2np)
            LOG (tmp_3np)

        if step % 50 == 0: 
            draw_bbox(step, 
                      np.uint8((np.array(input_imagenp[0])/2.0+0.5)*255.0), 
                      name='train_est', 
                      bbox=training_rcnn_roisnp, 
                      label=np.argmax(np.array(training_rcnn_scoresnp),axis=1), 
                      prob=training_rcnn_scoresnp,
                      # bbox=training_mask_roisnp, 
                      # label=training_mask_clses_targetnp, 
                      # prob=np.zeros((training_mask_final_masknp.shape[0],81), dtype=np.float32)+1.0,
                      # mask=training_mask_final_masknp,
                      vis_all=True)

            draw_bbox(step, 
                      np.uint8((np.array(input_imagenp[0])/2.0+0.5)*255.0), 
                      name='train_gt', 
                      bbox=training_rcnn_roisnp, 
                      label=np.argmax(np.array(training_rcnn_clses_targetnp),axis=1), 
                      prob=np.zeros((training_rcnn_clsesnp.shape[0],81), dtype=np.float32)+1.0,
                      # bbox=training_mask_roisnp, 
                      # label=training_mask_clses_targetnp, 
                      # prob=np.zeros((training_mask_final_masknp.shape[0],81), dtype=np.float32)+1.0,
                      # mask=training_mask_final_mask_targetnp,
                      vis_all=True)
            
            if np.isnan(tot_loss) or np.isinf(tot_loss):
                LOG (gt_boxesnp)
                raise
          
        if step % 100 == 0:
            summary_str = sess.run(summary_op)
            summary_writer.add_summary(summary_str, step)
            summary_writer.flush()

        if (step % 500 == 0 or step + 1 == FLAGS.max_iters) and step != 0:
            checkpoint_path = os.path.join(FLAGS.train_dir, 
                                           FLAGS.dataset_name + '_' + FLAGS.network + '_model.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)

        if coord.should_stop():
            coord.request_stop()
            coord.join(threads)
        gc.collect()
Example #9
0
def encode(gt_boxes, all_anchors, height, width, stride):
    """Matching and Encoding groundtruth into learning targets
  Sampling
  
  Parameters
  ---------
  gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class]
  all_anchors: an array of shape (h, w, A, 4),
  width: width of feature
  height: height of feature
  stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32]
  Returns
  --------
  labels:   Nx1 array in [0, num_classes]
  bbox_targets: N x (4) regression targets
  bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned.
  """
    # TODO: speedup this module
    # if all_anchors is None:
    #   all_anchors = anchors_plane(height, width, stride=stride)

    # # anchors, inds_inside, total_anchors
    # border = cfg.FLAGS.allow_border
    # all_anchors = all_anchors.reshape((-1, 4))
    # inds_inside = np.where(
    #   (all_anchors[:, 0] >= -border) &
    #   (all_anchors[:, 1] >= -border) &
    #   (all_anchors[:, 2] < (width * stride) + border) &
    #   (all_anchors[:, 3] < (height * stride) + border))[0]
    # anchors = all_anchors[inds_inside, :]
    all_anchors = all_anchors.reshape([-1, 4])
    anchors = all_anchors
    total_anchors = all_anchors.shape[0]

    # labels = np.zeros((anchors.shape[0], ), dtype=np.float32)
    labels = np.empty((anchors.shape[0], ), dtype=np.float32)
    labels.fill(-1)

    if gt_boxes.size > 0:
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

        # if _DEBUG:
        #     print ('gt_boxes shape: ', gt_boxes.shape)
        #     print ('anchors shape: ', anchors.shape)
        #     print ('overlaps shape: ', overlaps.shape)

        gt_assignment = overlaps.argmax(axis=1)  # (A)
        max_overlaps = overlaps[np.arange(total_anchors), gt_assignment]
        gt_argmax_overlaps = overlaps.argmax(axis=0)  # G
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]

        labels[max_overlaps < cfg.FLAGS.rpn_bg_threshold] = 0

        if True:
            # this is sentive to boxes of little overlaps, no need!
            # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

            # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps
            labels[gt_argmax_overlaps] = 1

            # exclude examples with little overlaps
            # added later
            # excludes = np.where(gt_max_overlaps < cfg.FLAGS.bg_threshold)[0]
            # labels[gt_argmax_overlaps[excludes]] = -1

            if _DEBUG:
                min_ov = np.min(gt_max_overlaps)
                max_ov = np.max(gt_max_overlaps)
                mean_ov = np.mean(gt_max_overlaps)
                if min_ov < cfg.FLAGS.bg_threshold:
                    LOG('ANCHOREncoder: overlaps: (min %.3f mean:%.3f max:%.3f), stride: %d, shape:(h:%d, w:%d)'
                        % (min_ov, mean_ov, max_ov, stride, height, width))
                    worst = gt_boxes[np.argmin(gt_max_overlaps)]
                    anc = anchors[
                        gt_argmax_overlaps[np.argmin(gt_max_overlaps)], :]
                    LOG('ANCHOREncoder: worst case: overlap: %.3f, box:(%.1f, %.1f, %.1f, %.1f %d), anchor:(%.1f, %.1f, %.1f, %.1f)'
                        % (min_ov, worst[0], worst[1], worst[2], worst[3],
                           worst[4], anc[0], anc[1], anc[2], anc[3]))

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.FLAGS.rpn_fg_threshold] = 1
        # print (np.min(labels), np.max(labels))

        # subsample positive labels if there are too many
        num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = np.random.choice(fg_inds,
                                            size=(len(fg_inds) - num_fg),
                                            replace=False)
            labels[disable_inds] = -1
    else:
        # if there is no gt
        labels[:] = 0

    # TODO: mild hard negative mining
    # subsample negative labels if there are too many
    num_fg = np.sum(labels == 1)
    num_bg = max(min(cfg.FLAGS.rpn_batch_size - num_fg, num_fg * 3), 8)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = np.random.choice(bg_inds,
                                        size=(len(bg_inds) - num_bg),
                                        replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :])
    bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = 0.1

    # # mapping to whole outputs
    # labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    # bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    # bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)

    labels = labels.reshape((1, height, width, -1))
    bbox_targets = bbox_targets.reshape((1, height, width, -1))
    bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, -1))

    return labels, bbox_targets, bbox_inside_weights
Example #10
0
def encode(gt_boxes, all_anchors):
    """Single Shot
    Sampling

    Parameters
    ---------
    gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class]
    all_anchors: an array of shape (h, w, A, 4),
    Returns
    --------
    labels:   Nx1 array in [-1, num_classes], negative labels are ignored
    bbox_targets: N x (4) regression targets
    bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned.
    """

    all_anchors = all_anchors.reshape([-1, 4])
    anchors = all_anchors
    total_anchors = all_anchors.shape[0]
    bbox_flags_ = np.zeros([total_anchors], dtype=np.int32)

    if gt_boxes.size > 0:
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

        gt_assignment = overlaps.argmax(axis=1)  # (A)
        max_overlaps = overlaps[np.arange(total_anchors), gt_assignment]
        gt_argmax_overlaps = overlaps.argmax(axis=0)  # (G)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        labels = gt_boxes[gt_assignment, 4]
        labels[max_overlaps < cfg.rpn_bg_threshold] = 0
        labels[np.logical_and(max_overlaps < cfg.rpn_fg_threshold,
                              max_overlaps >= cfg.rpn_bg_threshold)] = -1
        bbox_flags_[max_overlaps >= 0.5] = 1

        # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps
        labels[gt_argmax_overlaps] = gt_boxes[
            gt_assignment[gt_argmax_overlaps], 4]
        # bbox_flags_[gt_argmax_overlaps] = 1

        # if clobber positive: there may exist some positive objs (jaccard overlap < bg_th) that are not assigned to any anchors
        if cfg.rpn_clobber_positives:
            labels[max_overlaps < cfg.rpn_bg_threshold] = 0
        bbox_flags_[labels >= 1] = 1

        if _DEBUG:
            min_ov = np.min(gt_max_overlaps)
            max_ov = np.max(gt_max_overlaps)
            mean_ov = np.mean(gt_max_overlaps)
            if min_ov < cfg.rpn_bg_threshold:
                LOG('ANCHORSS: overlaps: (min %.3f mean:%.3f max:%.3f)' %
                    (min_ov, mean_ov, max_ov))
                worst = gt_boxes[np.argmin(gt_max_overlaps)]
                anc = anchors[
                    gt_argmax_overlaps[np.argmin(gt_max_overlaps)], :]
                LOG('ANCHORSS: worst overlap:%.3f, box:(%.1f, %.1f, %.1f, %.1f %d), anchor:(%.1f, %.1f, %.1f, %.1f)'
                    % (min_ov, worst[0], worst[1], worst[2], worst[3],
                       worst[4], anc[0], anc[1], anc[2], anc[3]))

        ## handle ignored regions (the gt_class of crowd boxes is set to -1)
        ignored_inds = np.where(gt_boxes[:, -1] < 0)[0]
        if ignored_inds.size > 0:
            ignored_areas = gt_boxes[ignored_inds, :]
            # intersec shape is D x A
            intersecs = cython_bbox.bbox_intersections(
                np.ascontiguousarray(ignored_areas, dtype=np.float),
                np.ascontiguousarray(anchors, dtype=np.float))
            intersecs_ = intersecs.sum(axis=0)  # A x 1
            labels[intersecs_ > cfg.ignored_area_intersection_fraction] = -1
            bbox_flags_[
                intersecs_ > cfg.ignored_area_intersection_fraction] = 0

    else:
        # if there is no gt
        labels = np.zeros([total_anchors], dtype=np.float32)

    label_weights = np.zeros((total_anchors, ), dtype=np.float32)

    if cfg.rpn_sample_strategy == 'traditional':
        """subsample positive labels if there are too many, inherited from fastrcnn"""
        num_fg = int(cfg.rpn_fg_fraction * cfg.rpn_batch_size)
        fg_inds = np.where(labels >= 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = np.random.choice(fg_inds,
                                            size=(len(fg_inds) - num_fg),
                                            replace=False)
            labels[disable_inds] = -1
        else:
            num_fg = len(fg_inds)
        # subsample negative labels if there are too many
        num_bg = max(min(cfg.rpn_batch_size - num_fg, num_fg * 5), 128)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = np.random.choice(bg_inds,
                                            size=(len(bg_inds) - num_bg),
                                            replace=False)
            labels[disable_inds] = -1

    elif cfg.rpn_sample_strategy == 'simple':
        """using label_weights to balance example losses"""
        fg_inds = np.where(labels >= 1)[0]
        num_fg = len(fg_inds)
        label_weights[fg_inds] = 1.0
        bg_inds = np.where(labels == 0)[0]
        num_bg = len(bg_inds)
        label_weights[bg_inds] = 3 * max(num_fg, 1.0) / max(
            max(num_bg, num_fg), 1.0)

    elif cfg.rpn_sample_strategy == 'advanced':
        """no implemented yet"""
        # deal with ignored lables?
    else:
        raise ValueError(
            'RPN sample strategy %s has not been implemented yet' %
            cfg.rpn_sample_strategy)

    # if True: # person only
    #     nonperson_inds = np.where(np.logical_and(labels != 1, labels != -1))[0]
    #     labels[nonperson_inds] = 0
    #     label_weights[nonperson_inds] = 0
    #     kept_inds = np.random.choice(nonperson_inds, size=(1000), replace=False)
    #     label_weights[kept_inds] = 0.02

    bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :])
    bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32)
    # bbox_inside_weights[labels >= 1, :] = np.asarray(cfg.bbweights, dtype=np.float32)
    bbox_inside_weights[bbox_flags_ == 1, :] = np.asarray(cfg.bbweights,
                                                          dtype=np.float32)

    labels = labels.reshape((-1, ))
    bbox_targets = bbox_targets.reshape((-1, 4))
    bbox_inside_weights = bbox_inside_weights.reshape((-1, 4))

    return labels.astype(np.float32), label_weights, bbox_targets.astype(
        np.float32), bbox_inside_weights.astype(np.float32)
Example #11
0
def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width):
    """Encode masks groundtruth into learnable targets
  Sample some exmaples
  
  Params
  ------
  gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw)
  #actually modified by me, gt_mask is of shape (G,imh,imw,7)
  gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class]
  rois:     the bounding boxes of shape (N, 4),
  ## scores:   scores of shape (N, 1)
  num_classes; K
  mask_height, mask_width: height and width of output masks
  
  Returns
  -------
  # rois: boxes sampled for cropping masks, of shape (M, 4)
  labels: class-ids of shape (M, 1)
  mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values
  mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1}Í indicating which mask is sampled
  """
    total_masks = rois.shape[0]
    if gt_boxes.size > 0:
        # B x G
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(rois[:, 0:4], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)  # shape is N
        max_overlaps = overlaps[np.arange(len(gt_assignment)),
                                gt_assignment]  # N
        # note: this will assign every rois with a positive label
        # labels = gt_boxes[gt_assignment, 4] # N
        labels = np.zeros((total_masks, ), np.float32)
        labels[:] = -1

        # sample positive rois which intersection is more than 0.5
        keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
        num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image))
        if keep_inds.size > 0 and num_masks < keep_inds.size:
            keep_inds = np.random.choice(keep_inds,
                                         size=num_masks,
                                         replace=False)
            LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\
                         %(num_masks, rois.shape[0], gt_masks.shape[0]))

        labels[keep_inds] = gt_boxes[gt_assignment[keep_inds], -1]

        # rois = rois[inds]
        # labels = labels[inds].astype(np.int32)
        # gt_assignment = gt_assignment[inds]

        # ignore rois with overlaps between fg_threshold and bg_threshold
        # mask are only defined on positive rois
        ignore_inds = np.where((max_overlaps < cfg.FLAGS.fg_threshold))[0]
        labels[ignore_inds] = -1

        mask_targets = np.zeros(
            (total_masks, mask_height, mask_width, num_classes),
            dtype=np.int32)
        mask_inside_weights = np.zeros(
            (total_masks, mask_height, mask_width, num_classes),
            dtype=np.float32)
        rois[rois < 0] = 0

        # TODO: speed bottleneck?
        for i in keep_inds:
            roi = rois[i, :4]

            for x in range(7):
                cropped = gt_masks[gt_assignment[i],
                                   int(roi[1]):int(roi[3]) + 1,
                                   int(roi[0]):int(roi[2]) + 1, x]
                cropped = cv2.resize(cropped, (mask_width, mask_height),
                                     interpolation=cv2.INTER_NEAREST)
                mask_targets[i, :, :, x] = cropped
                mask_inside_weights[i, :, :, x] = 1
    else:
        # there is no gt
        labels = np.zeros((total_masks, ), np.float32)
        labels[:] = -1
        mask_targets = np.zeros(
            (total_masks, mask_height, mask_width, num_classes),
            dtype=np.int32)
        mask_inside_weights = np.zeros(
            (total_masks, mask_height, mask_height, num_classes),
            dtype=np.float32)
    #np.save("/home/czurini/Alex/rois.npy",rois)
    #                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     np.save("/home/czurini/Alex/mask_targets.npy",mask_targets)
    return labels, mask_targets, mask_inside_weights