Beispiel #1
0
    def setup(self, bottom, top):
        self._batch_size = 6
        self._depth = 8
        self._height = 300
        self._width = 400
        self.dataset = UcfSports('train', [self._height, self._width],
                                 '/home/rhou/ucf_sports')

        self._feat_stride = 16
        self._pooled_height = np.round(self._height / float(self._feat_stride))
        self._pooled_width = np.round(self._width / float(self._feat_stride))

        self._root_anchors = generate_anchors(ratios=[0.5, 1, 2, 4],
                                              scales=np.array(
                                                  [3, 6, 8, 11, 14]))
        self.num_anchors = self._root_anchors.shape[0]
        shift_x = np.arange(0, self._pooled_width) * self._feat_stride
        shift_y = np.arange(0, self._pooled_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        K = shifts.shape[0]
        A = self.num_anchors
        all_anchors = (self._root_anchors.reshape((1, A, 4)).transpose(
            (1, 0, 2)) + shifts.reshape((1, K, 4)))
        all_anchors = all_anchors.reshape((K * A, 4))
        self.total_anchors = int(K * A)
        self.inds_inside = np.where(
            (all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0)
            & (all_anchors[:, 2] < self._width + 0) &  # width
            (all_anchors[:, 3] < self._height + 0)  # height
        )[0]
        self.anchors = all_anchors[self.inds_inside, :]
        self.len = len(self.inds_inside)
Beispiel #2
0
    def setup(self, bottom, top):
        # parse the layer parameter string, which must be valid YAML
        try:
            layer_params = yaml.load(self.param_str_)
        except AttributeError:
            layer_params = yaml.load(self.param_str)
        except:
            raise

        self._feat_stride = layer_params['feat_stride']
        anchor_scales = layer_params.get('scales', (8, 16, 32))
        self._anchors = generate_anchors(scales=np.array(anchor_scales))
        self._num_anchors = self._anchors.shape[0]

        #  if DEBUG:
        #      print 'feat_stride: {}'.format(self._feat_stride)
        #      print 'anchors:'
        #      print self._anchors

        # rois blob: holds R regions of interest, each is a 5-tuple
        # (n, x1, y1, x2, y2) specifying an image batch index n and a
        # rectangle (x1, y1, x2, y2)
        top[0].reshape(1, 5)

        #  # scores blob: holds scores for R regions of interest
        #  if len(top) > 1:
        #      top[1].reshape(1, 1, 1, 1)
        # ind blob:
        if len(top) > 1:
            top[1].reshape(1)
Beispiel #3
0
    def     __init__(self, feature_stride, scales, ratios):
        super(_ProposalLayer, self).__init__()

        self._feat_stride = feature_stride
        self._anchors = torch.from_numpy(generate_anchors(feature_stride=16,
                                                          scales=np.array(scales),
                                                          ratios=np.array(ratios))).float()
        self._num_anchors = self._anchors.size(0)
Beispiel #4
0
    def __init__(self, im_width=0, im_height=0, name=None):
        self.im_w = im_width
        self.im_h = im_height
        self.dataset_name = name
        self.basic_size = cfg.BASIC_SIZE
        self.ratios = cfg.RATIOS
        self.scales = cfg.SCALES

        self.raw_anchors = G.generate_anchors(self.basic_size, self.ratios,
                                              self.scales)
Beispiel #5
0
def anchor_target_layer(rpn_cls_score,
                        gt_boxes,
                        im_info,
                        data,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[4, 8, 16, 32]):
    _anchors = generate_anchors(scale=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]
    def __init__(self, feat_stride, scales, ratios):
        super(_AnchorTargetLayer, self).__init__()

        self._feat_stride = feat_stride
        self._scales = scales
        anchor_scales = scales
        self._anchors = torch.from_numpy(
            generate_anchors(self._feat_stride, scales=np.array(anchor_scales), ratios=np.array(ratios))).float()
        self._num_anchors = self._anchors.size(0)

        self._allowed_border = 0  # allow boxes to sit over the edge by a small amount
Beispiel #7
0
    def __init__(self, im_width, im_height, batch_size=8):
        self.vid_dir = '/mnt/sda7/ILSVRC2015/Data/VID/train'
        self.annot_dir = '/mnt/sda7/VID_Manual/Annotations/train/random'
        self.img_dirs = []
        self.anno_dirs = []

        self.stride = cfg.STRIDE
        self.basic_size = cfg.BASIC_SIZE
        self.ratios = cfg.RATIOS
        self.scales = cfg.SCALES

        self.K = len(self.ratios) * len(self.scales)

        for DIR in VID_SUBDIRS:
            img_dirs = sorted(os.listdir(op.join(self.vid_dir, DIR)))
            anno_dirs = sorted(os.listdir(op.join(self.annot_dir, DIR)))
            #            img_dirs = [op.join(DIR, _dir) for _dir in img_dirs]
            '''depend on anno dirs, not img dirs'''
            img_dirs = [op.join(DIR, _dir) for _dir in anno_dirs]
            anno_dirs = [op.join(DIR, _dir) for _dir in anno_dirs]
            self.img_dirs.extend(img_dirs)
            self.anno_dirs.extend(anno_dirs)

        self.index = 0
        self.vis_dir = './vis_vid'
        self.vis_index = 0

        self.margin_gain = 0.2

        self.im_w = im_width
        self.im_h = im_height

        self.batch_size = batch_size
        self.roi_size = cfg.DET_ROI_SIZE - cfg.TEMP_ROI_SIZE + 1
        '''INTER_SEQ v.s. INTER_IMG'''
        self.method = cfg.DATA_LOADER_METHOD
        #        assert len(self.img_dirs) == len(self.anno_dirs), 'Data and annotation dirs not uniformed'

        self.num_sequences = len(self.anno_dirs)
        self.num_images = 0

        self.num_visualize = 100
        self.permute_inds = np.random.permutation(np.arange(
            self.num_sequences))

        self.max_interval = 50 if cfg.PHASE == 'TRAIN' else 20
        #        self.valid_seq_inds=np.zeros(0, dtype=np.int32)

        self.iter_stop = False
        self.enum_sequences()

        self.raw_anchors = G.generate_anchors(self.basic_size, self.ratios,
                                              self.scales)
Beispiel #8
0
    def __init__(self):
        self.batch_size=2   
        self.stride=STRIDE[net_type]   
        self.im_w, self.im_h=RESOLUTION[net_type]

        self.display=20
        self.snapshot=2
        self.decay_ratio=0.1
        
        self.lr_mult=0.5
        
        self.fetch_config()
        self.update_config()
        
        self.K=len(self.ratios)*len(self.scales)
        self.TK=len(self.track_ratios)*len(self.track_scales)                    

        self.raw_anchors=G.generate_anchors(self.basic_size, self.ratios, self.scales)
        self.track_raw_anchors=G.generate_anchors(self.track_basic_size, self.track_ratios, self.track_scales)
        
        self.model=MotFRCNN(self.im_w, self.im_h)
    def __init__(self, im_width, im_height, batch_size=8):
        self.im_w = im_width
        self.im_h = im_height
        self.batch_size = batch_size
        self.stride = cfg.STRIDE

        self.bound = (im_width, im_height)
        self.out_size = (self.im_w // self.stride, self.im_h // self.stride)

        self.fetch_config()

        self.raw_anchors = G.generate_anchors(self.basic_size, self.ratios,
                                              self.scales)
        dummy_search_box = np.array([[0, 0, self.im_w - 1, self.im_h - 1]])
        self.anchors=G.gen_region_anchors(self.raw_anchors, \
            dummy_search_box, self.bound, K=self.K, size=self.out_size)[0]
    def setup(self, bottom, top):
        layer_params = yaml.load(self.param_str)
        anchor_scales = layer_params.get('scales', (8, 16, 32))
        self._anchors = generate_anchors(scales=np.array(anchor_scales))
        self._num_anchors = self._anchors.shape[0]
        self._feat_stride = layer_params['feat_stride']

        if DEBUG:
            print('anchors:')
            print(self._anchors)
            print('anchor shapes:')
            print(
                np.hstack((
                    self._anchors[:, 2::4] - self._anchors[:, 0::4],
                    self._anchors[:, 3::4] - self._anchors[:, 1::4],
                )))

            self._counts = cfg.EPS
            self._sums = np.zeros((1, 4))
            self._squared_sums = np.zeros((1, 4))
            self._fg_sum = 0
            self._bg_sum = 0
            self._count = 0

        # allow boxes to sit over the edge by a small amount
        self._allowed_border = layer_params.get('allowed_border', 0)

        height, width = bottom[0].data.shape[-2:]
        if DEBUG:
            print('AnchorTargetLayer: height', height, 'width', width)

        A = self._num_anchors
        # labels
        top[0].reshape(1, 1, A * height, width)
        # bbox_targets
        top[1].reshape(1, A * 4, height, width)
        # bbox_inside_weights
        top[2].reshape(1, A * 4, height, width)
        # bbox_outside_weights
        top[3].reshape(1, A * 4, height, width)
    def setup(self, bottom, top):
        # parse the layer parameter string, which must be valid YAML
        layer_params = yaml.load(self.param_str_)

        self._feat_stride = layer_params['feat_stride']
        anchor_scales = layer_params.get('scales', (8, 16, 32))
        self._anchors = generate_anchors(scales=np.array(anchor_scales))
        self._num_anchors = self._anchors.shape[0]

        if DEBUG:
            print(('feat_stride: {}'.format(self._feat_stride)))
            print('anchors:')
            print((self._anchors))

        # rois blob: holds R regions of interest, each is a 5-tuple
        # (n, x1, y1, x2, y2) specifying an image batch index n and a
        # rectangle (x1, y1, x2, y2)
        top[0].reshape(1, 5)

        # scores blob: holds scores for R regions of interest
        if len(top) > 1:
            top[1].reshape(1, 1, 1, 1)
Beispiel #12
0
def inference_track(model, roidb):
    rpn_conv_size = cfg.RPN_CONV_SIZE
    basic_size = cfg.TRACK_BASIC_SIZE
    ratios = cfg.TRACK_RATIOS
    scales = cfg.TRACK_SCALES
    K = len(ratios) * len(scales)

    raw_anchors = G.generate_anchors(basic_size, ratios, scales)

    if cfg.IMAGE_NORMALIZE:
        roidb['temp_image'] -= roidb['temp_image'].min()
        roidb['temp_image'] /= roidb['temp_image'].max()
        roidb['det_image'] -= roidb['det_image'].min()
        roidb['det_image'] /= roidb['det_image'].max()
        roidb['temp_image'] = (roidb['temp_image'] - 0.5) / 0.5
        roidb['det_image'] = (roidb['det_image'] - 0.5) / 0.5
    else:
        roidb['temp_image'] -= cfg.PIXEL_MEANS
        roidb['det_image'] -= cfg.PIXEL_MEANS

    bound = roidb['bound']
    #    print(bound)

    output_dict = model(roidb, task='track')

    temp_boxes = roidb['temp_boxes']
    search_boxes = roidb['search_boxes']

    configs = {}
    configs['K'] = K
    configs['temp_boxes'] = temp_boxes
    configs['search_boxes'] = search_boxes
    configs['rpn_conv_size'] = rpn_conv_size
    configs['raw_anchors'] = raw_anchors
    configs['bound'] = bound
    ret = get_track_output(output_dict, configs)
    bboxes_list = ret['bboxes_list']
    anchors_list = ret['anchors_list']
    return bboxes_list, anchors_list
    def setup(self, bottom, top):
        # parse the layer parameter string, which must be valid YAML
        layer_params = yaml.load(self.param_str)

        self._feat_stride = layer_params['feat_stride']
        anchor_scales = layer_params.get('scales', (8, 16, 32))
        self._anchors = generate_anchors(scales=np.array(anchor_scales))
        self._num_anchors = self._anchors.shape[0]

        if DEBUG:
            print('feat_stride: {}'.format(self._feat_stride))
            print('anchors:')
            print(self._anchors)

        # rois blob: holds R regions of interest, each is a 5-tuple
        # (n, x1, y1, x2, y2) specifying an image batch index n and a
        # rectangle (x1, y1, x2, y2)
        top[0].reshape(1, 5)

        # scores blob: holds scores for R regions of interest
        if len(top) > 1:
            top[1].reshape(1, 1, 1, 1)
Beispiel #14
0
  def generate_proposals(self, cls_prob, bbox_pred, im_info):
  
    batch_size = cls_prob[0].shape[0]
    if batch_size > 1:
      raise ValueError("Sorry, multiple images for each device is not implemented.")

    pre_nms_topN = self.rpn_pre_nms_top_n 
    post_nms_topN = self.rpn_post_nms_top_n
    min_size = self.rpn_min_size

    proposal_list = []
    score_list = []
    
    for idx in range(len(self.feat_stride)):
      stride = int(self.feat_stride[idx])
      sub_anchors = generate_anchors(stride=stride, sizes=self.scales * stride, aspect_ratios=self.ratios)

      scores, bbox_deltas = cls_prob[idx], bbox_pred[idx]

      # 1. generate proposals from bbox_deltas and shifted anchors
      # use real image size instead of padded feature map sizes
      height, width = scores.shape[-3:-1]

      # enumerate all shifts 
      shift_x = np.arange(0, width) * stride 
      shift_y = np.arange(0, height) * stride 
      shift_x, shift_y = np.meshgrid(shift_x, shift_y)
      shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

      A = self.num_anchors 
      K = shifts.shape[0]
      anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
      anchors = anchors.reshape((K * A, 4))

      bbox_deltas = bbox_deltas.reshape((-1, 4))
      scores = scores.reshape((-1, 1))

      if self.individual_proposals:
        if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
          order = np.argsort(-scores.squeeze())
        else:
          inds = np.argpartition(
            -scores.squeeze(), pre_nms_topN
          )[:pre_nms_topN]
          order = np.argsort(-scores[inds].squeeze())
          order = inds[order]
        bbox_deltas = bbox_deltas[order, :]
        anchors = anchors[order, :]
        scores = scores[order]
      
      # convert anchors into proposals via bbox transformations
      proposals = bbox_transform(anchors, bbox_deltas)

      # 2. clip predicted boxes to image 
      proposals = clip_boxes(proposals, im_info[:2])

      # 3. remove predicted boxes with either height or width < threshold 
      # (NOTE: convert min_size to input image scale stored in im_info[2])
      # keep = self._filter_boxes(proposals, min_size * im_info[2])
      keep = self._filter_boxes(proposals, min_size * im_info[2])
      proposals = proposals[keep, :]
      scores = scores[keep]

      if self.individual_proposals:
        keep = self.nms_func(np.hstack((proposals, scores)).astype(np.float32))
        if post_nms_topN > 0:
          keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]
      
      proposal_list.append(proposals)
      score_list.append(scores)
  
    proposals = np.vstack(proposal_list)
    scores = np.vstack(score_list)

    batch_inds = np.ones((proposals.shape[0], 1), dtype=np.float32) * self.batch_idx
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores
Beispiel #15
0
from rpn.template import get_template

im_width = 768
im_height = 448

MAX_TEMPLATE_SIZE = im_height

colors = [[0, 255, 255], [255, 85, 0], [255, 170, 0], [255, 255, 0],
          [170, 255, 0], [85, 255, 0],
          [0, 255, 0], [0, 255, 85], [0, 255, 170], [255, 0, 0], [0, 170, 255],
          [0, 85, 255], [0, 0, 255], [85, 0, 255], [255, 0, 0], [255, 85, 0],
          [255, 170, 0], [255, 255, 0], [0, 0, 255], [85, 0, 255], [255, 0, 0],
          [255, 85, 0], [255, 170, 0], [255, 255, 0]]

templates = get_template(min_size=64, max_size=im_height, num_templates=5)
det_raw_anchors = G.generate_anchors(cfg.BASIC_SIZE, cfg.RATIOS, cfg.SCALES)
track_raw_anchors = G.generate_anchors(cfg.TRACK_BASIC_SIZE, cfg.TRACK_RATIOS,
                                       cfg.TRACK_SCALES)
K = len(cfg.RATIOS) * len(cfg.SCALES)
TK = len(cfg.TRACK_RATIOS) * len(cfg.TRACK_SCALES)
rpn_conv_size = cfg.RPN_CONV_SIZE
out_size = (im_width // 8, im_height // 8)


def add_new_targets(ids, boxes, new_ids, new_boxes):
    combined_ids = ids.copy()
    combined_boxes = boxes.copy()
    num_instances = len(ids)
    num_new_instances = new_boxes.shape[0]

    index = 0
    def _load_imagenet3d_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the imagenet3d format.
        """

        if self._image_set == 'test' or self._image_set == 'test_1' or self._image_set == 'test_2':
            lines = []
        else:
            filename = os.path.join(self._imagenet3d_path, 'Labels',
                                    index + '.txt')
            lines = []
            with open(filename) as f:
                for line in f:
                    lines.append(line)

        num_objs = len(lines)

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        viewpoints = np.zeros(
            (num_objs, 3),
            dtype=np.float32)  # azimuth, elevation, in-plane rotation
        viewpoints_flipped = np.zeros(
            (num_objs, 3),
            dtype=np.float32)  # azimuth, elevation, in-plane rotation
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        for ix, line in enumerate(lines):
            words = line.split()
            assert len(words) == 5 or len(
                words) == 8, 'Wrong label format: {}'.format(index)
            cls = self._class_to_ind[words[0]]
            boxes[ix, :] = [float(n) for n in words[1:5]]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0
            if len(words) == 8:
                viewpoints[ix, :] = [float(n) for n in words[5:8]]
                # flip the viewpoint
                viewpoints_flipped[ix, 0] = -viewpoints[ix, 0]  # azimuth
                viewpoints_flipped[ix, 1] = viewpoints[ix, 1]  # elevation
                viewpoints_flipped[ix,
                                   2] = -viewpoints[ix, 2]  # in-plane rotation
            else:
                viewpoints[ix, :] = np.inf
                viewpoints_flipped[ix, :] = np.inf

        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes),
                                      dtype=np.int32)
        viewindexes_azimuth = np.zeros((num_objs, self.num_classes),
                                       dtype=np.float32)
        viewindexes_azimuth_flipped = np.zeros((num_objs, self.num_classes),
                                               dtype=np.float32)
        viewindexes_elevation = np.zeros((num_objs, self.num_classes),
                                         dtype=np.float32)
        viewindexes_elevation_flipped = np.zeros((num_objs, self.num_classes),
                                                 dtype=np.float32)
        viewindexes_rotation = np.zeros((num_objs, self.num_classes),
                                        dtype=np.float32)
        viewindexes_rotation_flipped = np.zeros((num_objs, self.num_classes),
                                                dtype=np.float32)

        overlaps = scipy.sparse.csr_matrix(overlaps)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)
        viewindexes_azimuth = scipy.sparse.csr_matrix(viewindexes_azimuth)
        viewindexes_azimuth_flipped = scipy.sparse.csr_matrix(
            viewindexes_azimuth_flipped)
        viewindexes_elevation = scipy.sparse.csr_matrix(viewindexes_elevation)
        viewindexes_elevation_flipped = scipy.sparse.csr_matrix(
            viewindexes_elevation_flipped)
        viewindexes_rotation = scipy.sparse.csr_matrix(viewindexes_rotation)
        viewindexes_rotation_flipped = scipy.sparse.csr_matrix(
            viewindexes_rotation_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float),
                                              boxes_all.astype(np.float))

                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(list(range(num_objs)),
                                    len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in range(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes_all == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])
                    index_covered = np.unique(index[fg_inds])

                    for i in range(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                base_size = 16
                ratios = cfg.TRAIN.RPN_ASPECTS
                scales = cfg.TRAIN.RPN_SCALES
                anchors = generate_anchors(base_size, ratios, scales)
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack(
                    (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                     shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape(
                    (1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float),
                                              gt_boxes.astype(np.float))

                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in range(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])

                    for i in range(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[fg_inds] == i)[0])

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_viewpoints': viewpoints,
            'gt_viewpoints_flipped': viewpoints_flipped,
            'gt_viewindexes_azimuth': viewindexes_azimuth,
            'gt_viewindexes_azimuth_flipped': viewindexes_azimuth_flipped,
            'gt_viewindexes_elevation': viewindexes_elevation,
            'gt_viewindexes_elevation_flipped': viewindexes_elevation_flipped,
            'gt_viewindexes_rotation': viewindexes_rotation,
            'gt_viewindexes_rotation_flipped': viewindexes_rotation_flipped,
            'gt_subclasses': gt_subclasses,
            'gt_subclasses_flipped': gt_subclasses_flipped,
            'gt_overlaps': overlaps,
            'gt_subindexes': subindexes,
            'gt_subindexes_flipped': subindexes_flipped,
            'flipped': False
        }
Beispiel #17
0
 def __init__(self):
     super(ProposalLayer, self).__init__()
     self.feat_stride = cfg.FEAT_STRIDE[0]
     self.anchors = generate_anchors()
     self.num_anchors = self.anchors.size(0)
Beispiel #18
0
import rpn.generate_anchors as G

im_width = 768
im_height = 448

colors = [[0, 255, 255], [255, 85, 0], [255, 170, 0], [255, 255, 0],
          [170, 255, 0], [85, 255, 0],
          [0, 255, 0], [0, 255, 85], [0, 255, 170], [255, 0, 0], [0, 170, 255],
          [0, 85, 255], [0, 0, 255], [85, 0, 255], [255, 0, 0], [255, 85, 0],
          [255, 170, 0], [255, 255, 0], [0, 0, 255], [85, 0, 255], [255, 0, 0],
          [255, 85, 0], [255, 170, 0], [255, 255, 0]]

K = len(cfg.RATIOS) * len(cfg.SCALES)
bound = (im_width, im_height)
out_size = (im_width // 8, im_height // 8)
det_raw_anchors = G.generate_anchors(cfg.BASIC_SIZE, cfg.RATIOS, cfg.SCALES)
dummy_search_box = np.array([[0, 0, im_width - 1, im_height - 1]])
det_anchors = G.gen_region_anchors(det_raw_anchors,
                                   dummy_search_box,
                                   bound,
                                   K=K,
                                   size=out_size)[0]
bound = (im_width, im_height)


def main(dataset_obj, model=None):
    loader = DataLoader(dataset_obj)
    #    video_path='./result.avi'
    #    writer=cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc('M','J','P','G'), 30.0, (im_width, im_height))

    roidb = {'anchors': det_anchors, 'bound': bound}
def imdb_rpn_compute_stats(net,
                           imdb,
                           anchor_scales=(8, 16, 32),
                           feature_stride=16):
    raw_anchors = generate_anchors(scales=np.array(anchor_scales))
    print(raw_anchors.shape)
    sums = 0
    squred_sums = 0
    counts = 0
    roidb = filter_roidb(imdb.roidb)
    # Compute a map of input image size and output feature map blob
    map_w = {}
    map_h = {}
    for i in range(50, cfg.TRAIN.MAX_SIZE + 10):
        blobs = {
            'data': np.zeros((1, 3, i, i)),
            'im_info': np.asarray([[i, i, 1.0]])
        }
        net.blobs['data'].reshape(*(blobs['data'].shape))
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
        blobs_out = net.forward(data=blobs['data'].astype(np.float32,
                                                          copy=False),
                                im_info=blobs['im_info'].astype(np.float32,
                                                                copy=False))
        height, width = net.blobs['rpn/output'].data.shape[-2:]
        map_w[i] = width
        map_h[i] = height

    for i in range(len(roidb)):
        if not i % 5000:
            print('computing %d/%d' % (i, imdb.num_images))
        im = cv2.imread(roidb[i]['image'])
        im_data, im_info = _get_image_blob(im)
        gt_boxes = roidb[i]['boxes']
        gt_boxes = gt_boxes * im_info[0, 2]
        height = map_h[im_data.shape[2]]
        width = map_w[im_data.shape[3]]
        # 1. Generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, width) * feature_stride
        shift_y = np.arange(0, height) * feature_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = raw_anchors.shape[0]
        K = shifts.shape[0]
        all_anchors = (raw_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))

        # only keep anchors inside the image
        inds_inside = np.where((all_anchors[:, 0] >= 0)
                               & (all_anchors[:, 1] >= 0)
                               & (all_anchors[:, 2] < im_info[0, 1]) &  # width
                               (all_anchors[:, 3] < im_info[0, 0])  # height
                               )[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))

        # There are 2 types of bbox targets
        # 1. anchor whose overlaps with gt is greater than RPN_POSITIVE_OVERLAP
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        fg_inds = np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)[0]
        # 2. anchors which best match certain gt
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
        fg_inds = np.unique(np.hstack((fg_inds, gt_argmax_overlaps)))
        gt_rois = gt_boxes[argmax_overlaps, :]

        anchors = anchors[fg_inds, :]
        gt_rois = gt_rois[fg_inds, :]
        targets = bbox_transform(anchors, gt_rois[:, :4]).astype(np.float32,
                                                                 copy=False)
        sums += targets.sum(axis=0)
        squred_sums += (targets**2).sum(axis=0)
        counts += targets.shape[0]

    means = sums / counts
    stds = np.sqrt(squred_sums / counts - means**2)
    print(means)
    print(stds)
    return means, stds
Beispiel #20
0
    ratios = cfg.RATIOS
    scales = cfg.SCALES
    stride = 8

    cfg.STRIDE = stride
    cfg.PHASE = 'TEST'
    cfg.TEST.RPN_POST_NMS_TOP_N = 100
    cfg.NUM_CLASSES = len(CLASSES)
    cfg.TEST.IMS_PER_BATCH = 1
    cfg.TEST.NMS_THRESH = 0.5
    cfg.TEST.RPN_NMS_THRESH = 0.7
    #    cfg.TEST.RPN_POST_NMS_TOP_N=300

    K = len(ratios) * len(scales)

    raw_anchors = G.generate_anchors(basic_size, ratios, scales)

    bound = (im_width, im_height)
    out_size = (im_width // stride, im_height // stride)

    dummy_search_box = np.array([0, 0, bound[0], bound[1]]).reshape(1, -1)
    anchors = G.gen_region_anchors(raw_anchors,
                                   dummy_search_box,
                                   bound,
                                   K=K,
                                   size=out_size)[0]

    print(anchors.shape)

    img_files = [
        'img00337.jpg', 'img00832.jpg', 'img00995.jpg', 'img01879.jpg',
    def _load_pascal_annotation(self, index):
        """
        Load image and bounding boxes info from XML file in the PASCAL VOC
        format.
        """
        filename = os.path.join(self._data_path, 'Annotations', index + '.xml')

        # print 'Loading: {}'.format(filename)
        def get_data_from_tag(node, tag):
            return node.getElementsByTagName(tag)[0].childNodes[0].data

        with open(filename) as f:
            data = minidom.parseString(f.read())

        objs = data.getElementsByTagName('object')
        num_objs = len(objs)

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        # Load object bounding boxes into a data frame.
        for ix, obj in enumerate(objs):
            # Make pixel indexes 0-based
            x1 = float(get_data_from_tag(obj, 'xmin')) - 1
            y1 = float(get_data_from_tag(obj, 'ymin')) - 1
            x2 = float(get_data_from_tag(obj, 'xmax')) - 1
            y2 = float(get_data_from_tag(obj, 'ymax')) - 1
            name = str(get_data_from_tag(obj, "name")).lower().strip()
            if name in self._classes:
                cls = self._class_to_ind[name]
            else:
                cls = 0
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0

        overlaps = scipy.sparse.csr_matrix(overlaps)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes),
                                      dtype=np.int32)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float),
                                              boxes_all.astype(np.float))

                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(list(range(num_objs)),
                                    len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in range(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes_all == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])
                    index_covered = np.unique(index[fg_inds])

                    for i in range(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                anchors = generate_anchors()
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack(
                    (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                     shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape(
                    (1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float),
                                              gt_boxes.astype(np.float))

                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in range(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])

                    for i in range(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[fg_inds] == i)[0])

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_subclasses': gt_subclasses,
            'gt_subclasses_flipped': gt_subclasses_flipped,
            'gt_overlaps': overlaps,
            'gt_subindexes': subindexes,
            'gt_subindexes_flipped': subindexes_flipped,
            'flipped': False
        }
Beispiel #22
0
    def predict(self, inputs):

        _anchors = generate_anchors(scales=np.array(self._anchor_scales))
        _num_anchors = _anchors.shape[0]
	print("_num_anchors",_anchors.shape)
        pre_nms_topN  = cfg.FRCNN.RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg.FRCNN.RPN_POST_NMS_TOP_N
        nms_thresh    = cfg.FRCNN.RPN_NMS_THRESH
        min_size      = cfg.FRCNN.RPN_MIN_SIZE
	print("nms_thresh",nms_thresh)
        scores = inputs[0][:, :, :, _num_anchors:]
        bbox_deltas = inputs[1]
 
        #anchors
        height, width = scores.shape[-3:-1]
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
            shift_x.ravel(), shift_y.ravel())).transpose()

        A = _num_anchors
        K = shifts.shape[0]
        anchors = _anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))
        print("ttttttttttttttttttttttt,anchors",anchors.shape)
        #box
        bbox_deltas = bbox_deltas.reshape((-1, 4))

        #scores
        scores = scores.reshape((-1, 1))

        proposals = bbox_transform_inv(anchors, bbox_deltas)
	boxdecode = proposals

        im_info = np.array([height, width, 0])
	print("proposals1:",proposals.shape)
        proposals = clip_boxes(proposals, im_info[:2])
	print("proposals2:",proposals.shape)
 	print("im_info:[:2]",im_info[:2])
        keep = self._filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
	print("proposals3:",proposals.shape)
	print("scores.shape1",scores.shape)
        scores = scores[keep]
	print("scores.shape2",scores.shape)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
	print("proposals4:",proposals.shape)
        scores = scores[order]

        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]

	print("proposals5:",proposals.shape)
        return proposals,boxdecode,anchors
    def _load_kitti_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the KITTI format.
        """

        if self._image_set == 'test':
            lines = []
        else:
            filename = os.path.join(self._data_path, 'training', 'label_2',
                                    index + '.txt')
            lines = []
            with open(filename) as f:
                for line in f:
                    line = line.replace('Van', 'Car')
                    words = line.split()
                    cls = words[0]
                    truncation = float(words[1])
                    occlusion = int(words[2])
                    height = float(words[7]) - float(words[5])
                    if cls in self._class_to_ind and truncation < 0.5 and occlusion < 3 and height > 25:
                        lines.append(line)

        num_objs = len(lines)

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        for ix, line in enumerate(lines):
            words = line.split()
            cls = self._class_to_ind[words[0]]
            boxes[ix, :] = [float(n) for n in words[4:8]]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0

        overlaps = scipy.sparse.csr_matrix(overlaps)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes),
                                      dtype=np.int32)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float),
                                              boxes_all.astype(np.float))

                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(list(range(num_objs)),
                                    len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in range(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes_all == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])
                    index_covered = np.unique(index[fg_inds])

                    for i in range(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                anchors = generate_anchors()
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack(
                    (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                     shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape(
                    (1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float),
                                              gt_boxes.astype(np.float))

                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in range(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])

                    for i in range(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[fg_inds] == i)[0])

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_subclasses': gt_subclasses,
            'gt_subclasses_flipped': gt_subclasses_flipped,
            'gt_overlaps': overlaps,
            'gt_subindexes': subindexes,
            'gt_subindexes_flipped': subindexes_flipped,
            'flipped': False
        }
Beispiel #24
0
    num_anchors = 20

    # anchors_person = gen_anchors(imdb.roidb, 10, [1])
    # anchors_cyclist = gen_anchors(imdb.roidb, 10, [2])
    # anchors_car = gen_anchors(imdb.roidb, 60, [3])
    # anchors = np.vstack( (anchors_person, anchors_cyclist, anchors_car) )

    anchors = gen_anchors(imdb.roidb, num_anchors, [1, 2, 3])

    from rpn.generate_anchors import generate_anchors
    # anchor_scales = np.exp( np.linspace( np.log(2), np.log(11), 3 ) )
    # anchor_ratios = np.exp( np.linspace( np.log(0.3), np.log(2), 3) )
    anchor_scales = (2, 4, 8, 16, 32)
    anchor_ratios = (0.5, 1, 2.0)
    anchors_ = generate_anchors(scales=np.array(anchor_scales),
                                ratios=np.array(anchor_ratios))

    # Draw anchors
    fig = plt.figure(1, figsize=(15, 10))
    axes = [fig.add_subplot(2, 1, ii + 1) for ii in range(2)]

    clrs = sns.color_palette("Set2", 100)
    axes[0].set_xlim(-200, 200)
    axes[0].set_ylim(-200, 200)
    axes[1].set_xlim(-200, 200)
    axes[1].set_ylim(-200, 200)

    for aa, clr in zip(anchors, clrs):
        axes[0].add_patch(
            plt.Rectangle((aa[0], aa[1]),
                          aa[2] - aa[0],
Beispiel #25
0
    # Load dataset
    imdb = kitti('train', '2012')
    roidb = imdb.roidb

    #im_scale = float(576) / float(375)
    im_scale = 1.0
    feat_stride = 16
    height, width = (int(375. * im_scale / feat_stride),
                     int(1242. * im_scale / feat_stride))  # feature map size

    # Load anchors
    anchor_setting = 'kitti_scale5_ratio4_imscale1.0'
    scales = np.array(range(1, 10, 2))
    ratios = np.asarray([0.5, 1.0, 2., 2.5])
    anchors = generate_anchors(scales=scales, ratios=ratios)

    #anchor_setting = 'kitti-data-driven'
    #anchors = imdb.get_anchors()

    anchors = anchors * im_scale

    A = anchors.shape[0]

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * feat_stride
    shift_y = np.arange(0, height) * feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
Beispiel #26
0
if __name__ == '__main__':
	
	cfg_from_file('experiments/cfgs/faster_rcnn_end2end_kitti.yml')

	# Load dataset	
	from datasets.kitti import kitti
	imdb = kitti('train', '2012')
	roidb = imdb.roidb

	
	im_scale = float(576) / float(375)	  

	# Load anchors
	from rpn.generate_anchors import generate_anchors
	anchors = generate_anchors(scales=np.array(range(1,10)), ratios=[0.5, 1., 1.5, 2., 2.5, 3.])
	anchors = anchors * im_scale

	num_anchors = anchors.shape[0]
	#height, width = (375, 1242)
	height, width = (int(375*im_scale/16), int(1242*im_scale/16))
	feat_stride = 16

	# 1. Generate proposals from bbox deltas and shifted anchors
	shift_x = np.arange(0, width) * feat_stride
	shift_y = np.arange(0, height) * feat_stride
	shift_x, shift_y = np.meshgrid(shift_x, shift_y)
	shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()
    
    # add A anchors (1, A, 4) to
    def _load_pascal3d_voxel_exemplar_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the pascal subcategory exemplar format.
        """

        if self._image_set == 'val':
            return self._load_pascal_annotation(index)

        filename = os.path.join(self._pascal3d_path, cfg.SUBCLS_NAME,
                                index + '.txt')
        assert os.path.exists(filename), \
                'Path does not exist: {}'.format(filename)

        # the annotation file contains flipped objects
        lines = []
        lines_flipped = []
        with open(filename) as f:
            for line in f:
                words = line.split()
                subcls = int(words[1])
                is_flip = int(words[2])
                if subcls != -1:
                    if is_flip == 0:
                        lines.append(line)
                    else:
                        lines_flipped.append(line)

        num_objs = len(lines)

        # store information of flipped objects
        assert (num_objs == len(lines_flipped)
                ), 'The number of flipped objects is not the same!'
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)

        for ix, line in enumerate(lines_flipped):
            words = line.split()
            subcls = int(words[1])
            gt_subclasses_flipped[ix] = subcls

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes),
                                      dtype=np.int32)

        for ix, line in enumerate(lines):
            words = line.split()
            cls = self._class_to_ind[words[0]]
            subcls = int(words[1])
            # Make pixel indexes 0-based
            boxes[ix, :] = [float(n) - 1 for n in words[3:7]]
            gt_classes[ix] = cls
            gt_subclasses[ix] = subcls
            overlaps[ix, cls] = 1.0
            subindexes[ix, cls] = subcls
            subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix]

        overlaps = scipy.sparse.csr_matrix(overlaps)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float),
                                              boxes_all.astype(np.float))

                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(list(range(num_objs)),
                                    len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in range(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes_all == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])
                    index_covered = np.unique(index[fg_inds])

                    for i in range(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                base_size = 16
                ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25]
                scales = 2**np.arange(1, 6, 0.5)
                anchors = generate_anchors(base_size, ratios, scales)
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack(
                    (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                     shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape(
                    (1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float),
                                              gt_boxes.astype(np.float))

                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis=0)
                    fg_inds = []
                    for k in range(1, self.num_classes):
                        fg_inds.extend(
                            np.where((gt_classes == k) & (
                                max_overlaps >= cfg.TRAIN.FG_THRESH[k - 1]))
                            [0])

                    for i in range(self.num_classes):
                        self._num_boxes_all[i] += len(
                            np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(
                            np.where(gt_classes[fg_inds] == i)[0])

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_subclasses': gt_subclasses,
            'gt_subclasses_flipped': gt_subclasses_flipped,
            'gt_overlaps': overlaps,
            'gt_subindexes': subindexes,
            'gt_subindexes_flipped': subindexes_flipped,
            'flipped': False
        }
Beispiel #28
0
    def predict(self, inputs):

        _anchors = generate_anchors(scales=np.array(self._anchor_scales))
        _num_anchors = _anchors.shape[0]

        print("_num_anchors", _anchors.shape)

        pre_nms_topN = 6000
        post_nms_topN = 100
        nms_thresh = 0.699999988079
        min_size = 16

        print("nms_thresh", nms_thresh)
        scores = inputs[0][:, :, :, _num_anchors:]
        bbox_deltas = inputs[1]

        # anchors
        height, width = scores.shape[-3:-1]
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        A = _num_anchors
        K = shifts.shape[0]
        anchors = _anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        print("================ anchors", anchors.shape)
        print("================ anchors 0", anchors[0])
        # box
        bbox_deltas = bbox_deltas.reshape((-1, 4))

        # scores
        scores = scores.reshape((-1, 1))

        # ('bbox_deltas 1:', array([ 0.8462524, -0.1174521, -2.104301 , -1.7837868], dtype=float32))
        # ('bbox_deltas 2:', array([-0.1174521,  0.8462524, -2.104301 , -1.7837868], dtype=float32))

        bbox_deltas = bbox_deltas[:, (1, 0, 2, 3)]

        proposals = bbox_transform_inv(anchors, bbox_deltas)
        boxdecode = proposals

        im_info = np.array([height, width, 0])
        print("bbox_transform_inv proposals:", proposals.shape)
        print("bbox_transform_inv proposals[0]:", proposals[0])

        proposals = clip_boxes(proposals, im_info[:2])
        print("clip_boxes proposals :", proposals.shape)
        print("clip_boxes proposals[0] :", proposals[0])
        print("im_info:[:2]", im_info[:2])
        keep = self._filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        print("_filter_boxes proposals3:", proposals.shape)
        print("_filter_boxes proposals3 [0]:", proposals[0])

        print("scores.shape1", scores.shape)
        scores = scores[keep]
        print("scores.shape2", scores.shape)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        print("proposals4:", proposals.shape)
        scores = scores[order]

        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]

        print("proposals5:", proposals.shape)
        return proposals, boxdecode, anchors
Beispiel #29
0
def proposal_layer(rpn_cls_prob_reshape,
                   rpn_bbox_pred,
                   im_info,
                   cfg_key,
                   _feat_stride=[
                       16,
                   ],
                   anchor_scales=[8, 16, 32]):
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    #layer_params = yaml.load(self.param_str_)
    _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]
    rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2])
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2])
    #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1])
    #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1])
    im_info = im_info[0]

    assert rpn_cls_prob_reshape.shape[0] == 1, \
        'Only single item batches are supported'
    # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
    #cfg_key = 'TEST'
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    min_size = cfg[cfg_key].RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs, which we want
    scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :]
    bbox_deltas = rpn_bbox_pred
    #im_info = bottom[2].data[0, :]

    if DEBUG:
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))

    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]

    if DEBUG:
        print('score map size: {}'.format(scores.shape))

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    anchors = _anchors.reshape((1, A, 4)) + \
              shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A, 4))

    # Transpose and reshape predicted bbox transformations to get them
    # into the same order as the anchors:
    #
    # bbox deltas will be (1, 4 * A, H, W) format
    # transpose to (1, H, W, 4 * A)
    # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
    # in slowest to fastest order
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # 3. remove predicted boxes with either height or width < threshold
    # (NOTE: convert min_size to input image scale stored in im_info[2])
    keep = _filter_boxes(proposals, min_size * im_info[2])
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob
Beispiel #30
0
 def __init__(self, feat_stride, anchor_scales, phase):
     self._feat_stride = feat_stride
     self._anchors = generate_anchors(scales=np.array(anchor_scales))
     self._num_anchors = self._anchors.shape[0]
     self.phase = phase
Beispiel #31
0
    def __init__(self, im_width, im_height, batch_size=8):
        self.data_dir = op.join(cfg.DATA_DIR, 'Insight-MVT_Annotation_Train')
        self.anno_dir = op.join(cfg.DATA_DIR, 'DETRAC-Train-Annotations-XML')

        self.stride = cfg.STRIDE
        self.basic_size = cfg.BASIC_SIZE
        self.ratios = cfg.RATIOS
        self.scales = cfg.SCALES

        self.track_basic_size = cfg.TRACK_BASIC_SIZE
        self.track_ratios = cfg.TRACK_RATIOS
        self.track_scales = cfg.TRACK_SCALES

        self.K = len(self.ratios) * len(self.scales)
        self.TK = len(self.track_ratios) * len(self.track_scales)
        self.rpn_conv_size = cfg.RPN_CONV_SIZE

        self.img_dirs = sorted(os.listdir(self.data_dir))
        self.anno_files = sorted(os.listdir(self.anno_dir))

        for ext_seq in EXTRA_SEQS:
            ext_anno = '{}.xml'.format(ext_seq)
            #            assert ext_anno in self.anno_files, '{} not exists'.format(ext_anno)
            self.anno_files.remove(ext_anno)
            self.img_dirs.remove(ext_seq)

        self.index = 0
        self.vis_dir = './vis_vid'
        self.vis_index = 0

        self.margin_gain = 0.2

        self.im_w = im_width
        self.im_h = im_height

        self.bound = (im_width, im_height)
        self.out_size = (im_width // self.stride, im_height // self.stride)

        self.batch_size = batch_size
        self.roi_size = cfg.DET_ROI_SIZE - cfg.TEMP_ROI_SIZE + 1

        self.num_sequences = len(self.anno_files)
        self.num_images = 0

        self.num_visualize = 100
        self.permute_inds = np.random.permutation(np.arange(
            self.num_sequences))

        self.max_interval = 4 if cfg.PHASE == 'TRAIN' else 1

        self.iter_stop = False
        self.enum_sequences()

        self.templates = get_template(min_size=cfg.TEMP_MIN_SIZE,
                                      max_size=cfg.TEMP_MAX_SIZE,
                                      num_templates=cfg.TEMP_NUM)

        self.raw_anchors = G.generate_anchors(self.basic_size, self.ratios,
                                              self.scales)
        dummy_search_box = np.array([[0, 0, self.im_w - 1, self.im_h - 1]])
        self.det_anchors = G.gen_region_anchors(self.raw_anchors,
                                                dummy_search_box,
                                                self.bound,
                                                K=self.K,
                                                size=self.out_size)[0]

        self.track_raw_anchors = G.generate_anchors(self.track_basic_size,
                                                    self.track_ratios,
                                                    self.track_scales)