Python create_squared_image Examples, datasets.ds_utils.create_squared_image Python Examples

Example #1

0

Show file

File: PredictionCNN.py Project: wuguowuge/who_where

    def single_sample(self, entry, K=3):
        # assuming square image
        resolution = cfg.PREDICT_RESOLUTION
        grid_shape = cfg.GRID_SHAPE
        state_dims = cfg.STATE_DIMS

        img = cv2.imread(entry['bg_image'], cv2.IMREAD_COLOR)
        lyo = cv2.imread(entry['bg_layout'], cv2.IMREAD_COLOR)
        img, ox, oy = ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
        lyo, ox, oy = ds_utils.create_squared_image(lyo, cfg.PIXEL_MEANS)
        input_img = cv2.resize(img, (resolution[1], resolution[0]))
        input_lyo = cv2.resize(lyo, (resolution[1], resolution[0]))
        input_img, input_lyo = self.preprocess_input_images(\
            np.expand_dims(input_img, 0), np.expand_dims(input_lyo, 0))

        feat, cen_probs = self.center_inference.predict([input_img, input_lyo])
        cen_probs = cen_probs.squeeze()
        cen_inds = np.argsort(cen_probs)[::-1]
        cen_inds = np.array(cen_inds[:K])
        rois = ds_utils.centers_to_rois(cen_inds, grid_shape[:2],
                                        grid_shape[:2])

        grids = np.zeros((0, 2))
        for i in range(K):
            input_roi = rois[i].reshape((1, state_dims[0], state_dims[1]))
            size_probs = self.size_inference.predict([feat,
                                                      input_roi]).squeeze()
            size_inds = np.argsort(size_probs.squeeze())[::-1]
            size_inds = np.array(size_inds[:K])
            for j in range(K):
                grids = np.vstack((grids, np.array([cen_inds[i],
                                                    size_inds[j]]).reshape(
                                                        (1, 2))))

        xywhs = ds_utils.indices_to_boxes(grids, grid_shape)

        cen_probs = cen_probs.reshape((grid_shape[0], grid_shape[1]))
        heatmap = cv2.resize(cen_probs, (img.shape[1], img.shape[0]))
        heatmap = (255 * heatmap).astype(np.uint8)
        heatmap = cv2.equalizeHist(heatmap)
        heatmap = np.repeat(np.expand_dims(heatmap, axis=-1), 3, axis=-1)

        return xywhs, grids, heatmap

Example #2

0

Show file

File: imdb.py Project: wuguowuge/who_where

    def get_minibatch(self, square=True):
        # outputs: resized images, normalized xywhs, grids

        batch_size = cfg.TRAIN.BATCH_SIZE
        grid_shape = cfg.GRID_SHAPE
        resolution = cfg.RESOLUTION

        #######################################################################
        # indices of the minibatch
        if self.objdb_cur + batch_size >= len(self.objdb):
            self.permute_objdb_indices()
        db_inds = self.objdb_perm[self.objdb_cur:self.objdb_cur + batch_size]
        self.objdb_cur += batch_size
        #######################################################################

        images  = np.zeros((batch_size, resolution[0], \
                        resolution[1], resolution[2]), dtype=np.float32)
        grids = np.zeros((batch_size, 2))
        boxes = np.zeros((batch_size, 4))

        for i in range(batch_size):
            obj = self.objdb[db_inds[i]]
            im_path = obj['background']
            width = obj['width']
            height = obj['height']
            box = obj['box'].copy()

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            if obj['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)
            xywh = ds_utils.xyxy_to_xywh(box.reshape((1, 4))).squeeze()

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                xywh[0] += offset_x
                xywh[1] += offset_y
                width = height = img.shape[0]

            nxywh = ds_utils.normalize_xywh(xywh.reshape((1, 4)), width,
                                            height).squeeze()
            # discreted output positions
            grid = ds_utils.boxes_to_indices(nxywh.reshape((1, 4)),
                                             grid_shape).squeeze()
            # images of the same shape
            images[i, :, :, :] = cv2.resize(img,
                                            (resolution[1], resolution[0]))
            grids[i, :] = grid
            boxes[i, :] = nxywh

        return images, boxes, grids

Example #3

0

Show file

File: PredictionCNN.py Project: liuguoyou/who_where

    def single_sample(self, entry, K=3):
        # assuming square image
        resolution = cfg.PREDICT_RESOLUTION
        grid_shape = cfg.GRID_SHAPE
        state_dims = cfg.STATE_DIMS

        img = cv2.imread(entry['bg_image'],  cv2.IMREAD_COLOR)
        lyo = cv2.imread(entry['bg_layout'], cv2.IMREAD_COLOR)
        img, ox, oy = ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
        lyo, ox, oy = ds_utils.create_squared_image(lyo, cfg.PIXEL_MEANS)
        input_img = cv2.resize(img, (resolution[1], resolution[0]))
        input_lyo = cv2.resize(lyo, (resolution[1], resolution[0]))
        input_img, input_lyo = self.preprocess_input_images(\
            np.expand_dims(input_img, 0), np.expand_dims(input_lyo, 0))

        feat, cen_probs = self.center_inference.predict([input_img, input_lyo])
        cen_probs = cen_probs.squeeze()
        cen_inds = np.argsort(cen_probs)[::-1]
        cen_inds = np.array(cen_inds[:K])
        rois = ds_utils.centers_to_rois(cen_inds, grid_shape[:2], grid_shape[:2])

        grids = np.zeros((0,2))
        for i in range(K):
            input_roi  = rois[i].reshape((1, state_dims[0], state_dims[1]))
            size_probs = self.size_inference.predict([feat, input_roi]).squeeze()
            size_inds = np.argsort(size_probs.squeeze())[::-1]
            size_inds = np.array(size_inds[:K])
            for j in range(K):
                grids = np.vstack((grids, np.array([cen_inds[i], size_inds[j]]).reshape((1,2))))

        xywhs = ds_utils.indices_to_boxes(grids, grid_shape)
        
        cen_probs = cen_probs.reshape((grid_shape[0], grid_shape[1]))
        heatmap = cv2.resize(cen_probs, (img.shape[1], img.shape[0]))
        heatmap = (255 * heatmap).astype(np.uint8)
        heatmap = cv2.equalizeHist(heatmap)
        heatmap = np.repeat(np.expand_dims(heatmap, axis=-1), 3, axis=-1)

        return xywhs, grids, heatmap

Example #4

0

Show file

File: imdb.py Project: liuguoyou/who_where

    def get_minibatch(self, square=True):
        # outputs: resized images, normalized xywhs, grids

        batch_size = cfg.TRAIN.BATCH_SIZE
        grid_shape = cfg.GRID_SHAPE
        resolution = cfg.RESOLUTION

        #######################################################################
        # indices of the minibatch
        if self.objdb_cur + batch_size >= len(self.objdb):
            self.permute_objdb_indices()
        db_inds = self.objdb_perm[self.objdb_cur : self.objdb_cur + batch_size]
        self.objdb_cur += batch_size
        #######################################################################

        images  = np.zeros((batch_size, resolution[0], \
                        resolution[1], resolution[2]), dtype=np.float32)
        grids = np.zeros((batch_size, 2))
        boxes = np.zeros((batch_size, 4))

        for i in range(batch_size):
            obj = self.objdb[db_inds[i]]
            im_path = obj['background']
            width   = obj['width']
            height  = obj['height']
            box     = obj['box'].copy()

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            if obj['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)
            xywh = ds_utils.xyxy_to_xywh(box.reshape((1,4))).squeeze()

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                xywh[0] += offset_x
                xywh[1] += offset_y
                width = height = img.shape[0]

            nxywh = ds_utils.normalize_xywh(xywh.reshape((1,4)), width, height).squeeze()
            # discreted output positions
            grid  = ds_utils.boxes_to_indices(nxywh.reshape((1,4)), grid_shape).squeeze()
            # images of the same shape
            images[i, :, :, :] = cv2.resize(img, (resolution[1], resolution[0]))
            grids[i, :] = grid
            boxes[i, :] = nxywh

        return images, boxes, grids

Example #5

0

Show file

File: imdb.py Project: wuguowuge/who_where

    def draw_roidb_bboxes(self, output_dir, roidb=None):
        ds_utils.maybe_create(output_dir)
        ds_utils.maybe_create(osp.join(output_dir, 'roidb_boxes'))

        if roidb is None:
            roidb = self._roidb

        for i in xrange(len(roidb)):
            roi = roidb[i]
            im_path = roi['image']
            bboxes = roi['boxes'].copy()
            clses = roi['clses']

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            if roi['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)

            img, offset_x, offset_y = \
                ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
            bboxes[:, 0] += offset_x
            bboxes[:, 1] += offset_y
            bboxes[:, 2] += offset_x
            bboxes[:, 3] += offset_y

            fontScale = 0.0007 * math.sqrt(2 * img.shape[0] * img.shape[0])

            for j in xrange(bboxes.shape[0]):
                bb = bboxes[j, :].astype(np.int16)
                cls = self.classes[clses[j]]

                cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0),
                              1)

                cv2.putText(img, '{:}_{:}'.format(j, cls), (bb[0], bb[1] - 2),
                            cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255),
                            1)

            output_path = osp.join(output_dir, 'roidb_bboxes',
                                   osp.basename(im_path))
            cv2.imwrite(output_path, img)
            print i

Example #6

0

Show file

File: imdb.py Project: wuguowuge/who_where

    def draw_objdb_bboxes(self, output_dir, objdb=None):
        ds_utils.maybe_create(output_dir)
        ds_utils.maybe_create(osp.join(output_dir, 'objdb_boxes'))

        if objdb is None:
            objdb = self._objdb

        for i in xrange(len(objdb)):
            obj = objdb[i]
            im_path = obj['image']
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            box = obj['box']
            cls = obj['cls']
            aid = obj['obj_id']

            if obj['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)

            img, offset_x, offset_y = \
                ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)

            box[0] += offset_x
            box[1] += offset_y
            box[2] += offset_x
            box[3] += offset_y

            bb = box.astype(np.int)

            fontScale = 0.0007 * math.sqrt(2 * img.shape[0] * img.shape[0])

            cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 1)

            cv2.putText(img, '{:}_{:}'.format(j, cls), (bb[0], bb[1] - 2),
                        cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1)

            im_name, im_ext = osp.splitext(osp.basename(im_path))
            output_path = osp.join(output_dir, 'objdb_boxes',
                                   im_name + '_' + str(aid).zfill(12) + im_ext)
            cv2.imwrite(output_path, img)

            print i

Example #7

0

Show file

File: imdb.py Project: liuguoyou/who_where

    def draw_roidb_bboxes(self, output_dir, roidb=None):
        ds_utils.maybe_create(output_dir)
        ds_utils.maybe_create(osp.join(output_dir, 'roidb_boxes'))

        if roidb is None:
            roidb = self._roidb

        for i in xrange(len(roidb)):
            roi = roidb[i]
            im_path = roi['image']
            bboxes  = roi['boxes'].copy()
            clses   = roi['clses']

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            if roi['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)

            img, offset_x, offset_y = \
                ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
            bboxes[:, 0] += offset_x; bboxes[:, 1] += offset_y
            bboxes[:, 2] += offset_x; bboxes[:, 3] += offset_y

            fontScale = 0.0007 * math.sqrt(2 * img.shape[0] * img.shape[0])

            for j in xrange(bboxes.shape[0]):
                bb  = bboxes[j, :].astype(np.int16)
                cls = self.classes[clses[j]]

                cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]),
                            (0, 255, 0), 1)

                cv2.putText(img, '{:}_{:}'.format(j, cls),
                            (bb[0], bb[1] - 2),
                            cv2.FONT_HERSHEY_SIMPLEX,
                            fontScale, (0, 0, 255), 1)


            output_path = osp.join(output_dir, 'roidb_bboxes', osp.basename(im_path))
            cv2.imwrite(output_path, img)
            print i

Example #8

0

Show file

File: imdb.py Project: liuguoyou/who_where

    def draw_objdb_bboxes(self, output_dir, objdb=None):
        ds_utils.maybe_create(output_dir)
        ds_utils.maybe_create(osp.join(output_dir, 'objdb_boxes'))

        if objdb is None:
            objdb = self._objdb

        for i in xrange(len(objdb)):
            obj = objdb[i]
            im_path = obj['image']
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            box = obj['box']
            cls = obj['cls']
            aid = obj['obj_id']

            if obj['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)

            img, offset_x, offset_y = \
                ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)

            box[0] += offset_x; box[1] += offset_y
            box[2] += offset_x; box[3] += offset_y

            bb = box.astype(np.int)

            fontScale = 0.0007 * math.sqrt(2 * img.shape[0] * img.shape[0])

            cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]),
                        (0, 255, 0), 1)

            cv2.putText(img, '{:}_{:}'.format(j, cls),
                        (bb[0], bb[1] - 2), cv2.FONT_HERSHEY_SIMPLEX,
                        fontScale, (0, 0, 255), 1)

            im_name, im_ext = osp.splitext(osp.basename(im_path))
            output_path = osp.join(output_dir, 'objdb_boxes', im_name+'_'+str(aid).zfill(12)+im_ext)
            cv2.imwrite(output_path, img)

            print i

Example #9

0

Show file

File: PredictionCNN.py Project: liuguoyou/who_where

    def sampler(self, test_db, epoch=0, K=3, vis=False):
        # assume each entry in test_db has field: 'bg_image', 'bg_layout'
        self.center_inference.set_weights(self.get_center_branch_weights(self.model))
        self.size_inference.set_weights(self.get_size_branch_weights(self.model))

        output_dir = osp.join(self.output_dir, 'prediction_jsons')
        ds_utils.maybe_create(output_dir)
        if vis:
            vis_dir = osp.join(self.output_dir, 'prediction_vis')
            ds_utils.maybe_create(vis_dir)
            # hm_dir = osp.join(self.output_dir, 'prediction_heatmap')
            # ds_utils.maybe_create(hm_dir)


        res_db = []
        num_samples = len(test_db)
        
        for i in range(num_samples):
            entry   = test_db[i]
            im_path = entry['bg_image']
            im_name, im_ext = osp.splitext(osp.basename(im_path))
            ori_img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            img, ox, oy = ds_utils.create_squared_image(ori_img, cfg.PIXEL_MEANS)
            width  = img.shape[1];height = img.shape[0]

            xywhs, grids, heatmap = self.single_sample(entry,K=K)
            xywhs = ds_utils.denormalize_xywh(xywhs, width, height)
            xyxys = ds_utils.xywh_to_xyxy(xywhs, width, height)

            xyxys[:,0] -= ox; xyxys[:,1] -= oy
            xyxys[:,2] -= ox; xyxys[:,3] -= oy
            xyxys = ds_utils.clip_boxes(xyxys, ori_img.shape[1], ori_img.shape[0])
            heatmap = heatmap[oy:(oy+ori_img.shape[0]), ox:(ox+ori_img.shape[1]), :]

            res = {}
            res['bg_image'] = im_path
            res['name']  = im_name
            res['boxes'] = xyxys.tolist()
            json_path = osp.join(output_dir, im_name+'.json')
            with open(json_path, 'w') as res_file:
                json.dump(res, res_file, indent=4, separators=(',', ': '))

            if vis:
                vis_img = ori_img
                fontScale = 0.0007 * math.sqrt(2 * width * height)
                for j in range(xyxys.shape[0]):
                    bb = xyxys[j]
                    color = self.palette[j%len(self.palette)]
                    cv2.rectangle(vis_img, (bb[0], bb[1]), (bb[2], bb[3]), color, 4)
                    # cv2.putText(vis_img, '{:}'.format(j), (bb[0], bb[1] - 2),
                    #             cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1)

                tmp = np.ones_like(heatmap, dtype=np.float)
                tmp[:,:,1] += heatmap[:,:,1]/255.0
                overlay = np.multiply(vis_img, tmp)
                overlay = np.minimum(overlay, 255).astype(np.uint8)
                final = np.concatenate((vis_img, overlay, heatmap), axis=1)
                # output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+im_ext)
                # cv2.imwrite(output_path, final)

                output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+'_ol'+im_ext)
                cv2.imwrite(output_path, overlay)
                output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+'_hm'+im_ext)
                cv2.imwrite(output_path, heatmap)

            for j in range(len(res['boxes'])):
                entry = {}
                entry['bg_image'] = im_path
                entry['name']  = im_name
                entry['box']   = xyxys[j]
                entry['rank']  = j
                res_db.append(entry)
        
        return res_db

Example #10

0

Show file

File: PredictionCNN.py Project: wuguowuge/who_where

    def sampler(self, test_db, epoch=0, K=3, vis=False):
        # assume each entry in test_db has field: 'bg_image', 'bg_layout'
        self.center_inference.set_weights(
            self.get_center_branch_weights(self.model))
        self.size_inference.set_weights(
            self.get_size_branch_weights(self.model))

        output_dir = osp.join(self.output_dir, 'prediction_jsons')
        ds_utils.maybe_create(output_dir)
        if vis:
            vis_dir = osp.join(self.output_dir, 'prediction_vis')
            ds_utils.maybe_create(vis_dir)
            # hm_dir = osp.join(self.output_dir, 'prediction_heatmap')
            # ds_utils.maybe_create(hm_dir)

        res_db = []
        num_samples = len(test_db)

        for i in range(num_samples):
            entry = test_db[i]
            im_path = entry['bg_image']
            im_name, im_ext = osp.splitext(osp.basename(im_path))
            ori_img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            img, ox, oy = ds_utils.create_squared_image(
                ori_img, cfg.PIXEL_MEANS)
            width = img.shape[1]
            height = img.shape[0]

            xywhs, grids, heatmap = self.single_sample(entry, K=K)
            xywhs = ds_utils.denormalize_xywh(xywhs, width, height)
            xyxys = ds_utils.xywh_to_xyxy(xywhs, width, height)

            xyxys[:, 0] -= ox
            xyxys[:, 1] -= oy
            xyxys[:, 2] -= ox
            xyxys[:, 3] -= oy
            xyxys = ds_utils.clip_boxes(xyxys, ori_img.shape[1],
                                        ori_img.shape[0])
            heatmap = heatmap[oy:(oy + ori_img.shape[0]),
                              ox:(ox + ori_img.shape[1]), :]

            res = {}
            res['bg_image'] = im_path
            res['name'] = im_name
            res['boxes'] = xyxys.tolist()
            json_path = osp.join(output_dir, im_name + '.json')
            with open(json_path, 'w') as res_file:
                json.dump(res, res_file, indent=4, separators=(',', ': '))

            if vis:
                vis_img = ori_img
                fontScale = 0.0007 * math.sqrt(2 * width * height)
                for j in range(xyxys.shape[0]):
                    bb = xyxys[j]
                    color = self.palette[j % len(self.palette)]
                    cv2.rectangle(vis_img, (bb[0], bb[1]), (bb[2], bb[3]),
                                  color, 4)
                    # cv2.putText(vis_img, '{:}'.format(j), (bb[0], bb[1] - 2),
                    #             cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1)

                tmp = np.ones_like(heatmap, dtype=np.float)
                tmp[:, :, 1] += heatmap[:, :, 1] / 255.0
                overlay = np.multiply(vis_img, tmp)
                overlay = np.minimum(overlay, 255).astype(np.uint8)
                final = np.concatenate((vis_img, overlay, heatmap), axis=1)
                # output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+im_ext)
                # cv2.imwrite(output_path, final)

                output_path = osp.join(
                    vis_dir, '%04d_' % epoch + im_name + '_ol' + im_ext)
                cv2.imwrite(output_path, overlay)
                output_path = osp.join(
                    vis_dir, '%04d_' % epoch + im_name + '_hm' + im_ext)
                cv2.imwrite(output_path, heatmap)

            for j in range(len(res['boxes'])):
                entry = {}
                entry['bg_image'] = im_path
                entry['name'] = im_name
                entry['box'] = xyxys[j]
                entry['rank'] = j
                res_db.append(entry)

        return res_db

Example #11

0

Show file

File: imdb.py Project: wuguowuge/who_where

    def get_scene_minibatch(self, square=True):
        # outputs: resized images, layouts, segmentations, normalized xywhs, grids

        batch_size = cfg.TRAIN.BATCH_SIZE
        resolution = cfg.PREDICT_RESOLUTION
        grid_shape = cfg.GRID_SHAPE
        num_clses = self.num_classes - 1

        #######################################################################
        # indices of the minibatch
        if self.objdb_cur + batch_size >= len(self.objdb):
            self.permute_objdb_indices()
        db_inds = self.objdb_perm[self.objdb_cur:self.objdb_cur + batch_size]
        self.objdb_cur += batch_size
        #######################################################################

        images = np.zeros((batch_size, resolution[0], \
                        resolution[1], resolution[2]), dtype=np.float32)
        scenes = np.zeros((batch_size, resolution[0], \
                        resolution[1], num_clses), dtype=np.float32)
        segs   = np.zeros((batch_size, resolution[0], \
                        resolution[1], resolution[2]), dtype=np.float32)

        grids = np.zeros((batch_size, 2))
        boxes = np.zeros((batch_size, 4))

        for i in range(batch_size):
            obj = self.objdb[db_inds[i]]

            im_path = obj['background']
            seg_path = obj['out_seg']

            width = obj['width']
            height = obj['height']
            box = obj['box'].copy()

            all_boxes = obj['all_boxes'].copy().reshape((-1, 4)).astype(np.int)
            all_clses = obj['all_clses'].copy().flatten()

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            seg = cv2.imread(seg_path, cv2.IMREAD_COLOR)

            if obj['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)
                seg = cv2.flip(seg, 1)

            xywh = ds_utils.xyxy_to_xywh(box.reshape((1, 4))).squeeze()
            ex_box = box.copy().flatten().astype(np.int)

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                xywh[0] += offset_x
                xywh[1] += offset_y

                ex_box[0] += offset_x
                ex_box[1] += offset_y
                ex_box[2] += offset_x
                ex_box[3] += offset_y

                all_boxes[:, 0] += offset_x
                all_boxes[:, 1] += offset_y
                all_boxes[:, 2] += offset_x
                all_boxes[:, 3] += offset_y
                width = height = img.shape[0]
                seg, offset_x, offset_y = \
                    ds_utils.create_squared_image(seg, cfg.PIXEL_MEANS)

            nxywh = ds_utils.normalize_xywh(xywh.reshape((1, 4)), width,
                                            height).squeeze()
            # discreted output positions
            grid = ds_utils.boxes_to_indices(nxywh.reshape((1, 4)),
                                             grid_shape).squeeze()

            # images of the same shape
            images[i] = cv2.resize(img, (resolution[1], resolution[0]))
            segs[i] = cv2.resize(seg, (resolution[1], resolution[0]))

            factor = float(resolution[0]) / width
            all_boxes = (factor * all_boxes).astype(np.int)
            ex_box = (factor * ex_box).astype(np.int)
            scenes[i] = ds_utils.create_scenes(resolution[1],
                                               resolution[0],
                                               all_boxes,
                                               all_clses,
                                               ex_box=ex_box,
                                               n_cls=num_clses)

            grids[i, :] = grid
            boxes[i, :] = nxywh

        return images, scenes, segs, boxes, grids

Example #12

0

Show file

File: imdb.py Project: wuguowuge/who_where

    def get_rnn_minibatch(self, max_seq_len, square=True, vis=False):

        #######################################################################
        # rename the config parameters to make the codes look clear
        batch_size = cfg.TRAIN.BATCH_SIZE
        resolution = cfg.RESOLUTION
        grid_shape = cfg.GRID_SHAPE

        #######################################################################
        # indices of the minibatch
        if self.roidb_cur + batch_size >= len(self.roidb):
            self.permute_roidb_indices()
        db_inds = self.roidb_perm[self.roidb_cur:self.roidb_cur + batch_size]
        self.roidb_cur += batch_size
        #######################################################################

        #######################################################################
        # to be returned
        objects = []
        centers = []
        ratios = []
        masks = []
        # normalized xywh representation
        bboxes = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32)
        # grid box offset
        deltas = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32)
        images  = np.zeros((batch_size, resolution[0], \
                            resolution[1], resolution[2]), dtype=np.float32)
        #######################################################################

        for i in xrange(batch_size):
            rois = self.roidb[db_inds[i]]
            im_path = rois['image']
            width = rois['width']
            height = rois['height']
            gt_boxes = rois['boxes'].copy()
            gt_cats = rois['clses'].copy()
            areas = rois['seg_areas']

            # number of instances should not exceed max_seq_len
            num_instances = min(gt_boxes.shape[0], max_seq_len)

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            if rois['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)

            # sort the objects in the sequence based on their areas
            order = np.argsort(areas)[::-1]
            gt_boxes = gt_boxes[order, :]
            gt_cats = gt_cats[order]
            areas = areas[order]
            # print areas

            # [x1, y1, x2, y2] to [x, y, w, h]
            gt_boxes = ds_utils.xyxy_to_xywh(gt_boxes)

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                gt_boxes[:, 0] += offset_x
                gt_boxes[:, 1] += offset_y
                width = height = img.shape[0]

            # normalize
            gt_boxes = ds_utils.normalize_xywh(gt_boxes, width, height)

            # truncate the sequences
            gt_boxes = gt_boxes[:num_instances, :]

            # discreted output positions
            grid_indices = ds_utils.xywh_to_index(gt_boxes, \
                grid_shape[1], grid_shape[0])

            # deltas between grid boxes and ground truth boxes
            grid_boxes  = ds_utils.index_to_xywh(grid_indices, \
                grid_shape[1], grid_shape[0])
            grid_deltas = ds_utils.bbox_transform(grid_boxes, gt_boxes)

            # images of the same shape
            images[i, :, :, :] = cv2.resize(img,
                                            (resolution[1], resolution[0]))
            # use the last 'num_instances' objects
            bboxes[i, :num_instances, :] = np.expand_dims(gt_boxes, axis=0)
            # grid offsets
            deltas[i, :num_instances, :] = np.expand_dims(grid_deltas, axis=0)
            # object indicators
            objects.append(gt_cats[:num_instances].tolist())
            # masks for loss function
            masks.append(np.ones((num_instances, )).tolist())
            # grid centers and sizes
            centers.append(grid_indices[:, 0].tolist())
            ratios.append(grid_indices[:, 1].tolist())

        # padding
        objects = pad_sequences(objects,
                                maxlen=max_seq_len,
                                padding='post',
                                truncating='post',
                                value=0.)
        centers = pad_sequences(centers,
                                maxlen=max_seq_len,
                                padding='post',
                                truncating='post',
                                value=0.)
        ratios = pad_sequences(ratios,
                               maxlen=max_seq_len,
                               padding='post',
                               truncating='post',
                               value=0.)
        masks = pad_sequences(masks,
                              maxlen=max_seq_len,
                              padding='post',
                              truncating='post',
                              value=0.)

        if vis:
            output_dir = osp.abspath(osp.join(cfg.ROOT_DIR, 'output', \
                                              cfg.EXP_DIR, self.name, \
                                              'rnn_minibatch'))
            if not osp.exists(output_dir):
                os.makedirs(output_dir)

            for i in xrange(batch_size):
                rois = self.roidb[db_inds[i]]
                im_name, im_ext = osp.splitext(osp.basename(rois['image']))
                msk = masks[i, :]

                # ground truth boxes
                ibb = bboxes[i, :, :].copy()
                iid = objects[i, :].copy()
                iim = images[i, :, :, :].copy()

                # grid bboxes
                grid_indices = np.vstack(
                    (centers[i, :], ratios[i, :])).transpose()
                gbb = ds_utils.index_to_xywh(grid_indices, grid_shape[1],
                                             grid_shape[0])

                # regressed bboxes
                rbb = ds_utils.bbox_transform_inv(gbb, deltas[i, :, :])

                # Denormalize
                ibb = ds_utils.denormalize_xywh(ibb, resolution[1],
                                                resolution[0])
                gbb = ds_utils.denormalize_xywh(gbb, resolution[1],
                                                resolution[0])
                rbb = ds_utils.denormalize_xywh(rbb, resolution[1],
                                                resolution[0])

                ibb = ds_utils.xywh_to_xyxy(ibb, resolution[1], resolution[0])
                gbb = ds_utils.xywh_to_xyxy(gbb, resolution[1], resolution[0])
                rbb = ds_utils.xywh_to_xyxy(rbb, resolution[1], resolution[0])

                # fontScale = 0.0007 * math.sqrt(float(\
                #     resolution[0]*resolution[0]+resolution[1]*resolution[1]))

                for j in xrange(ibb.shape[0]):
                    if msk[j] == 0:
                        break

                    id = iid[j]
                    cls = self.classes[id]

                    # ground truth boxes
                    bb = ibb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (0, 255, 0), 2)
                    # grid boxes
                    bb = gbb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (255, 0, 0), 1)
                    # regressed boxes
                    bb = rbb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (0, 0, 255), 1)
                    # cv2.putText(iim, '{:}_{:}'.format(j, cls), \
                    #             (bb[0], bb[1] - 2), \
                    #             cv2.FONT_HERSHEY_SIMPLEX, \
                    #             fontScale, (0, 0, 255), 1)

                output_path = osp.join(output_dir,
                                       '%06d_' % i + im_name + '.jpg')
                cv2.imwrite(output_path, iim)

        return images, objects, bboxes, deltas, centers, ratios, masks

Example #13

0

Show file

File: imdb.py Project: liuguoyou/who_where

    def get_scene_minibatch(self, square=True):
        # outputs: resized images, layouts, segmentations, normalized xywhs, grids

        batch_size = cfg.TRAIN.BATCH_SIZE
        resolution = cfg.PREDICT_RESOLUTION
        grid_shape = cfg.GRID_SHAPE
        num_clses  = self.num_classes-1

        #######################################################################
        # indices of the minibatch
        if self.objdb_cur + batch_size >= len(self.objdb):
            self.permute_objdb_indices()
        db_inds = self.objdb_perm[self.objdb_cur : self.objdb_cur + batch_size]
        self.objdb_cur += batch_size
        #######################################################################

        images = np.zeros((batch_size, resolution[0], \
                        resolution[1], resolution[2]), dtype=np.float32)
        scenes = np.zeros((batch_size, resolution[0], \
                        resolution[1], num_clses), dtype=np.float32)
        segs   = np.zeros((batch_size, resolution[0], \
                        resolution[1], resolution[2]), dtype=np.float32)

        grids = np.zeros((batch_size, 2))
        boxes = np.zeros((batch_size, 4))

        for i in range(batch_size):
            obj = self.objdb[db_inds[i]]

            im_path  = obj['background']
            seg_path = obj['out_seg']

            width   = obj['width']
            height  = obj['height']
            box     = obj['box'].copy()

            all_boxes = obj['all_boxes'].copy().reshape((-1,4)).astype(np.int)
            all_clses = obj['all_clses'].copy().flatten()

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            seg = cv2.imread(seg_path, cv2.IMREAD_COLOR)

            if obj['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)
                seg = cv2.flip(seg, 1)

            xywh = ds_utils.xyxy_to_xywh(box.reshape((1,4))).squeeze()
            ex_box = box.copy().flatten().astype(np.int)

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                xywh[0] += offset_x
                xywh[1] += offset_y


                ex_box[0] += offset_x
                ex_box[1] += offset_y
                ex_box[2] += offset_x
                ex_box[3] += offset_y

                all_boxes[:, 0] += offset_x
                all_boxes[:, 1] += offset_y
                all_boxes[:, 2] += offset_x
                all_boxes[:, 3] += offset_y
                width = height = img.shape[0]
                seg, offset_x, offset_y = \
                    ds_utils.create_squared_image(seg, cfg.PIXEL_MEANS)

            nxywh = ds_utils.normalize_xywh(xywh.reshape((1,4)), width, height).squeeze()
            # discreted output positions
            grid  = ds_utils.boxes_to_indices(nxywh.reshape((1,4)), grid_shape).squeeze()

            # images of the same shape
            images[i] = cv2.resize(img, (resolution[1], resolution[0]))
            segs[i]   = cv2.resize(seg, (resolution[1], resolution[0]))

            factor    = float(resolution[0])/width
            all_boxes = (factor * all_boxes).astype(np.int)
            ex_box    = (factor * ex_box).astype(np.int)
            scenes[i] = ds_utils.create_scenes(resolution[1], resolution[0], all_boxes, all_clses, ex_box=ex_box, n_cls=num_clses)

            grids[i, :] = grid
            boxes[i, :] = nxywh

        return images, scenes, segs, boxes, grids

Example #14

0

Show file

File: imdb.py Project: liuguoyou/who_where

    def get_rnn_minibatch(self, max_seq_len, square=True, vis=False):

        #######################################################################
        # rename the config parameters to make the codes look clear
        batch_size = cfg.TRAIN.BATCH_SIZE
        resolution = cfg.RESOLUTION
        grid_shape = cfg.GRID_SHAPE

        #######################################################################
        # indices of the minibatch
        if self.roidb_cur + batch_size >= len(self.roidb):
            self.permute_roidb_indices()
        db_inds = self.roidb_perm[self.roidb_cur : self.roidb_cur + batch_size]
        self.roidb_cur += batch_size
        #######################################################################

        #######################################################################
        # to be returned
        objects = []; centers = []; ratios  = []; masks   = []
        # normalized xywh representation
        bboxes  = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32)
        # grid box offset
        deltas  = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32)
        images  = np.zeros((batch_size, resolution[0], \
                            resolution[1], resolution[2]), dtype=np.float32)
        #######################################################################

        for i in xrange(batch_size):
            rois     = self.roidb[db_inds[i]]
            im_path  = rois['image']
            width    = rois['width']
            height   = rois['height']
            gt_boxes = rois['boxes'].copy()
            gt_cats  = rois['clses'].copy()
            areas    = rois['seg_areas']

            # number of instances should not exceed max_seq_len
            num_instances = min(gt_boxes.shape[0], max_seq_len)

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            if rois['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)

            # sort the objects in the sequence based on their areas
            order    = np.argsort(areas)[::-1]
            gt_boxes = gt_boxes[order, :]
            gt_cats  = gt_cats[order]
            areas    = areas[order]
            # print areas

            # [x1, y1, x2, y2] to [x, y, w, h]
            gt_boxes = ds_utils.xyxy_to_xywh(gt_boxes)

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                gt_boxes[:,0] += offset_x
                gt_boxes[:,1] += offset_y
                width = height = img.shape[0]

            # normalize
            gt_boxes = ds_utils.normalize_xywh(gt_boxes, width, height)

            # truncate the sequences
            gt_boxes = gt_boxes[:num_instances, :]

            # discreted output positions
            grid_indices = ds_utils.xywh_to_index(gt_boxes, \
                grid_shape[1], grid_shape[0])

            # deltas between grid boxes and ground truth boxes
            grid_boxes  = ds_utils.index_to_xywh(grid_indices, \
                grid_shape[1], grid_shape[0])
            grid_deltas = ds_utils.bbox_transform(grid_boxes, gt_boxes)

            # images of the same shape
            images[i, :, :, :] = cv2.resize(img, (resolution[1], resolution[0]))
            # use the last 'num_instances' objects
            bboxes[i, :num_instances, :] = np.expand_dims(gt_boxes, axis=0)
            # grid offsets
            deltas[i, :num_instances, :] = np.expand_dims(grid_deltas, axis=0)
            # object indicators
            objects.append(gt_cats[:num_instances].tolist())
            # masks for loss function
            masks.append(np.ones((num_instances, )).tolist())
            # grid centers and sizes
            centers.append(grid_indices[:, 0].tolist())
            ratios.append(grid_indices[:, 1].tolist())

        # padding
        objects = pad_sequences(objects, maxlen=max_seq_len,
                      padding='post', truncating='post', value=0.)
        centers = pad_sequences(centers, maxlen=max_seq_len,
                      padding='post', truncating='post', value=0.)
        ratios  = pad_sequences(ratios, maxlen=max_seq_len,
                      padding='post', truncating='post', value=0.)
        masks   = pad_sequences(masks, maxlen=max_seq_len,
                      padding='post', truncating='post', value=0.)

        if vis:
            output_dir = osp.abspath(osp.join(cfg.ROOT_DIR, 'output', \
                                              cfg.EXP_DIR, self.name, \
                                              'rnn_minibatch'))
            if not osp.exists(output_dir):
                os.makedirs(output_dir)

            for i in xrange(batch_size):
                rois = self.roidb[db_inds[i]]
                im_name, im_ext = osp.splitext(osp.basename(rois['image']))
                msk = masks[i, :]

                # ground truth boxes
                ibb = bboxes[i, :, :].copy()
                iid = objects[i, :].copy()
                iim = images[i, :, :, :].copy()

                # grid bboxes
                grid_indices = np.vstack((centers[i,:], ratios[i,:])).transpose()
                gbb = ds_utils.index_to_xywh(grid_indices, grid_shape[1], grid_shape[0])

                # regressed bboxes
                rbb = ds_utils.bbox_transform_inv(gbb, deltas[i,:,:])

                # Denormalize
                ibb = ds_utils.denormalize_xywh(ibb, resolution[1], resolution[0])
                gbb = ds_utils.denormalize_xywh(gbb, resolution[1], resolution[0])
                rbb = ds_utils.denormalize_xywh(rbb, resolution[1], resolution[0])

                ibb = ds_utils.xywh_to_xyxy(ibb, resolution[1], resolution[0])
                gbb = ds_utils.xywh_to_xyxy(gbb, resolution[1], resolution[0])
                rbb = ds_utils.xywh_to_xyxy(rbb, resolution[1], resolution[0])

                # fontScale = 0.0007 * math.sqrt(float(\
                #     resolution[0]*resolution[0]+resolution[1]*resolution[1]))

                for j in xrange(ibb.shape[0]):
                    if msk[j] == 0:
                        break

                    id = iid[j]
                    cls = self.classes[id]

                    # ground truth boxes
                    bb = ibb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (0, 255, 0), 2)
                    # grid boxes
                    bb = gbb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (255, 0, 0), 1)
                    # regressed boxes
                    bb = rbb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (0, 0, 255), 1)
                    # cv2.putText(iim, '{:}_{:}'.format(j, cls), \
                    #             (bb[0], bb[1] - 2), \
                    #             cv2.FONT_HERSHEY_SIMPLEX, \
                    #             fontScale, (0, 0, 255), 1)

                output_path = osp.join(output_dir, '%06d_'%i+im_name+'.jpg')
                cv2.imwrite(output_path, iim)

        return images, objects, bboxes, deltas, centers, ratios, masks