Ejemplo n.º 1
0
def save_bbox(args, cfg, im_file, im, dataset_dict, boxes, scores):
    MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES
    MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES
    CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH

    scores = scores[0].cpu()
    boxes = boxes[0]
    num_classes = scores.shape[1]
    boxes = BUABoxes(boxes.reshape(-1, 4))
    boxes.clip((dataset_dict['image'].shape[1]/dataset_dict['im_scale'], dataset_dict['image'].shape[2]/dataset_dict['im_scale']))
    boxes = boxes.tensor.view(-1, num_classes*4).cpu()  # R x C x 4

    cls_boxes = torch.zeros((boxes.shape[0], 4))
    for idx in range(boxes.shape[0]):
        cls_idx = torch.argmax(scores[idx, 1:]) + 1
        cls_boxes[idx, :] = boxes[idx, cls_idx * 4:(cls_idx + 1) * 4]

    max_conf = torch.zeros((scores.shape[0])).to(scores.device)
    for cls_ind in range(1, num_classes):
            cls_scores = scores[:, cls_ind]
            keep = nms(cls_boxes, cls_scores, 0.3)
            max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                             cls_scores[keep],
                                             max_conf[keep])
            
    keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES]
    image_bboxes = cls_boxes[keep_boxes]

    output_file = os.path.join(args.output_dir, im_file.split('.')[0])
    np.savez_compressed(output_file, bbox=image_bboxes, num_bbox=len(keep_boxes), image_h=np.size(im, 0), image_w=np.size(im, 1))
Ejemplo n.º 2
0
def save_roi_features(args, cfg, im_file, im, dataset_dict, boxes, scores, features_pooled, attr_scores=None):
    MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES
    MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES
    CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH

    dets = boxes[0].tensor.cpu() / dataset_dict['im_scale']
    scores = scores[0].cpu()
    feats = features_pooled[0].cpu()   

    max_conf = torch.zeros((scores.shape[0])).to(scores.device)
    max_obj = torch.zeros((scores.shape[0]), dtype=torch.long).to(scores.device)
    for cls_ind in range(1, scores.shape[1]):
            cls_scores = scores[:, cls_ind]
            keep = nms(dets, cls_scores, 0.3)
            max_obj[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                             torch.tensor(cls_ind, dtype=torch.long),
                                             max_obj[keep])
            max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                             cls_scores[keep],
                                             max_conf[keep])
            
    keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten()
    if len(keep_boxes) < MIN_BOXES:
        keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES]
    elif len(keep_boxes) > MAX_BOXES:
        keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES]
    image_feat = feats[keep_boxes]
    image_bboxes = dets[keep_boxes]
    image_objects_conf = max_conf[keep_boxes].cpu().numpy()
    image_objects = max_obj[keep_boxes].cpu().numpy()
    if not attr_scores is None:
        attr_scores = attr_scores[0].cpu()
        image_attrs_conf = np.max(attr_scores[keep_boxes].numpy(), axis=1)
        image_attrs = np.argmax(attr_scores[keep_boxes].numpy(), axis=1)
        info = {
            'image_id': im_file.split('.')[0],
            'image_h': np.size(im, 0),
            'image_w': np.size(im, 1),
            'num_boxes': len(keep_boxes),
            'objects_id': image_objects,
            'objects_conf': image_objects_conf,
            'attrs_id': image_attrs,
            'attrs_conf': image_attrs_conf,
            }
    else:
        info = {
            'image_id': im_file.split('.')[0],
            'image_h': np.size(im, 0),
            'image_w': np.size(im, 1),
            'num_boxes': len(keep_boxes),
            'objects_id': image_objects,
            'objects_conf': image_objects_conf
            }

    output_file = os.path.join(args.output_dir, im_file.split('.')[0])
    np.savez_compressed(output_file, x=image_feat, bbox=image_bboxes, num_bbox=len(keep_boxes), image_h=np.size(im, 0), image_w=np.size(im, 1), info=info)
def extractor_postprocess(boxes, scores, features_pooled, input_per_image,
                          extractor):
    """
    Resize the output instances.
    The input images are often resized when entering an object detector.
    As a result, we often need the outputs of the detector in a different
    resolution from its inputs.

    This function will resize the raw outputs of an R-CNN detector
    to produce outputs according to the desired output resolution.

    Args:
        results (Instances): the raw outputs from the detector.
            `results.image_size` contains the input image resolution the detector sees.
            This object might be modified in-place.
        output_height, output_width: the desired output resolution.

    Returns:
        Instances: the resized output from the model, based on the output resolution
    """
    MIN_BOXES = extractor.MIN_BOXES
    MAX_BOXES = extractor.MAX_BOXES
    CONF_THRESH = extractor.CONF_THRESH

    cur_device = scores.device

    dets = boxes / input_per_image["im_scale"]

    max_conf = torch.zeros((scores.shape[0])).to(cur_device)

    for cls_ind in range(1, scores.shape[1]):
        cls_scores = scores[:, cls_ind]
        keep = nms(dets, cls_scores, 0.3)
        max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                     cls_scores[keep], max_conf[keep])

    keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten()
    if len(keep_boxes) < MIN_BOXES:
        keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES]
    elif len(keep_boxes) > MAX_BOXES:
        keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES]
        # keep_boxes = torch.argsort(max_conf, descending=True)[:100]
        # feat_list.append(feats[i][keep_boxes])
    image_feat = features_pooled[keep_boxes]
    image_bboxes = dets[keep_boxes]

    return image_feat, image_bboxes
Ejemplo n.º 4
0
    def predict(self, image):
        try:
            # convert image to opencv format
            x = np.array(image)
            x = x[:, :, ::-1].copy()

            dataset_dict = get_image_blob(x, self._cfg.MODEL.PIXEL_MEAN)

            with torch.set_grad_enabled(False):
                boxes, scores, features_pooled, attr_scores = self._model(
                    [dataset_dict])

            dets = boxes[0].tensor.cpu() / dataset_dict['im_scale']
            scores = scores[0].cpu()
            feats = features_pooled[0].cpu()
            attr_scores = attr_scores[0].cpu()

            max_conf = torch.zeros((scores.shape[0])).to(scores.device)
            for cls_ind in range(1, scores.shape[1]):
                cls_scores = scores[:, cls_ind]
                keep = nms(dets, cls_scores, 0.3)
                max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                             cls_scores[keep], max_conf[keep])

            keep_boxes = torch.nonzero(max_conf >= self._threshold).flatten()
            if len(keep_boxes) < self._min_boxes:
                keep_boxes = torch.argsort(max_conf,
                                           descending=True)[:self._min_boxes]
            elif len(keep_boxes) > self._max_boxes:
                keep_boxes = torch.argsort(max_conf,
                                           descending=True)[:self._max_boxes]

            boxes = dets[keep_boxes].numpy()
            objects = np.argmax(scores[keep_boxes].numpy()[:, 1:], axis=1)
            attr = np.argmax(attr_scores[keep_boxes].numpy()[:, 1:], axis=1)
            attr_conf = np.max(attr_scores[keep_boxes].numpy()[:, 1:], axis=1)

            outputs = []
            for i in range(len(keep_boxes)):
                # if attr_conf[i] > attr_thresh:
                #     cls = attributes[attr[i]+1] + " " + cls
                outputs.append(self._classes[objects[i] + 1])

            return outputs
        except Exception as e:
            print(e, flush=True)
            return []
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection2 Inference")
    parser.add_argument(
        "--config-file",
        default="configs/bua-caffe/extract-bua-caffe-r101.yaml",
        metavar="FILE",
        help="path to config file",
    )

    parser.add_argument("--mode",
                        default="caffe",
                        type=str,
                        help="bua_caffe, ...")

    parser.add_argument('--out-dir',
                        dest='output_dir',
                        help='output directory for features',
                        default="features")
    parser.add_argument('--image-dir',
                        dest='image_dir',
                        help='directory with images',
                        default="image")
    parser.add_argument(
        "--resume",
        action="store_true",
        help="whether to attempt to resume from the checkpoint directory",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    cfg = setup(args)

    MIN_BOXES = 10
    MAX_BOXES = 100
    CONF_THRESH = 0.2

    model = DefaultTrainer.build_model(cfg)
    DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
        cfg.MODEL.WEIGHTS, resume=args.resume)
    # Extract features.
    imglist = os.listdir(args.image_dir)
    num_images = len(imglist)
    print('Number of images: {}.'.format(num_images))
    model.eval()

    for im_file in tqdm.tqdm(imglist):
        im = cv2.imread(os.path.join(args.image_dir, im_file))
        dataset_dict = get_image_blob(im)

        with torch.set_grad_enabled(False):
            # boxes, scores, features_pooled = model([dataset_dict])
            if cfg.MODEL.BUA.ATTRIBUTE_ON:
                boxes, scores, features_pooled, attr_scores = model(
                    [dataset_dict])
            else:
                boxes, scores, features_pooled = model([dataset_dict])

        dets = boxes[0].tensor.cpu() / dataset_dict['im_scale']
        scores = scores[0].cpu()
        feats = features_pooled[0].cpu()

        max_conf = torch.zeros((scores.shape[0])).to(scores.device)
        for cls_ind in range(1, scores.shape[1]):
            cls_scores = scores[:, cls_ind]
            keep = nms(dets, cls_scores, 0.3)
            max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                         cls_scores[keep], max_conf[keep])

        keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten()
        if len(keep_boxes) < MIN_BOXES:
            keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES]
        elif len(keep_boxes) > MAX_BOXES:
            keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES]
        image_feat = feats[keep_boxes]
        image_bboxes = dets[keep_boxes]
        image_objects_conf = np.max(scores[keep_boxes].numpy(), axis=1)
        image_objects = np.argmax(scores[keep_boxes].numpy(), axis=1)
        if cfg.MODEL.BUA.ATTRIBUTE_ON:
            attr_scores = attr_scores[0].cpu()
            image_attrs_conf = np.max(attr_scores[keep_boxes].numpy(), axis=1)
            image_attrs = np.argmax(attr_scores[keep_boxes].numpy(), axis=1)
            info = {
                'image_id': im_file.split('.')[0],
                'image_h': np.size(im, 0),
                'image_w': np.size(im, 1),
                'num_boxes': len(keep_boxes),
                'objects_id': image_objects,
                'objects_conf': image_objects_conf,
                'attrs_id': image_attrs,
                'attrs_conf': image_attrs_conf,
            }
        else:
            info = {
                'image_id': im_file.split('.')[0],
                'image_h': np.size(im, 0),
                'image_w': np.size(im, 1),
                'num_boxes': len(keep_boxes),
                'objects_id': image_objects,
                'objects_conf': image_objects_conf
            }

        output_file = os.path.join(args.output_dir, im_file.split('.')[0])
        np.savez_compressed(output_file,
                            x=image_feat,
                            bbox=image_bboxes,
                            num_bbox=len(keep_boxes),
                            image_h=np.size(im, 0),
                            image_w=np.size(im, 1),
                            info=info)
Ejemplo n.º 6
0
def extract_feat(image_path):
    MIN_BOXES = 10
    MAX_BOXES = 100
    CONF_THRESH = 0.2

    im = cv2.imread(image_path)
    print('image shape:', im.shape)
    dataset_dict = get_image_blob(im)

    with torch.set_grad_enabled(False):
        # boxes, scores, features_pooled = model([dataset_dict])
        if cfg.MODEL.BUA.ATTRIBUTE_ON:
            boxes, scores, features_pooled, attr_scores = net_img(
                [dataset_dict])
        else:
            boxes, scores, features_pooled = net_img([dataset_dict])

    dets = boxes[0].tensor.cpu() / dataset_dict['im_scale']
    scores = scores[0].cpu()
    feats = features_pooled[0].cpu()

    max_conf = torch.zeros((scores.shape[0])).to(scores.device)
    for cls_ind in range(1, scores.shape[1]):
        cls_scores = scores[:, cls_ind]
        keep = nms(dets, cls_scores, 0.3)
        max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                     cls_scores[keep], max_conf[keep])

    keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten()
    if len(keep_boxes) < MIN_BOXES:
        keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES]
    elif len(keep_boxes) > MAX_BOXES:
        keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES]
    image_feat = feats[keep_boxes]
    image_bboxes = dets[keep_boxes]
    image_objects_conf = np.max(scores[keep_boxes].numpy(), axis=1)
    image_objects = np.argmax(scores[keep_boxes].numpy(), axis=1)
    if cfg.MODEL.BUA.ATTRIBUTE_ON:
        attr_scores = attr_scores[0].cpu()
        image_attrs_conf = np.max(attr_scores[keep_boxes].numpy(), axis=1)
        image_attrs = np.argmax(attr_scores[keep_boxes].numpy(), axis=1)
        info = {
            'image_id': image_path.split('.')[0],
            'image_h': np.size(im, 0),
            'image_w': np.size(im, 1),
            'num_boxes': len(keep_boxes),
            'objects_id': image_objects,
            'objects_conf': image_objects_conf,
            'attrs_id': image_attrs,
            'attrs_conf': image_attrs_conf,
        }
    else:
        info = {
            'image_id': image_path.split('.')[0],
            'image_h': np.size(im, 0),
            'image_w': np.size(im, 1),
            'num_boxes': len(keep_boxes),
            'objects_id': image_objects,
            'objects_conf': image_objects_conf
        }

    return image_feat, image_bboxes, im.shape[:2]
Ejemplo n.º 7
0
    im = cv2.imread(im_file)
    dataset_dict = get_image_blob(im, cfg.MODEL.PIXEL_MEAN)

    with torch.set_grad_enabled(False):
        boxes, scores, features_pooled, attr_scores = model([dataset_dict])

    dets = boxes[0].tensor.cpu() / dataset_dict['im_scale']
    scores = scores[0].cpu()
    feats = features_pooled[0].cpu()
    attr_scores = attr_scores[0].cpu()

    max_conf = torch.zeros((scores.shape[0])).to(scores.device)
    for cls_ind in range(1, scores.shape[1]):
        cls_scores = scores[:, cls_ind]
        keep = nms(dets, cls_scores, 0.3)
        max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
                                     cls_scores[keep], max_conf[keep])

    keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten()
    if len(keep_boxes) < MIN_BOXES:
        keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES]
    elif len(keep_boxes) > MAX_BOXES:
        keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES]

    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    plt.axis('off')
    plt.imshow(im)

    boxes = dets[keep_boxes].numpy()
    objects = np.argmax(scores[keep_boxes].numpy()[:, 1:], axis=1)
Ejemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection2 Inference")
    parser.add_argument(
        "--config-file",
        default="configs/bua-caffe/extract-bua-caffe-r101-fix36.yaml",
        metavar="FILE",
        help="path to config file",
    )

    # --image-dir or --image
    parser.add_argument('--image-dir',
                        dest='image_dir',
                        help='directory with images',
                        default="datasets/demos")
    parser.add_argument(
        '--image', dest='image',
        help='image')  # e.g. datasets/demos/COCO_val2014_000000060623.jpg
    parser.add_argument("--mode",
                        default="caffe",
                        type=str,
                        help="bua_caffe, ...")
    parser.add_argument('--out-dir',
                        dest='output_dir',
                        help='output directory for features',
                        default="features")
    parser.add_argument('--out-name',
                        dest='output_name',
                        help='output file name for features',
                        default="demos")
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    cfg = setup(args)

    model = DefaultTrainer.build_model(cfg)
    DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
        cfg.MODEL.WEIGHTS, resume=True)
    model.eval()
    # Extract features.
    if args.image:
        imglist = [args.image]
    else:
        imglist = os.listdir(args.image_dir)
        imglist = [os.path.join(args.image_dir, fn) for fn in imglist]
    num_images = len(imglist)
    print('Number of images: {}.'.format(num_images))
    imglist.sort()

    MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES
    MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES
    CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH

    classes = []
    with open(os.path.join('evaluation/objects_vocab.txt')) as f:
        for object in f.readlines():
            names = [n.lower().strip() for n in object.split(',')]
            classes.append(names[0])
    attributes = []
    with open(os.path.join('evaluation/attributes_vocab.txt')) as f:
        for att in f.readlines():
            names = [n.lower().strip() for n in att.split(',')]
            attributes.append(names[0])
    classes = np.array(classes)
    attributes = np.array(attributes)

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    with h5py.File(os.path.join(args.output_dir, '%s_fc.h5' % args.output_name), 'a') as file_fc, \
            h5py.File(os.path.join(args.output_dir, '%s_att.h5' % args.output_name), 'a') as file_att, \
            h5py.File(os.path.join(args.output_dir, '%s_box.h5' % args.output_name), 'a') as file_box:
        informations = {}
        try:
            for im_file in tqdm.tqdm(imglist):
                img_nm = os.path.basename(im_file)
                im = cv2.imread(im_file)
                if im is None:
                    print(im_file, "is illegal!")
                    continue
                dataset_dict = get_image_blob(im, cfg.MODEL.PIXEL_MEAN)
                # extract roi features
                attr_scores = None
                with torch.set_grad_enabled(False):
                    if cfg.MODEL.BUA.ATTRIBUTE_ON:
                        boxes, scores, features_pooled, attr_scores = model(
                            [dataset_dict])
                    else:
                        boxes, scores, features_pooled = model([dataset_dict])

                dets = boxes[0].tensor.cpu() / dataset_dict['im_scale']
                scores = scores[0].cpu()
                feats = features_pooled[0].cpu()
                max_conf = torch.zeros((scores.shape[0])).to(scores.device)
                for cls_ind in range(1, scores.shape[1]):
                    cls_scores = scores[:, cls_ind]
                    keep = nms(dets, cls_scores, 0.3)
                    max_conf[keep] = torch.where(
                        cls_scores[keep] > max_conf[keep], cls_scores[keep],
                        max_conf[keep])

                keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten()
                if len(keep_boxes) < MIN_BOXES:
                    keep_boxes = torch.argsort(max_conf,
                                               descending=True)[:MIN_BOXES]
                elif len(keep_boxes) > MAX_BOXES:
                    keep_boxes = torch.argsort(max_conf,
                                               descending=True)[:MAX_BOXES]
                image_feat = feats[keep_boxes].numpy()
                image_bboxes = dets[keep_boxes].numpy()
                image_objects_conf = np.max(scores[keep_boxes].numpy()[:, 1:],
                                            axis=1)
                image_objects = classes[np.argmax(
                    scores[keep_boxes].numpy()[:, 1:], axis=1)]
                info = {
                    'image_name': img_nm,
                    'image_h': np.size(im, 0),
                    'image_w': np.size(im, 1),
                    'num_boxes': len(keep_boxes),
                    'objects': image_objects,
                    'objects_conf': image_objects_conf
                }
                if attr_scores is not None:
                    attr_scores = attr_scores[0].cpu()
                    image_attrs_conf = np.max(
                        attr_scores[keep_boxes].numpy()[:, 1:], axis=1)
                    image_attrs = attributes[np.argmax(
                        attr_scores[keep_boxes].numpy()[:, 1:], axis=1)]
                    info['attrs'] = image_attrs
                    info['attrs_conf'] = image_attrs_conf
                file_fc.create_dataset(img_nm, data=image_feat.mean(0))
                file_att.create_dataset(img_nm, data=image_feat)
                file_box.create_dataset(img_nm, data=image_bboxes)
                informations[img_nm] = info
        finally:
            file_fc.close()
            file_att.close()
            file_box.close()
            pickle.dump(
                informations,
                open(
                    os.path.join(args.output_dir,
                                 '%s_info.pkl' % args.output_name), 'wb'))
            print(
                '--------------------------------------------------------------------'
            )