Exemple #1
0
class MaskRCNN(object):
    def __init__(self, confidence_threshold=0.7):
        cfg.merge_from_file('e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml')
        cfg.MODEL.DEVICE
        cfg.freeze()
        self.model_wrapper = COCODemo(
                        cfg,
                        confidence_threshold=confidence_threshold,
                                    )

    def get_chips_and_masks(self, img, label_index=COCO_PERSON_INDEX):
        '''
        Params
        ------
        img : nd array like, RGB
        label_index : int, index of label wanted

        Returns
        -------
        list of tuple (chip, mask)
        - chip is a ndarray: bb crop of the image
        - mask is a ndarray: same shape as chip, whose 'pixel' value is either 0 or 1, indicating if that pixel belongs to that class or not. 
        '''

        preds = self.model_wrapper.compute_prediction(img)
        top_preds = self.model_wrapper.select_top_predictions(preds)

        labels = top_preds.get_field('labels')
        person_bool_mask = (labels==label_index).numpy().astype(bool)

        masks = top_preds.get_field('mask').numpy()[person_bool_mask]
        bboxes = top_preds.bbox.to(torch.int64).numpy()[person_bool_mask]

        results = []

        for mask, box in zip( masks, bboxes ):
            thresh = mask[0, :, :, None]
            # l,t,r,b = box.to(torch.int64).numpy()
            l,t,r,b = box
            if b - t <= 0 or r - l <= 0:
                continue

            content = img[ t:(b+1), l:(r+1), : ]
            minimask = thresh[ t:(b+1), l:(r+1), : ]
            results.append( (content, minimask) )

        return results                
def detect_person(cfg, image):
    coco_demo = COCODemo(
        cfg,
        min_image_size=800,
        confidence_threshold=0.7,
    )
    predictions = coco_demo.compute_prediction(image)
    top_predictions = coco_demo.select_top_predictions(predictions)

    #result = coco_demo.overlay_class_names(result, top_predictions)

    labels = top_predictions.get_field("labels").tolist()
    labels = [coco_demo.CATEGORIES[i] for i in labels]

    if 'person' in labels:
        return 1
    else:
        return 0
Exemple #3
0
def single_predict():
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    config_file = "configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml"
    # update the config options with the config file
    cfg.merge_from_file(config_file)
    # manual override some options
    #cfg.merge_from_list(["MODEL.DEVICE", "cpu"])
    coco_demo = COCODemo(
    	cfg,
    	min_image_size=800,
    	confidence_threshold=0.5,
    	)
    import cv2
    from process_image import show_image, draw_bb
    
    im = cv2.imread('/home/jianfw/data/sample_images/TaylorSwift.jpg',
            cv2.IMREAD_COLOR)
    predictions = coco_demo.compute_prediction(im)
    predictions = coco_demo.select_top_predictions(predictions)
    scores = predictions.get_field("scores").tolist()
    labels = predictions.get_field("labels").tolist()
    labels = [coco_demo.CATEGORIES[i] for i in labels]
    boxes = predictions.bbox
    rects = []
    import torch
    for box, score, label in zip(boxes, scores, labels):
        box = box.to(torch.int64)
        top_left, bottom_right = box[:2].tolist(), box[2:].tolist()
    
        r = [top_left[0], top_left[1], bottom_right[0], bottom_right[1]]
        rect = {'class': label, 'conf': score, 'rect': r}
        rects.append(rect)
    import numpy as np
    im_mask = np.copy(im)
    draw_bb(im_mask, [r['rect'] for r in rects], [r['class'] for r in rects],
            [r['conf'] for r in rects])
    import json
    from process_image import show_images
    show_images([im, im_mask], 1, 2)
Exemple #4
0
n2 = 2 * n // 3
#start, end = 0, n1
#start, end = n1, n2
#start, end = n2, n
#start, end = [(0,n1), (n1,n2), (n2,n)][idx_no]
start, end = 0, n

print("working on %i - %i" % (start, end))
all_images = all_images[start:end]

for image_name in tqdm(all_images):
    image_full_path = images_path + image_name
    cur_img = cv2.imread(image_full_path)
    H, W, C = cur_img.shape

    predictions = coco_demo.compute_prediction(cur_img)
    scores = predictions.get_field("scores").tolist()
    labels = predictions.get_field("labels").tolist()
    #labels = [self.CATEGORIES[i] for i in labels]
    boxes = predictions.bbox.tolist()

    cur_results = {'image_name': image_name, 'H': H, 'W': W, 'detections': []}
    for ii in range(len(boxes)):
        #box_coords = boxes[0,ii] # hard code batch dimension to 1
        #score = scores[0,ii]
        #class_no = int(classes[0,ii])
        left, top, right, bottom = boxes[ii]
        box_coords = [top, left, bottom, right]
        score = scores[ii]
        class_no = int(labels[ii])
Exemple #5
0
Y1List = []
Y2List = []
Y3List = []
Y4List = []

TypeList = []

empty_img_name = []

# for img_name in img_names:
for i, img_name in enumerate(tqdm(img_names)):
    path = os.path.join(imgs_dir, img_name)
    image = load(path)
    # compute predictions
    predictions = coco_demo.compute_prediction(image)
    try:
        scores = predictions.get_field("scores").numpy()
        bbox = predictions.bbox[np.argmax(scores)].numpy()
        labelList = predictions.get_field("labels").numpy()
        label = labelList[np.argmax(scores)]
        filenameList.append(img_name)
        X1List.append(round(bbox[0]))
        Y1List.append(round(bbox[1]))
        X2List.append(round(bbox[2]))
        Y2List.append(round(bbox[1]))
        X3List.append(round(bbox[2]))
        Y3List.append(round(bbox[3]))
        X4List.append(round(bbox[0]))
        Y4List.append(round(bbox[3]))
        TypeList.append(label)
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Webcam Demo")
    parser.add_argument(
        "--config-file",
        default="configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument(
        "--confidence-threshold",
        type=float,
        default=0.6,
        help="Minimum score for the prediction to be shown",
    )
    parser.add_argument(
        "--min-image-size",
        type=int,
        default=256,
        help="Smallest size of the image to feed to the model. "
        "Model was trained with 800, which gives best results",
    )
    parser.add_argument(
        "--show-mask-heatmaps",
        dest="show_mask_heatmaps",
        help="Show a heatmap probability for the top masks-per-dim masks",
        action="store_true",
    )
    parser.add_argument(
        "--masks-per-dim",
        type=int,
        default=2,
        help="Number of heatmaps per dimension to show",
    )
    parser.add_argument(
        "opts",
        help="Modify model config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    parser.add_argument("--svo-filename",
                        help="Optional SVO input filepath",
                        default=None)

    args = parser.parse_args()

    # load config from file and command-line arguments
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    # prepare object that handles inference plus adds predictions on top of image
    coco_demo = COCODemo(
        cfg,
        confidence_threshold=args.confidence_threshold,
        show_mask_heatmaps=args.show_mask_heatmaps,
        masks_per_dim=args.masks_per_dim,
        min_image_size=args.min_image_size,
    )

    init_cap_params = sl.InitParameters()
    if args.svo_filename:
        print("Loading SVO file " + args.svo_filename)
        init_cap_params.set_from_svo_file(args.svo_filename)
        init_cap_params.svo_real_time_mode = True
    init_cap_params.camera_resolution = sl.RESOLUTION.HD720
    init_cap_params.depth_mode = sl.DEPTH_MODE.ULTRA
    init_cap_params.coordinate_units = sl.UNIT.METER
    init_cap_params.depth_stabilization = True
    init_cap_params.camera_image_flip = sl.FLIP_MODE.AUTO
    init_cap_params.coordinate_system = sl.COORDINATE_SYSTEM.RIGHT_HANDED_Y_UP

    cap = sl.Camera()
    if not cap.is_opened():
        print("Opening ZED Camera...")
    status = cap.open(init_cap_params)
    if status != sl.ERROR_CODE.SUCCESS:
        print(repr(status))
        exit()

    display = True
    runtime = sl.RuntimeParameters()
    left = sl.Mat()
    ptcloud = sl.Mat()
    depth_img = sl.Mat()
    depth = sl.Mat()

    res = sl.Resolution(1280, 720)

    py_transform = sl.Transform(
    )  # First create a Transform object for TrackingParameters object
    tracking_parameters = sl.PositionalTrackingParameters(
        init_pos=py_transform)
    tracking_parameters.set_as_static = True
    err = cap.enable_positional_tracking(tracking_parameters)
    if err != sl.ERROR_CODE.SUCCESS:
        exit(1)

    running = True
    keep_people_only = True

    if coco_demo.cfg.MODEL.MASK_ON:
        print("Mask enabled!")
    if coco_demo.cfg.MODEL.KEYPOINT_ON:
        print("Keypoints enabled!")

    while running:
        start_time = time.time()
        err_code = cap.grab(runtime)
        if err_code != sl.ERROR_CODE.SUCCESS:
            break

        cap.retrieve_image(left, sl.VIEW.LEFT, resolution=res)
        cap.retrieve_image(depth_img, sl.VIEW.DEPTH, resolution=res)
        cap.retrieve_measure(depth, sl.MEASURE.DEPTH, resolution=res)
        cap.retrieve_measure(ptcloud, sl.MEASURE.XYZ, resolution=res)
        ptcloud_np = np.array(ptcloud.get_data())

        img = cv2.cvtColor(left.get_data(), cv2.COLOR_RGBA2RGB)
        prediction = coco_demo.select_top_predictions(
            coco_demo.compute_prediction(img))

        # Keep people only
        if keep_people_only:
            labels_tmp = prediction.get_field("labels")
            people_coco_label = 1
            keep = torch.nonzero(labels_tmp == people_coco_label).squeeze(1)
            prediction = prediction[keep]

        composite = img.copy()
        humans_3d = None
        masks_3d = None
        if coco_demo.show_mask_heatmaps:
            composite = coco_demo.create_mask_montage(composite, prediction)
        composite = coco_demo.overlay_boxes(composite, prediction)
        if coco_demo.cfg.MODEL.MASK_ON:
            masks_3d = get_masks3d(prediction, depth)
            composite = coco_demo.overlay_mask(composite, prediction)
        if coco_demo.cfg.MODEL.KEYPOINT_ON:
            # Extract 3D skeleton from the ZED depth
            humans_3d = get_humans3d(prediction, ptcloud_np)
            composite = coco_demo.overlay_keypoints(composite, prediction)
        if True:
            overlay_distances(prediction, get_boxes3d(prediction, ptcloud_np),
                              composite, humans_3d, masks_3d)
            composite = coco_demo.overlay_class_names(composite, prediction)

        print(" Time: {:.2f} s".format(time.time() - start_time))

        if display:
            cv2.imshow("COCO detections", composite)
            cv2.imshow("ZED Depth", depth_img.get_data())
            key = cv2.waitKey(10)
            if key == 27:
                break  # esc to quit
                     min_image_size=800,
                     confidence_threshold=0.6)
fourcc = cv2.VideoWriter_fourcc(*'XVID')

cap = cv2.VideoCapture('tmp/S2_Cars_day_cut.mp4')
out = cv2.VideoWriter('tmp/test_S2_Cars_day.avi', fourcc, 20.0, size)

index = 0
while (cap.isOpened()):
    ret, frame_bgr = cap.read()
    frame_bgr = cv2.resize(frame_bgr, size)
    index += 1

    if not ret:
        break

    with log.Tick():
        predictions = coco_demo.compute_prediction(frame_bgr)
        top_predictions = coco_demo.select_top_predictions(predictions)

        result = frame_bgr.copy()
        result = coco_demo.overlay_mask(result, top_predictions)
        result = coco_demo.overlay_boxes(result, top_predictions)
        result = coco_demo.overlay_class_names(result, top_predictions)

    cv2.imshow('result', result)
    out.write(result)

    if 32 == cv2.waitKey(1):
        break
out.release()
def main():
    parser = argparse.ArgumentParser()

    # total_no_sets and current_set is used for splitting the data and running multiple processes manually on different gpus or machines
    # if total no sets is 1 and then current_set can only be 0 and will run normally(default mode)
    # if total no sets is 2 lets say, then you have to run this script with current_set=0 and current_set=1 which will split the total data into 2 points and
    # allows running separately
    parser.add_argument('-t',
                        '--total_no_sets',
                        type=int,
                        required=False,
                        default=1)
    parser.add_argument('-c',
                        '--current_set',
                        type=int,
                        required=False,
                        default=0)
    parser.add_argument('-g', '--gpu', type=str, required=False, default="0")
    # NO_GPUS = 4
    # CUR_GPU = 0 # zero based
    #parser.add_argument('-g', '--gpu', type=str, required=True)

    args = parser.parse_args()

    gpu = args.gpu
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    total_no_sets = args.total_no_sets
    current_set = args.current_set

    print('SET no %i (0 based) of %i SETS' % (current_set, total_no_sets))

    with open(SEGMENT_ANN_FILE) as fp:
        annotations = json.load(fp)

    segment_keys = annotations.keys()
    segment_keys.sort()
    # -5KQ66BBWC4.0902

    movie_timestamp_mapping = {}
    #for segment_key in current_segment_keys:
    for segment_key in segment_keys:
        movie_name, timestamp = segment_key.split('.')
        if movie_name in movie_timestamp_mapping.keys():
            movie_timestamp_mapping[movie_name].append(segment_key)
        else:
            movie_timestamp_mapping[movie_name] = [segment_key]

    movies = movie_timestamp_mapping.keys()
    movies.sort()

    no_segments = len(movies)
    vid_per_set = no_segments / total_no_sets
    start_idx = current_set * vid_per_set
    start_idx = np.floor(start_idx).astype(int)
    end_idx = (current_set + 1) * vid_per_set
    end_idx = no_segments if (
        current_set + 1 == total_no_sets) else np.ceil(end_idx).astype(int)

    print('Working on movies: [%i - %i)' % (start_idx, end_idx))
    cur_movies = movies[start_idx:end_idx]

    #config_file = "../configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml"
    config_file = "e2e_faster_rcnn_X_101_32x8d_FPN_1x_ava.yaml"

    # update the config options with the config file
    cfg.merge_from_file(config_file)

    #### if you are changing the weights, change the name for the ouput folder!!!!
    #cfg.merge_from_list(["MODEL.WEIGHT", "e2e_faster_rcnn_X_101_32x8d_FPN_1x.pth"]) ### original COCO weights from facebook
    cfg.merge_from_list(["MODEL.WEIGHT", "faster_rcnn_ava_model_0255000.pth"
                         ])  ### finetuned on AVA actors

    coco_demo = COCODemo(
        cfg,
        min_image_size=800,
        confidence_threshold=DETECTION_TH,
    )

    for mm, movie in enumerate(tqdm(cur_movies)):
        print('\n\n Working on %s, %i/%i \n\n' % (movie, mm, len(cur_movies)))
        for segment_key in movie_timestamp_mapping[movie]:
            midframe = read_keyframe(segment_key)
            pred_boxes = coco_demo.compute_prediction(midframe)

            H, W, C = midframe.shape

            boxes = pred_boxes.bbox / torch.tensor([W, H, W, H],
                                                   dtype=torch.float)
            box_list = boxes.tolist()
            scores = pred_boxes.get_field("scores")
            classes = pred_boxes.get_field("labels")
            num_boxes = len(box_list)
            # clean up organize
            segment_detections = []
            for bb in range(num_boxes):
                left, top, right, bottom = [
                    get_3_decimal_float(coord) for coord in box_list[bb]
                ]  # xyxy : left top right bottom
                cur_box = [top, left, bottom, right]

                cur_score = get_3_decimal_float(scores[bb])
                cur_class_no = int(classes[bb])
                cur_class_str = coco_demo.CATEGORIES[cur_class_no]
                cur_detection = {
                    'box': cur_box,
                    'score': cur_score,
                    'class_str': cur_class_str,
                    'class_no': cur_class_no
                }
                segment_detections.append(cur_detection)

            movie_name, timestamp = segment_key.split('.')
            #cur_detections = object_detections

            results_dict = {
                'movie_name': movie_name,
                'timestamp': timestamp,
                'detections': segment_detections,
                'height': H,
                'width': W,
            }

            save_results_json(results_dict)
            #print('Timestamp done : %s' %timestamp)
            tqdm.write('Timestamp done : %s' % timestamp)

        #print('\n\nMovie done %s\n\n' % movie)
        tqdm.write('\n\nMovie done %s\n\n' % movie)
def main():
    parser = argparse.ArgumentParser()

    # total_no_sets and current_set is used for splitting the data and running multiple processes manually on different gpus or machines
    # if total no sets is 1 and then current_set can only be 0 and will run normally(default mode)
    # if total no sets is 2 lets say, then you have to run this script with current_set=0 and current_set=1 which will split the total data into 2 points and
    # allows running separately
    parser.add_argument('-t',
                        '--total_no_sets',
                        type=int,
                        required=False,
                        default=1)
    parser.add_argument('-c',
                        '--current_set',
                        type=int,
                        required=False,
                        default=0)
    parser.add_argument('-g', '--gpu', type=str, required=False, default='0')
    # NO_GPUS = 4
    # CUR_GPU = 0 # zero based
    #parser.add_argument('-g', '--gpu', type=str, required=True)

    args = parser.parse_args()

    gpu = args.gpu
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    total_no_sets = args.total_no_sets
    current_set = args.current_set

    print('SET no %i (0 based) of %i SETS' % (current_set, total_no_sets))

    all_files = os.listdir(IMAGES_FOLDER)
    all_files.sort()

    no_segments = len(all_files)
    vid_per_set = no_segments / total_no_sets
    start_idx = current_set * vid_per_set
    start_idx = np.floor(start_idx).astype(int)
    end_idx = (current_set + 1) * vid_per_set
    end_idx = no_segments if (
        current_set + 1 == total_no_sets) else np.ceil(end_idx).astype(int)

    print('Working on images: [%i - %i)' % (start_idx, end_idx))
    cur_files = all_files[start_idx:end_idx]

    config_file = "../configs/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml"

    # update the config options with the config file
    cfg.merge_from_file(config_file)
    cfg.merge_from_list(
        ["MODEL.WEIGHT", "e2e_faster_rcnn_X_101_32x8d_FPN_1x.pth"])
    #cfg.merge_from_list(["MODEL.WEIGHT", "faster_rcnn_ava_model_0255000.pth"])

    coco_demo = COCODemo(
        cfg,
        min_image_size=800,
        confidence_threshold=DETECTION_TH,
    )

    for mm, img_file in enumerate(tqdm(cur_files)):
        print('\n\n Working on %s, %i/%i \n\n' %
              (img_file, mm, len(cur_files)))
        img_path = os.path.join(IMAGES_FOLDER, img_file)
        img = cv2.imread(img_path)
        pred_boxes = coco_demo.compute_prediction(img)

        H, W, C = img.shape

        boxes = pred_boxes.bbox / torch.tensor([W, H, W, H], dtype=torch.float)
        box_list = boxes.tolist()
        scores = pred_boxes.get_field("scores")
        classes = pred_boxes.get_field("labels")
        num_boxes = len(box_list)
        # clean up organize
        segment_detections = []
        for bb in range(num_boxes):
            left, top, right, bottom = [
                get_3_decimal_float(coord) for coord in box_list[bb]
            ]  # xyxy : left top right bottom
            cur_box = [top, left, bottom, right]

            cur_score = get_3_decimal_float(scores[bb])
            cur_class_no = int(classes[bb])
            cur_class_str = coco_demo.CATEGORIES[cur_class_no]
            cur_detection = {
                'box': cur_box,
                'score': cur_score,
                'class_str': cur_class_str,
                'class_no': cur_class_no
            }
            segment_detections.append(cur_detection)

        #cur_detections = object_detections

        results_dict = {
            'segment_name': img_file,
            'detections': segment_detections,
            'height': H,
            'width': W,
        }

        save_results_json(results_dict)
        #print('Timestamp done : %s' %timestamp)

        #print('\n\nMovie done %s\n\n' % movie)
        tqdm.write('\n\nImage done %s\n\n' % img_file)
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Webcam Demo")
    parser.add_argument(
        "--config-file",
        default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument(
        "--predictions-out",
        default="./test.json",
        metavar="FILE",
        help="path to file to output labels",
    )
    parser.add_argument(
        "--test-image-dir",
        default="/n/pana/scratch/ravi/bdd/bdd100k/images/100k/val/",
        metavar="FILE",
        help="path to test image directory",
    )
    parser.add_argument(
        "--confidence-threshold",
        type=float,
        default=0.5,
        help="Minimum score for the prediction to be shown",
    )
    parser.add_argument(
        "--min-image-size",
        type=int,
        default=800,
        help="Smallest size of the image to feed to the model. "
        "Model was trained with 800, which gives best results",
    )
    parser.add_argument(
        "--show-mask-heatmaps",
        dest="show_mask_heatmaps",
        help="Show a heatmap probability for the top masks-per-dim masks",
        action="store_true",
    )
    parser.add_argument(
        "--masks-per-dim",
        type=int,
        default=2,
        help="Number of heatmaps per dimension to show",
    )
    parser.add_argument(
        "opts",
        help="Modify model config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    # load config from file and command-line arguments
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    # prepare object that handles inference plus adds predictions on top of image
    coco_demo = COCODemo(
        cfg,
        confidence_threshold=args.confidence_threshold,
        show_mask_heatmaps=args.show_mask_heatmaps,
        masks_per_dim=args.masks_per_dim,
        min_image_size=args.min_image_size,
    )

    image_paths = glob.glob(os.path.join(args.test_image_dir, '*.jpg'))
    pred_list = []
    coco_cat_to_bdd_cat = {
        "person": "person",
        "car": "car",
        "traffic light": "traffic light",
        "stop sign": "traffic sign",
        "bus": "bus",
        "truck": "truck",
        "bicycle": "bike",
        "motorcycle": "motor",
        "train": "train"
    }

    for i in tqdm(image_paths):
        img = cv2.imread(i)
        image_id = i.split('/')[-1].split('.')[0]

        start = time.time()
        predictions = coco_demo.compute_prediction(img)
        end = time.time()
        scores = predictions.get_field('scores')
        #high_conf_idx = scores > args.confidence_threshold
        #predictions = predictions[high_conf_idx]
        #scores = predictions.get_field('scores')
        boxes = predictions.bbox
        labels = predictions.get_field('labels')
        labels = [coco_demo.CATEGORIES[l] for l in labels]

        for b in range(len(labels)):
            if labels[b] in coco_cat_to_bdd_cat:
                label = coco_cat_to_bdd_cat[labels[b]]
                obj_dict = {
                    'name':
                    image_id,
                    'bbox': [
                        float(boxes[b][0]),
                        float(boxes[b][1]),
                        float(boxes[b][2]),
                        float(boxes[b][3])
                    ],
                    'category':
                    label,
                    'score':
                    float(scores[b])
                }
                pred_list.append(obj_dict)

    with open(args.predictions_out, 'w') as fp:
        json.dump(pred_list, fp)