Exemple #1
0
def main(args):
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.USE_GPU_NMS = args.cuda
    np.random.seed(cfg.RNG_SEED)

    pascal_classes = np.asarray(['__background__', 'targetobject', 'hand'])
    args.set_cfgs = [
        'ANCHOR_SCALES', '[8, 16, 32, 64]', 'ANCHOR_RATIOS', '[0.5, 1, 2]'
    ]

    # initilize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(pascal_classes,
                           pretrained=False,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(pascal_classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(pascal_classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(pascal_classes,
                            152,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()
        raise Exception

    fasterRCNN.create_architecture()

    load_name = 'models/res101_handobj_100K/pascal_voc/faster_rcnn_1_8_132028.pth'

    print("load checkpoint %s" % (load_name))
    if args.cuda > 0:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name,
                                map_location=(lambda storage, loc: storage))

    fasterRCNN.load_state_dict(checkpoint['model'])

    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')

    lr = cfg.TRAIN.LEARNING_RATE
    momentum = cfg.TRAIN.MOMENTUM
    weight_decay = cfg.TRAIN.WEIGHT_DECAY

    def _get_image_blob(im):
        """Converts an image into a network input.
        Arguments:
          im (ndarray): a color image in BGR order
        Returns:
          blob (ndarray): a data blob holding an image pyramid
          im_scale_factors (list): list of image scales (relative to im) used
            in the image pyramid
        """
        im_orig = im.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS

        im_shape = im_orig.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])

        processed_ims = []
        im_scale_factors = []

        for target_size in cfg.TEST.SCALES:
            im_scale = float(target_size) / float(im_size_min)
            # Prevent the biggest axis from being more than MAX_SIZE
            if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
                im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
            im = cv2.resize(im_orig,
                            None,
                            None,
                            fx=im_scale,
                            fy=im_scale,
                            interpolation=cv2.INTER_LINEAR)
            im_scale_factors.append(im_scale)
            processed_ims.append(im)

        # Create a blob to hold the input images
        blob = im_list_to_blob(processed_ims)

        return blob, np.array(im_scale_factors)

    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)
    box_info = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda > 0:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    with torch.no_grad():
        if args.cuda > 0:
            cfg.CUDA = True

        if args.cuda > 0:
            fasterRCNN.cuda()

        fasterRCNN.eval()

    with torch.no_grad():
        start = time.time()
        max_per_image = 100
        thresh_hand = args.thresh_hand
        thresh_obj = args.thresh_obj
        vis = args.vis

        # print(f'thresh_hand = {thresh_hand}')
        # print(f'thnres_obj = {thresh_obj}')

        webcam_num = args.webcam_num
        # Set up webcam or get image directories
        if webcam_num >= 0:
            cap = cv2.VideoCapture(webcam_num)
            num_images = 0
        else:
            print(f'image dir = {args.image_dir}')
            print(f'save dir = {args.save_dir}')
            imglist = os.listdir(args.image_dir)
            num_images = len(imglist)

        print('Loaded Photo: {} images.'.format(num_images))

        while (num_images >= 0):
            total_tic = time.time()
            if webcam_num == -1:
                num_images -= 1

            # Get image from the webcam
            if webcam_num >= 0:
                if not cap.isOpened():
                    raise RuntimeError(
                        "Webcam could not open. Please check connection.")
                ret, frame = cap.read()
                im_in = np.array(frame)
            # Load the demo image
            else:
                im_file = os.path.join(args.image_dir, imglist[num_images])
                im_in = np.array(imread(im_file))
                # resize
                # im_in = np.array(Image.fromarray(im_in).resize((640, 360)))
            if len(im_in.shape) == 2:
                im_in = im_in[:, :, np.newaxis]
                im_in = np.concatenate((im_in, im_in, im_in), axis=2)
            # rgb -> bgr
            im = im_in[:, :, ::-1]

            blobs, im_scales = _get_image_blob(im)
            assert len(im_scales) == 1, "Only single-image batch implemented"
            im_blob = blobs
            im_info_np = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)

            im_data_pt = torch.from_numpy(im_blob)
            im_data_pt = im_data_pt.permute(0, 3, 1, 2)
            im_info_pt = torch.from_numpy(im_info_np)

            with torch.no_grad():
                im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
                im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
                gt_boxes.resize_(1, 1, 5).zero_()
                num_boxes.resize_(1).zero_()
                box_info.resize_(1, 1, 5).zero_()

                # pdb.set_trace()
            det_tic = time.time()
            print(im_data.shape)
            rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, RCNN_loss_cls, RCNN_loss_bbox, rois_label, \
                loss_list = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, box_info)

            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]

            # extact predicted params
            contact_vector = loss_list[0][0]  # hand contact state info
            offset_vector = loss_list[1][0].detach(
            )  # offset vector (factored into a unit vector and a magnitude)
            lr_vector = loss_list[2][0].detach()  # hand side info (left/right)

            # get hand contact
            _, contact_indices = torch.max(contact_vector, 2)
            contact_indices = contact_indices.squeeze(0).unsqueeze(-1).float()

            # get hand side
            lr = torch.sigmoid(lr_vector) > 0.5
            lr = lr.squeeze(0).float()

            if cfg.TEST.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        if args.cuda > 0:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                                cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                         + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        else:
                            box_deltas = box_deltas.view(
                                -1, 4) * torch.FloatTensor(
                                    cfg.TRAIN.BBOX_NORMALIZE_STDS
                                ) + torch.FloatTensor(
                                    cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                        box_deltas = box_deltas.view(1, -1, 4)
                    else:
                        if args.cuda > 0:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                                cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                         + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        else:
                            box_deltas = box_deltas.view(
                                -1, 4) * torch.FloatTensor(
                                    cfg.TRAIN.BBOX_NORMALIZE_STDS
                                ) + torch.FloatTensor(
                                    cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                        box_deltas = box_deltas.view(1, -1,
                                                     4 * len(pascal_classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            pred_boxes /= im_scales[0]

            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()
            det_toc = time.time()
            detect_time = det_toc - det_tic
            misc_tic = time.time()
            print(detect_time)
            if vis:
                im2show = np.copy(im)
            obj_dets, hand_dets = None, None
            for j in xrange(1, len(pascal_classes)):
                # inds = torch.nonzero(scores[:,j] > thresh).view(-1)
                if pascal_classes[j] == 'hand':
                    inds = torch.nonzero(scores[:, j] > thresh_hand,
                                         as_tuple=False).view(-1)
                elif pascal_classes[j] == 'targetobject':
                    inds = torch.nonzero(scores[:, j] > thresh_obj,
                                         as_tuple=False).view(-1)

                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if args.class_agnostic:
                        cls_boxes = pred_boxes[inds, :]
                    else:
                        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                    cls_dets = torch.cat(
                        (cls_boxes,
                         cls_scores.unsqueeze(1), contact_indices[inds],
                         offset_vector.squeeze(0)[inds], lr[inds]), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_boxes[order, :], cls_scores[order],
                               cfg.TEST.NMS)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    if pascal_classes[j] == 'targetobject':
                        obj_dets = cls_dets.cpu().numpy()
                    if pascal_classes[j] == 'hand':
                        hand_dets = cls_dets.cpu().numpy()

            if vis:
                # visualization
                im2show = vis_detections_filtered_objects_PIL(
                    im2show, obj_dets, hand_dets, thresh_hand, thresh_obj)

            misc_toc = time.time()
            nms_time = misc_toc - misc_tic

            if webcam_num == -1:
                sys.stdout.write(
                    'im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r'.format(
                        num_images + 1, len(imglist), detect_time, nms_time))
                sys.stdout.flush()

            if vis and webcam_num == -1:

                folder_name = args.save_dir
                os.makedirs(folder_name, exist_ok=True)
                result_path = os.path.join(
                    folder_name, imglist[num_images][:-4] + "_det.png")
                im2show.save(result_path)
            else:
                im2showRGB = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB)
                cv2.imshow("frame", im2showRGB)
                total_toc = time.time()
                total_time = total_toc - total_tic
                frame_rate = 1 / total_time
                print('Frame rate:', frame_rate)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

        if webcam_num >= 0:
            cap.release()
            cv2.destroyAllWindows()
                cls_boxes = pred_boxes[inds, :]
              else:
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
              
              cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1), contact_indices[inds], offset_vector.squeeze(0)[inds], lr[inds]), 1)
              cls_dets = cls_dets[order]
              keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
              cls_dets = cls_dets[keep.view(-1).long()]
              if pascal_classes[j] == 'targetobject':
                obj_dets = cls_dets.cpu().numpy()
              if pascal_classes[j] == 'hand':
                hand_dets = cls_dets.cpu().numpy()
              
        if vis:
          # visualization
          im2show = vis_detections_filtered_objects_PIL(im2show, obj_dets, hand_dets, thresh_hand, thresh_obj)

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        if webcam_num == -1:
            sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                            .format(num_images + 1, len(imglist), detect_time, nms_time))
            sys.stdout.flush()

        if vis and webcam_num == -1:
            
            folder_name = args.save_dir
            os.makedirs(folder_name, exist_ok=True)
            result_path = os.path.join(folder_name, imglist[num_images][:-4] + "_det.png")
            im2show.save(result_path)
Exemple #3
0
                            (cls_boxes,
                             cls_scores.unsqueeze(1), contact_indices[inds],
                             offset_vector.squeeze(0)[inds], lr[inds]), 1)
                        cls_dets = cls_dets[order]
                        keep = nms(cls_boxes[order, :], cls_scores[order],
                                   cfg.TEST.NMS)
                        cls_dets = cls_dets[keep.view(-1).long()]
                        if pascal_classes[j] == 'targetobject':
                            obj_dets = cls_dets.cpu().numpy()
                        if pascal_classes[j] == 'hand':
                            hand_dets = cls_dets.cpu().numpy()

                if vis:
                    # visualization
                    cvimg = vis_detections_filtered_objects_PIL(
                        frame, im2show, obj_dets, hand_dets, thresh_hand,
                        thresh_obj)

                misc_toc = time.time()
                nms_time = misc_toc - misc_tic

                print(nms_time + detect_time)

                #cv2.imwrite("detectedvideo/detected{}.png".format(c),cvimg)
                cv2.imshow('detection', cvimg)
                if args.output:
                    out.write(cvimg)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

                # if webcam_num == -1:
Exemple #4
0
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                cls_dets = torch.cat(
                    (cls_boxes, cls_scores.unsqueeze(1), indices[inds, :],
                     hand_vector.squeeze(0)[inds, :], lr[inds, :],
                     nc_prob[inds, :]), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_boxes[order, :], cls_scores[order],
                           cfg.TEST.NMS)
                cls_dets = cls_dets[keep.view(-1).long()]
                if args.vis:
                    im2show = vis_detections_filtered_objects_PIL(
                        im2show, imdb.classes[j],
                        cls_dets.cpu().numpy(), 0.1)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, 4] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, 4] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]