Example #1
0
def test_rotation(opt):
    result_dir = os.path.join(args.path, args.code_name,
                              "result+" + "-".join(opt.model_prefix_list))
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
    # Load
    assert len(opt.model_prefix_list) <= torch.cuda.device_count(), \
        "number of models should not exceed the device numbers"
    nets = []
    for _, prefix in enumerate(opt.model_prefix_list):
        net = model.SSD(cfg,
                        connect_loc_to_conf=True,
                        fix_size=False,
                        incep_conf=True,
                        incep_loc=True)
        device_id = opt.device_id if len(opt.model_prefix_list) == 1 else _
        net = net.to("cuda:%d" % (device_id))
        net_dict = net.state_dict()
        weight_dict = util.load_latest_model(args,
                                             net,
                                             prefix=prefix,
                                             return_state_dict=True,
                                             nth=opt.nth_best_model)
        loading_fail_signal = False
        for i, key in enumerate(net_dict.keys()):
            if "module." + key not in weight_dict:
                net_dict[key] = torch.zeros(net_dict[key].shape)
        for key in weight_dict.keys():
            if key[7:] in net_dict:
                if net_dict[key[7:]].shape == weight_dict[key].shape:
                    net_dict[key[7:]] = weight_dict[key]
                else:
                    print(
                        "Key: %s from disk has shape %s copy to the model with shape %s"
                        % (key[7:], str(weight_dict[key].shape),
                           str(net_dict[key[7:]].shape)))
                    loading_fail_signal = True
            else:
                print("Key: %s does not exist in net_dict" % (key[7:]))
        if loading_fail_signal:
            raise RuntimeError(
                'Shape Error happens, remove "%s" from your -mpl settings.' %
                (prefix))

        net.load_state_dict(net_dict)
        net.eval()
        nets.append(net.half())
        print("Above model loaded with out a problem")
    detector = model.Detect(num_classes=2,
                            bkg_label=0,
                            top_k=opt.detector_top_k,
                            conf_thresh=opt.detector_conf_threshold,
                            nms_thresh=opt.detector_nms_threshold)

    # Enumerate test folder
    root_path = os.path.expanduser(opt.test_dataset_root)
    if not os.path.exists(root_path):
        raise FileNotFoundError(
            "%s does not exists, please check your -tdr/--test_dataset_root settings"
            % (root_path))
    img_list = glob.glob(root_path + "/*.%s" % (opt.extension))
    precisions, recalls = [], []
    for i, img_file in enumerate(sorted(img_list)):
        start = time.time()
        name = img_file[img_file.rfind("/") + 1:-4]
        img = cv2.imread(img_file)
        height_ori, width_ori = img.shape[0], img.shape[1]

        # detect rotation for returning the image back
        transform_det = {"rotation": 0}
        # Resize the longer side to a certain length
        if height_ori >= width_ori:
            resize_aug = augmenters.Sequential([
                augmenters.Resize(size={
                    "height": opt.test_size,
                    "width": "keep-aspect-ratio"
                })
            ])
        else:
            resize_aug = augmenters.Sequential([
                augmenters.Resize(size={
                    "height": "keep-aspect-ratio",
                    "width": opt.test_size
                })
            ])
        resize_aug = resize_aug.to_deterministic()
        image = resize_aug.augment_image(img)
        h_re, w_re = image.shape[0], image.shape[1]
        # Pad the image into a square image
        pad_aug = augmenters.Sequential(
            augmenters.PadToFixedSize(width=opt.test_size,
                                      height=opt.test_size,
                                      pad_cval=255,
                                      position="center"))
        pad_aug = pad_aug.to_deterministic()
        image = pad_aug.augment_image(image)
        h_final, w_final = image.shape[0], image.shape[1]

        # Prepare image tensor and test
        image_t = torch.Tensor(util.normalize_image(args, image)).unsqueeze(0)
        image_t = image_t.permute(0, 3, 1, 2)
        #visualize_bbox(args, cfg, image, [torch.Tensor(rot_coord).cuda()], net.prior, height_final/width_final)

        text_boxes = []
        for _, net in enumerate(nets):
            device_id = opt.device_id if len(nets) == 1 else _
            image_t = image_t.to("cuda:%d" % (device_id)).half()
            out = net(image_t, is_train=False)
            loc_data, conf_data, prior_data = out
            prior_data = prior_data.to("cuda:%d" % (device_id))
            det_result = detector(loc_data, conf_data, prior_data)
            # Extract the predicted bboxes
            idx = det_result.data[0, 1, :, 0] >= 0.1
            text_boxes.append(det_result.data[0, 1, idx, 1:])
        text_boxes = torch.cat(text_boxes, dim=0)
        text_boxes = combine_boxes(text_boxes, img=image_t)
        pred = [[float(coor) for coor in area] for area in text_boxes]
        BBox = [
            imgaug.augmentables.bbs.BoundingBox(box[0] * w_final,
                                                box[1] * h_final,
                                                box[2] * w_final,
                                                box[3] * h_final)
            for box in pred
        ]
        BBoxes = imgaug.augmentables.bbs.BoundingBoxesOnImage(
            BBox, shape=image.shape)
        return_aug = augment_back(transform_det, height_ori, width_ori,
                                  (h_final - h_re) / 2, (w_final - w_re) / 2)
        return_aug = return_aug.to_deterministic()
        img_ori = return_aug.augment_image(image)
        bbox = return_aug.augment_bounding_boxes([BBoxes])[0]

        f = open(os.path.join(result_dir, name + ".txt"), "w")
        pred_final = []
        for box in bbox.bounding_boxes:
            x1, y1, x2, y2 = int(round(box.x1)), int(round(box.y1)), int(
                round(box.x2)), int(round(box.y2))
            pred_final.append([x1, y1, x2, y2])
            #box_tensors.append(torch.tensor([x1, y1, x2, y2]))
            # 4-point to 8-point: x1, y1, x2, y1, x2, y2, x1, y2
            f.write("%d,%d,%d,%d,%d,%d,%d,%d\n" %
                    (x1, y1, x2, y1, x2, y2, x1, y2))
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 105, 65), 2)
        #accu, precision, recall = measure(torch.Tensor(pred_final).cuda(), torch.Tensor(gt_coords).cuda(),
        #width=img.shape[1], height=img.shape[0])
        img_save_directory = os.path.join(
            args.path, args.code_name,
            "val+" + "-".join(opt.model_prefix_list))
        if not os.path.exists(img_save_directory):
            os.mkdir(img_save_directory)
        _imgh, _imgw, _imgc = img.shape
        _imgh = _imgh * opt.test_size / _imgw
        img = cv2.resize(img, (opt.test_size, int(_imgh)))
        #cv2.imwrite(os.path.join(img_save_directory, name + ".jpg"), img)
        cv2.imwrite(os.path.join(img_save_directory, "%04d.jpg" % i), img)
        f.close()
        print("%d th image cost %.2f seconds" % (i, time.time() - start))
Example #2
0
def test_rotation(opt):
    result_dir = os.path.join(args.path, args.code_name,
                              "result+" + "-".join(opt.model_prefix_list))
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
    # Load
    if type(opt.model_prefix_list) is str:
        opt.model_prefix_list = [opt.model_prefix_list]
    assert len(opt.model_prefix_list) <= torch.cuda.device_count(), \
        "number of models should not exceed the device numbers"
    nets = []
    for _, prefix in enumerate(opt.model_prefix_list):
        #net = model.SSD(cfg, connect_loc_to_conf=True, fix_size=False,
        #conf_incep=True, loc_incep=True)
        net = model.SSD(cfg,
                        connect_loc_to_conf=opt.loc_to_conf,
                        fix_size=False,
                        conf_incep=opt.conf_incep,
                        loc_incep=opt.loc_incep,
                        loc_preconv=opt.loc_preconv,
                        conf_preconv=opt.conf_preconv,
                        FPN=opt.feature_pyramid_net,
                        SA=opt.self_attention,
                        in_wid=opt.inner_filters,
                        m_factor=opt.inner_m_factor)
        device_id = opt.device_id if len(opt.model_prefix_list) == 1 else _
        net = net.to("cuda:%d" % (device_id))
        net_dict = net.state_dict()
        weight_dict = util.load_latest_model(args,
                                             net,
                                             prefix=prefix,
                                             return_state_dict=True,
                                             nth=opt.nth_best_model)
        loading_fail_signal = False
        for i, key in enumerate(net_dict.keys()):
            if "module." + key not in weight_dict:
                net_dict[key] = torch.zeros(net_dict[key].shape)
        for key in weight_dict.keys():
            if key[7:] in net_dict:
                if net_dict[key[7:]].shape == weight_dict[key].shape:
                    net_dict[key[7:]] = weight_dict[key]
                else:
                    print(
                        "Key: %s from disk has shape %s copy to the model with shape %s"
                        % (key[7:], str(weight_dict[key].shape),
                           str(net_dict[key[7:]].shape)))
                    loading_fail_signal = True
            else:
                print("Key: %s does not exist in net_dict" % (key[7:]))
        if loading_fail_signal:
            raise RuntimeError(
                'Shape Error happens, remove "%s" from your -mpl settings.' %
                (prefix))

        net.load_state_dict(net_dict)
        net.eval()
        nets.append(net)
        print("Above model loaded with out a problem")
    detector = model.Detect(num_classes=2,
                            bkg_label=0,
                            top_k=opt.detector_top_k,
                            conf_thresh=opt.detector_conf_threshold,
                            nms_thresh=opt.detector_nms_threshold)

    # Enumerate test folder
    root_path = os.path.expanduser(opt.test_dataset_root)
    if not os.path.exists(root_path):
        raise FileNotFoundError(
            "%s does not exists, please check your -tdr/--test_dataset_root settings"
            % (root_path))
    img_list = glob.glob(root_path + "/*.%s" % (opt.extension))
    precisions, recalls = [], []
    for i, img_file in enumerate(sorted(img_list)):
        start = time.time()
        name = img_file[img_file.rfind("/") + 1:-4]
        img = cv2.imread(img_file)
        height_ori, width_ori = img.shape[0], img.shape[1]

        do_rotation = False
        if do_rotation:
            # detect rotation for returning the image back
            img, transform_det = estimate_angle(img, args, None, None, None)
            transform_det["rotation"] = 0
            if transform_det["rotation"] != 0:
                rot_aug = augmenters.Affine(rotate=transform_det["rotation"],
                                            cval=args.aug_bg_color)
            else:
                rot_aug = None

            # Perform Augmentation
            if rot_aug:
                rot_aug = augmenters.Sequential(
                    augmenters.Affine(rotate=transform_det["rotation"],
                                      cval=args.aug_bg_color))
                image = rot_aug.augment_image(img)
            else:
                image = img
        else:
            image = img
        # Resize the longer side to a certain length
        if height_ori >= width_ori:
            resize_aug = augmenters.Sequential([
                augmenters.Resize(size={
                    "height": square,
                    "width": "keep-aspect-ratio"
                })
            ])
        else:
            resize_aug = augmenters.Sequential([
                augmenters.Resize(size={
                    "height": "keep-aspect-ratio",
                    "width": square
                })
            ])
        resize_aug = resize_aug.to_deterministic()
        image = resize_aug.augment_image(image)
        h_re, w_re = image.shape[0], image.shape[1]
        # Pad the image into a square image
        pad_aug = augmenters.Sequential(
            augmenters.PadToFixedSize(width=square,
                                      height=square,
                                      pad_cval=255,
                                      position="center"))
        pad_aug = pad_aug.to_deterministic()
        image = pad_aug.augment_image(image)
        h_final, w_final = image.shape[0], image.shape[1]

        # Prepare image tensor and test
        image_t = torch.Tensor(util.normalize_image(args, image)).unsqueeze(0)
        image_t = image_t.permute(0, 3, 1, 2)
        # visualize_bbox(args, cfg, image, [torch.Tensor(rot_coord).cuda()], net.prior, height_final/width_final)

        text_boxes = []
        for _, net in enumerate(nets):
            device_id = opt.device_id if len(nets) == 1 else _
            image_t = image_t.to("cuda:%d" % (device_id))
            out = net(image_t, is_train=False)
            loc_data, conf_data, prior_data = out
            prior_data = prior_data.to("cuda:%d" % (device_id))
            det_result = detector(loc_data, conf_data, prior_data)
            # Extract the predicted bboxes
            idx = det_result.data[0, 1, :, 0] >= 0.1
            text_boxes.append(det_result.data[0, 1, idx, 1:])
        text_boxes = torch.cat(text_boxes, dim=0)
        text_boxes = combine_boxes(text_boxes, img=image_t)
        pred = [[float(coor) for coor in area] for area in text_boxes]
        BBox = [
            imgaug.augmentables.bbs.BoundingBox(box[0] * w_final,
                                                box[1] * h_final,
                                                box[2] * w_final,
                                                box[3] * h_final)
            for box in pred
        ]
        BBoxes = imgaug.augmentables.bbs.BoundingBoxesOnImage(
            BBox, shape=image.shape)
        return_aug = augment_back(height_ori, width_ori, (h_final - h_re) / 2,
                                  (w_final - w_re) / 2)
        return_aug = return_aug.to_deterministic()
        img_ori = return_aug.augment_image(image)
        bbox = return_aug.augment_bounding_boxes([BBoxes])[0]
        # print_box(blue_boxes=pred, idx=i, img=vb.plot_tensor(args, image_t, margin=0),
        # save_dir=args.val_log)

        f = open(os.path.join(result_dir, name + ".txt"), "w")
        gt_box_file = os.path.join(opt.test_dataset_root,
                                   name + "." + opt.ground_truth_extension)
        coords = tb_data.parse_file(os.path.expanduser(gt_box_file))
        gt_coords = []
        for coord in coords:
            x1, x2 = min(coord[::2]), max(coord[::2])
            y1, y2 = min(coord[1::2]), max(coord[1::2])
            gt_coords.append([x1, y1, x2, y2])
            cv2.rectangle(img, (x1, y1), (x2, y2), (70, 67, 238), 2)
        pred_final = []
        for box in bbox.bounding_boxes:
            x1, y1, x2, y2 = int(round(box.x1)), int(round(box.y1)), int(
                round(box.x2)), int(round(box.y2))
            pred_final.append([x1, y1, x2, y2])
            # box_tensors.append(torch.tensor([x1, y1, x2, y2]))
            # 4-point to 8-point: x1, y1, x2, y1, x2, y2, x1, y2
            f.write("%d,%d,%d,%d,%d,%d,%d,%d\n" %
                    (x1, y1, x2, y1, x2, y2, x1, y2))
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 105, 65), 2)
        accu, precision, recall = measure(torch.Tensor(pred_final).cuda(),
                                          torch.Tensor(gt_coords).cuda(),
                                          width=img.shape[1],
                                          height=img.shape[0])
        precisions.append(precision)
        recalls.append(recall)
        img_save_directory = os.path.join(
            args.path, args.code_name,
            "val+" + "-".join(opt.model_prefix_list))
        if not os.path.exists(img_save_directory):
            os.mkdir(img_save_directory)
        name = "%s_%.2f_%.2f" % (str(i).zfill(4), precision, recall)
        cv2.imwrite(os.path.join(img_save_directory, name + ".jpg"), img)
        f.close()
        if opt.verbose:
            print(
                "%d th image cost %.2f seconds, precision: %.2f, recall: %.2f"
                % (i, time.time() - start, precision, recall))
    print("Precision: %.2f, Recall: %.2f" % (avg(precisions), avg(recalls)))
Example #3
0
def to_tensor(args, image, seed=None, size=None):
    image = util.normalize_image(args, image)
    trans = T.Compose([T.ToTensor()])
    return trans(image.astype("float32"))
def estimate_angle_and_crop_area(signal, args, path, seed, size, device=None):
    """
    Pre-Process function for SROIE
    Remove the white sorrounding areas of input images
    """
    def norm_zero_one(x):
        min_v = torch.min(x)
        return (x - min_v) / (torch.max(x) - min_v)

    img = signal
    transform_det = {}
    threshold = 0.15
    if device is None:
        #device = args.device
        device = "cpu"
    gaussian_kernal = (0.1, 0.2, 0.4, 0.2, 0.1)
    ascend_kernel = (0.0, 0.25, 0.5, 0.75, 1.0)
    descend_kernel = (1.0, 0.75, 0.5, 0.25, 0.0)
    # Use CLAHE to enhance the contrast
    signal, _ = clahe_inv(signal, args, path, seed, size)
    original_size = signal.shape
    # Resize to small size result to bad estimation
    #width = original_size[1] / original_size[0] * 500
    #_signal = cv2.resize(signal, (int(width), 500))
    angle = detect_angle(signal)
    if angle is not None and abs(angle) * 90 > 1:
        signal = rotate_image(signal, angle)
    # After rotation, the image size will change
    original_size = signal.shape
    if len(signal.shape) == 2:
        signal = np.expand_dims(signal, -1).astype(np.float32)
    else:
        signal = signal.astype(np.float32)
    signal = util.normalize_image(args, signal)
    # Transform (H, W, C) nd.array into (C, H, W) Tensor
    signal = torch.Tensor(signal).float().permute(2, 0, 1)
    # Convert to grey scale
    signal = torch.sum(signal, 0)
    # signal_x and signal_y represent the horizontal and vertical signal strength
    signal_x = (torch.sum(signal, dim=0) /
                signal.size(0)).unsqueeze(0).unsqueeze(0).to(device)
    signal_x = 1 - norm_zero_one(signal_x)
    signal_y = (torch.sum(signal, dim=1) /
                signal.size(1)).unsqueeze(0).unsqueeze(0).to(device)
    signal_y = 1 - norm_zero_one(signal_y)
    # Define the kernel
    #gaussian = args.gaussian
    #detector1 = args.detector1
    #detector2 = args.detector2
    gaussian = torch.tensor(gaussian_kernal).unsqueeze(0).unsqueeze(0).to(
        device)
    detector1 = torch.tensor(ascend_kernel).unsqueeze(0).unsqueeze(0).to(
        device)
    detector2 = torch.tensor(descend_kernel).unsqueeze(0).unsqueeze(0).to(
        device)

    start, end = [], []
    for signal in [signal_x, signal_y]:
        # Due to the size is very big, we do not need zero-padding
        smooth_signal = F.conv1d(signal, gaussian, stride=1, padding=0)
        # Calculate first derivative
        ascend_signal = F.conv1d(smooth_signal, detector1, stride=1,
                                 padding=0).squeeze()
        descend_signal = F.conv1d(smooth_signal,
                                  detector2,
                                  stride=1,
                                  padding=0).squeeze()

        # safe distance is 5% of current signal length
        safe_distance = int(0.05 * signal.size(-1))
        start_idx = (ascend_signal >= threshold).nonzero().squeeze(1)
        if start_idx.nelement() == 0:
            # Cannot find a ascend signal stronger than threshold
            _start = 0
        else:
            _start = max(0, int(start_idx[0]) - safe_distance)
        end_idx = (descend_signal >= threshold).nonzero().squeeze(1)
        if end_idx.nelement() == 0:
            _end = signal.size(-1)
        else:
            _end = min(signal.size(-1), int(end_idx[-1]) + safe_distance)
        if _end > _start + 300:
            start.append(_start)
            end.append(_end)
        else:
            print("assume some error happens in smart crop")
            start.append(0)
            end.append(signal.size(-1))
    if angle is not None and abs(angle) * 90 > 1:
        print("angle: %s" % (angle))
        transform_det.update({"rotation": angle * 90})
    # 4 dimension means distance to top, right, bottom, left
    crop_area = (start[1], int(original_size[1] - end[0]),
                 int(original_size[0] - end[1]), int(start[0]))
    if not crop_area == (0, 0, 0, 0):
        transform_det.update({"crop": crop_area})
    return img, transform_det