Beispiel #1
0
def decode(preds, scale):
    score = torch.sigmoid(preds[-1])
    outputs = (torch.sign(preds - 1) + 1) / 2

    text = outputs[-1]
    kernels = outputs * text
    score = score.detach().cpu().numpy().astype(np.float32)
    kernels = kernels.detach().cpu().numpy()
    pred, label_values = pse(kernels.astype(np.uint8), 5 / (scale * scale))
    bbox_list = []
    for label_value in label_values:
        points = np.array(np.where(pred == label_value)).transpose(
            (1, 0))[:, ::-1]

        if points.shape[0] < 800 / (scale * scale):
            continue

        score_i = np.mean(score[pred == label_value])
        if score_i < 0.93:
            continue

        rect = cv2.minAreaRect(points)
        bbox = cv2.boxPoints(rect)
        bbox_list.append([bbox[1], bbox[2], bbox[3], bbox[0]])

    return pred, np.array(bbox_list)
def detect(seg_maps, min_area_thresh=10, seg_map_thresh=0.9, ratio=1):
    """Detect text boxes from score map and geo map
    Args:
        seg_maps: 6 segmentation maps from network.
        min_area_thresh: min area to be detected.
        seg_map_thresh: segmentation threshlod.
        ratio: segmentation ratio.
    Returns:
        boxes: detected text boxes.
    """
    # get kernals, sequence: 0->n, max -> min
    kernals = []
    one = np.ones_like(seg_maps[..., 0], dtype=np.uint8)
    zero = np.zeros_like(seg_maps[..., 0], dtype=np.uint8)
    thresh = seg_map_thresh
    for i in range(seg_maps.shape[-1] - 1, -1, -1):  # 5(big),4,3,2,1,0
        # 0.8ms
        kernal = np.where(seg_maps[..., i] > thresh, one, zero)
        kernals.append(kernal)
        thresh = seg_map_thresh * ratio
    mask_res, label_values = pse(kernals, min_area_thresh)  # 2.8ms
    mask_res_resized = cv2.resize(
        mask_res, (mask_res.shape[1] * 4, mask_res.shape[0] * 4),
        interpolation=cv2.INTER_NEAREST)
    boxes = []
    for label_value in label_values:
        # (y,x)
        points = np.argwhere(mask_res_resized == label_value)
        points = points[:, (1, 0)]
        rect = cv2.minAreaRect(points)
        box = cv2.boxPoints(rect)
        boxes.append(box)

    return np.array(boxes)
Beispiel #3
0
def detect(seg_maps,
           timer,
           image_w,
           image_h,
           min_area_thresh=10,
           seg_map_thresh=0.9,
           ratio=1):
    '''
    restore text boxes from score map and geo map
    :param seg_maps:
    :param timer:
    :param min_area_thresh:
    :param seg_map_thresh: threshhold for seg map
    :param ratio: compute each seg map thresh
    :return:
    '''
    if len(seg_maps.shape) == 4:
        seg_maps = seg_maps[0, :, :, ]
    # get kernals, sequence: 0->n, max -> min
    kernals = []
    one = np.ones_like(seg_maps[..., 0], dtype=np.uint8)
    zero = np.zeros_like(seg_maps[..., 0], dtype=np.uint8)
    thresh = seg_map_thresh
    for i in range(seg_maps.shape[-1] - 1, -1, -1):
        kernal = np.where(seg_maps[..., i] > thresh, one, zero)
        kernals.append(kernal)
        thresh = seg_map_thresh * ratio
    start = time.time()
    mask_res, label_values = pse(kernals, min_area_thresh)
    timer['pse'] = time.time() - start
    mask_res = np.array(mask_res)
    mask_res_resized = cv2.resize(mask_res, (image_w, image_h),
                                  interpolation=cv2.INTER_NEAREST)
    boxes = []
    for label_value in label_values:
        # (y,x)
        points = np.argwhere(mask_res_resized == label_value)
        points = points[:, (1, 0)]
        rect = cv2.minAreaRect(points)
        box = cv2.boxPoints(rect)
        # print("box(no sorted): ", box)
        box = box.tolist()
        box = sorted(box)
        temp = box[3]
        box[3] = box[1]
        box[1] = box[2]
        box[2] = temp
        box = np.array(box)
        # print("box(sorted): ", box)
        boxes.append(box)

    boxes = merge(np.array(boxes))

    # boxes经过缩减,kernals不用管,不用修改
    return boxes, kernals, timer
Beispiel #4
0
def detect_pse(seg_maps, threshold=0.5, threshold_k=0.55, boxes_thres=0.01):
    """
    poster with pse
    """
    seg_maps = seg_maps[0, :, :, :]
    image_size = seg_maps.shape[2:]
    mask = np.where(seg_maps[0, :, :] > threshold, 1., 0.)
    seg_maps = (seg_maps * mask > threshold_k)

    result_map = pse(seg_maps, 5)
    bboxes, scores = mask_to_boxes_pse(result_map,
                                       seg_maps[0, :, :],
                                       min_score=boxes_thres)
    return bboxes, scores
Beispiel #5
0
    def inference(self, model, image):
        model.eval()
        image_preprocessed, scale = self._preprocess_image(image)
        image_preprocessed = image_preprocessed.to(self.device)
        with torch.no_grad():
            outputs = model(image_preprocessed)
            score = torch.sigmoid(outputs[:, 0, ...])
            outputs = (torch.sign(outputs - self.args.binary_th) + 1) / 2
            text = outputs[:, 0, ...]
            kernels = outputs[:, 0:self.args.kernel_num, ...] * text

            score = score.data.cpu().numpy()[0].astype(np.float32)
            text = text.data.cpu().numpy()[0].astype(np.uint8)
            kernels = kernels.data.cpu().numpy()[0].astype(np.uint8)

            pred = pse(
                kernels, self.args.min_kernel_area /
                (self.args.scale * self.args.scale))
            label_num = np.max(pred) + 1
Beispiel #6
0
def decode(preds, threshold=0.5):
    # preds = (preds >= threshold).detach()
    # preds = (preds * preds[-1]).cpu().numpy()
    # np.save('result.npy', preds)
    # pred, label_num = pse(preds,100)

    mask = (preds[-1] > threshold).detach().float()
    preds = (preds * mask).detach().cpu().numpy()
    pred, label_num = pse(preds >= threshold, 100)
    h, w = pred.shape[-2:]
    bbox_list = []
    for label_idx in range(1, label_num + 1):
        result = (pred == label_idx).astype(np.uint8)
        _, contours, hierarchy = cv2.findContours(result, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            rect = cv2.minAreaRect(contour)
            point = cv2.boxPoints(rect)
            point[:, 0] = np.clip(point[:, 0], 0, w - 1)
            point[:, 1] = np.clip(point[:, 1], 0, h - 1)
            bbox_list.append([point[1], point[2], point[3], point[0]])

    return pred, np.array(bbox_list)
Beispiel #7
0
def detect(seg_maps,
           image_w,
           image_h,
           min_area_thresh=10,
           seg_map_thresh=0.9,
           ratio=1):
    '''
    restore text boxes from score map and geo map
    :param seg_maps:
    :param min_area_thresh:
    :param seg_map_thresh: threshhold for seg map
    :param ratio: compute each seg map thresh
    :return:
    '''
    if len(seg_maps.shape) == 4:
        seg_maps = seg_maps[0, :, :, ]
    #get kernals, sequence: 0->n, max -> min
    kernals = []
    one = np.ones_like(seg_maps[..., 0], dtype=np.uint8)
    zero = np.zeros_like(seg_maps[..., 0], dtype=np.uint8)
    thresh = seg_map_thresh
    for i in range(seg_maps.shape[-1] - 1, -1, -1):
        kernal = np.where(seg_maps[..., i] > thresh, one, zero)
        kernals.append(kernal)
        thresh = seg_map_thresh * ratio
    mask_res, label_values = pse(kernals, min_area_thresh)
    mask_res = np.array(mask_res)
    mask_res_resized = cv2.resize(mask_res, (image_w, image_h),
                                  interpolation=cv2.INTER_NEAREST)
    boxes = []
    for label_value in label_values:
        #(y,x)
        points = np.argwhere(mask_res_resized == label_value)
        points = points[:, (1, 0)]
        rect = cv2.minAreaRect(points)
        box = cv2.boxPoints(rect)
        boxes.append(box)

    return np.array(boxes), kernals
Beispiel #8
0
def run_PSENet(args, model, img, org_shape, out_type='rect', return_score=False):
    outputs = model(img)

    score = torch.sigmoid(outputs[:, 0, :, :])
    outputs = (torch.sign(outputs - args.binary_th) + 1) / 2

    text = outputs[:, 0, :, :]
    kernels = outputs[:, 0:args.kernel_num, :, :] * text

    score = score.data.cpu().numpy()[0].astype(np.float32)
    kernels = kernels.data.cpu().numpy()[0].astype(np.uint8)

    # c++ version pse
    pred = pse(kernels, args.min_kernel_area / (args.scale * args.scale))
    # python version pse
    # pred = pypse(kernels, args.min_kernel_area / (args.scale * args.scale))

    # scale = (org_img.shape[0] * 1.0 / pred.shape[0], org_img.shape[1] * 1.0 / pred.shape[1])
    scale = (org_shape[1] * 1.0 / pred.shape[1], org_shape[0] * 1.0 / pred.shape[0])
    label = pred
    label_num = np.max(label) + 1
    bboxes = []
    for i in range(1, label_num):
        points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1]

        if points.shape[0] < args.min_area / (args.scale * args.scale):
            continue

        score_i = np.mean(score[label == i])
        if score_i < args.min_score:
            continue

        if out_type == 'rect':
            rect = cv2.boundingRect(points)
            x1, y1 = rect[0], rect[1]
            x2, y2 = x1 + rect[2] - 1, y1 + rect[3] - 1
            pts = [x1, y1, x2, y1, x2, y2, x1, y2]
            bbox = np.array(pts).reshape(-1, 2) * scale
            bbox = bbox.astype('int32')
        elif out_type == 'rbox':
            rect = cv2.minAreaRect(points)
            bbox = cv2.boxPoints(rect) * scale
            bbox = bbox.astype('int32')
        elif out_type == 'contour':
            binary = np.zeros(label.shape, dtype='uint8')
            binary[label == i] = 1
            ret = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            # print(ret)
            # _, contours, _ = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            contours = ret[-2]
            contour = contours[0]
            # epsilon = 0.01 * cv2.arcLength(contour, True)
            # bbox = cv2.approxPolyDP(contour, epsilon, True)
            bbox = contour

            if bbox.shape[0] <= 2:
                continue

            bbox = bbox * scale
            bbox = bbox.astype('int32')

        bboxes.append({'type': out_type, 'bbox': bbox.reshape(-1)})
    if return_score:
        return bboxes, score
    return bboxes
def test(args, file=None):
    result = []
    data_loader = DataLoader(long_size=args.long_size, file=file)
    test_loader = torch.utils.data.DataLoader(data_loader,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=2,
                                              drop_last=True)

    slice = 0
    # Setup Model
    if args.arch == "resnet50":
        model = models.resnet50(pretrained=True,
                                num_classes=7,
                                scale=args.scale)
    elif args.arch == "resnet101":
        model = models.resnet101(pretrained=True,
                                 num_classes=7,
                                 scale=args.scale)
    elif args.arch == "resnet152":
        model = models.resnet152(pretrained=True,
                                 num_classes=7,
                                 scale=args.scale)
    elif args.arch == "mobilenet":
        model = models.Mobilenet(pretrained=True,
                                 num_classes=6,
                                 scale=args.scale)
        slice = -1

    for param in model.parameters():
        param.requires_grad = False

    # model = model.cuda()

    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("Loading model and optimizer from checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)

            # model.load_state_dict(checkpoint['state_dict'])
            d = collections.OrderedDict()
            for key, value in checkpoint['state_dict'].items():
                tmp = key[7:]
                d[tmp] = value

            try:
                model.load_state_dict(d)
            except:
                model.load_state_dict(checkpoint['state_dict'])

            print("Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            sys.stdout.flush()
        else:
            print("No checkpoint found at '{}'".format(args.resume))
            sys.stdout.flush()

    model.eval()

    total_frame = 0.0
    total_time = 0.0
    for idx, (org_img, img) in enumerate(test_loader):
        print('progress: %d / %d' % (idx, len(test_loader)))
        sys.stdout.flush()

        # img = Variable(img.cuda(), volatile=True)
        org_img = org_img.numpy().astype('uint8')[0]
        text_box = org_img.copy()

        # torch.cuda.synchronize()
        start = time.time()

        # angle detection
        # org_img, angle = detect_angle(org_img)
        outputs = model(img)

        score = torch.sigmoid(outputs[:, slice, :, :])
        outputs = (torch.sign(outputs - args.binary_th) + 1) / 2

        text = outputs[:, slice, :, :]
        kernels = outputs
        # kernels = outputs[:, 0:args.kernel_num, :, :] * text

        score = score.data.cpu().numpy()[0].astype(np.float32)
        text = text.data.cpu().numpy()[0].astype(np.uint8)
        kernels = kernels.data.cpu().numpy()[0].astype(np.uint8)

        if args.arch == 'mobilenet':
            pred = pse2(kernels,
                        args.min_kernel_area / (args.scale * args.scale))
        else:
            # c++ version pse
            pred = pse(kernels,
                       args.min_kernel_area / (args.scale * args.scale))
            # python version pse
            # pred = pypse(kernels, args.min_kernel_area / (args.scale * args.scale))

        # scale = (org_img.shape[0] * 1.0 / pred.shape[0], org_img.shape[1] * 1.0 / pred.shape[1])
        scale = (org_img.shape[1] * 1.0 / pred.shape[1],
                 org_img.shape[0] * 1.0 / pred.shape[0])
        label = pred
        label_num = np.max(label) + 1
        bboxes = []
        rects = []
        for i in range(1, label_num):
            points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1]

            if points.shape[0] < args.min_area / (args.scale * args.scale):
                continue

            score_i = np.mean(score[label == i])
            if score_i < args.min_score:
                continue

            rect = cv2.minAreaRect(points)
            bbox = cv2.boxPoints(rect) * scale
            bbox = bbox.astype('int32')
            bbox = order_point(bbox)
            # bbox = np.array([bbox[1], bbox[2], bbox[3], bbox[0]])
            bboxes.append(bbox.reshape(-1))

            rec = []
            rec.append(rect[-1])
            rec.append(rect[1][1] * scale[1])
            rec.append(rect[1][0] * scale[0])
            rec.append(rect[0][0] * scale[0])
            rec.append(rect[0][1] * scale[1])
            rects.append(rec)

        # torch.cuda.synchronize()
        end = time.time()
        total_frame += 1
        total_time += (end - start)
        print('fps: %.2f' % (total_frame / total_time))
        sys.stdout.flush()

        for bbox in bboxes:
            cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (0, 255, 0),
                             2)

        image_name = data_loader.img_paths[idx].split('/')[-1].split('.')[0]
        write_result_as_txt(image_name, bboxes, 'outputs/submit_invoice/')

        text_box = cv2.resize(text_box, (text.shape[1], text.shape[0]))
        debug(idx, data_loader.img_paths, [[text_box]], 'data/images/tmp/')

        result = crnnRec(cv2.cvtColor(org_img, cv2.COLOR_BGR2RGB), rects)
        result = formatResult(result)

    # cmd = 'cd %s;zip -j %s %s/*' % ('./outputs/', 'submit_invoice.zip', 'submit_invoice')
    # print(cmd)
    # sys.stdout.flush()
    # util.cmd.Cmd(cmd)
    return result
Beispiel #10
0
def test(args):
    data_loader = DemoDataLoader(long_size=args.long_size,
                                 input_path=args.input_dir)
    test_loader = torch.utils.data.DataLoader(data_loader,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=2,
                                              drop_last=True)

    # Setup Model
    if args.arch == "resnet50":
        model = models.resnet50(pretrained=True,
                                num_classes=7,
                                scale=args.scale)
    elif args.arch == "resnet101":
        model = models.resnet101(pretrained=True,
                                 num_classes=7,
                                 scale=args.scale)
    elif args.arch == "resnet152":
        model = models.resnet152(pretrained=True,
                                 num_classes=7,
                                 scale=args.scale)

    for param in model.parameters():
        param.requires_grad = False

    model = model.cuda()

    if args.resume is not None:
        if os.path.isfile(args.resume):
            print(("Loading model and optimizer from checkpoint '{}'".format(
                args.resume)))
            checkpoint = torch.load(args.resume)

            # model.load_state_dict(checkpoint['state_dict'])
            d = collections.OrderedDict()
            for key, value in list(checkpoint['state_dict'].items()):
                tmp = key[7:]
                d[tmp] = value
            model.load_state_dict(d)

            print(("Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch'])))
            sys.stdout.flush()
        else:
            print(("No checkpoint found at '{}'".format(args.resume)))
            sys.stdout.flush()

    model.eval()

    total_frame = 0.0
    total_time = 0.0
    with torch.no_grad():
        for idx, (org_img, img) in enumerate(test_loader):
            print(('progress: %d / %d' % (idx, len(test_loader))))
            sys.stdout.flush()

            img = Variable(img.cuda())
            org_img = org_img.numpy().astype('uint8')[0]
            text_box = org_img.copy()

            torch.cuda.synchronize()
            start = time.time()

            outputs = model(img)

            score = torch.sigmoid(outputs[:, 0, :, :])
            outputs = (torch.sign(outputs - args.binary_th) + 1) / 2

            text = outputs[:, 0, :, :]
            kernels = outputs[:, 0:args.kernel_num, :, :] * text

            score = score.data.cpu().numpy()[0].astype(np.float32)
            text = text.data.cpu().numpy()[0].astype(np.uint8)
            kernels = kernels.data.cpu().numpy()[0].astype(np.uint8)

            # c++ version pse
            pred = pse(kernels,
                       args.min_kernel_area / (args.scale * args.scale))

            scale = (org_img.shape[1] * 1.0 / pred.shape[1],
                     org_img.shape[0] * 1.0 / pred.shape[0])
            label = pred
            label_num = np.max(label) + 1
            bboxes = []
            for i in range(1, label_num):
                points = np.array(np.where(label == i)).transpose(
                    (1, 0))[:, ::-1]

                if points.shape[0] < args.min_area / (args.scale * args.scale):
                    continue

                score_i = np.mean(score[label == i])
                if score_i < args.min_score:
                    continue

                rect = cv2.minAreaRect(points)
                bbox = cv2.boxPoints(rect) * scale
                bbox = bbox.astype('int32')
                bboxes.append(bbox.reshape(-1))

            torch.cuda.synchronize()
            end = time.time()
            total_frame += 1
            total_time += (end - start)
            print(('fps: %.2f' % (total_frame / total_time)))
            sys.stdout.flush()

            for bbox in bboxes:
                cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1,
                                 (0, 255, 0), 10)

            image_name = data_loader.img_paths[idx].split('/')[-1].split(
                '.')[0]
            write_result_as_txt(image_name, bboxes, 'outputs/demo/')

            text_box = cv2.resize(text_box, (text.shape[1], text.shape[0]))
            debug(idx, data_loader.img_paths, [[text_box]], 'outputs/demo/')
Beispiel #11
0
		
        else:
		    img = Variable(scaled_img)
	        outputs = model(img)
	        score = torch.sigmoid(outputs[:, 0, :, :])
	        outputs = (torch.sign(outputs - 1) + 1) / 2
	        text = outputs[:, 0, :, :]
	        kernels = outputs[:, 0:kernel_num, :, :] * text
	        score = score.data.numpy()[0].astype(np.float32)
	        text = text.data.numpy()[0].astype(np.uint8)
	        kernels = kernels.data.numpy()[0].astype(np.uint8)



    # c++ version pse
    pred = pse(kernels, min_kernel_area)
    # python version pse
    # pred = pypse(kernels, min_kernel_area)

    scale = (org_img.shape[1] * 1.0 / pred.shape[1], org_img.shape[0] * 1.0 / pred.shape[0])
    label = pred
    label_num = np.max(label) + 1

    bboxes = []
    for i in range(1, label_num):
        points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1]

        if points.shape[0] < min_area:
            continue

        score_i = np.mean(score[label == i])
def Test():
    test_data_load = data_pre.DataTest_load_pre(long_size=320)
    print("Data num: ", len(test_data_load))

    tf_image = tf.placeholder(dtype=tf.float32,
                              shape=[1, None, None, 3],
                              name="image")

    #############################################################################################
    ###  Model logites And Model Path
    ###  Self Model
    #resnet = PM.ResNet(PM.BottleBlock(), FLAGS.kernal_num, True, 1.0)
    #logites = resnet(tf_image)  ## (batch, 7, size, size)
    #model_path = "./checkpoints/old/PSENet_BC-32_k3_2020-03-02-19-31-31.ckpt-192500"
    ### Model two
    logites, _ = model_v1.model(tf_image, FLAGS.kernal_num)  ## [1,3,?,?]
    model_path = "./checkpoints/0302/PSENet_BC-32_k3_2020-02-26-17-06-44.ckpt-192500"
    #############################################################################################

    saver = tf.train.Saver()
    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True
    sess = tf.Session(config=sess_config)

    saver.restore(sess=sess, save_path=model_path)
    print("################ load model down! ##########################")

    for i in range(len(test_data_load)):
        ori_img, scaled_img = test_data_load[i]  ### (h,w,3)

        text_box = ori_img.copy()
        scaled_img = np.expand_dims(scaled_img, axis=0)  ### (1, h, w, 3)

        train_pred = sess.run([logites],
                              feed_dict={tf_image:
                                         scaled_img})  ## [(1, 7, size, size)]
        train_pred = train_pred[0]  ### (1, k, h, w) 0~1之间
        #train_score = (pre_tools.sigmod(train_pred[0,0,:,:])).astype(np.float32) ## [512,512]

        mask = train_pred[:, 0, :, :]  ## [1,512,512] ## 取第一个 kernal 作为 mask
        kernels = train_pred[:,
                             0:, :, :] * mask  ## [1,3,512,512] 对后kernal进行mask处理
        kernels = np.squeeze(kernels, 0).astype(np.uint8)  ##  [3,512,512]

        ### pse 渐进扩展输出
        pred = pse(kernels,
                   FLAGS.min_kernel_area / (FLAGS.scale * FLAGS.scale))
        #cv2.imwrite("./Images/Image_OUT/image_Pred_2{}.jpg".format(i), pred * 255) ## 输出最终结果

        scale = (ori_img.shape[1] * 1.0 / pred.shape[1],
                 ori_img.shape[0] * 1.0 / pred.shape[0])  ## 变换尺寸
        label = pred
        label_num = np.max(label) + 1
        bboxes = []
        for j in range(1, label_num):
            #point_where = np.where(label == 1)
            try:
                points = np.array(np.where(label == j)).transpose(
                    (1, 0))[:, ::-1]
            except:
                continue

            if points.shape[0] < FLAGS.min_area / (FLAGS.scale * FLAGS.scale):
                continue
            rect = cv2.minAreaRect(points)
            bbox = cv2.boxPoints(rect) * scale
            bbox = bbox.astype('int32')
            bboxes.append(bbox.reshape(-1))

        for bbox in bboxes:
            cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (0, 255, 0),
                             1)

        text_box = cv2.resize(text_box, (ori_img.shape[1], ori_img.shape[0]))

        cv2.imwrite("./Images/Image_OUT/img_{}.jpg".format(i), text_box)
        pre_tools.write_result_as_txt(str(i), bboxes, './Images/Text_OUT/')
        print("Finish {} image!".format(i + 1))
Beispiel #13
0
    bbox_list = []
    for label_idx in range(1, label_num + 1):
        result = (pred == label_idx).astype(np.uint8)
        _, contours, hierarchy = cv2.findContours(result, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            rect = cv2.minAreaRect(contour)
            point = cv2.boxPoints(rect)
            point[:, 0] = np.clip(point[:, 0], 0, w - 1)
            point[:, 1] = np.clip(point[:, 1], 0, h - 1)
            bbox_list.append([point[1], point[2], point[3], point[0]])

    return pred, np.array(bbox_list)


if __name__ == '__main__':
    x = np.zeros((3, 3, 3))
    y = np.ones((3, 3, 3))
    s1 = np.zeros((5, 5))
    s2 = np.zeros((5, 5))
    s3 = np.zeros((5, 5))
    s1[[0, 0, 0, 0], [0, 1, 2, 3]] = 1
    s2[[2, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 2]] = 1
    s3[[1, 1, 1, 1], [0, 1, 2, 3]] = 1
    # com = np.concatenate((x,y,x,y),axis=2)
    # kernels = np.stack((s1, s2, s3)).astype(np.uint8)
    kernels = np.load('/data1/zj/PSENet.pytorch/result.npy')

    tic = time.time()
    pred = pse(kernels, 100)
    print(time.time() - tic)
Beispiel #14
0
def pred(args):
    from PIL import Image
    import torchvision.transforms as transforms

    def get_img(img_path):
        try:
            img = cv2.imread(img_path)
            img = img[:, :, [2, 1, 0]]
        except Exception as e:
            print(img_path)
            raise
        return img

    def scale(img, long_size=2240):
        h, w = img.shape[0:2]
        scale = long_size * 1.0 / max(h, w)
        img = cv2.resize(img, dsize=None, fx=scale, fy=scale)
        return img

    imgPath = './data/test/1.png'
    imgLoad = get_img(imgPath)

    scaled_img = scale(imgLoad, long_size=1120)
    scaled_img = Image.fromarray(scaled_img)
    scaled_img = scaled_img.convert('RGB')
    scaled_img = transforms.ToTensor()(scaled_img)
    img = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                               std=[0.229, 0.224, 0.225])(scaled_img)
    org_img = imgLoad[:, :, [2, 1, 0]]

    # Setup Model
    if args.arch == "resnet50":
        model = models.resnet50(pretrained=True,
                                num_classes=7,
                                scale=args.scale)
    elif args.arch == "resnet101":
        model = models.resnet101(pretrained=True,
                                 num_classes=7,
                                 scale=args.scale)
    elif args.arch == "resnet152":
        model = models.resnet152(pretrained=True,
                                 num_classes=7,
                                 scale=args.scale)

    for param in model.parameters():
        param.requires_grad = False

    model = model.cuda()

    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("Loading model and optimizer from checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)

            # model.load_state_dict(checkpoint['state_dict'])
            d = collections.OrderedDict()
            for key, value in checkpoint['state_dict'].items():
                tmp = key[7:]
                d[tmp] = value
            model.load_state_dict(d)

            print("Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            sys.stdout.flush()
        else:
            print("No checkpoint found at '{}'".format(args.resume))
            sys.stdout.flush()

    model.eval()

    total_frame = 0.0
    total_time = 0.0

    sys.stdout.flush()

    img = Variable(img[None, :, :, :].cuda(), volatile=True)
    #org_img = org_img.astype('uint8')[0]
    text_box = org_img.copy()

    torch.cuda.synchronize()
    start = time.time()

    outputs = model(img)

    score = torch.sigmoid(outputs[:, 0, :, :])
    outputs = (torch.sign(outputs - args.binary_th) + 1) / 2

    text = outputs[:, 0, :, :]
    kernels = outputs[:, 0:args.kernel_num, :, :] * text

    score = score.data.cpu().numpy()[0].astype(np.float32)
    text = text.data.cpu().numpy()[0].astype(np.uint8)
    kernels = kernels.data.cpu().numpy()[0].astype(np.uint8)

    # c++ version pse
    pred = pse(kernels, args.min_kernel_area / (args.scale * args.scale))
    # python version pse
    # pred = pypse(kernels, args.min_kernel_area / (args.scale * args.scale))

    # scale = (org_img.shape[0] * 1.0 / pred.shape[0], org_img.shape[1] * 1.0 / pred.shape[1])
    scale = (org_img.shape[1] * 1.0 / pred.shape[1],
             org_img.shape[0] * 1.0 / pred.shape[0])
    label = pred
    label_num = np.max(label) + 1
    bboxes = []
    for i in range(1, label_num):
        points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1]

        if points.shape[0] < args.min_area / (args.scale * args.scale):
            continue

        score_i = np.mean(score[label == i])
        if score_i < args.min_score:
            continue

        rect = cv2.minAreaRect(points)
        bbox = cv2.boxPoints(rect) * scale
        bbox = bbox.astype('int32')
        bboxes.append(bbox.reshape(-1))

    torch.cuda.synchronize()
    end = time.time()
    total_frame += 1
    total_time += (end - start)
    print('fps: %.2f' % (total_frame / total_time))
    sys.stdout.flush()

    for bbox in bboxes:
        cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2)

    write_result_as_txt('1', bboxes, 'data/test/output')

    text_box = cv2.resize(text_box, (text.shape[1], text.shape[0]))
    debug(0, [imgPath], [[text_box]], 'data/test/output')
Beispiel #15
0
def use_psenet(img,
               model,
               precession=960,
               kernel_num=7,
               min_kernel_area=5.0,
               min_area=800,
               min_score=0.93):
    org_img = img[:, :, [2, 1, 0]]
    h, w = org_img.shape[0:2]
    scale = precession * 1.0 / max(h, w)
    scaled_img = cv2.resize(org_img, dsize=None, fx=scale, fy=scale)
    scaled_img = Image.fromarray(scaled_img)
    scaled_img = scaled_img.convert('RGB')
    scaled_img = transforms.ToTensor()(scaled_img)
    scaled_img = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])(scaled_img)
    scaled_img = scaled_img.unsqueeze(0)
    text_box = org_img.copy()

    with torch.no_grad():
        img = Variable(scaled_img.cuda())
        torch.cuda.synchronize()
        outputs = model(img)
        score = torch.sigmoid(outputs[:, 0, :, :])
        outputs = (torch.sign(outputs - 1) + 1) / 2

        text = outputs[:, 0, :, :]
        kernels = outputs[:, 0:kernel_num, :, :] * text

    score = score.data.cpu().numpy()[0].astype(np.float32)
    text = text.data.cpu().numpy()[0].astype(np.uint8)
    kernels = kernels.data.cpu().numpy()[0].astype(np.uint8)

    # c++ version pse
    pred = pse(kernels, min_kernel_area)
    # python version pse
    # pred = pypse(kernels, min_kernel_area)

    scale = (org_img.shape[1] * 1.0 / pred.shape[1],
             org_img.shape[0] * 1.0 / pred.shape[0])
    label = pred
    label_num = np.max(label) + 1

    bboxes = []
    for i in range(1, label_num):
        points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1]

        if points.shape[0] < min_area:
            continue

        score_i = np.mean(score[label == i])
        if score_i < min_score:
            continue

        rect = cv2.minAreaRect(points)
        bbox = cv2.boxPoints(rect) * scale
        bbox = bbox.astype('int32')
        bboxes.append(bbox.reshape(-1))

    torch.cuda.synchronize()
    return bboxes
Beispiel #16
0
def test(args):
    data_loader = IC15TestLoader(long_size=args.long_size)
    test_loader = torch.utils.data.DataLoader(data_loader,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=2,
                                              drop_last=True)

    # Setup Model
    if args.arch == "resnet50":
        model = models.resnet50(pretrained=False,
                                num_classes=7,
                                scale=args.scale)
    elif args.arch == "resnet101":
        model = models.resnet101(pretrained=True,
                                 num_classes=7,
                                 scale=args.scale)
    elif args.arch == "resnet152":
        model = models.resnet152(pretrained=True,
                                 num_classes=7,
                                 scale=args.scale)

    for param in model.parameters():
        param.requires_grad = False

    model = model.cuda()

    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("Loading model and optimizer from checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)

            # model.load_state_dict(checkpoint['state_dict'])
            d = collections.OrderedDict()
            for key, value in checkpoint['state_dict'].items():
                tmp = key[7:]
                d[tmp] = value
            model.load_state_dict(d)

            print("Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            sys.stdout.flush()
        else:
            print("No checkpoint found at '{}'".format(args.resume))
            sys.stdout.flush()

    model.eval()

    total_frame = 0.0
    total_time = 0.0
    with torch.no_grad():
        for idx, (org_img, img) in enumerate(test_loader):
            print('progress: %d / %d' % (idx, len(test_loader)))
            sys.stdout.flush()

            img = Variable(img.cuda())
            org_img = org_img.numpy().astype('uint8')[0]
            text_box = org_img.copy()

            torch.cuda.synchronize()
            start = time.time()

            outputs = model(img)

            score = torch.sigmoid(outputs[:, 0, :, :])
            outputs = (torch.sign(outputs - args.binary_th) + 1) / 2

            # pyplot.imshow(score[0])
            # pyplot.savefig('./heatmap_out/1_'+ str(idx)+'.jpg')

            text = outputs[:, 0, :, :]
            kernels = outputs[:, 0:args.kernel_num, :, :] * text

            score = score.data.cpu().numpy()[0].astype(np.float32)
            text = text.data.cpu().numpy()[0].astype(np.uint8)
            kernels = kernels.data.cpu().numpy()[0].astype(np.uint8)

            # c++ version pse
            pred = pse(kernels,
                       args.min_kernel_area / (args.scale * args.scale))
            # python version pse
            #pred = pypse(kernels, args.min_kernel_area / (args.scale * args.scale)) #pred contains the connected components whose value is different label

            scale = (org_img.shape[0] * 1.0 / pred.shape[0],
                     org_img.shape[1] * 1.0 / pred.shape[1])
            label = pred
            label_num = np.max(label) + 1  #equals to the number of boxxes
            bboxes = []
            for i in range(1, label_num):
                points = np.array(np.where(label == i)).transpose(
                    (1,
                     0))[:, ::-1]  #the pixels belong to connected components i

                if points.shape[0] < args.min_area / (args.scale * args.scale):
                    continue

                score_i = np.mean(score[label == i])
                if score_i < args.min_score:  #score_threshold
                    continue

                # rect = cv2.minAreaRect(points)
                # bbox = cv2.boxPoints(rect) * scale
                # bbox = bbox.astype('int32')
                # bboxes.append(bbox.reshape(-1))

                binary = np.zeros(label.shape, dtype='uint8')
                binary[label == i] = 1
                #
                contours, _ = cv2.findContours(binary, cv2.RETR_TREE,
                                               cv2.CHAIN_APPROX_SIMPLE)
                contour = contours[0]
                # epsilon = 0.01 * cv2.arcLength(contour, True)
                # bbox = cv2.approxPolyDP(contour, epsilon, True)
                bbox = contour

                if bbox.shape[0] <= 2:
                    continue

                bbox = bbox * scale
                bbox = bbox.astype('int32')
                bboxes.append(bbox.reshape(-1))

            torch.cuda.synchronize()
            end = time.time()
            total_frame += 1
            total_time += (end - start)
            print('fps: %.2f' % (total_frame / total_time))
            sys.stdout.flush()

            for bbox in bboxes:
                cv2.drawContours(text_box,
                                 [bbox.reshape(bbox.shape[0] / 2, 2)], -1,
                                 (0, 255, 0), 2)

            image_name = data_loader.img_paths[idx].split('/')[-1].split(
                '.')[0]
            write_result_as_txt(image_name, bboxes, 'outputs/submit_LSVT/')

            text_box = cv2.resize(text_box, (text.shape[1], text.shape[0]))
            debug(idx, data_loader.img_paths, [[text_box]],
                  'outputs/vis_LSVT/')