Beispiel #1
0
def test_rect_synth800k():
    data_root = '/Users/luning/Dev/data/SynthText/SynthText/'
    img_fn = 'desert_78_83'
    gt_fn = data_root + 'gt.mat'
    targets = {}
    targets = sio.loadmat(gt_fn,
                          targets,
                          squeeze_me=True,
                          struct_as_record=False,
                          variable_names=['imnames', 'wordBB', 'txt'])

    imageNames = targets['imnames']
    wordBBoxes = targets['wordBB']
    transcripts = targets['txt']

    mask = [True if img_fn in i else False for i in imageNames]
    index = np.where(mask)[0][0]
    print(index)
    img_fn = imageNames[index]

    img_fn = data_root + img_fn

    img = cv2.imread(img_fn)
    boxes = wordBBoxes[index]
    transcripts = transcripts[index]
    transcripts = [word for line in transcripts for word in line.split()]

    boxes = np.expand_dims(boxes, axis=2) if (boxes.ndim == 2) else boxes
    _, _, numOfWords = boxes.shape
    boxes = boxes.reshape([8, numOfWords], order='F').T  # num_words * 8

    for i in range(len(boxes)):
        x1, y1, x2, y2, x3, y3, x4, y4 = boxes[i]
        print(boxes[i])
        rotated_rect = cv2.minAreaRect(
            np.array([[x1, y1], [x2, y2], [x3, y3], [x4, y4]]))
        print(rotated_rect)
        box_w, box_h = rotated_rect[0][0], rotated_rect[0][1]

        if box_w < box_h:
            box_w, box_h = box_h, box_w

        width = 8 * box_w / box_h
        print(width)

        show_box(img, boxes[i].reshape(4, 2), transcripts[i])
Beispiel #2
0
def test_rect_icdar():
    img_fn = '/Users/luning/Dev/data/icdar/icdar2015/4.4/training/ch4_training_images/img_283.jpg'
    img_gt = '/Users/luning/Dev/data/icdar/icdar2015/4.4/training/ch4_training_localization_transcription_gt/gt_img_283.txt'
    img = cv2.imread(img_fn)
    boxes = []
    transcripts = []
    with open(img_gt, mode='r', encoding='utf-8-sig') as f:
        for line in f:
            text = line.strip().split(',')
            box = [int(x) for x in text[:8]]
            transcripts.append(text[8])
            boxes.append(box)

    boxes = np.array(boxes).astype(np.float32)

    for i in range(len(boxes)):
        x1, y1, x2, y2, x3, y3, x4, y4 = boxes[i]
        print(boxes[i])
        rotated_rect = cv2.minAreaRect(
            np.array([[x1, y1], [x2, y2], [x3, y3], [x4, y4]]))
        print(rotated_rect)
        show_box(img, boxes[i].reshape(4, 2), transcripts[i])