Beispiel #1
0
def text_detect(text_detector, image_path, img_type):
    print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    print "Image: %s"%image_path
    if img_type == "others":
        return [],0

    im=cv2.imread(image_path)
    im_small, f=resize_im(im, Config.SCALE, Config.MAX_SCALE)

    timer = Timer()
    timer.tic()
    text_lines = text_detector.detect(im_small)
    text_lines = text_lines / f # project back to size of original image
    text_lines = refine_boxes(im, text_lines, expand_pixel_len = Config.DILATE_PIXEL,
                              pixel_blank = Config.BREATH_PIXEL, binary_thresh=Config.BINARY_THRESH)
    text_area_ratio = calc_area_ratio(text_lines, im.shape)
    print "Number of the detected text lines: %s" % len(text_lines)
    print "Detection Time: %f" % timer.toc()
    print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"

    if Config.DEBUG_SAVE_BOX_IMG:
        im_with_text_lines = draw_boxes(im, text_lines, is_display=False, caption=image_path, wait=False)
        if im_with_text_lines is not None:
            cv2.imwrite(image_path+'_boxes.jpg', im_with_text_lines)

    return text_lines, text_area_ratio
def textPredict(input_path):

    #CPU mode setting
    if len(sys.argv)>1 and sys.argv[1]=="--no-gpu":
        caffe.set_mode_cpu()
    else:
        caffe.set_mode_gpu()
        caffe.set_device(cfg.TEST_GPU_ID)

    model_path = "../models/"

    # initialize the detectors
    NET_DEF_FILE = model_path + "deploy.prototxt"
    MODEL_FILE = model_path + "ctpn_trained_model.caffemodel"

    text_proposals_detector=TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE))
    text_detector=TextDetector(text_proposals_detector)

    im=cv2.imread(input_path)
    #h = im.shape[0]
    #w = im.shape[1]
    im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
    text_lines=text_detector.detect(im)

    return text_lines,f
Beispiel #3
0
def detect_text_boxes(video_pk, cpu_mode=False):
    """
    Detect Text Boxes in frames for a video using CTPN, must be run in dva_ctpn container
    :param detector_pk
    :param video_pk
    :return:
    """
    setup_django()
    from dvaapp.models import Region, Frame
    from django.conf import settings
    from PIL import Image
    import sys
    video_pk = int(video_pk)
    sys.path.append('/opt/ctpn/CTPN/tools/')
    sys.path.append('/opt/ctpn/CTPN/src/')
    from cfg import Config as cfg
    from other import resize_im, CaffeModel
    import cv2, caffe
    from detectors import TextProposalDetector, TextDetector
    NET_DEF_FILE = "/opt/ctpn/CTPN/models/deploy.prototxt"
    MODEL_FILE = "/opt/ctpn/CTPN/models/ctpn_trained_model.caffemodel"
    if cpu_mode:  # Set this to true for CPU only mode
        caffe.set_mode_cpu()
    else:
        caffe.set_mode_gpu()
        caffe.set_device(cfg.TEST_GPU_ID)
    text_proposals_detector = TextProposalDetector(
        CaffeModel(NET_DEF_FILE, MODEL_FILE))
    text_detector = TextDetector(text_proposals_detector)
    for f in Frame.objects.all().filter(video_id=video_pk):
        path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT, video_pk,
                                            f.frame_index)
        im = cv2.imread(path)
        old_h, old_w, channels = im.shape
        im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
        new_h, new_w, channels = im.shape
        mul_h = float(old_h) / float(new_h)
        mul_w = float(old_w) / float(new_w)
        text_lines = text_detector.detect(im)
        for k in text_lines:
            left, top, right, bottom, score = k
            left, top, right, bottom = int(left * mul_w), int(
                top * mul_h), int(right * mul_w), int(bottom * mul_h)
            r = Region()
            r.region_type = r.DETECTION
            r.confidence = int(100.0 * score)
            r.object_name = "CTPN_TEXTBOX"
            r.y = top
            r.x = left
            r.w = right - left
            r.h = bottom - top
            r.frame_id = f.pk
            r.video_id = video_pk
            r.save()
            right = r.w + r.x
            bottom = r.h + r.y
            img = Image.open(path)
            img2 = img.crop((left, top, right, bottom))
            img2.save("{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT,
                                                       video_pk, r.pk))
def text_detect(args, text_detector, image_path):
    print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    print "Image: %s" % image_path

    im = cv2.imread(image_path)
    im_small, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)

    timer = Timer()
    timer.tic()
    text_lines = text_detector.detect(im_small)
    text_lines = text_lines / f  # project back to size of original image
    text_lines = refine_boxes(im, text_lines)
    text_lines = rank_boxes(text_lines)
    print "Number of the detected text lines: %s" % len(text_lines)
    print "Detection Time: %f" % timer.toc()
    print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"

    if args.SAVE_IMAGE_WITH_BOX:
        im_with_text_lines = draw_boxes(im,
                                        text_lines,
                                        is_display=False,
                                        caption=image_path,
                                        wait=False)
        if im_with_text_lines is not None:
            cv2.imwrite(image_path + '_boxes.jpg', im_with_text_lines)

    return im, text_lines
Beispiel #5
0
def ctpn(img):
    """
    text box detect
    """
    scale, max_scale = Config.SCALE,Config.MAX_SCALE
    img,f = resize_im(img,scale=scale,max_scale=max_scale)
    scores, boxes = test_ctpn(sess, net, img)
    return scores, boxes,img
 def get_text_lines(self, im, NET_DEF_FILE, MODEL_FILE):
     # initialize the detectors
     text_proposals_detector = TextProposalDetector(
         CaffeModel(NET_DEF_FILE, MODEL_FILE))
     text_detector = TextDetector(text_proposals_detector)
     im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
     text_lines = text_detector.detect(im)
     return text_lines / f
Beispiel #7
0
    def process(self, image, bbox):
        im_crop = image[int(bbox[1]):int(bbox[3]),
                        int(bbox[0]):int(bbox[2]), :]

        im_scale, f = resize_im(im_crop, cfg.SCALE, cfg.MAX_SCALE)

        # print(np.array(im).shape)
        text_lines = self.text_detector.detect(im_scale)
        return im_scale, text_lines
Beispiel #8
0
def getTextRec(text_detector, im):
    im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
    #generator the copy
    tmp = im.copy()
    text_lines = text_detector.detect(im)
    imsrc, text_recs = draw_boxes(tmp,
                                  text_lines,
                                  caption='im_name',
                                  wait=True)
    return tmp, imsrc, text_recs
Beispiel #9
0
    def execute(self, data, batch_size):
        ret = []
        for i in range(batch_size):
            img_array = np.asarray(bytearray(data[i].read()), dtype=np.uint8)
            im = cv2.imdecode(img_array, -1)

            im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
            text_lines = self.text_detector.detect(im)

            ret_val = str(text_lines) + '\n'
            ret.append(ret_val)
        return ret
Beispiel #10
0
def getCharBlock(text_detector, im):
    im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
    tmp = im.copy()
    #timer=Timer()
    #timer.tic()
    text_lines = text_detector.detect(im)

    #print "Number of the detected text lines: %s"%len(text_lines)
    #print "Time: %f"%timer.toc()

    text_recs = draw_boxes(tmp, text_lines, caption='im_name', wait=True)
    return tmp, text_recs
Beispiel #11
0
def getCharBlock(text_detector,im):
    im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
    cv2.imshow("src", im)
    tmp = im.copy()
    #timer=Timer()
    #timer.tic()
    text_lines=text_detector.detect(im)

    #print "Number of the detected text lines: %s"%len(text_lines)
    #print "Time: %f"%timer.toc()

    text_recs = draw_boxes(tmp, text_lines, caption='im_name', wait=True)
    return tmp,text_recs
Beispiel #12
0
    def detectText(self, image):
        """
        Detects text from the image given its path
        Returns a list of bounding boxes
        """
        if os.path.exists(image):
            img = cv2.imread(image)

            self.timer.tic()
            im, f = resize_im(img, cfg.SCALE, cfg.MAX_SCALE)
            text_lines = self.text_detector.detect(im)
            print("Time: %f" % self.timer.toc())
            return text_lines, f
        else:
            print("Image not found")
Beispiel #13
0
 def detect(self,image_path):
     if self.session is None:
         self.load()
     regions = []
     im = cv2.imread(image_path)
     old_h, old_w, channels = im.shape
     im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
     new_h, new_w, channels = im.shape
     mul_h = float(old_h) / float(new_h)
     mul_w = float(old_w) / float(new_w)
     text_lines = self.session.detect(im)
     for k in text_lines:
         left, top, right, bottom, score = k
         left, top, right, bottom = int(left * mul_w), int(top * mul_h), int(right * mul_w), int(bottom * mul_h)
         r = {'score':float(score),'y':top,'x':left,'w':right - left,'h':bottom - top,}
         regions.append(r)
     return regions
Beispiel #14
0
def text_detec(img_url):
    caffe.set_mode_gpu()
    caffe.set_device(cfg.TEST_GPU_ID)

    # initialize the detectors
    text_proposals_detector = TextProposalDetector(
        CaffeModel(NET_DEF_FILE, MODEL_FILE))
    text_detector = TextDetector(text_proposals_detector)
    im = cv2.imread(img_url)
    timer.tic()
    im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
    text_lines = text_detector.detect(im)
    obj_num = len(text_lines)
    print "Number of the detected text lines: %s" % len(text_lines)
    print "Time: %f" % timer.toc()

    boxstr = u''

    count = 0
    #http://192.168.7.37:8393/static/jz66f1d49d97d048fe9e4a62004199d0b2_1_for_trail.jpg
    print text_lines
    for bbox in text_lines:
        print bbox
        count += 1
        boxstr += "text[%d]:[%f,%f,%f,%f]<br/>" % (count, bbox[0], bbox[1],
                                                   bbox[2], bbox[3])
    im_name = img_url.split('/')[-1]
    im_name.replace("?", '_')
    im_name.replace("%", '_')
    im_name.replace("&", '_')
    im_name.replace("=", '_')
    local_url = img_url
    write_path = "/data1/mingmingzhao/data_sets/test/text_detect/text_detect_%s" % (
        local_url.split('/')[-1])
    print "write_path:" + write_path
    im_with_text_lines = draw_boxes_zmm(im,
                                        text_lines,
                                        caption=write_path,
                                        wait=False)
    server_url = "http://192.168.7.37:8393/static/text_detect/%s" % (
        write_path.split('/')[-1])
    print "server_url:" + server_url
    return boxstr, server_url, count
Beispiel #15
0
 def detect(self, image_path):
     if self.session is None:
         self.load()
     regions = []
     im = cv2.imread(image_path)
     old_h, old_w, channels = im.shape
     im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
     new_h, new_w, channels = im.shape
     mul_h = float(old_h) / float(new_h)
     mul_w = float(old_w) / float(new_w)
     text_lines = self.session.detect(im)
     for k in text_lines:
         left, top, right, bottom, score = k
         left, top, right, bottom = int(left * mul_w), int(
             top * mul_h), int(right * mul_w), int(bottom * mul_h)
         r = {
             'score': float(score),
             'y': top,
             'x': left,
             'w': right - left,
             'h': bottom - top,
         }
         regions.append(r)
     return regions
Beispiel #16
0
    def process(self, image, bbox):
        def resize_im(im, scale, max_scale=None):
            f = float(scale) / min(im.shape[0], im.shape[1])
            if max_scale != None and f * max(im.shape[0],
                                             im.shape[1]) > max_scale:
                f = float(max_scale) / max(im.shape[0], im.shape[1])
            return cv2.resize(im,
                              None,
                              None,
                              fx=f,
                              fy=f,
                              interpolation=cv2.INTER_LINEAR), f

        im_crop = image[int(bbox[1]):int(bbox[3]),
                        int(bbox[0]):int(bbox[2]), :]
        img, scale = resize_im(im_crop,
                               scale=TextLineCfg.SCALE,
                               max_scale=TextLineCfg.MAX_SCALE)
        scores, boxes = test_ctpn(self.session, self.net, img)

        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                    img.shape[:2])
        return img, boxes, scale
Beispiel #17
0
 def detect(self, filepath):
     im = cv2.imread(filepath)
     im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
     self.text_lines = self.text_detector.detect(im)
     return self.text_lines
Beispiel #18
0
text_proposals_detector = TextProposalDetector(
    CaffeModel(NET_DEF_FILE, MODEL_FILE))
text_detector = TextDetector(text_proposals_detector)

demo_imnames = os.listdir(DEMO_IMAGE_DIR)
timer = Timer()

for im_name in demo_imnames:
    print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    print "Image: %s" % im_name

    im_file = osp.join(DEMO_IMAGE_DIR, im_name)
    im = cv2.imread(im_file)

    timer.tic()

    im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
    text_lines = text_detector.detect(im)

    print "Number of the detected text lines: %s" % len(text_lines)
    print "Time: %f" % timer.toc()

    im_with_text_lines = draw_boxes(im,
                                    text_lines,
                                    caption=im_name,
                                    wait=False)

print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
print "Thank you for trying our demo. Press any key to exit..."
cv2.waitKey(0)
Beispiel #19
0
    def forward(self, bottom, top):
        while True:
            name = self._imnames[self._image_index].split('.')[0]
            self._image_index += 1
            if self._image_index == len(self._imnames):
                self._image_index = 0

            image = cv2.imread(name + '.jpg')
            image, scale = resize_im(image, cfg.SCALE, cfg.MAX_SCALE)
            data = prepare_img(image, cfg.MEAN)
            im_info = np.array([[data.shape[1], data.shape[2]]], np.float32)
            data = data[np.newaxis, :]

            gt_boxes_list = []
            with open(name + '.txt', 'r') as f:
                for line in f:
                    line_data = line.split(',')
                    gt_boxes_list.append([
                        int(line_data[0]),
                        int(line_data[1]),
                        int(line_data[2]),
                        int(line_data[3])
                    ])
            gt_boxes_list = [[int(x * scale) for x in box]
                             for box in gt_boxes_list]

            divide_gt_boxes_list = []
            side_pos = []
            for box in gt_boxes_list:
                x1 = box[0]
                y1 = box[1]
                x2 = box[2] + box[0] - 1
                y2 = box[3] + box[1] - 1
                if y2 - y1 + 1 <= cfg.TEXT_PROPOSALS_WIDTH / 2:
                    continue
                if x2 - x1 + 1 < cfg.TEXT_PROPOSALS_WIDTH:
                    continue
                start = x1
                while start % cfg.TEXT_PROPOSALS_WIDTH != 0:
                    start += 1
                end = x2 + 1
                while end % cfg.TEXT_PROPOSALS_WIDTH != 0:
                    end -= 1
                begin_flag = 1
                tmp_side_pos = []
                while start < end:
                    if inside_image(start, y1, im_info[0, :]) and inside_image(
                            start + cfg.TEXT_PROPOSALS_WIDTH - 1, y2,
                            im_info[0, :]):
                        divide_gt_boxes_list.append([
                            start, y1, start + cfg.TEXT_PROPOSALS_WIDTH - 1, y2
                        ])
                        if begin_flag:
                            begin_flag = 0
                            tmp_side_pos.append(x1)
                        else:
                            if start + cfg.TEXT_PROPOSALS_WIDTH == end:
                                tmp_side_pos.append(x2)
                            else:
                                tmp_side_pos.append(-1)
                    start += cfg.TEXT_PROPOSALS_WIDTH
                for p in tmp_side_pos:
                    side_pos.append(p)
            gt_boxes = np.array(divide_gt_boxes_list)
            side_pos = np.array(side_pos)
            if len(divide_gt_boxes_list):
                break

        top[0].reshape(*(data.shape))
        top[0].data[...] = data.astype(np.float32, copy=False)

        top[1].reshape(*(im_info.shape))
        top[1].data[...] = im_info.astype(np.float32, copy=False)

        top[2].reshape(*(gt_boxes.shape))
        top[2].data[...] = gt_boxes.astype(np.float32, copy=False)

        top[3].reshape(*(side_pos.shape))
        top[3].data[...] = side_pos.astype(np.float32, copy=False)
Beispiel #20
0
text_proposals_detector = TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE))

text_detector = TextDetector(text_proposals_detector)

path = os.path.abspath(os.curdir)
timer=Timer()
print "\ninput exit break\n"
while 1 :
    im_name = raw_input("\nplease input file name:")
    if im_name == "exit":
       break
    im_path = path + "/demo_images/" + im_name
    
    im = cv2.imread(im_path)
    if im is None:
      continue
    
    im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
    cv2.imshow("src", im)
    tmp = im.copy()
    timer.tic()
    text_lines=text_detector.detect(im)

    print "Number of the detected text lines: %s"%len(text_lines)
    print "Time: %f"%timer.toc()

    im_with_text_lines=draw_boxes(tmp, text_lines, caption=im_name, wait=True)