コード例 #1
0
class TextBoxDetector():

    def __init__(self):
        self.session = None

    def load(self):
        logging.info('Creating networks and loading parameters')
        NET_DEF_FILE = "/opt/ctpn/CTPN/models/deploy.prototxt"
        MODEL_FILE = "/opt/ctpn/CTPN/models/ctpn_trained_model.caffemodel"
        caffe.set_mode_gpu()
        caffe.set_device(cfg.TEST_GPU_ID)
        text_proposals_detector = TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE))
        self.session = TextDetector(text_proposals_detector)

    def detect(self,image_path):
        if self.session is None:
            self.load()
        regions = []
        im = cv2.imread(image_path)
        old_h, old_w, channels = im.shape
        im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
        new_h, new_w, channels = im.shape
        mul_h = float(old_h) / float(new_h)
        mul_w = float(old_w) / float(new_w)
        text_lines = self.session.detect(im)
        for k in text_lines:
            left, top, right, bottom, score = k
            left, top, right, bottom = int(left * mul_w), int(top * mul_h), int(right * mul_w), int(bottom * mul_h)
            r = {'score':float(score),'y':top,'x':left,'w':right - left,'h':bottom - top,}
            regions.append(r)
        return regions
コード例 #2
0
def detect_text_boxes(video_pk, cpu_mode=False):
    """
    Detect Text Boxes in frames for a video using CTPN, must be run in dva_ctpn container
    :param detector_pk
    :param video_pk
    :return:
    """
    setup_django()
    from dvaapp.models import Region, Frame
    from django.conf import settings
    from PIL import Image
    import sys
    video_pk = int(video_pk)
    sys.path.append('/opt/ctpn/CTPN/tools/')
    sys.path.append('/opt/ctpn/CTPN/src/')
    from cfg import Config as cfg
    from other import resize_im, CaffeModel
    import cv2, caffe
    from detectors import TextProposalDetector, TextDetector
    NET_DEF_FILE = "/opt/ctpn/CTPN/models/deploy.prototxt"
    MODEL_FILE = "/opt/ctpn/CTPN/models/ctpn_trained_model.caffemodel"
    if cpu_mode:  # Set this to true for CPU only mode
        caffe.set_mode_cpu()
    else:
        caffe.set_mode_gpu()
        caffe.set_device(cfg.TEST_GPU_ID)
    text_proposals_detector = TextProposalDetector(
        CaffeModel(NET_DEF_FILE, MODEL_FILE))
    text_detector = TextDetector(text_proposals_detector)
    for f in Frame.objects.all().filter(video_id=video_pk):
        path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT, video_pk,
                                            f.frame_index)
        im = cv2.imread(path)
        old_h, old_w, channels = im.shape
        im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
        new_h, new_w, channels = im.shape
        mul_h = float(old_h) / float(new_h)
        mul_w = float(old_w) / float(new_w)
        text_lines = text_detector.detect(im)
        for k in text_lines:
            left, top, right, bottom, score = k
            left, top, right, bottom = int(left * mul_w), int(
                top * mul_h), int(right * mul_w), int(bottom * mul_h)
            r = Region()
            r.region_type = r.DETECTION
            r.confidence = int(100.0 * score)
            r.object_name = "CTPN_TEXTBOX"
            r.y = top
            r.x = left
            r.w = right - left
            r.h = bottom - top
            r.frame_id = f.pk
            r.video_id = video_pk
            r.save()
            right = r.w + r.x
            bottom = r.h + r.y
            img = Image.open(path)
            img2 = img.crop((left, top, right, bottom))
            img2.save("{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT,
                                                       video_pk, r.pk))
コード例 #3
0
ファイル: detector.py プロジェクト: pribadihcr/DLL-RAPI
class bibnumber_ctpn_caffe(object):
    def __init__(self, dir_model, gpu_id):
        NET_DEF_FILE = dir_model + "/bib_number/CTPN/deploy.prototxt"
        MODEL_FILE = dir_model + "/bib_number/CTPN/ctpn_trained_model.caffemodel"
        if False:  # Set this to true for CPU only mode
            caffe.set_mode_cpu()
        else:
            caffe.set_mode_gpu()
            caffe.set_device(int(gpu_id))  # (cfg.TEST_GPU_ID)

        text_proposals_detector = TextProposalDetector(
            CaffeModel(NET_DEF_FILE, MODEL_FILE))
        self.text_detector = TextDetector(text_proposals_detector)

        length_regexp = 'Duration: (\d{2}):(\d{2}):(\d{2})\.\d+,'
        self.re_length = re.compile(length_regexp)

    def process(self, image, bbox):
        im_crop = image[int(bbox[1]):int(bbox[3]),
                        int(bbox[0]):int(bbox[2]), :]

        im_scale, f = resize_im(im_crop, cfg.SCALE, cfg.MAX_SCALE)

        # print(np.array(im).shape)
        text_lines = self.text_detector.detect(im_scale)
        return im_scale, text_lines
コード例 #4
0
def textPredict(input_path):

    #CPU mode setting
    if len(sys.argv)>1 and sys.argv[1]=="--no-gpu":
        caffe.set_mode_cpu()
    else:
        caffe.set_mode_gpu()
        caffe.set_device(cfg.TEST_GPU_ID)

    model_path = "../models/"

    # initialize the detectors
    NET_DEF_FILE = model_path + "deploy.prototxt"
    MODEL_FILE = model_path + "ctpn_trained_model.caffemodel"

    text_proposals_detector=TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE))
    text_detector=TextDetector(text_proposals_detector)

    im=cv2.imread(input_path)
    #h = im.shape[0]
    #w = im.shape[1]
    im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
    text_lines=text_detector.detect(im)

    return text_lines,f
コード例 #5
0
 def get_text_lines(self, im, NET_DEF_FILE, MODEL_FILE):
     # initialize the detectors
     text_proposals_detector = TextProposalDetector(
         CaffeModel(NET_DEF_FILE, MODEL_FILE))
     text_detector = TextDetector(text_proposals_detector)
     im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
     text_lines = text_detector.detect(im)
     return text_lines / f
コード例 #6
0
class TextBoxDetector():
    def __init__(self, model_path):
        self.session = None
        self.model_path = str(model_path.encode('utf-8'))
        self.network_def = str(
            model_path.replace('.caffemodel', '.prototxt').encode('utf-8'))

    def load(self):
        logging.info('Creating networks and loading parameters')
        if os.environ.get('GPU_AVAILABLE', False):
            caffe.set_mode_gpu()
            caffe.set_device(cfg.TEST_GPU_ID)
            logging.info("GPU mode")
        else:
            caffe.set_mode_cpu()
            logging.info("CPU mode")
        text_proposals_detector = TextProposalDetector(
            CaffeModel(self.network_def, self.model_path))
        self.session = TextDetector(text_proposals_detector)
        logging.info('model loaded!')

    def detect(self, image_path):
        if self.session is None:
            self.load()
        regions = []
        im = cv2.imread(image_path)
        old_h, old_w, channels = im.shape
        im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
        new_h, new_w, channels = im.shape
        mul_h = float(old_h) / float(new_h)
        mul_w = float(old_w) / float(new_w)
        text_lines = self.session.detect(im)
        for k in text_lines:
            left, top, right, bottom, score = k
            left, top, right, bottom = int(left * mul_w), int(
                top * mul_h), int(right * mul_w), int(bottom * mul_h)
            r = {
                'score': float(score),
                'y': top,
                'x': left,
                'w': right - left,
                'h': bottom - top,
            }
            regions.append(r)
        return regions
コード例 #7
0
class CTPNDetector:
    def __init__(self):
        '''
        @Construction for text detector. 
        This class initiates the constructor for 
        '''
        self.NET_DEF_FILE = "models/deploy.prototxt"
        self.MODEL_FILE = "models/ctpn_trained_model.caffemodel"
        caffe.set_mode_gpu()
        caffe.set_device(cfg.TEST_GPU_ID)
        self.text_proposals_detector = TextProposalDetector(CaffeModel(self.NET_DEF_FILE, self.MODEL_FILE))
        self.text_detector = TextDetector(self.text_proposals_detector)

    def detect(self, filepath):
        im = cv2.imread(filepath)
        im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
        self.text_lines = self.text_detector.detect(im)
        return self.text_lines
コード例 #8
0
ファイル: text_detect.py プロジェクト: anjiang2016/CTPN
def text_detec(img_url):
    caffe.set_mode_gpu()
    caffe.set_device(cfg.TEST_GPU_ID)

    # initialize the detectors
    text_proposals_detector = TextProposalDetector(
        CaffeModel(NET_DEF_FILE, MODEL_FILE))
    text_detector = TextDetector(text_proposals_detector)
    im = cv2.imread(img_url)
    timer.tic()
    im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
    text_lines = text_detector.detect(im)
    obj_num = len(text_lines)
    print "Number of the detected text lines: %s" % len(text_lines)
    print "Time: %f" % timer.toc()

    boxstr = u''

    count = 0
    #http://192.168.7.37:8393/static/jz66f1d49d97d048fe9e4a62004199d0b2_1_for_trail.jpg
    print text_lines
    for bbox in text_lines:
        print bbox
        count += 1
        boxstr += "text[%d]:[%f,%f,%f,%f]<br/>" % (count, bbox[0], bbox[1],
                                                   bbox[2], bbox[3])
    im_name = img_url.split('/')[-1]
    im_name.replace("?", '_')
    im_name.replace("%", '_')
    im_name.replace("&", '_')
    im_name.replace("=", '_')
    local_url = img_url
    write_path = "/data1/mingmingzhao/data_sets/test/text_detect/text_detect_%s" % (
        local_url.split('/')[-1])
    print "write_path:" + write_path
    im_with_text_lines = draw_boxes_zmm(im,
                                        text_lines,
                                        caption=write_path,
                                        wait=False)
    server_url = "http://192.168.7.37:8393/static/text_detect/%s" % (
        write_path.split('/')[-1])
    print "server_url:" + server_url
    return boxstr, server_url, count
コード例 #9
0
class TextBoxDetector():
    def __init__(self):
        self.session = None

    def load(self):
        logging.info('Creating networks and loading parameters')
        NET_DEF_FILE = "/opt/ctpn/CTPN/models/deploy.prototxt"
        MODEL_FILE = "/opt/ctpn/CTPN/models/ctpn_trained_model.caffemodel"
        caffe.set_mode_gpu()
        caffe.set_device(cfg.TEST_GPU_ID)
        text_proposals_detector = TextProposalDetector(
            CaffeModel(NET_DEF_FILE, MODEL_FILE))
        self.session = TextDetector(text_proposals_detector)

    def detect(self, image_path):
        if self.session is None:
            self.load()
        regions = []
        im = cv2.imread(image_path)
        old_h, old_w, channels = im.shape
        im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
        new_h, new_w, channels = im.shape
        mul_h = float(old_h) / float(new_h)
        mul_w = float(old_w) / float(new_w)
        text_lines = self.session.detect(im)
        for k in text_lines:
            left, top, right, bottom, score = k
            left, top, right, bottom = int(left * mul_w), int(
                top * mul_h), int(right * mul_w), int(bottom * mul_h)
            r = {
                'score': float(score),
                'y': top,
                'x': left,
                'w': right - left,
                'h': bottom - top,
            }
            regions.append(r)
        return regions
コード例 #10
0
ファイル: detector.py プロジェクト: pribadihcr/DLL-RAPI
    def process(self, image, bbox):
        def resize_im(im, scale, max_scale=None):
            f = float(scale) / min(im.shape[0], im.shape[1])
            if max_scale != None and f * max(im.shape[0],
                                             im.shape[1]) > max_scale:
                f = float(max_scale) / max(im.shape[0], im.shape[1])
            return cv2.resize(im,
                              None,
                              None,
                              fx=f,
                              fy=f,
                              interpolation=cv2.INTER_LINEAR), f

        im_crop = image[int(bbox[1]):int(bbox[3]),
                        int(bbox[0]):int(bbox[2]), :]
        img, scale = resize_im(im_crop,
                               scale=TextLineCfg.SCALE,
                               max_scale=TextLineCfg.MAX_SCALE)
        scores, boxes = test_ctpn(self.session, self.net, img)

        textdetector = TextDetector()
        boxes = textdetector.detect(boxes, scores[:, np.newaxis],
                                    img.shape[:2])
        return img, boxes, scale
コード例 #11
0
class CTPNModel(CaffeAiUcloudModel):
    """ Mnist example model
    """
    def __init__(self, conf):
        super(CTPNModel, self).__init__(conf)

    def load_model(self):
        caffe.set_mode_cpu()
        text_proposals_detector = TextProposalDetector(
            CaffeModel(NET_DEF_FILE, MODEL_FILE))
        self.text_detector = TextDetector(text_proposals_detector)

    def execute(self, data, batch_size):
        ret = []
        for i in range(batch_size):
            img_array = np.asarray(bytearray(data[i].read()), dtype=np.uint8)
            im = cv2.imdecode(img_array, -1)

            im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
            text_lines = self.text_detector.detect(im)

            ret_val = str(text_lines) + '\n'
            ret.append(ret_val)
        return ret
コード例 #12
0
ファイル: demo.py プロジェクト: vicident/CTPN
text_proposals_detector = TextProposalDetector(
    CaffeModel(NET_DEF_FILE, MODEL_FILE))
text_detector = TextDetector(text_proposals_detector)

demo_imnames = os.listdir(DEMO_IMAGE_DIR)
timer = Timer()

for im_name in demo_imnames:
    print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    print "Image: %s" % im_name

    im_file = osp.join(DEMO_IMAGE_DIR, im_name)
    im = cv2.imread(im_file)

    timer.tic()

    im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
    text_lines = text_detector.detect(im)

    print "Number of the detected text lines: %s" % len(text_lines)
    print "Time: %f" % timer.toc()

    im_with_text_lines = draw_boxes(im,
                                    text_lines,
                                    caption=im_name,
                                    wait=False)

print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
print "Thank you for trying our demo. Press any key to exit..."
cv2.waitKey(0)
コード例 #13
0
ファイル: demo.py プロジェクト: et0803/sceneReco
text_proposals_detector = TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE))

text_detector = TextDetector(text_proposals_detector)

path = os.path.abspath(os.curdir)
timer=Timer()
print "\ninput exit break\n"
while 1 :
    im_name = raw_input("\nplease input file name:")
    if im_name == "exit":
       break
    im_path = path + "/demo_images/" + im_name
    
    im = cv2.imread(im_path)
    if im is None:
      continue
    
    im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
    cv2.imshow("src", im)
    tmp = im.copy()
    timer.tic()
    text_lines=text_detector.detect(im)

    print "Number of the detected text lines: %s"%len(text_lines)
    print "Time: %f"%timer.toc()

    im_with_text_lines=draw_boxes(tmp, text_lines, caption=im_name, wait=True)



コード例 #14
0
class TextRecognizer():
    """
    Recognizes text from a given image
    """
    def __init__(self, mode):
        if mode == "GPU":
            caffe.set_mode_gpu()
            caffe.set_device(cfg.TEST_GPU_ID)
        else:
            caffe.set_mode_cpu()

        netfile = cfg.NET_FILE
        modelfile = cfg.MODEL_FILE

        # initialize the detectors
        self.text_proposals_detector = TextProposalDetector(
            CaffeModel(netfile, modelfile))
        self.text_detector = TextDetector(self.text_proposals_detector)
        self.timer = Timer()

        self.char_classifier = caffe.Classifier(
            cfg.FONT_PROTO,
            cfg.FONT_MODEL,
            mean=np.load(cfg.FONT_MEANFILE).mean(1).mean(1),
            channel_swap=(2, 1, 0),
            raw_scale=255,
            image_dims=(cfg.FONT_DIMS, cfg.FONT_DIMS))
        with open(cfg.FONT_LBLFILE, 'r') as f:
            self.fontLabels = [x.strip() for x in f]

    def detectText(self, image):
        """
        Detects text from the image given its path
        Returns a list of bounding boxes
        """
        if os.path.exists(image):
            img = cv2.imread(image)

            self.timer.tic()
            im, f = resize_im(img, cfg.SCALE, cfg.MAX_SCALE)
            text_lines = self.text_detector.detect(im)
            print("Time: %f" % self.timer.toc())
            return text_lines, f
        else:
            print("Image not found")

    def extractText(self, image, boundingBoxes):
        """
        Extracts the text from a given image using the bounding boxes
        Input - image name and the bounding boxes list
        Output - extracted text images
        """
        extractedText = []
        if os.path.exists(image):
            img = cv2.imread(image)
            for box in boundingBoxes:
                text = img[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
                extractedText.append(text)
        else:
            print("Image not found")
        return extractedText

    def extractCharacters(self, image):
        """
        Extracts characters from a given "text" image
        Input - "image" opencv image
        """
        extractedChars = []
        if image.shape[2] == 3:
            imgGray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            imgGray = image

        # Otsu's Thresholding
        newRet, binaryThreshold = cv2.threshold(
            imgGray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        # dilation
        rectkernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
        rectdilation = cv2.dilate(binaryThreshold, rectkernel, iterations=1)
        outputImage = image.copy()
        npaContours, npaHierarchy = cv2.findContours(rectdilation.copy(),
                                                     cv2.RETR_EXTERNAL,
                                                     cv2.CHAIN_APPROX_SIMPLE)

        for num, npaContour in enumerate(npaContours):
            if cv2.contourArea(npaContour) > cfg.MIN_CONTOUR_AREA:
                [intX, intY, intW, intH] = cv2.boundingRect(npaContour)
                cv2.rectangle(outputImage, (intX, intY),
                              (intX + intW, intY + intH), (0, 0, 255), 2)
                # Get subimage of word and find contours of that word
                imgROI = binaryThreshold[intY:intY + intH, intX:intX + intW]
                subContours, subHierarchy = cv2.findContours(
                    imgROI.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

                for n, subContour in enumerate(subContours):
                    [pointX, pointY, width,
                     height] = cv2.boundingRect(subContour)
                    imr = image[intY + pointY:intY + pointY + height,
                                intX + pointX:intX + pointX + width]
                    extractedChars.append(imr)
        return extractedChars

    def recognizeCharacters(self, charList):
        """
        Character classification module
        charList - list of character images
        """
        outList = []
        pred = self.char_classifier.predict(charList)
        detectedChars = []
        for p in pred:
            x = self.fontLabels[p.argmax()]
            detectedChars.append(x)
        #detectedChars = [self.fontLabels[x] for i,p in enumerate(pred) x = self.fontLabels.index(p.argmax)]
        #indexes = [x for i,x in enumerate(self.fontLabels) if x == self.fontLabels[pred[0].argmax()]]
        return detectedChars
コード例 #15
0
ファイル: ctpnport.py プロジェクト: eglrp/scene-text-recog
class CTPNDetector:
    def __init__(self, NET_DEF_FILE, MODEL_FILE, caffe_path):
        sys.path.insert(0, "%s/python" % caffe_path)
        import caffe
        from other import draw_boxes, resize_im, CaffeModel
        from detectors import TextProposalDetector, TextDetector
        sys.path.remove("%s/python" % caffe_path)
        #def ctpnSource(NET_DEF_FILE, MODEL_FILE, use_gpu):
        #NET_DEF_FILE = "CTPN/models/deploy.prototxt"
        #MODEL_FILE = "CTPN/models/ctpn_trained_model.caffemodel"
        self.caffe = caffe
        #if use_gpu:
        #    caffe.set_mode_gpu()
        #    caffe.set_device(cfg.TEST_GPU_ID)
        #else:
        #    caffe.set_mode_cpu()

        # initialize the detectors
        text_proposals_detector = TextProposalDetector(
            CaffeModel(NET_DEF_FILE, MODEL_FILE))
        self.text_detector = TextDetector(text_proposals_detector)
        self.resize_im = resize_im
        self.draw_boxes = draw_boxes
        #return text_detector

    def getCharBlock(self, im, gpu_id=0):
        if gpu_id < 0:
            self.caffe.set_mode_cpu()
        else:
            self.caffe.set_mode_gpu()
            self.caffe.set_device(gpu_id)

        resize_im, resize_ratio = self.resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
        #print "resize", f
        #cv2.imshow("src", im)
        tmp = resize_im.copy()
        #timer=Timer()
        #timer.tic()
        text_lines = self.text_detector.detect(tmp)

        #print "Number of the detected text lines: %s"%len(text_lines)
        #print "Time: %f"%timer.toc()
        return text_lines, resize_im, resize_ratio

    # this is deprecated
    def convert_bbox(self, bboxes):
        text_recs = np.zeros((len(bboxes), 8), np.int)
        index = 0
        for box in bboxes:
            b1 = box[6] - box[7] / 2
            b2 = box[6] + box[7] / 2
            x1 = box[0]
            y1 = box[5] * box[0] + b1
            x2 = box[2]
            y2 = box[5] * box[2] + b1
            x3 = box[0]
            y3 = box[5] * box[0] + b2
            x4 = box[2]
            y4 = box[5] * box[2] + b2

            disX = x2 - x1
            disY = y2 - y1
            width = np.sqrt(disX * disX + disY * disY)
            fTmp0 = y3 - y1
            fTmp1 = fTmp0 * disY / width
            x = np.fabs(fTmp1 * disX / width)
            y = np.fabs(fTmp1 * disY / width)
            if box[5] < 0:
                x1 -= x
                y1 += y
                x4 += x
                y4 -= y
            else:
                x2 += x
                y2 += y
                x3 -= x
                y3 -= y
            text_recs[index, 0] = x1
            text_recs[index, 1] = y1
            text_recs[index, 2] = x2
            text_recs[index, 3] = y2
            text_recs[index, 4] = x3
            text_recs[index, 5] = y3
            text_recs[index, 6] = x4
            text_recs[index, 7] = y4
            index = index + 1
        return text_recs

    def draw_boxes8(self,
                    im,
                    bboxes,
                    is_display=True,
                    color=None,
                    caption="Image",
                    wait=True):
        """
            boxes: bounding boxes
        """
        text_recs = np.zeros((len(bboxes), 8), np.int)

        im = im.copy()
        index = 0
        for box in bboxes:
            if color == None:
                if len(box) == 8 or len(box) == 9:
                    c = tuple(cm.jet([box[-1]])[0, 2::-1] * 255)
                else:
                    c = tuple(np.random.randint(0, 256, 3))
            else:
                c = color

            b1 = box[6] - box[7] / 2
            b2 = box[6] + box[7] / 2
            x1 = box[0]
            y1 = box[5] * box[0] + b1
            x2 = box[2]
            y2 = box[5] * box[2] + b1
            x3 = box[0]
            y3 = box[5] * box[0] + b2
            x4 = box[2]
            y4 = box[5] * box[2] + b2

            disX = x2 - x1
            disY = y2 - y1
            width = np.sqrt(disX * disX + disY * disY)
            fTmp0 = y3 - y1
            fTmp1 = fTmp0 * disY / width
            x = np.fabs(fTmp1 * disX / width)
            y = np.fabs(fTmp1 * disY / width)
            if box[5] < 0:
                x1 -= x
                y1 += y
                x4 += x
                y4 -= y
            else:
                x2 += x
                y2 += y
                x3 -= x
                y3 -= y
            cv2.line(im, (int(x1), int(y1)), (int(x2), int(y2)), c, 2)
            cv2.line(im, (int(x1), int(y1)), (int(x3), int(y3)), c, 2)
            cv2.line(im, (int(x4), int(y4)), (int(x2), int(y2)), c, 2)
            cv2.line(im, (int(x3), int(y3)), (int(x4), int(y4)), c, 2)
            text_recs[index, 0] = x1
            text_recs[index, 1] = y1
            text_recs[index, 2] = x2
            text_recs[index, 3] = y2
            text_recs[index, 4] = x3
            text_recs[index, 5] = y3
            text_recs[index, 6] = x4
            text_recs[index, 7] = y4
            index = index + 1
            #cv2.rectangle(im, tuple(box[:2]), tuple(box[2:4]), c,2)
        if is_display:
            cv2.imshow('result', im)
            #if wait:
            #cv2.waitKey(0)
        return im, text_recs
コード例 #16
0
ファイル: detector.py プロジェクト: zhy1/DeepVideoAnalytics
class TextBoxDetector():
    def __init__(self, model_path, gpu_fraction=None):
        self.session = None
        if gpu_fraction:
            self.gpu_fraction = gpu_fraction
        else:
            self.gpu_fraction = float(os.environ.get('GPU_MEMORY', 0.20))
        self.model_path = os.path.dirname(str(model_path.encode('utf-8')))

    def load(self):
        logging.info('Creating networks and loading parameters')
        cfg_from_file(os.path.join(os.path.dirname(__file__), 'ctpn/text.yml'))
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=self.gpu_fraction)
        config = tf.ConfigProto(allow_soft_placement=True,
                                gpu_options=gpu_options)
        self.session = tf.Session(config=config)
        self.net = get_network("VGGnet_test")
        self.textdetector = TextDetector()
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state(self.model_path)
        saver.restore(self.session, ckpt.model_checkpoint_path)

    def detect(self, image_path):
        if self.session is None:
            self.load()
        regions = []
        img = cv2.imread(image_path)
        old_h, old_w, channels = img.shape
        img, scale = self.resize_im(img,
                                    scale=TextLineCfg.SCALE,
                                    max_scale=TextLineCfg.MAX_SCALE)
        new_h, new_w, channels = img.shape
        mul_h, mul_w = float(old_h) / float(new_h), float(old_w) / float(new_w)
        scores, boxes = test_ctpn(self.session, self.net, img)
        boxes = self.textdetector.detect(boxes, scores[:, np.newaxis],
                                         img.shape[:2])
        for box in boxes:
            left, top = int(box[0]), int(box[1])
            right, bottom = int(box[6]), int(box[7])
            score = float(box[8])
            left, top, right, bottom = int(left * mul_w), int(
                top * mul_h), int(right * mul_w), int(bottom * mul_h)
            r = {
                'score': float(score),
                'y': top,
                'x': left,
                'w': right - left,
                'h': bottom - top,
            }
            regions.append(r)
        return regions

    def resize_im(self, im, scale, max_scale=None):
        f = float(scale) / min(im.shape[0], im.shape[1])
        if max_scale != None and f * max(im.shape[0], im.shape[1]) > max_scale:
            f = float(max_scale) / max(im.shape[0], im.shape[1])
        return cv2.resize(im,
                          None,
                          None,
                          fx=f,
                          fy=f,
                          interpolation=cv2.INTER_LINEAR), f