class TextBoxDetector(): def __init__(self): self.session = None def load(self): logging.info('Creating networks and loading parameters') NET_DEF_FILE = "/opt/ctpn/CTPN/models/deploy.prototxt" MODEL_FILE = "/opt/ctpn/CTPN/models/ctpn_trained_model.caffemodel" caffe.set_mode_gpu() caffe.set_device(cfg.TEST_GPU_ID) text_proposals_detector = TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE)) self.session = TextDetector(text_proposals_detector) def detect(self,image_path): if self.session is None: self.load() regions = [] im = cv2.imread(image_path) old_h, old_w, channels = im.shape im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) new_h, new_w, channels = im.shape mul_h = float(old_h) / float(new_h) mul_w = float(old_w) / float(new_w) text_lines = self.session.detect(im) for k in text_lines: left, top, right, bottom, score = k left, top, right, bottom = int(left * mul_w), int(top * mul_h), int(right * mul_w), int(bottom * mul_h) r = {'score':float(score),'y':top,'x':left,'w':right - left,'h':bottom - top,} regions.append(r) return regions
def detect_text_boxes(video_pk, cpu_mode=False): """ Detect Text Boxes in frames for a video using CTPN, must be run in dva_ctpn container :param detector_pk :param video_pk :return: """ setup_django() from dvaapp.models import Region, Frame from django.conf import settings from PIL import Image import sys video_pk = int(video_pk) sys.path.append('/opt/ctpn/CTPN/tools/') sys.path.append('/opt/ctpn/CTPN/src/') from cfg import Config as cfg from other import resize_im, CaffeModel import cv2, caffe from detectors import TextProposalDetector, TextDetector NET_DEF_FILE = "/opt/ctpn/CTPN/models/deploy.prototxt" MODEL_FILE = "/opt/ctpn/CTPN/models/ctpn_trained_model.caffemodel" if cpu_mode: # Set this to true for CPU only mode caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(cfg.TEST_GPU_ID) text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) for f in Frame.objects.all().filter(video_id=video_pk): path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT, video_pk, f.frame_index) im = cv2.imread(path) old_h, old_w, channels = im.shape im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) new_h, new_w, channels = im.shape mul_h = float(old_h) / float(new_h) mul_w = float(old_w) / float(new_w) text_lines = text_detector.detect(im) for k in text_lines: left, top, right, bottom, score = k left, top, right, bottom = int(left * mul_w), int( top * mul_h), int(right * mul_w), int(bottom * mul_h) r = Region() r.region_type = r.DETECTION r.confidence = int(100.0 * score) r.object_name = "CTPN_TEXTBOX" r.y = top r.x = left r.w = right - left r.h = bottom - top r.frame_id = f.pk r.video_id = video_pk r.save() right = r.w + r.x bottom = r.h + r.y img = Image.open(path) img2 = img.crop((left, top, right, bottom)) img2.save("{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT, video_pk, r.pk))
class bibnumber_ctpn_caffe(object): def __init__(self, dir_model, gpu_id): NET_DEF_FILE = dir_model + "/bib_number/CTPN/deploy.prototxt" MODEL_FILE = dir_model + "/bib_number/CTPN/ctpn_trained_model.caffemodel" if False: # Set this to true for CPU only mode caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(int(gpu_id)) # (cfg.TEST_GPU_ID) text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) self.text_detector = TextDetector(text_proposals_detector) length_regexp = 'Duration: (\d{2}):(\d{2}):(\d{2})\.\d+,' self.re_length = re.compile(length_regexp) def process(self, image, bbox): im_crop = image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2]), :] im_scale, f = resize_im(im_crop, cfg.SCALE, cfg.MAX_SCALE) # print(np.array(im).shape) text_lines = self.text_detector.detect(im_scale) return im_scale, text_lines
def textPredict(input_path): #CPU mode setting if len(sys.argv)>1 and sys.argv[1]=="--no-gpu": caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(cfg.TEST_GPU_ID) model_path = "../models/" # initialize the detectors NET_DEF_FILE = model_path + "deploy.prototxt" MODEL_FILE = model_path + "ctpn_trained_model.caffemodel" text_proposals_detector=TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector=TextDetector(text_proposals_detector) im=cv2.imread(input_path) #h = im.shape[0] #w = im.shape[1] im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines=text_detector.detect(im) return text_lines,f
def get_text_lines(self, im, NET_DEF_FILE, MODEL_FILE): # initialize the detectors text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines = text_detector.detect(im) return text_lines / f
class TextBoxDetector(): def __init__(self, model_path): self.session = None self.model_path = str(model_path.encode('utf-8')) self.network_def = str( model_path.replace('.caffemodel', '.prototxt').encode('utf-8')) def load(self): logging.info('Creating networks and loading parameters') if os.environ.get('GPU_AVAILABLE', False): caffe.set_mode_gpu() caffe.set_device(cfg.TEST_GPU_ID) logging.info("GPU mode") else: caffe.set_mode_cpu() logging.info("CPU mode") text_proposals_detector = TextProposalDetector( CaffeModel(self.network_def, self.model_path)) self.session = TextDetector(text_proposals_detector) logging.info('model loaded!') def detect(self, image_path): if self.session is None: self.load() regions = [] im = cv2.imread(image_path) old_h, old_w, channels = im.shape im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) new_h, new_w, channels = im.shape mul_h = float(old_h) / float(new_h) mul_w = float(old_w) / float(new_w) text_lines = self.session.detect(im) for k in text_lines: left, top, right, bottom, score = k left, top, right, bottom = int(left * mul_w), int( top * mul_h), int(right * mul_w), int(bottom * mul_h) r = { 'score': float(score), 'y': top, 'x': left, 'w': right - left, 'h': bottom - top, } regions.append(r) return regions
class CTPNDetector: def __init__(self): ''' @Construction for text detector. This class initiates the constructor for ''' self.NET_DEF_FILE = "models/deploy.prototxt" self.MODEL_FILE = "models/ctpn_trained_model.caffemodel" caffe.set_mode_gpu() caffe.set_device(cfg.TEST_GPU_ID) self.text_proposals_detector = TextProposalDetector(CaffeModel(self.NET_DEF_FILE, self.MODEL_FILE)) self.text_detector = TextDetector(self.text_proposals_detector) def detect(self, filepath): im = cv2.imread(filepath) im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) self.text_lines = self.text_detector.detect(im) return self.text_lines
def text_detec(img_url): caffe.set_mode_gpu() caffe.set_device(cfg.TEST_GPU_ID) # initialize the detectors text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) im = cv2.imread(img_url) timer.tic() im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines = text_detector.detect(im) obj_num = len(text_lines) print "Number of the detected text lines: %s" % len(text_lines) print "Time: %f" % timer.toc() boxstr = u'' count = 0 #http://192.168.7.37:8393/static/jz66f1d49d97d048fe9e4a62004199d0b2_1_for_trail.jpg print text_lines for bbox in text_lines: print bbox count += 1 boxstr += "text[%d]:[%f,%f,%f,%f]<br/>" % (count, bbox[0], bbox[1], bbox[2], bbox[3]) im_name = img_url.split('/')[-1] im_name.replace("?", '_') im_name.replace("%", '_') im_name.replace("&", '_') im_name.replace("=", '_') local_url = img_url write_path = "/data1/mingmingzhao/data_sets/test/text_detect/text_detect_%s" % ( local_url.split('/')[-1]) print "write_path:" + write_path im_with_text_lines = draw_boxes_zmm(im, text_lines, caption=write_path, wait=False) server_url = "http://192.168.7.37:8393/static/text_detect/%s" % ( write_path.split('/')[-1]) print "server_url:" + server_url return boxstr, server_url, count
class TextBoxDetector(): def __init__(self): self.session = None def load(self): logging.info('Creating networks and loading parameters') NET_DEF_FILE = "/opt/ctpn/CTPN/models/deploy.prototxt" MODEL_FILE = "/opt/ctpn/CTPN/models/ctpn_trained_model.caffemodel" caffe.set_mode_gpu() caffe.set_device(cfg.TEST_GPU_ID) text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) self.session = TextDetector(text_proposals_detector) def detect(self, image_path): if self.session is None: self.load() regions = [] im = cv2.imread(image_path) old_h, old_w, channels = im.shape im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) new_h, new_w, channels = im.shape mul_h = float(old_h) / float(new_h) mul_w = float(old_w) / float(new_w) text_lines = self.session.detect(im) for k in text_lines: left, top, right, bottom, score = k left, top, right, bottom = int(left * mul_w), int( top * mul_h), int(right * mul_w), int(bottom * mul_h) r = { 'score': float(score), 'y': top, 'x': left, 'w': right - left, 'h': bottom - top, } regions.append(r) return regions
def process(self, image, bbox): def resize_im(im, scale, max_scale=None): f = float(scale) / min(im.shape[0], im.shape[1]) if max_scale != None and f * max(im.shape[0], im.shape[1]) > max_scale: f = float(max_scale) / max(im.shape[0], im.shape[1]) return cv2.resize(im, None, None, fx=f, fy=f, interpolation=cv2.INTER_LINEAR), f im_crop = image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2]), :] img, scale = resize_im(im_crop, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(self.session, self.net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) return img, boxes, scale
class CTPNModel(CaffeAiUcloudModel): """ Mnist example model """ def __init__(self, conf): super(CTPNModel, self).__init__(conf) def load_model(self): caffe.set_mode_cpu() text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) self.text_detector = TextDetector(text_proposals_detector) def execute(self, data, batch_size): ret = [] for i in range(batch_size): img_array = np.asarray(bytearray(data[i].read()), dtype=np.uint8) im = cv2.imdecode(img_array, -1) im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines = self.text_detector.detect(im) ret_val = str(text_lines) + '\n' ret.append(ret_val) return ret
text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) demo_imnames = os.listdir(DEMO_IMAGE_DIR) timer = Timer() for im_name in demo_imnames: print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Image: %s" % im_name im_file = osp.join(DEMO_IMAGE_DIR, im_name) im = cv2.imread(im_file) timer.tic() im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines = text_detector.detect(im) print "Number of the detected text lines: %s" % len(text_lines) print "Time: %f" % timer.toc() im_with_text_lines = draw_boxes(im, text_lines, caption=im_name, wait=False) print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Thank you for trying our demo. Press any key to exit..." cv2.waitKey(0)
text_proposals_detector = TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) path = os.path.abspath(os.curdir) timer=Timer() print "\ninput exit break\n" while 1 : im_name = raw_input("\nplease input file name:") if im_name == "exit": break im_path = path + "/demo_images/" + im_name im = cv2.imread(im_path) if im is None: continue im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) cv2.imshow("src", im) tmp = im.copy() timer.tic() text_lines=text_detector.detect(im) print "Number of the detected text lines: %s"%len(text_lines) print "Time: %f"%timer.toc() im_with_text_lines=draw_boxes(tmp, text_lines, caption=im_name, wait=True)
class TextRecognizer(): """ Recognizes text from a given image """ def __init__(self, mode): if mode == "GPU": caffe.set_mode_gpu() caffe.set_device(cfg.TEST_GPU_ID) else: caffe.set_mode_cpu() netfile = cfg.NET_FILE modelfile = cfg.MODEL_FILE # initialize the detectors self.text_proposals_detector = TextProposalDetector( CaffeModel(netfile, modelfile)) self.text_detector = TextDetector(self.text_proposals_detector) self.timer = Timer() self.char_classifier = caffe.Classifier( cfg.FONT_PROTO, cfg.FONT_MODEL, mean=np.load(cfg.FONT_MEANFILE).mean(1).mean(1), channel_swap=(2, 1, 0), raw_scale=255, image_dims=(cfg.FONT_DIMS, cfg.FONT_DIMS)) with open(cfg.FONT_LBLFILE, 'r') as f: self.fontLabels = [x.strip() for x in f] def detectText(self, image): """ Detects text from the image given its path Returns a list of bounding boxes """ if os.path.exists(image): img = cv2.imread(image) self.timer.tic() im, f = resize_im(img, cfg.SCALE, cfg.MAX_SCALE) text_lines = self.text_detector.detect(im) print("Time: %f" % self.timer.toc()) return text_lines, f else: print("Image not found") def extractText(self, image, boundingBoxes): """ Extracts the text from a given image using the bounding boxes Input - image name and the bounding boxes list Output - extracted text images """ extractedText = [] if os.path.exists(image): img = cv2.imread(image) for box in boundingBoxes: text = img[int(box[1]):int(box[3]), int(box[0]):int(box[2])] extractedText.append(text) else: print("Image not found") return extractedText def extractCharacters(self, image): """ Extracts characters from a given "text" image Input - "image" opencv image """ extractedChars = [] if image.shape[2] == 3: imgGray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: imgGray = image # Otsu's Thresholding newRet, binaryThreshold = cv2.threshold( imgGray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # dilation rectkernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) rectdilation = cv2.dilate(binaryThreshold, rectkernel, iterations=1) outputImage = image.copy() npaContours, npaHierarchy = cv2.findContours(rectdilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for num, npaContour in enumerate(npaContours): if cv2.contourArea(npaContour) > cfg.MIN_CONTOUR_AREA: [intX, intY, intW, intH] = cv2.boundingRect(npaContour) cv2.rectangle(outputImage, (intX, intY), (intX + intW, intY + intH), (0, 0, 255), 2) # Get subimage of word and find contours of that word imgROI = binaryThreshold[intY:intY + intH, intX:intX + intW] subContours, subHierarchy = cv2.findContours( imgROI.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for n, subContour in enumerate(subContours): [pointX, pointY, width, height] = cv2.boundingRect(subContour) imr = image[intY + pointY:intY + pointY + height, intX + pointX:intX + pointX + width] extractedChars.append(imr) return extractedChars def recognizeCharacters(self, charList): """ Character classification module charList - list of character images """ outList = [] pred = self.char_classifier.predict(charList) detectedChars = [] for p in pred: x = self.fontLabels[p.argmax()] detectedChars.append(x) #detectedChars = [self.fontLabels[x] for i,p in enumerate(pred) x = self.fontLabels.index(p.argmax)] #indexes = [x for i,x in enumerate(self.fontLabels) if x == self.fontLabels[pred[0].argmax()]] return detectedChars
class CTPNDetector: def __init__(self, NET_DEF_FILE, MODEL_FILE, caffe_path): sys.path.insert(0, "%s/python" % caffe_path) import caffe from other import draw_boxes, resize_im, CaffeModel from detectors import TextProposalDetector, TextDetector sys.path.remove("%s/python" % caffe_path) #def ctpnSource(NET_DEF_FILE, MODEL_FILE, use_gpu): #NET_DEF_FILE = "CTPN/models/deploy.prototxt" #MODEL_FILE = "CTPN/models/ctpn_trained_model.caffemodel" self.caffe = caffe #if use_gpu: # caffe.set_mode_gpu() # caffe.set_device(cfg.TEST_GPU_ID) #else: # caffe.set_mode_cpu() # initialize the detectors text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) self.text_detector = TextDetector(text_proposals_detector) self.resize_im = resize_im self.draw_boxes = draw_boxes #return text_detector def getCharBlock(self, im, gpu_id=0): if gpu_id < 0: self.caffe.set_mode_cpu() else: self.caffe.set_mode_gpu() self.caffe.set_device(gpu_id) resize_im, resize_ratio = self.resize_im(im, cfg.SCALE, cfg.MAX_SCALE) #print "resize", f #cv2.imshow("src", im) tmp = resize_im.copy() #timer=Timer() #timer.tic() text_lines = self.text_detector.detect(tmp) #print "Number of the detected text lines: %s"%len(text_lines) #print "Time: %f"%timer.toc() return text_lines, resize_im, resize_ratio # this is deprecated def convert_bbox(self, bboxes): text_recs = np.zeros((len(bboxes), 8), np.int) index = 0 for box in bboxes: b1 = box[6] - box[7] / 2 b2 = box[6] + box[7] / 2 x1 = box[0] y1 = box[5] * box[0] + b1 x2 = box[2] y2 = box[5] * box[2] + b1 x3 = box[0] y3 = box[5] * box[0] + b2 x4 = box[2] y4 = box[5] * box[2] + b2 disX = x2 - x1 disY = y2 - y1 width = np.sqrt(disX * disX + disY * disY) fTmp0 = y3 - y1 fTmp1 = fTmp0 * disY / width x = np.fabs(fTmp1 * disX / width) y = np.fabs(fTmp1 * disY / width) if box[5] < 0: x1 -= x y1 += y x4 += x y4 -= y else: x2 += x y2 += y x3 -= x y3 -= y text_recs[index, 0] = x1 text_recs[index, 1] = y1 text_recs[index, 2] = x2 text_recs[index, 3] = y2 text_recs[index, 4] = x3 text_recs[index, 5] = y3 text_recs[index, 6] = x4 text_recs[index, 7] = y4 index = index + 1 return text_recs def draw_boxes8(self, im, bboxes, is_display=True, color=None, caption="Image", wait=True): """ boxes: bounding boxes """ text_recs = np.zeros((len(bboxes), 8), np.int) im = im.copy() index = 0 for box in bboxes: if color == None: if len(box) == 8 or len(box) == 9: c = tuple(cm.jet([box[-1]])[0, 2::-1] * 255) else: c = tuple(np.random.randint(0, 256, 3)) else: c = color b1 = box[6] - box[7] / 2 b2 = box[6] + box[7] / 2 x1 = box[0] y1 = box[5] * box[0] + b1 x2 = box[2] y2 = box[5] * box[2] + b1 x3 = box[0] y3 = box[5] * box[0] + b2 x4 = box[2] y4 = box[5] * box[2] + b2 disX = x2 - x1 disY = y2 - y1 width = np.sqrt(disX * disX + disY * disY) fTmp0 = y3 - y1 fTmp1 = fTmp0 * disY / width x = np.fabs(fTmp1 * disX / width) y = np.fabs(fTmp1 * disY / width) if box[5] < 0: x1 -= x y1 += y x4 += x y4 -= y else: x2 += x y2 += y x3 -= x y3 -= y cv2.line(im, (int(x1), int(y1)), (int(x2), int(y2)), c, 2) cv2.line(im, (int(x1), int(y1)), (int(x3), int(y3)), c, 2) cv2.line(im, (int(x4), int(y4)), (int(x2), int(y2)), c, 2) cv2.line(im, (int(x3), int(y3)), (int(x4), int(y4)), c, 2) text_recs[index, 0] = x1 text_recs[index, 1] = y1 text_recs[index, 2] = x2 text_recs[index, 3] = y2 text_recs[index, 4] = x3 text_recs[index, 5] = y3 text_recs[index, 6] = x4 text_recs[index, 7] = y4 index = index + 1 #cv2.rectangle(im, tuple(box[:2]), tuple(box[2:4]), c,2) if is_display: cv2.imshow('result', im) #if wait: #cv2.waitKey(0) return im, text_recs
class TextBoxDetector(): def __init__(self, model_path, gpu_fraction=None): self.session = None if gpu_fraction: self.gpu_fraction = gpu_fraction else: self.gpu_fraction = float(os.environ.get('GPU_MEMORY', 0.20)) self.model_path = os.path.dirname(str(model_path.encode('utf-8'))) def load(self): logging.info('Creating networks and loading parameters') cfg_from_file(os.path.join(os.path.dirname(__file__), 'ctpn/text.yml')) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=self.gpu_fraction) config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) self.session = tf.Session(config=config) self.net = get_network("VGGnet_test") self.textdetector = TextDetector() saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(self.model_path) saver.restore(self.session, ckpt.model_checkpoint_path) def detect(self, image_path): if self.session is None: self.load() regions = [] img = cv2.imread(image_path) old_h, old_w, channels = img.shape img, scale = self.resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) new_h, new_w, channels = img.shape mul_h, mul_w = float(old_h) / float(new_h), float(old_w) / float(new_w) scores, boxes = test_ctpn(self.session, self.net, img) boxes = self.textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) for box in boxes: left, top = int(box[0]), int(box[1]) right, bottom = int(box[6]), int(box[7]) score = float(box[8]) left, top, right, bottom = int(left * mul_w), int( top * mul_h), int(right * mul_w), int(bottom * mul_h) r = { 'score': float(score), 'y': top, 'x': left, 'w': right - left, 'h': bottom - top, } regions.append(r) return regions def resize_im(self, im, scale, max_scale=None): f = float(scale) / min(im.shape[0], im.shape[1]) if max_scale != None and f * max(im.shape[0], im.shape[1]) > max_scale: f = float(max_scale) / max(im.shape[0], im.shape[1]) return cv2.resize(im, None, None, fx=f, fy=f, interpolation=cv2.INTER_LINEAR), f