Python model 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: ctpn.nets.model_train

메소드/함수: model

hotexamples.com에서의 예제들: 5

Python model - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 ctpn.nets.model_train.model에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: detect.py 프로젝트: jack139/ocr-with-ctpn

def load_tf_model():
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    input_image = tf.placeholder(tf.float32,
                                 shape=[None, None, None, 3],
                                 name='input_image')
    input_im_info = tf.placeholder(tf.float32,
                                   shape=[None, 3],
                                   name='input_im_info')

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)

    bbox_pred, cls_pred, cls_prob = model.model(input_image)

    variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
    saver = tf.train.Saver(variable_averages.variables_to_restore())

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
    model_path = os.path.join(
        FLAGS.checkpoint_path,
        os.path.basename(ckpt_state.model_checkpoint_path))
    print('Restore from {}'.format(model_path))
    saver.restore(sess, model_path)

    return sess, input_image, input_im_info, bbox_pred, cls_pred, cls_prob

예제 #2

파일 보기

def ctpn_recognition(test_images_path, app):
    if os.path.exists(params.middle_path):
        shutil.rmtree(params.middle_path)
    os.makedirs(params.middle_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = params.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(
            tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(
            tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable(
            'global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(params.checkpoint_path)
            model_path = os.path.join(params.checkpoint_path, os.path.basename(
                ckpt_state.model_checkpoint_path))
            app.logger.info('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im = cv2.imread(test_images_path)[:, :, ::-1]

            img, (rh, rw) = resize_image(im)
            h, w, c = img.shape
            im_info = np.array([h, w, c]).reshape([1, 3])
            bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                   feed_dict={input_image: [img],
                                                              input_im_info: im_info})

            textsegs, _ = proposal_layer(
                cls_prob_val, bbox_pred_val, im_info)
            scores = textsegs[:, 0]
            textsegs = textsegs[:, 1:5]

            textdetector = TextDetector(DETECT_MODE='H')
            boxes = textdetector.detect(
                textsegs, scores[:, np.newaxis], img.shape[:2])
            boxes = np.array(boxes, dtype=np.int)

            img_copy = img.copy()

            boxes_array = np.array(boxes, dtype=np.int)

            widths = {}
            for i, box in enumerate(boxes_array):
                width, height = get_wh(box[:8].tolist())  # 计算宽高比
                widths[width] = [i, height]

            width_max = max(widths)
            width_max_value = widths[width_max]
            part_img = img.copy()

            for i, box in enumerate(boxes_array):

                color = (0, 255, 0)

                if i == width_max_value[0] and width_max_value[1] > 20:
                    color = (255, 0, 0)
                    box[0] = box[0] - 5
                    box[2] = box[2] + 5
                    part_img = img[box[1]:box[5], box[0]:box[2]][:, :, 0]

                cv2.polylines(img_copy, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=color,
                              thickness=2)

            img_copy = cv2.resize(
                img_copy, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
            cv2.imwrite(os.path.join(params.middle_path,
                                     os.path.basename(test_images_path)), img_copy[:, :, ::-1])

            part_img = Image.fromarray(part_img.astype('uint8'))

            return part_img

예제 #3

파일 보기

def ctpn_pred(input_path, output_path,textloc_output_path, checkpoint_path,gpu):
    print("========== detect text using ctpn ==============")
    if os.path.exists(output_path):
        shutil.rmtree(output_path)
    os.makedirs(output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = gpu
    tf.reset_default_graph()
    with tf.get_default_graph().as_default():
        
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
            model_path = os.path.join(checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images(input_path)
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)

                #remove existing detected component from text
                txtfileloc = os.path.join (textloc_output_path, os.path.splitext(os.path.basename(im_fn))[0]) + "_loc.txt"
                if os.path.isfile(txtfileloc):
                    f =  open(txtfileloc, 'r+')
                    textlines = txtremove(f,['ORIG','ROT'])
                    f.seek(0)
                    f.write(textlines)
                    f.truncate()
                    f.close()   

                start = time.time()
                try:
                    img_raw = cv2.imread(im_fn)
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue
                # image used to draw bounding box
                H, W, _ = img_raw.shape
                img_blank =  np.ones(shape=[H, W], dtype=np.uint8)*255
                img_draw = img_raw.copy()

                img, (rh, rw) = resize_image(img_raw)
                h, w, c = img.shape
                res = []
                for ifrot in ['ORIG','ROT']:
                    im = img.copy()

                    if ifrot == 'ROT':
                        im = cv2.transpose(im)
                        im = cv2.flip(im,1)
                        bbox_color = (255,0,0)
                        im_info = np.array([w, h, c]).reshape([1, 3])
                    else: 
                        bbox_color = (0,255,0)
                        im_info = np.array([h, w, c]).reshape([1, 3])
                    bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                           feed_dict={input_image: [im],
                                                                      input_im_info: im_info})
    
                    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                    scores = textsegs[:, 0]
                    textsegs = textsegs[:, 1:5]
                    
                    textdetector = TextDetector(DETECT_MODE='H')
                    boxes = textdetector.detect(textsegs, scores[:, np.newaxis], im.shape[:2])
                    boxes = np.array(boxes, dtype=np.int)
                    print("Find number of text:",len(boxes))
                    cost_time = (time.time() - start)
                    print("cost time: {:.2f}s".format(cost_time))
                    fx=1.0 / rw
                    fy=1.0 / rh

                    for i, box in enumerate(boxes):
                        if ifrot == 'ROT':
                            box = np.array([box[3],h-box[2],box[5],h-box[4],box[7],h-box[6],box[1],h-box[0],box[8]])
                            
                        #resize the images
                        box[:8:2] = (box[:8:2]*fx).astype(np.int32)
                        box[1::2] = (box[1::2]*fy).astype(np.int32)
                        loc = [int(i) for i in box[0:-1]]
                       
                        # crop image with rectangle box and save
                        x0,y0,w0,h0 = cv2.boundingRect(np.array(loc[:8]).astype(np.int32).reshape((-1, 2)))
                        img_crop = img_blank[y0:y0+h0,x0:x0+w0].copy()
                        hc, wc = img_crop.shape[:2]
                        countzero = hc*wc - cv2.countNonZero(img_crop)

                        if countzero *1.0 / (hc*wc) <= 0.2:
                            # if there is minimum overlap with previous bounding box
                            
                            cv2.drawContours(img_blank, [np.array(loc).reshape((-1,1,2))], 0, (0), thickness = -1, lineType=8)
                            cv2.polylines(img_draw, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=bbox_color, thickness=2)
                            # crop image with rectangle box and save
                            x0,y0,w0,h0 = cv2.boundingRect(box[:8].astype(np.int32).reshape((-1, 2)))
                            img_crop = img_raw[y0:y0+h0,x0:x0+w0].copy()
                            txtrecog = txt_recog(img_crop)
                            res.append([ifrot]+loc+[txtrecog]) 
                            cv2.imwrite(os.path.join(output_path, os.path.splitext(os.path.basename(im_fn))[0])+"_"+ifrot+"_"+str(format(i, "04"))+".jpg", img_crop) 
                            cv2.putText(img_draw, str(i), (box[0],box[1]), cv2.FONT_HERSHEY_SIMPLEX ,1.0, bbox_color, 2, cv2.LINE_AA) 
                cv2.imwrite(os.path.join(output_path, os.path.splitext(os.path.basename(im_fn))[0])+"_"+ifrot+".jpg", img_draw) 
                
                with open(txtfileloc, "a") as f:
                    for i, ir in enumerate(res):
                        line = "\t".join(str(ir[k]) for k in range(10))
                        line += "\n"
                        print(line)
                        f.writelines(line)
                    f.close()

예제 #4

파일 보기

def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    tf.reset_default_graph()
    with tf.get_default_graph().as_default():

        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                im, (rh, rw) = resize_image(im)
                h, w, c = im.shape
                img = im
                for im_rot in ['orig', 'rot90']:
                    if im_rot == 'rot90':
                        img = cv2.transpose(img)
                        img = cv2.flip(img, 1)
                        bbox_color = (255, 0, 0)
                        im_info = np.array([w, h, c]).reshape([1, 3])
                    else:
                        bbox_color = (0, 255, 0)
                        im_info = np.array([h, w, c]).reshape([1, 3])
                    bbox_pred_val, cls_prob_val = sess.run(
                        [bbox_pred, cls_prob],
                        feed_dict={
                            input_image: [img],
                            input_im_info: im_info
                        })

                    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                                 im_info)
                    scores = textsegs[:, 0]
                    textsegs = textsegs[:, 1:5]

                    textdetector = TextDetector(DETECT_MODE='H')
                    boxes = textdetector.detect(textsegs, scores[:,
                                                                 np.newaxis],
                                                img.shape[:2])
                    boxes = np.array(boxes, dtype=np.int)

                    cost_time = (time.time() - start)
                    print("cost time: {:.2f}s".format(cost_time))

                    for i, box in enumerate(boxes):
                        if im_rot == 'rot90':
                            box = np.array([
                                box[3], h - box[2], box[5], h - box[4], box[7],
                                h - box[6], box[1], h - box[0], box[8]
                            ])

                        cv2.polylines(
                            im, [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                            True,
                            color=bbox_color,
                            thickness=2)

                    #im = cv2.resize(im, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(
                        os.path.join(FLAGS.output_path,
                                     im_rot + "-" + os.path.basename(im_fn)),
                        im[:, :, ::-1])

                    with open(
                            os.path.join(
                                FLAGS.output_path,
                                os.path.splitext(os.path.basename(im_fn))[0]) +
                            ".txt", "a") as f:
                        f.writelines("\n")
                        for i, box in enumerate(boxes):
                            line = ",".join(str(box[k]) for k in range(8))
                            line += "," + str(scores[i]) + "\r\n"
                            f.writelines(line)
                        f.close()

예제 #5

파일 보기

파일: main.py 프로젝트: ericlferguson/AB-PID-recog

def ctpn():
    if os.path.exists(FLAGS.ctpn_output_path):
        shutil.rmtree(FLAGS.ctpn_output_path)
    os.makedirs(FLAGS.ctpn_output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    tf.reset_default_graph()
    with tf.get_default_graph().as_default():
        
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images(FLAGS.ctpn_input_path)
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    img_raw = cv2.imread(im_fn)
                    #(thresh, img_raw) = cv2.threshold(img_raw, 127, 255, cv2.THRESH_BINARY)
                    # Create kernel
                    #kernel = np.array([[-1, -1, -1], 
                    #                   [-1, 9,-1], 
                    #                   [-1, -1, -1]])

                    # Sharpen image
                    #img_raw = cv2.filter2D(img_raw, -1, kernel)
 
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue
                img_draw = img_raw.copy()

                img, (rh, rw) = resize_image(img_raw)
                # image used to draw bounding box

                h, w, c = img.shape
                for ifrot in ['orig','rot']:
                    im = img.copy()

                    if ifrot == 'rot':
                        im = cv2.transpose(im)
                        im = cv2.flip(im,1)
                        bbox_color = (255,0,0)
                        im_info = np.array([w, h, c]).reshape([1, 3])
                    else: 
                        bbox_color = (0,255,0)
                        im_info = np.array([h, w, c]).reshape([1, 3])
                    bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                           feed_dict={input_image: [im],
                                                                      input_im_info: im_info})
    
                    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                    scores = textsegs[:, 0]
                    textsegs = textsegs[:, 1:5]
                    
                    textdetector = TextDetector(DETECT_MODE='H')
                    boxes = textdetector.detect(textsegs, scores[:, np.newaxis], im.shape[:2])
                    boxes = np.array(boxes, dtype=np.int)
                    print(len(boxes))
                    cost_time = (time.time() - start)
                    print("cost time: {:.2f}s".format(cost_time))
                    fx=1.0 / rw
                    fy=1.0 / rh
                    for i, box in enumerate(boxes):
                        if ifrot == 'rot':
                            box = np.array([box[3],h-box[2],box[5],h-box[4],box[7],h-box[6],box[1],h-box[0],box[8]])
                        #resize the images
                        box[:8:2] = (box[:8:2]*fx).astype(np.int32)
                        box[1::2] = (box[1::2]*fy).astype(np.int32)
                        
                        cv2.polylines(img_draw, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=bbox_color, thickness=2)
                        # crop image with rectangle box and save
                        x0,y0,w0,h0 = cv2.boundingRect(box[:8].astype(np.int32).reshape((-1, 2)))
                        img_crop = img_raw[y0:y0+h0,x0:x0+w0].copy()

                        cv2.imwrite(os.path.join(FLAGS.ctpn_output_path, ifrot+str(format(i, "04"))+"-"+os.path.basename(im_fn)), img_crop) 
                        cv2.putText(img_draw, str(i), (box[0],box[1]), cv2.FONT_HERSHEY_SIMPLEX ,1.0, bbox_color, 2, cv2.LINE_AA) 
   
                    #im = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(os.path.join(FLAGS.ctpn_output_path, ifrot+"-"+os.path.basename(im_fn)),img_draw[:, :, ::-1])

                    with open(os.path.join(FLAGS.ctpn_output_path, ifrot+"-"+os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                                "a") as f:
                        for i, box in enumerate(boxes):
                            line = ",".join(str(box[k]) for k in range(8))
                            line += "," + str(scores[i]) + "\r\n"
                            f.writelines(line)
                        f.close()