Example #1
0
class dyOCR():
    def __init__(self):
        from cnocr import CnOcr
        self.ocr = CnOcr()

    def resFromFilePath(self, fp):
        get = self.ocr.ocr(fp)
        if get == []:
            return "-1 暂时不能识别"
        get = "".join(get[0])
        get = get.replace("o", "0")
        get = get.replace("O", "0")
        return get

    def resFromB64(self, b64):
        img_data = base64.b64decode(b64)
        # nparr = np.fromstring(img_data, np.uint8)
        nparr = np.frombuffer(img_data, np.uint8)
        img_np = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
        get = self.ocr.ocr(img_np)
        if get == []:
            return "-1 暂时不能识别"
        get = "".join(get[0])
        get = get.replace("o", "0")
        get = get.replace("O", "0")
        return get
Example #2
0
def test_cand_alphabet2():
    img_fp = os.path.join(example_dir, 'hybrid.png')

    ocr = CnOcr(name='instance1')
    pred = ocr.ocr(img_fp)
    pred = [''.join(line_p) for line_p in pred]
    print("Predicted Chars:", pred)
    assert len(pred) == 1 and pred[0] == 'o12345678'

    ocr.set_cand_alphabet(NUMBERS)
    pred = ocr.ocr(img_fp)
    pred = [''.join(line_p) for line_p in pred]
    print("Predicted Chars:", pred)
    assert len(pred) == 1 and pred[0] == '012345678'
Example #3
0
def test_multiple_instances(model_name):
    global INSTANCE_ID
    print('test multiple instances for model_name: %s' % model_name)
    img_fp = os.path.join(example_dir, 'hybrid.png')
    INSTANCE_ID += 1
    print('instance id: %d' % INSTANCE_ID)
    cnocr1 = CnOcr(model_name, name='instance-%d' % INSTANCE_ID)
    print_preds(cnocr1.ocr(img_fp))
    INSTANCE_ID += 1
    print('instance id: %d' % INSTANCE_ID)
    cnocr2 = CnOcr(model_name,
                   name='instance-%d' % INSTANCE_ID,
                   cand_alphabet=NUMBERS)
    print_preds(cnocr2.ocr(img_fp))
Example #4
0
def getOCR(img):
    """使用CnOcr识别图片内容,按行输出结果至res列表的函数
    """
    global result_str, result
    buffer = QtCore.QBuffer()
    buffer.open(QtCore.QBuffer.ReadWrite)
    img.save(buffer, "PNG")
    pil_img = Image.open(io.BytesIO(buffer.data()))
    image_array = np.array(pil_img)
    buffer.close()

    ocr = CnOcr()
    res = ocr.ocr(image_array)

    try:
        # 拼接字符串并保存至results列表
        result_str = []
        for iter_item in res:
            result_str.append(''.join(iter_item))
        result = "\n".join(result_str)

    except Exception as e:
        print(f"INFO: Unable to read text from image, did not copy")
        notify(f"Unable to read text from image, did not copy")

    if result:
        pyperclip.copy(result)
        print(f'INFO: Copied "{result}" to the clipboard')
        notify(f'Copied "{result}" to the clipboard')
    else:
        print(f"INFO: Unable to read text from image, did not copy")
        notify(f"Unable to read text from image, did not copy")
Example #5
0
def pathCallBack():
    # , ('All Files', '*')
    filePath = filedialog.askopenfilename(title='Select picture to OCR', filetypes=[('PNG', '*.png'), ('JPG', '*.jpg'),('JPEG', '*.jpeg'), ('bmp', '*.bmp')])
    if (filePath != ''):
        strPath.set(filePath)

        ocr = CnOcr()
        img_fp = ''
        # img_fp = 'D://download/multi-line_cn1.png'
        img_fp = filePath

        img = mx.image.imread(img_fp, 1)

        res = ocr.ocr(img)
        # print("Predicted Chars:", res)

        # print(type(res))
        strResult = ""

        for s in res:
            strResult = strResult + ''.join('%s' % id for id in s) + "\n"

        txtResult.delete(0.0, tk.END)
        txtResult.insert(tk.INSERT, strResult)
        txtResult.update()
Example #6
0
def test_cand_alphabet():
    from cnocr import NUMBERS

    img_fp = os.path.join(example_dir, 'hybrid.png')

    ocr = CnOcr()
    pred = ocr.ocr(img_fp)
    pred = [''.join(line_p) for line_p in pred]
    print("Predicted Chars:", pred)
    assert len(pred) == 1 and pred[0] == 'o12345678'

    ocr = CnOcr(cand_alphabet=NUMBERS)
    pred = ocr.ocr(img_fp)
    pred = [''.join(line_p) for line_p in pred]
    print("Predicted Chars:", pred)
    assert len(pred) == 1 and pred[0] == '012345678'
Example #7
0
    def get_list(self):
        std=CnStd()
        
        ocr=CnOcr()
        ocr._model_name='conv-lite-fc'
        print(ocr._model_name)
        ocr_res2=ocr.ocr(img_fp=self.filepath)
        box_info_list=std.detect(self.filepath,pse_threshold=0.7,pse_min_area=150,context='gpu',height_border=0.10)
        image=Image.open(self.filepath)
        fontstyle=ImageFont.truetype('./simhei.ttf',13,encoding='utf-8')
        draw=ImageDraw.Draw(image)
        for box_info in box_info_list:
            print('a')
            print('a')
            print(box_info)
            info_box=box_info['box']
            crp_img=box_info['cropped_img']
            ocr_res1=ocr.ocr_for_single_line(crp_img)
            
            print('result: %s' % ''.join(str(ocr_res1)))
            x1,y1=info_box[0,0],info_box[0,1]
            x2,y2=info_box[1,0],info_box[1,1]
            x3,y3=info_box[2,0],info_box[2,1]
            x4,y4=info_box[3,0],info_box[3,1]
            

            draw.polygon([(x1,y1),(x4,y4),(x3,y3),(x2,y2)],outline=(255,0,0))
            draw.text((x4,y4),str(ocr_res1),(200,0,0),font=fontstyle)
        image.show()
        print(ocr_res2)
        return box_info_list
Example #8
0
class FTC(object):
    # font_to_chinese
    def __init__(self, font='HYZiKuTangChangLinTiW-2.ttf'):
        self.img_font = ImageFont.truetype(font, 60)
        self.fill_color = "#000000"
        self.ocr = CnOcr()
        if not os.path.isdir('jpg'):
            os.mkdir('jpg')
        pass

    def make_all_font_img(self):
        img = Image.new('RGB', (70 * 20, 80), color='white')
        draw = ImageDraw.Draw(img)
        draw.text((0, 0), u'abcdefghigklmnopqrstuvwxyz', font=self.img_font, fill=self.fill_color)
        img.show()
        img.save('jpg/1.jpg')

    def make_img(self):
        img = Image.new('RGB', (140, 80), color='white')
        draw = ImageDraw.Draw(img)
        draw.text((0, 0), chr(0xe41b), font=self.img_font, fill=self.fill_color)
        # img.show()
        # img.save('jpg/1.jpg')
        # print(pytesseract.image_to_string(img, lang='chi_sim'))
        pass

    def ret_key_font(self):
        self.make_all_font_img()
        res = self.ocr.ocr('jpg/1.jpg')
        return res
Example #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_name",
                        help="model name",
                        type=str,
                        default='conv-lite-fc')
    parser.add_argument("--model_epoch",
                        type=int,
                        default=None,
                        help="model epoch")
    parser.add_argument("-f", "--file", help="Path to the image file")
    parser.add_argument(
        "-s",
        "--single-line",
        default=False,
        help="Whether the image only includes one-line characters",
    )
    args = parser.parse_args()

    ocr = CnOcr(model_name=args.model_name, model_epoch=args.model_epoch)
    if args.single_line:
        res = ocr.ocr_for_single_line(args.file)
    else:
        res = ocr.ocr(args.file)
    logger.info("Predicted Chars: %s", res)
Example #10
0
def OCR():
    ocr = CnOcr()
    QR_Image = getScreenImage()
    #res = ocr.ocr_for_single_line(QR_Image)
    res = ocr.ocr(QR_Image)
    print("Predicted Chars:", res)
    easygui.msgbox(res)
Example #11
0
def extract_img(path: str):
    ocr = CnOcr()
    res = ocr.ocr(path)
    lines = []
    for obj in res:
        line = "".join(obj[0])
        lines.append(line)
    data = "\n".join(lines)
    os_remove(path)
    return data
Example #12
0
def cnocr_ocr(pic_path):
    from cnocr import CnOcr
    ocr = CnOcr()
    res = ocr.ocr(pic_path)
    for i in res[:-1]:
        for j in i[0]:
            print(j, end='')
        print()
    for i in res[-1][0]:
        print(i, end='')
Example #13
0
def cnocr_ocr(pic_path):
    from cnocr import CnOcr
    ocr = CnOcr()
    res = ocr.ocr(pic_path)
    for r in res[:-1]:
        for c in r:
            print(c, end='')
        print()
    for r in res[-1]:
        for c in r:
            print(c, end='')
Example #14
0
def ocr_image(fname):
    print('OCR %s' % fname)

    global _cnocr
    if _cnocr is None:
        _cnocr = CnOcr()

    res = _cnocr.ocr(fname)
    text = '\n'.join([''.join(t) for t in res])

    return text
Example #15
0
class OCR_cnocr(object):
    """docstring for OCR_tesseract"""
    def __init__(self):
        self.ocr = CnOcr()

    def run(self, img):
        nary = np.asarray(img)
        res = self.ocr.ocr(nary)

        content = ""
        for r in res:
            for x in r:
                content = content + x
        return content
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-f", "--file", help="Path to the image file")
    parser.add_argument(
        "-s",
        "--single-line",
        default=False,
        help="Whether the image only includes one-line characters")
    args = parser.parse_args()

    ocr = CnOcr()
    if args.single_line:
        res = ocr.ocr_for_single_line(args.file)
    else:
        res = ocr.ocr(args.file)
    print("Predicted Chars:", res)
Example #17
0
def txtOcr():
    if request.method == 'POST':
        file = request.files['file']
        if file and allowed_file(file.filename):
            filename = secure_filename(''.join(lazy_pinyin(file.filename)))
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            file_url = url_for('uploaded_file', filename=filename)
            import mxnet as mx
            from cnocr import CnOcr
            ocr = CnOcr()
            img_fp = os.path.join(app.config['UPLOAD_FOLDER'], filename)
            img = mx.image.imread(img_fp, 1)
            res = ocr.ocr(img)
            print("Predicted Chars:", res)
            return html + '<br><img src=' + file_url + '><p>预测结果如下:' + str(
                res) + '</p>'
    return html
Example #18
0
def multLineOcr(request):
    imgs = request.POST.get("img")
    img = imgs.replace(" ", "+")

    res = base64.b64decode(img)
    # res = base64.urlsafe_b64encode(img)
    nparr = np.fromstring(res, np.uint8)
    img = cv2.imdecode(nparr, cv2.COLOR_BGR2RGB)
    imgb = img
    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # cv2.imwrite("img.png", imgb)

    ocr = CnOcr()
    res = ocr.ocr(img)

    return HttpResponse(
        json.dumps(res)
    )
Example #19
0
    def cn_ocr(self, _range, delay_time=5):
        """
        cn ocr识别数字
        :param delay_time: ocr识别延迟时间
        :param _range: 验证码截图区域坐标(左x,左y,右x,右y)
        :return: 识别到的数字
        """
        global sms_code
        self.get_code_pic(_range)

        cn_ocr = CnOcr(model_name="conv-lite-fc",
                       context="cpu",
                       root="conv-lite-fc")
        ret = cn_ocr.ocr("ios_code_pic.png")
        result = ""
        for v in ret:
            result += "".join(v)

        find_all = re.findall(r'\'[\d]{6}\'', str(result))
        if len(find_all) != 1:
            find_all = re.findall(r'([\d]{6})[\u3002]', str(result))
        if len(find_all) != 1:
            find_all = re.findall(r'(您的验证码为[\d]{6})', str(result))

        # 识别结果
        self.logger.info("CnOCR识别结果:" + result)

        if len(find_all) == 1:
            code = find_all[0].strip("'")

            if sms_code == code:
                self.logger.info("暂未获取到最新验证码,%d秒后重试" % delay_time)
                time.sleep(delay_time)
                return self.cn_ocr(_range, delay_time)
            else:
                sms_code = code

            return code
        else:
            self.logger.info("暂未获取到最新验证码,%d秒后重试" % delay_time)
            time.sleep(delay_time)
            return self.cn_ocr(_range, delay_time)
Example #20
0
class FTC(object):
    # font_to_chinese
    def __init__(self, font='shaoq.woff'):
        self.img_font = ImageFont.truetype(font, 60)
        self.fill_color = "#000000"
        self.font = TTFont('shaoq.woff')
        self.keys = list(key for key in self.font['glyf'].keys()
                         if key.startswith('uni'))
        self.ocr = CnOcr()
        if not os.path.isdir('jpg'):
            os.mkdir('jpg')
        pass

    def make_all_font_img(self):
        img = Image.new('RGB', (70 * len(self.keys), 80), color='white')
        draw = ImageDraw.Draw(img)
        draw.text(
            (0, 0),
            ''.join(
                chr(int(f'0x{key.strip("uni")}', 16)) for key in self.keys),
            font=self.img_font,
            fill=self.fill_color)
        # img.show()
        img.save('jpg/1.jpg')

    def make_img(self):
        img = Image.new('RGB', (140, 80), color='white')
        draw = ImageDraw.Draw(img)
        draw.text((0, 0),
                  chr(0xe41b),
                  font=self.img_font,
                  fill=self.fill_color)
        # img.show()
        # img.save('jpg/1.jpg')
        # print(pytesseract.image_to_string(img, lang='chi_sim'))
        pass

    def ret_key_font(self):
        self.make_all_font_img()
        res = self.ocr.ocr('jpg/1.jpg')
        return dict(zip(self.keys, res[0]))
Example #21
0
 def POST(self):
     x = web.input(myfile={})
     filedir = './upload_file'  # change this to the directory you want to store the file in.
     if 'myfile' in x:  # to check if the file-object is created
         filepath = x.myfile.filename.replace(
             '\\',
             '/')  # replaces the windows-style slashes with linux ones.
         filename = filepath.split(
             '/'
         )[-1]  # splits the and chooses the last part (the filename with extension)
         fout = open(
             filedir + '/' + filename, 'wb'
         )  # creates the file where the uploaded file should be stored
         fout.write(x.myfile.file.read()
                    )  # writes the uploaded file to the newly created file.
         fout.close()  # closes the file, upload complete.
         myOcr = CnOcr()
         resultData = myOcr.ocr(filedir + '/' + filename)
         del myOcr
         gc.collect()
         jsonStr = json.dumps(resultData, cls=NumpyEncoder)
     return jsonStr
Example #22
0
from cnocr import CnOcr

ocr = CnOcr()
res = ocr.ocr(r'E:\企业微信截图_15927938103453.png')
Example #23
0
import sys
from cnocr import CnOcr
ocr = CnOcr()
res = ocr.ocr(sys.argv[1])
print("Predicted Chars:", res)
Example #24
0
from cnocr import CnOcr
import cnocr
from cnstd import CnStd

std=CnStd()
ocr=CnOcr()
box_info_list=std.detect('E:\\Work Place\\pocr\\pic\\2.png')
res=ocr.ocr('E:\\Work Place\\pocr\\pic\\1.png')
for box_info in box_info_list:
    crp_img=box_info['cropped_img']
    ocr_res=ocr.ocr_for_single_line(crp_img)
    print('result: %s' % ''.join(ocr_res))
Example #25
0
import mxnet as mx
import sys
import json
from cnocr import CnOcr


class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj,
                      (np.int_, np.intc, np.intp, np.int8, np.int16, np.int32,
                       np.int64, np.uint8, np.uint16, np.uint32, np.uint64)):
            return int(obj)
        elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)):
            return float(obj)
        elif isinstance(obj, (np.ndarray, )):  #### This is the fix
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)


ocr = CnOcr()
img = mx.image.imread(str(sys.argv[1]), 1)
res = ocr.ocr(img)
jsonStr = json.dumps(res, cls=NumpyEncoder)
print(jsonStr)
Example #26
0
def conn_USTC(user):
    '''通过统一身份认证连接综合教务系统,并保持连接'''
    # 使用requests方法实现,https://blog.csdn.net/xc_zhou/article/details/81021496?utm_medium=distribute.pc_relevant_t0.none-task-blog-2%7Edefault%7EBlogCommendFromMachineLearnPai2%7Edefault-1.control&dist_request_id=&depth_1-utm_source=distribute.pc_relevant_t0.none-task-blog-2%7Edefault%7EBlogCommendFromMachineLearnPai2%7Edefault-1.control,https://blog.csdn.net/qq_37616069/article/details/80376776
    # https://www.jianshu.com/p/8cd6e9bc2680,先chrome开发者工具分析登录过程,再进行模仿
    # 也可考虑使用webdriver实现,见https://blog.csdn.net/Haven200/article/details/103208795
    user = user['user']
    # 首次访问,获取统一身份认证地址
    login_url = 'https://jw.ustc.edu.cn/login'
    headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'Accept-Encoding':
        'gzip, deflate, br',
        'Accept-Language':
        'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,ja-JP;q=0.6,ja;q=0.5',
        'cache-control':
        'max-age=0',
        'sec-ch-ua':
        '"Google Chrome";v="93", " Not;A Brand";v="99", "Chromium";v="93"',
        'sec-ch-ua-mobile':
        '?0',
        'sec-ch-ua-platform':
        '"Windows"',
        'sec-fetch-dest':
        'document',
        'sec-fetch-mode':
        'navigate',
        'sec-fetch-site':
        'none',
        'sec-fetch-user':
        '******',
        'upgrade-insecure-requests':
        '1',
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
    }
    print('connecting to %s ...' % login_url)
    res = requests.get(url=login_url, headers=headers)
    parser = Parser_loginurl()
    parser.feed(res.text)
    parser.close()
    print(res.text)
    print("获取的统一身份认证地址:{0}".format(parser.links[0]))
    # 解析并生成下一个url
    urlparser = urlparse(login_url)
    host = urlparser.netloc
    ucaslogin_url = urlparser.scheme + '://' + host + parser.links[0]
    # print(ucaslogin_url)

    # 访问教务处与统一身份认证的接口,会自动重定向到统一身份认证地址
    session = requests.session()
    print('connecting to %s ...' % ucaslogin_url)
    headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'Accept-Encoding':
        'gzip, deflate, br',
        'Accept-Language':
        'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,ja-JP;q=0.6,ja;q=0.5',
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'
    }
    res = session.get(url=ucaslogin_url, headers=headers)
    # print(res.cookies)

    # 获取验证码图片,识别验证码
    validate_code = '1234'

    ## 解析前面获取的html,得到验证码图片链接
    parser = Parser_validateimg()
    parser.feed(res.text)
    parser.close()
    print("验证码图片地址:{0}".format(parser.links[0]))
    img_url = parser.links[0]
    path = './validatecode.jpg'

    ## 获取并保存验证码图片
    res = requests.get(img_url)
    with open(path, 'wb') as fp:
        fp.write(res.content)
    fp.close()

    ## 识别验证码
    ocr = CnOcr()
    ocr_res = ocr.ocr(path)
    print(res)

    # 建立统一身份认证连接,并保持连接
    # 登陆时提交表格用到这里的参数,是通过chrome开发者工具查看请求的模式并模仿
    headers['Referer'] = res.url
    headers['Cookie'] = 'JSESSIONID=' + res.cookies['JSESSIONID']
    params = {
        'model': 'uplogin.jsp',
        'service': '',
        'warn': '',
        'showCode': '1',
        'username': user['username'],
        'password': user['password'],
        'LT': validate_code,
        'button': ''
    }
    print('connecting to {0},{1},{2} ...'.format(res.url, user['username'],
                                                 user['password']))
    res = session.post(url=res.url, data=params, headers=headers)
    # print(res.headers)
    # print(res.url)
    # 验证登陆成功
    return session
Example #27
0
from cnocr import CnOcr


def convert(s):
    # initialization of string to ""
    str1 = ""
    # using join function join the list s by
    # separating words by str1
    return (str1.join(s))


# Full Screen 2880 x 1920

imgfile = "e:\\bwin\\tmp\\code3.jpg"

ocr = CnOcr()
res = ocr.ocr(imgfile)
txt = [''.join(s) for s in res]

print("Content:\n", txt)
Example #28
0
import mxnet as mx
import cv2

ocr = CnOcr(name='img-tarn', model_name='densenet-lite-gru')
"""
example
"""
# response = requests.get('https://image.jiandan100.cn/images/cqaimages/42/255/2817944_q.jpg')
response = requests.get(
    'http://8.210.115.9/img.php?num=NjMyMzgy&x=MzM5NDgwNTgzNTk=&s=77600120640')
img_fp = Image.open(BytesIO(response.content))
# img_fp = Image.open('img.png')
np_image = np.array(img_fp)

gray = cv2.cvtColor(np_image, cv2.COLOR_BGR2GRAY)

# print(gray)

t, rst = cv2.threshold(gray, 168, 255, cv2.THRESH_BINARY_INV)

# cv2.imshow('t', gray)
# cv2.imshow('rst', rst)

# cv2.waitKey(0)
# img = mx.image.imread(img_fp, 1).asnumpy()

res = ocr.ocr(rst)

text = '\n'.join([''.join(a) for a in res])
print("Predicted Chars:", text)
Example #29
0
#-*- codeing = utf-8 -*-
#@Time : 2020/12/30 下午2:13
#@Author : 江啸栋19262010049
#@File : run.py
#@Software : PyCharm

from cnocr import CnOcr
ocr = CnOcr()
res = ocr.ocr('examples/00010991.jpg')
print("Predicted Chars:", res)

Example #30
0
right = 450
bottom = 470
image_obj2 = new_img.crop((left, top, right, bottom))
# image_obj2.show()

#纳税人识别号
left = 155
top = 470
right = 450
bottom = 490
image_text3 = new_img.crop((left, top, right, bottom))
#展示图片
image_text3.show()

# 开票人
left = 528
top = 550
right = 670
bottom = 600
image_obj4 = new_img.crop((left, top, right, bottom))
image_obj4.show()

image_obj2.save("tmp.jpg")
ocr = CnOcr()
res = ocr.ocr("tmp.jpg")
print("".join(res[0]))

image_obj4.save("tmp.jpg")
ocr = CnOcr()
res = ocr.ocr("tmp.jpg")
print("".join(res[0]))