Ejemplo n.º 1
0
def ocr(pic):
    try:
        with open(pic, 'rb') as f:
            f1 = f.read()
        pic_base64 = str(base64.b64encode(f1), 'utf-8')
        cred = credential.Credential("AKID96rMeho9uQiqjRvCI8C3f6esstjMjFZi",
                                     "3toZ7RmtlSv9EA0f8LNJ6i6MOYXHoQr5")
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile('TC3-HMAC-SHA256')
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)

        req = models.GeneralHandwritingOCRRequest()
        params = '{"ImageBase64":"' + pic_base64 + '"}'
        req.from_json_string(params)

        resp = client.GeneralHandwritingOCR(req)
        resp = json.loads(resp.to_json_string())
        if not resp.get('TextDetections'):
            return {'status': False, 'msg': '识别失败'}
        data = []
        for v in resp['TextDetections']:
            data.append(v['DetectedText'])
        return {'status': True, 'msg': 'success', 'data': data}
    except FileNotFoundError as err_file:
        return {'status': False, 'msg': err_file.strerror}
    except TencentCloudSDKException as err:
        return {'status': False, 'msg': err.get_message()}
Ejemplo n.º 2
0
def OcrBase64(Base64,SecretId,SecretKey):
    try: 
        cred = credential.Credential(SecretId, SecretKey) 
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile) 

        req = models.GeneralBasicOCRRequest()
        params = '{\"ImageBase64\":\"+' Base64 '+\"}'
        req.from_json_string(params)

        resp = client.GeneralBasicOCR(req) 
        jsonstr = json.loads(resp.to_json_string())
        array = jsonstr['TextDetections']
        s = ''
        for i in array:
            s = s + i['DetectedText']
        return s
        # f = open('C:\\Users\\hasee\\Documents\\UiPath\\Play\\1.txt','w')
        # f.write(s)
        # return '1'
    except TencentCloudSDKException as err: 
        print(err) 
Ejemplo n.º 3
0
def Tencent_OCR_IDCard(pic_str, flag_side, flag_piccut, flag_porcut):
    cred = credential.Credential("Your ID", "Your Secret")
    httpProfile = HttpProfile()
    httpProfile.endpoint = "ocr.tencentcloudapi.com"
    clientProfile = ClientProfile()
    clientProfile.httpProfile = httpProfile
    client = ocr_client.OcrClient(cred, "ap-beijing", clientProfile)

    req = models.IDCardOCRRequest()
    params_1 = r'{"ImageBase64":"'
    if flag_side:
        params_2 = r'","CardSide":"BACK","Config":"{\"CropIdCard\":'
    else:
        params_2 = r'","CardSide":"FRONT","Config":"{\"CropIdCard\":'
    if flag_piccut:
        params_3 = r'false,\"CropPortrait\":'
    else:
        params_3 = r'true,\"CropPortrait\":'
    if flag_porcut:
        params_4 = r'false}"}'
    else:
        params_4 = r'true}"}'
    params = params_1 + pic_str + params_2 + params_3 + params_4
    #params = '{"ImageBase64":"pic_str","CardSide":"FRONT","Config":"{\\"CropIdCard\\":true,\\"CropPortrait\\":true}"}'
    req.from_json_string(params)
    resp = client.IDCardOCR(req)
    text = resp.to_json_string()
    return text
Ejemplo n.º 4
0
def tx_ocr(filePath, fileName):
    # 实例化一个认证对象,入参需要传入腾讯云账户secretId,secretKey
    cred = credential.Credential("AKIDoPltlNQxwfdPxFP5h7qwFLCojSvDC5f3",
                                 "5wsZ5L1Vq0zlHYeV6yfGdkcFdmjTxCG6")

    httpProfile = HttpProfile()
    httpProfile.endpoint = "ocr.tencentcloudapi.com"
    clientProfile = ClientProfile()
    clientProfile.httpProfile = httpProfile
    client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)

    req = models.TableOCRRequest()
    path = filePath
    with open(path, "rb") as f:  #转为二进制格式
        base64_data = base64.b64encode(f.read())  #使用base64进行加密
    params = base64_data.decode('utf-8')
    req.ImageBase64 = str(params)

    resp = client.TableOCR(req)
    data = base64.b64decode(resp.Data)
    path = './res_xlsx/' + fileName + '.xlsx'
    with open(path, "wb") as f:
        f.write(data)
    f.close
    print('xlsx successed')
Ejemplo n.º 5
0
def get_tencent_biz(img_file_dir):
    secret_id = "AKIDXjfVfZDDySfx1OHKt63lCNIwUBhxhceR"
    secret_key = "g5LPDUVdV8XfYHju87oEOJyMTfcJkupW"
    try:
        with open(img_file_dir, 'rb') as f:
            img_data = f.read()
        img_base64 = base64.b64encode(img_data)
        params = '{"ImageBase64":"' + str(img_base64, 'utf-8') + '"}'

        cred = credential.Credential(secret_id, secret_key)
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile

        client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)

        req = models.BizLicenseOCRRequest()
        req.from_json_string(params)
        resp = client.BizLicenseOCR(req)
        return resp.Name, resp.Address

    except TencentCloudSDKException as err:
        print(err)
        return '', ''
Ejemplo n.º 6
0
def ocr(file):
    try:
        idcardfile_base64 = str(base64.b64encode(file), 'utf-8')

        cred = credential.Credential(config.tencent['general']['secretid'],
                                     config.tencent['general']['secretkey'])
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile('TC3-HMAC-SHA256')
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)

        req = models.GeneralAccurateOCRRequest()
        params = {
            'ImageBase64': idcardfile_base64,
        }
        params_json = json.dumps(params)
        req.from_json_string(params_json)

        resp = client.GeneralAccurateOCR(req)
        resp = json.loads(resp.to_json_string())
        if not resp.get('TextDetections'):
            return {'status': False, 'msg': '识别失败'}
        data = []
        for v in resp['TextDetections']:
            data.append(v['DetectedText'])
        return {'status': True, 'msg': 'success', 'data': data}
    except FileNotFoundError as err_file:
        return {'status': False, 'msg': err_file.strerror}
    except TencentCloudSDKException as err:
        return {'status': False, 'msg': err.get_message()}
Ejemplo n.º 7
0
    def img_to_excel(self, image_path, secret_id, secret_key):

        # 实例化一个认证对象,入参需要传入腾讯云账户secretId,secretKey
        cred = credential.Credential(secret_id, secret_key)

        # 实例化client对象
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"
        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        clientProfile.signMethod = "TC3-HMAC-SHA256"
        client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)

        # 实例化一个请求对象
        req = models.GeneralFastOCRRequest()

        # 读取图片数据,使用Base64编码
        with open(image_path, 'rb') as f:
            image = f.read()
            image_base64 = str(base64.b64encode(image), encoding='utf-8')
        req.ImageBase64 = image_base64

        # 通过client对象调用访问接口,传入请求对象
        resp = client.TableOCR(req)

        # 获取返回数据(Data为Base64编码后的Excel数据)
        data = resp.Data

        # 转换为Excel
        path_excel = image_path + ".xlsx"
        with open(path_excel, 'wb') as f:
            f.write(base64.b64decode(data))
        return path_excel
Ejemplo n.º 8
0
def get_json(path):
    try:
        cred = credential.Credential("", "")
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(cred, "ap-beijing", clientProfile)
        req = models.QrcodeOCRRequest()
        # with open(file=img_path, mode='rb') as file:
        #     base64_data = base64.b64encode(file.read())

        # 对本地图片进行base64转码【本地图片解析需要先转成base64编码】
        # img = cv2.imread(path)
        # imshow(img)
        with open(path, 'rb') as f:
            # base64_data = base64.b64encode(f.read())
            # s = base64_data.decode()
            # ImageBase64_value = 'data:image/jpeg;base64,%s'%s
            # #params是字符串,以下进行拼接
            # params = '{"ImageBase64":"' + ImageBase64_value + '"}' #以图片Base64编码发送请求

            base64data = base64.b64encode(f.read())  # 得到 byte 编码的数据
            base64data = str(base64data, 'utf-8')  # 重新编码数据
            params = '{"ImageBase64":"' + base64data + '"}'
        req.from_json_string(params)
        resp = client.QrcodeOCR(req)
        print(f"腾讯值:{resp.to_json_string()}")

    except TencentCloudSDKException as err:
        print(err)
Ejemplo n.º 9
0
def tencentOCR(src, format):
    try:
        cred = credential.Credential("AKIDqUBfvwgZCan9Ppq93kEVgcHM4QY24Z9U",
                                     "IqSTZUQ7nJSPzsxnOo34rQOFsIlx1tcI")
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)

        req = models.GeneralBasicOCRRequest()
        params = '{"ImageBase64":"data:image/' + format + ';base64,' + src + '"}'
        req.from_json_string(params)

        resp = client.GeneralBasicOCR(req)
        recv = resp.to_json_string()

    except TencentCloudSDKException as err:
        print(err)

    str = json.loads(recv)
    DetectedText = jsonpath.jsonpath(str, "$..DetectedText")
    parseDetect = ""
    for msg in DetectedText:
        print(msg)
        parseDetect += msg + '\n'
    return parseDetect
Ejemplo n.º 10
0
    def picocr(self):
        ocrarea = ImageGrab.grab(bbox=(p.left, p.top, p.right, p.bottom))
        ocrarea.save(fp.ocr_area)
        with open(fp.ocr_area, 'rb') as f:
            base64_data = base64.b64encode(f.read())
            s = base64_data.decode()
            ImageBase64_value = 'data:image/jpeg;base64,%s'%s

        try:
            cred = credential.Credential(k.SECRET_ID, k.SECRET_KEY) 
            httpProfile = HttpProfile()
            httpProfile.endpoint = "ocr.ap-guangzhou.tencentcloudapi.com"
        
            clientProfile = ClientProfile()
            clientProfile.httpProfile = httpProfile
            client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile) 
        
            req = models.GeneralBasicOCRRequest()
            params = '{"ImageBase64":"' + ImageBase64_value + '"}'
            req.from_json_string(params)

            resp = client.GeneralBasicOCR(req) 
            Orecognition.ocrresult = resp.to_json_string()

            with open(fp.txt_ocr_path,"w", encoding='utf-8') as f:
                transjson = json.loads(ocrresult)
                for item in transjson['TextDetections']:
                    line = item['DetectedText']
                    f.write("%s\n" %(line))
    
        except TencentCloudSDKException as err:
            print(err)
Ejemplo n.º 11
0
    def englishOCR(cls, bs, action="ImageBase64", region="ap-shanghai"):
        bs = str(bs, encoding="utf-8")
        try:
            cred = credential.Credential(cls._secretId, cls._secretKey)
            httpProfile = HttpProfile()
            httpProfile.endpoint = cls._endpoint

            clientProfile = ClientProfile()
            clientProfile.httpProfile = httpProfile
            client = ocr_client.OcrClient(cred, region, clientProfile)

            req = models.EnglishOCRRequest()
            params = {action: bs}
            req.from_json_string(json.dumps(params))

            resp = client.EnglishOCR(req)
            texts = resp.TextDetections
            total = len(texts)
            actual = 0
            res = []
            for ele in texts:
                text = ele.DetectedText
                text = text.strip()
                if not text:
                    continue
                actual += 1
                res.append(text)
            return {"data": res, "total": total, "actual": actual}, None
        except Exception as e:
            return None, e
Ejemplo n.º 12
0
    def getCodeFromImg(res, imgUrl):
        response = res.get(imgUrl, verify=False)  # 将这个图片保存在内存
        # 得到这个图片的base64编码
        imgCode = str(base64.b64encode(BytesIO(response.content).read()),
                      encoding='utf-8')
        # print(imgCode)
        try:
            cred = credential.Credential(Utils.getYmlConfig()['SecretId'],
                                         Utils.getYmlConfig()['SecretKey'])
            httpProfile = HttpProfile()
            httpProfile.endpoint = "ocr.tencentcloudapi.com"

            clientProfile = ClientProfile()
            clientProfile.httpProfile = httpProfile
            client = ocr_client.OcrClient(cred, "ap-beijing", clientProfile)

            req = models.GeneralBasicOCRRequest()
            params = {"ImageBase64": imgCode}
            req.from_json_string(json.dumps(params))
            resp = client.GeneralBasicOCR(req)
            codeArray = json.loads(resp.to_json_string())['TextDetections']
            code = ''
            for item in codeArray:
                code += item['DetectedText'].replace(' ', '')
            if len(code) == 4:
                return code
            else:
                return Utils.getCodeFromImg(res, imgUrl)
        except TencentCloudSDKException as err:
            raise Exception('验证码识别出现问题了' + str(err.message))
Ejemplo n.º 13
0
def run(SecretId, SecretKey, files):
    ''' 行业文档识别-表格识别, 使用本地图片转换为ImageBase64格式. '''
    try:
        cred = credential.Credential(SecretId, SecretKey)
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(cred, "", clientProfile)

        # 调用接口
        req = models.TableOCRRequest()
        for img_file in files:
            image_base64 = encode_image(img_file)
            params = '{"ImageBase64":"{}"}'.format(image_base64)
            print(params)
            req.from_json_string(params)
            resp = client.TableOCR(req)
            base64_res = resp["Response"][
                "Data"]  # String, Base64 编码后的 Excel 数据
            destine_file = 'output/' + ".".join(
                img_file.split(".")[0:-1]) + ".xlsx"
            write_res2file(base64_res, destine_file)

    except TencentCloudSDKException as err:
        print(err)
Ejemplo n.º 14
0
def recognition():
    try:
        # 生成证书
        cred = credential.Credential("AKIDwdGfopwkR6VYO04bXCbia4IJD43wOCH7", "XcXAWcSWaFl0yhYDdnLdeQXquiHYT8Yl")
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        # 初始化客户端实例
        clientProfile = ClientProfile("TC3-HMAC-SHA256")
        # 按就近的使用,所以我用的是ap-shanghai
        client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)

        req = models.GeneralAccurateOCRRequest()

        with open("../images/t_2.jpeg", 'rb') as f:
            base64_data = base64.b64encode(f.read())
            s = base64_data
        params = '{"ImageBase64":"' + str(s, 'utf-8') + '"}'
        req.from_json_string(params)

        resp = client.GeneralAccurateOCR(req)
        result = resp.TextDetections
        # 将官网文档里输出字符串格式的转换为字典,如果不需要可以直接print(resp)
        return result

    except TencentCloudSDKException as err:
        print(err)
Ejemplo n.º 15
0
def postToOCR(image_path):
    try:
        # Using own SecretID and SecretKey from tencent cloud
        cred = credential.Credential("", "")
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile

        # Using prefer region
        client = ocr_client.OcrClient(cred, "na-toronto", clientProfile)

        req = models.GeneralBasicOCRRequest()

        base64 = ct.convertToBase64(image_path)
        params = {"ImageBase64": base64}

        req.from_json_string(json.dumps(params))
        resp = client.GeneralBasicOCR(req)

        #convert to json format
        resp = json.loads(resp.to_json_string())
        with open('static/respondJson/respond.json', 'w') as outfile:
            json.dump(resp, outfile)
        # with open('static/respondJson/respond.json', 'r') as fp:
        #     resp = json.load(fp)

        print(type(resp))
        return resp

    except TencentCloudSDKException as err:
        print(err)
Ejemplo n.º 16
0
def get_tencent_reg(img_file_dir):
    secret_id = "AKIDXjfVfZDDySfx1OHKt63lCNIwUBhxhceR"
    secret_key = "g5LPDUVdV8XfYHju87oEOJyMTfcJkupW"
    try:
        with open(img_file_dir, 'rb') as f:
            img_data = f.read()
        img_base64 = base64.b64encode(img_data)
        params = '{"ImageBase64":"' + str(img_base64, 'utf-8') + '"}'

        cred = credential.Credential(secret_id, secret_key)
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile

        client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)

        req = models.GeneralBasicOCRRequest()
        req.from_json_string(params)
        resp = client.GeneralBasicOCR(req).to_json_string()
        ret_list = re.findall(r'"DetectedText": "(.*?)"', resp)
        return ret_list

    except TencentCloudSDKException as err:
        print(err)
        return []
Ejemplo n.º 17
0
Archivo: ocr.py Proyecto: hao1032/adbui
    def __init__(self, secret_id, secret_key):
        cred = credential.Credential(secret_id, secret_key)
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        self.client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)
Ejemplo n.º 18
0
def ocr(file, side='front'):
    idcardfile_base64 = str(base64.b64encode(file), 'utf-8')
    try:
        api_config = global_dict.get_value("api_config")
        cred = credential.Credential(api_config['appid'],
                                     api_config['appsecret'])
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile('TC3-HMAC-SHA256')
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)

        req = models.IDCardOCRRequest()
        params = {
            'ImageBase64':
            idcardfile_base64,
            'CardSide':
            'FRONT' if side == 'front' else 'BACK',
            'Config':
            '{"CopyWarn":true,"BorderCheckWarn":true,"ReshootWarn":true,"DetectPsWarn":true,"TempIdWarn":true,"InvalidDateWarn":true}'
        }
        params_json = json.dumps(params)
        req.from_json_string(params_json)

        resp = client.IDCardOCR(req)
        resp = json.loads(resp.to_json_string())
        warns = ";".join(get_warns(resp))
        if not resp.get('Name') and not resp.get('Authority'):
            err_msg = "身份证识别错误;" + warns
            return {'status': False, 'msg': err_msg}
        msg = '读取成功;' + warns
        if side == 'front':
            return {
                'status': True,
                'msg': msg,
                'data': {
                    'name': resp['Name'],
                    'gender': resp['Sex'],
                    'nation': resp['Nation'],
                    'birth': resp['Birth'].replace('/', '-'),
                    'address': resp['Address'],
                    'idnum': resp['IdNum'],
                }
            }
        else:
            return {
                'status': True,
                'msg': msg,
                'data': {
                    'authority': resp['Authority'],
                    'validity': resp['ValidDate'],
                }
            }
    except TencentCloudSDKException as err:
        return {'status': False, 'msg': err.get_message()}
Ejemplo n.º 19
0
def Tencent_car_api(img_base64):
    APP_config_ini = ConfigObj(APP_config_file, encoding='UTF8')
    car = dict()
    try:
        cred = credential.Credential(APP_config_ini['tencent_sdk']['api_id'],
                                     APP_config_ini['tencent_sdk']['api_key'])
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(
            cred, APP_config_ini['tencent_sdk']['api_area'], clientProfile)

        req = models.LicensePlateOCRRequest()
        params = '{\"ImageBase64\":\"' + img_base64 + '\"}'
        req.from_json_string(params)

        resp = client.LicensePlateOCR(req)
        # print(resp.to_json_string())
        # {"Number": "渝AN7968", "Confidence": 99, "RequestId": "bc9f8509-1d4b-4990-9557-03b0f17e7eba"}
        _json_data_text = resp.to_json_string()  # json格式化的文本字符串
        _json_data_dict = json.loads(_json_data_text)  # 将腾讯json字符串转换为字典

        car['number'] = _json_data_dict['Number']
        car['confidence'] = _json_data_dict['Confidence']
        car['requestId'] = _json_data_dict['RequestId']

        car['code'] = 1
        car['data'] = '识别成功'
        car['error'] = ''
        car['car_code_service'] = '1.0'  # 该字段将作为E4A APP校验内容是否是所需的json格式返回
    except TencentCloudSDKException as err:
        car['code'] = 0
        car['data'] = '识别出错'
        car['car_code_service'] = '1.0'  # 该字段将作为E4A APP校验内容是否是所需的json格式返回
        error = str(err)
        print(error)
        if 'FailedOperation.DownLoadError' in error:
            car['error'] = '文件下载失败'
        if 'FailedOperation.ImageDecodeFailed' in error:
            car['error'] = '图片解码失败'
        if 'FailedOperation.OcrFailed' in error:
            car['error'] = 'OCR识别失败'
        if 'FailedOperation.UnKnowError' in error:
            car['error'] = '未知错误'
        if 'FailedOperation.UnOpenError' in error:
            car['error'] = '服务未开通'
        if 'LimitExceeded.TooLargeFileError' in error:
            car['error'] = '文件内容太大'
        if 'ResourcesSoldOut.ChargeStatusException' in error:
            car['error'] = '云识别系统计费状态异常'
        if 'secret id should not be none' in error:
            car['error'] = '云识别系统未配置'
    return car
Ejemplo n.º 20
0
def ocrFiles():
    try:
        resourceList = []

        for filename in os.listdir(r"./resources"):
            if 'JPG' in filename.upper():
                resourceList.append(filename)
        print("总图片数: ", len(resourceList))

        cred = credential.Credential(os.environ.get("TENCENTCLOUD_SECRET_ID"),
                                     os.environ.get("TENCENTCLOUD_SECRET_KEY"))
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)

        for filename in resourceList:
            postDict = {
                'filename': filename,
                'ocrStatus': False,
                'postKey': ''
            }
            try:
                with open("./resources/%s" % filename, 'rb') as file:
                    req = models.GeneralEfficientOCRRequest()
                    content = file.read()
                    b64 = str(base64.b64encode(content), 'utf-8')
                    req.ImageBase64 = b64
                    resp = client.GeneralEfficientOCR(req)
                    data = json.loads(resp.to_json_string())
                    # print(resp.to_json_string())
                    for index, value in enumerate(data['TextDetections']):
                        if '投票地址' in value[
                                'DetectedText'] or 'senbatsu' in value[
                                    'DetectedText']:
                            targetItem1 = data['TextDetections'][
                                index + 1]['DetectedText'].replace(' ', '')
                            targetItem2 = data['TextDetections'][
                                index + 2]['DetectedText'].replace(' ', '')
                            if isContainChinese(targetItem1):
                                postDict['postKey'] = targetItem2
                            else:
                                postDict['postKey'] = targetItem1
            except Exception as e:
                pass
            if postDict['postKey']:
                postDict['ocrStatus'] = True
            print(postDict)
            yield (postDict)

    except TencentCloudSDKException as err:
        print(err)
Ejemplo n.º 21
0
def main_handler(event, context):
    logger.info("start main handler")
    if "requestContext" not in event.keys():
        return {"code": 410, "errorMsg": "event is not come from api gateway"}
    if "body" not in event.keys():
        return {
            "isBase64Encoded": False,
            "statusCode": 200,
            "headers": {
                "Content-Type": "text",
                "Access-Control-Allow-Origin": "*"
            },
            "body": "there is no file from api gateway"
        }

    #从网关上传的图片格式已经做过Base64,在event['body']里可以直接获取
    logger.info("Start to detection")
    try:
        secret_id = os.environ.get('TENCENTCLOUD_SECRETID')
        secret_key = os.environ.get('TENCENTCLOUD_SECRETKEY')
        token = os.environ.get('TENCENTCLOUD_SESSIONTOKEN')
        cred = credential.Credential(secret_id, secret_key, token)
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(cred, "ap-beijing", clientProfile)
        req = models.GeneralBasicOCRRequest()
        params = '{"ImageBase64":"%s"}' % event['body']
        req.from_json_string(params)
        resp = client.GeneralBasicOCR(req)
        res_ai = json.loads(resp.to_json_string())
        res_text = " "
        print(len(res_ai["TextDetections"]))
        for i in range(len(res_ai["TextDetections"])):
            res_text = res_text + str(
                res_ai["TextDetections"][i]["DetectedText"])

    except TencentCloudSDKException as err:
        print(err)

    print(res_text)
    response = {
        "isBase64Encoded": False,
        "statusCode": 200,
        "headers": {
            "Content-Type": "text",
            "Access-Control-Allow-Origin": "*"
        },
        "body": res_text
    }

    return response
Ejemplo n.º 22
0
def dataFromPictures(picture, SecretId, SecretKey):
    resp = None
    # try:
    with open(picture, "rb") as f:
        img_data = f.read()
    img_base64 = base64.b64encode(img_data)
    cred = credential.Credential(SecretId, SecretKey)  # Secret ID and Key
    httpProfile = HttpProfile()
    httpProfile.endpoint = "ocr.tencentcloudapi.com"

    clientProfile = ClientProfile()
    clientProfile.httpProfile = httpProfile
    client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)

    req = models.TableOCRRequest()
    # params = '{"ImageBase64":"' + str(img_base64) + '"}'
    params = '{"ImageBase64":"' + str(img_base64, 'utf-8') + '"}'
    req.from_json_string(params)
    resp = client.TableOCR(req)
    #     print(resp.to_json_string())

    # except TencentCloudSDKException as err:
    #     print(err)

    ## load resp to json
    result1 = json.loads(resp.to_json_string())

    rowIndex = []
    colIndex = []
    content = []

    for item in result1['TextDetections']:
        rowIndex.append(item['RowTl'])
        colIndex.append(item['ColTl'])
        content.append(item['Text'])

    rowIndex = pd.Series(rowIndex)
    colIndex = pd.Series(colIndex)

    index = rowIndex.unique()
    index.sort()

    columns = colIndex.unique()
    columns.sort()

    data = pd.DataFrame(index=index, columns=columns)
    for i in range(len(rowIndex)):
        data.loc[rowIndex[i], colIndex[i]] = re.sub(" ", "", content[i])

    return result1, data
Ejemplo n.º 23
0
def excelFromPictures(picture, SecretId, SecretKey):

    rowIndex = []
    colIndex = []
    content = []
    try:
        with open(picture, "rb") as f:
            img_data = f.read()
        img_base64 = base64.b64encode(img_data)
        cred = credential.Credential(SecretId, SecretKey)  #ID和Secret从腾讯云申请
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)

        req = models.TableOCRRequest()
        params = '{"ImageBase64":"' + str(img_base64, 'utf-8') + '"}'
        req.from_json_string(params)
        resp = client.TableOCR(req)
        #     print(resp.to_json_string())
        ##提取识别出的数据,并且生成json
        result1 = json.loads(resp.to_json_string())
        for item in result1['TextDetections']:
            rowIndex.append(item['RowTl'])
            colIndex.append(item['ColTl'])
            content.append(item['Text'])
    except TencentCloudSDKException as err:
        print(err)

    ##导出Excel
    ##ExcelWriter方案
    rowIndex = pd.Series(rowIndex)
    colIndex = pd.Series(colIndex)

    index = rowIndex.unique()
    index.sort()

    columns = colIndex.unique()
    columns.sort()

    data = pd.DataFrame(index=index, columns=columns)
    for i in range(len(rowIndex)):
        data.loc[rowIndex[i], colIndex[i]] = re.sub(" ", "", content[i])

    writer = pd.ExcelWriter("." + re.match(".*\.", f.name).group() + "xlsx",
                            engine='xlsxwriter')
    data.to_excel(writer, sheet_name='Sheet1', index=False, header=False)
    writer.save()
Ejemplo n.º 24
0
    def tencentcloud(self, img_and_txt_paths):
        """tencentcloud

        Args:
            img_and_txt_paths:
        
        Returns:
            txt_dirname:
        """
        for path in img_and_txt_paths:
            img_path = path[0]
            txt_save_path = path[1]
            save_dir = os.path.split(txt_save_path)[0]
            # 循环消除重名txt文件
            while (os.path.exists(txt_save_path)):
                txt_save_path = self.eliminate_dup_name(
                    txt_save_path, save_dir)
            try:
                cred = credential.Credential(
                    os.environ.get("TENCENTCLOUD_SECRET_ID"),
                    os.environ.get("TENCENTCLOUD_SECRET_KEY"))
                httpProfile = HttpProfile()
                httpProfile.endpoint = "ocr.tencentcloudapi.com"

                clientProfile = ClientProfile()
                clientProfile.httpProfile = httpProfile
                client = ocr_client.OcrClient(cred, "ap-shanghai",
                                              clientProfile)

                # 识别模型
                req = models.GeneralAccurateOCRRequest()

                # 将图片转化为base64编码格式
                with open(img_path, "rb") as rf:
                    base64_data = base64.b64encode(rf.read())
                    req.ImageBase64 = str(base64_data, 'utf-8')

                resp = client.GeneralAccurateOCR(req)

                results = resp.TextDetections

                if results != None:
                    with open(txt_save_path, "wt", encoding="gbk") as wf:
                        for line in results:
                            wf.write(str(line.DetectedText))
            except TencentCloudSDKException as err:
                print(err.get_code(), err.get_message())

        return os.path.dirname(txt_save_path)
Ejemplo n.º 25
0
def tencent_image2str_url(uuid_url_dict, types="characters"):
    from tencentcloud.common import credential
    from tencentcloud.common.profile.client_profile import ClientProfile
    from tencentcloud.common.profile.http_profile import HttpProfile
    from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
    from tencentcloud.ocr.v20181119 import ocr_client, models
    uuid_text_dict = {}
    try:
        cred = credential.Credential(Tencent_API_KEY, Tencent_SECRET_KEY)
        http_profile = HttpProfile()
        http_profile.endpoint = "ocr.ap-chengdu.tencentcloudapi.com"

        client_profile = ClientProfile()
        client_profile.httpProfile = http_profile

        client = ocr_client.OcrClient(cred, "ap-beijing",
                                      client_profile)  # create a connection
        req = None
        if types == "questions":
            req = models.EduPaperOCRRequest()  # questions model
        elif types == "characters":
            req = models.GeneralBasicOCRRequest()  # characters model
        for uuid, url in uuid_url_dict.items():
            params = {}
            params = '{\"ImageUrl\":\"url\"}'.replace("url", url)
            print("OCR resolving %s" % url)
            req.from_json_string(params)
            ret = ''
            try:
                if types == "questions":
                    resp = client.EduPaperOCR(req)
                    for questionsblock in resp.QuestionBlockInfos:
                        for textblock in questionsblock.QuestionArr:
                            ret = ret + textblock.QuestionText + '\n'
                elif types == "characters":
                    resp = client.GeneralBasicOCR(req)
                    for textblock in resp.TextDetections:
                        if textblock.Confidence >= 85:
                            ret = ret + textblock.DetectedText + '\n'
                uuid_text_dict[uuid] = ret
            except TencentCloudSDKException as err:
                print(err)
                print("OCR failed! URL:%s, set text empty" % url)
                uuid_text_dict[uuid] = ''
        return uuid_text_dict

    except TencentCloudSDKException as err:
        print(err)
        return uuid_text_dict
Ejemplo n.º 26
0
def Tencent_OCR_Summary(pic_str, mode):
    try:
        cred = credential.Credential("Your ID", "Your Secret")
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"
        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(cred, "ap-beijing", clientProfile)

        if mode == '11':  #11:通用印刷体识别
            return Tencent_OCR_Basic_Print(pic_str, client)
        elif mode == '12':  #12:通用印刷体识别(高速版)
            return Tencent_OCR_Basic_Print_Fast(pic_str, client)
        elif mode == '13':  #13:通用印刷体识别(高精度版)
            return Tencent_OCR_Basic_Print_HighAccurate(pic_str, client)
        elif mode == '14':  #14:通用手写体识别
            return Tencent_OCR_Handwriting(pic_str, client)
        elif mode == '15':  #15:英文识别
            return Tencent_OCR_English(pic_str, client)
        elif mode == '21':  #21:身份证识别
            return  # 废弃
        elif mode == '22':  #22:营业执照识别
            return Tencent_OCR_BizLicense(pic_str, client)
        elif mode == '23':  #23:银行卡识别
            return Tencent_OCR_BankCard(pic_str, client)
        elif mode == '24':  #24:名片识别
            return  # 废弃
        elif mode == '31':  #31:增值税发票识别
            return Tencent_OCR_VatInvoice(pic_str, client)
        elif mode == '32':  #32:运单识别
            return Tencent_OCR_Waybill(pic_str, client)
        elif mode == '41':  #41:驾驶证识别
            return Tencent_OCR_DriverLicense(pic_str, client)
        elif mode == '42':  #42:车牌识别
            return Tencent_OCR_LicensePlate(pic_str, client)
        elif mode == '43':  #43:车辆VIN码识别
            return Tencent_OCR_Vin(pic_str, client)
        elif mode == '44':  #44:行驶证识别
            return  # 废弃
        elif mode == '51':  #51:算式识别
            return Tencent_OCR_Arithmetic(pic_str, client)
        elif mode == '52':  #52:表格识别
            return Tencent_OCR_Table(pic_str, client)
        else:
            return Tencent_OCR_Basic_Print(pic_str, client)

    except TencentCloudSDKException as err:
        print(err)
Ejemplo n.º 27
0
def excelFromPictures(path, picture):
    SecretId = ""
    SecretKey = ""

    with open(picture, "rb") as f:
        img_data = f.read()
    img_base64 = b64encode(img_data)
    cred = credential.Credential(SecretId, SecretKey)  #ID和Secret从腾讯云申请
    httpProfile = HttpProfile()
    httpProfile.endpoint = "ocr.tencentcloudapi.com"

    clientProfile = ClientProfile()
    clientProfile.httpProfile = httpProfile
    client = ocr_client.OcrClient(cred, "ap-shanghai", clientProfile)

    req = models.VatInvoiceOCRRequest()
    params = '{"ImageBase64":"' + str(img_base64, 'utf-8') + '"}'
    req.from_json_string(params)
    #    false=0
    try:

        resp = client.VatInvoiceOCR(req)
        #     print(resp.to_json_string())

    except TencentCloudSDKException as err:
        print("识别", picture, "错误[", err, "]\n可重试")

    ##提取识别出的数据,并且生成json
    result1 = loads(resp.to_json_string())

    #    print(result1)
    #    print(resp.to_json_string())

    invoicedf = DataFrame(zeros(5).reshape(1, 5),
                          columns=["发票代码", "发票号码", "开票日期", "合计金额", "小写金额"])
    for item in result1['VatInvoiceInfos']:
        if item["Name"] in ["发票代码", "发票号码", "开票日期", "合计金额", "小写金额"]:
            invoicedf[item["Name"]] = item["Value"]


#
#    writer = ExcelWriter(path+"/tables/" +re.match(".*\.",f.name).group()+"xlsx", engine='xlsxwriter')
#    data.to_excel(writer,sheet_name = 'Sheet1', index=False,header = False)
#    writer.save()
#
    print("已经完成[" + f.name + "]的识别")
    return invoicedf
Ejemplo n.º 28
0
def ocr(file):
    try:
        api_config = global_dict.get_value("api_config")
        invoicefile_base64 = str(base64.b64encode(file), 'utf-8')
        cred = credential.Credential(api_config['appid'],
                                     api_config['appsecret'])
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile('TC3-HMAC-SHA256')
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)

        req = models.VatInvoiceOCRRequest()
        params = {
            'ImageBase64': invoicefile_base64,
        }
        params_json = json.dumps(params)
        req.from_json_string(params_json)

        resp = client.VatInvoiceOCR(req)
        resp = json.loads(resp.to_json_string())
        if not resp.get('VatInvoiceInfos'):
            return {'status': False, 'msg': '发票识别失败'}
        data = {}
        for v in resp['VatInvoiceInfos']:
            if v['Name'] == '货物或应税劳务、服务名称':
                data['goods_name'] = v['Value']
            if v['Name'] == '购买方名称':
                data['payer_name'] = v['Value']
            if v['Name'] == '数量':
                data['goods_num'] = v['Value']
            if v['Name'] == '单位':
                data['goods_unit'] = v['Value']
            if v['Name'] == '开票日期':
                data['issue_date'] = v['Value']
            if v['Name'] == '发票号码':
                data['invoice_number'] = v['Value']
            if v['Name'] == '金额':
                data['invoice_amount'] = v['Value']
            if v['Name'] == '单价':
                data['invoice_unit_price'] = v['Value']
        return {'status': True, 'data': data}
    except TencentCloudSDKException as err:
        return {'status': False, 'msg': str(err)}
Ejemplo n.º 29
0
def get_text(image_code):
    try:
        cred = credential.Credential("XXXXX", "XXXXX")
        httpProfile = HttpProfile()
        httpProfile.endpoint = "ocr.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)

        req = models.GeneralBasicOCRRequest()
        params = '{"ImageBase64":"' + image_code + '"}'
        req.from_json_string(params)

        resp = client.GeneralBasicOCR(req)
        return resp.to_json_string()
    except TencentCloudSDKException as err:
        print(err)
Ejemplo n.º 30
0
 def ocr_text(img_base):
     try:
         cred = credential.Credential("AKIDnQq0uYw4vVoqa3GDtIRKxKQFo56rNhml", "rcASVIp54mDNHuHF0UjjTxCbkdFaeRzI")
         http_profile = HttpProfile()
         http_profile.endpoint = "ocr.tencentcloudapi.com"
         client_profile = ClientProfile()
         client_profile.httpProfile = http_profile
         client = ocr_client.OcrClient(cred, "ap-beijing", client_profile)
         req = models.GeneralHandwritingOCRRequest()
         params = {
             "ImageBase64": img_base
         }
         req.from_json_string(json.dumps(params))
         resp = client.GeneralHandwritingOCR(req)
         print("read_text========", resp.to_json_string())
         return resp.TextDetections
     except TencentCloudSDKException as err:
         print("read_text=======", err)