Exemple #1
0
def baidu_image2str_url(uuid_url_dict={}, types="characters"):
    from aip import AipOcr
    client = AipOcr(Baidu_APP_ID, Baidu_API_KEY,
                    Baidu_SECRET_KEY)  # create a connection
    options = {}
    options["probability"] = "true"
    uuid_text_dict = {}
    for uuid, url in uuid_url_dict.items():
        ret = ""
        resp = client.basicGeneralUrl(url, options)  # url
        # print(resp)
        if "error_msg" in resp:
            print("url recognition failed! Using local model.  url: " + url)
            # print(resp)
            if resp["error_msg"] == "url response invalid" or resp[
                    "error_msg"] == "image size error":
                #request for the image of url, convert to valid format
                image_path = image_transform(url_img_download(url))
                print(image_path)
                uuid_text_dict[uuid] = baidu_image2str_local(image_path)
            else:
                uuid_text_dict[uuid] = ""
        else:
            for tex in resp["words_result"]:
                if tex["probability"]["average"] > 0.85:
                    ret = ret + tex["words"]
            # print(ret)
            uuid_text_dict[uuid] = ret
    return uuid_text_dict
Exemple #2
0
class BaiduAIP(object):
    def __init__(self):
        self.client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

    # 读取图片文件
    # 返回二进制内容
    def get_file_content(self, file_path):
        with open(file_path, 'rb') as fp:
            return fp.read()

    # 调用百度AIP并解析接口返回数据
    # 返回多个内容 => 重做
    # 平均值小于0.8 => 重做
    # 成功 => 返回code
    def get_code(self):
        # image = self.get_file_content(self.picture_path)
        """ 如果有可选参数 """
        options = {
            "language_type": "ENG",
            "detect_direction": "true",
            "detect_language": "true",
            "probability": "true"
        }
        response = self.client.basicGeneralUrl(self.picture_path, options)
        print(response, self.picture_path)
        data = response
        if isinstance(data['words_result'],
                      list) and data['words_result'].__len__() == 1:
            if data['words_result'][0]['probability'][
                    'average'] > 0.7:  # 准确率达到0.7以上
                return data['words_result'][0]['words']
            else:
                return 'do again'
        else:
            return 'do again'
Exemple #3
0
 def getOCRResult(self):
     client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
     """ 如果有可选参数 """
     options = {}
     options["language_type"] = "CHN_ENG"
     options["detect_direction"] = "true"
     options["detect_language"] = "true"
     options["probability"] = "false"
     """ 带参数调用通用文字识别, 图片参数为本地图片 """
     back = client.basicGeneralUrl(self.imgURL, options)
     return back['words_result']
def pic_to_word(urls):
    APP_ID = '16459310'
    API_KEY = 'XMntwgKcwzsuuLUIqhBWw9uZ'
    SECRET_KEY = 'iWESOZKlmB5vw3hL5T3MniNTdZCfAgL4'

    # options = {"language_type": type}
    client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

    """ 带参数调用通用文字识别, 图片参数为远程url图片 """
    text = client.basicGeneralUrl(urls)
    print(text)
    str_num = ''
    for i in range(0, text['words_result_num']):
        str_num = str_num + str(text['words_result'][i]['words']) + '\n'
    return str_num
Exemple #5
0
def parse_url_pdf(url):

    APP_ID = '14964808'
    API_KEY = 'AVWLHd7wAOxf4kijuImGZzVH'
    SECRET_KEY = 'SEZTAAYH92VFTFXEvc75Vyi4nROfE0I0'

    options = {}
    options["language_type"] = "CHN_ENG"
    options["detect_direction"] = "true"
    options["detect_language"] = "true"
    options["probability"] = "true"

    client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
    # 通用文字识别
    content = client.basicGeneralUrl(url, options)
    print(content)
Exemple #6
0
def verifying_code(
        url='https://paulzhangcc.oss-cn-beijing.aliyuncs.com/1.png'):
    """ 你的 APPID AK SK """
    APP_ID = '11086206'
    API_KEY = 'YTDMZxqm63fokY7UTOxSVowX'
    SECRET_KEY = 'HkTXYq0BzQ85kkvTAxemQi4SVNwaNUDf'

    client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
    """ 如果有可选参数 """
    options = {}
    options["language_type"] = "CHN_ENG"
    options["detect_direction"] = "false"
    options["detect_language"] = "true"
    options["language_type"] = "CHN_ENG"
    """ 带参数调用通用文字识别, 图片参数为远程url图片 """
    result = client.basicGeneralUrl(url, options)
    print("识别图片百度云返回的结果:", result)
    return result
Exemple #7
0
def get_BaiDuPicExtract(url=""):
    from aip import AipOcr
    print(url)
    """ 你的 APPID AK SK """
    APP_ID = '14709450'
    API_KEY = '0zw6HYjooiU1kmkynNGqSn4T'
    SECRET_KEY = 'u2Qag7hW2DnUkFz96on3FrDzLtVmwCDH'

    client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
    """读取本地图片函数"""
    def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

    """ 如果有可选参数 """
    options = dict()
    options["language_type"] = "CHN_ENG"
    options["detect_direction"] = "true"
    options["detect_language"] = "true"
    options["probability"] = "true"
    """ 带参数调用通用文字识别, 图片参数为远程url图片 """
    r_json = client.basicGeneralUrl(url, options)
    """识别成功返回的json
    {'log_id': 1551369869026631207, 
    'direction': 0, 
    'words_result_num': 1, 
    'words_result': 
        [{  'words': '82-8=', 
            'probability': {'variance': 0.007439, 'average': 0.91145, 'min': 0.806046}
        }], 
    'language': -1}
    """
    print(r_json)
    words_result = r_json.get("words_result", [])
    words = None
    if words_result:
        words = words_result[0].get("words", "")
    return words
Exemple #8
0
# -*- coding: utf-8 -*-
# @Time     : 2018/4/11 16:14
# @Author   : Shark
# @File     : verification_code.py
# @Software : PyCharm

from aip import AipOcr

APP_ID = '10769158'
API_KEY = 'C2Uo8OGGY1O1RVwGOkrb68it'
SECRET_KEY = '1ldb72IgGBVhrmrjtC3jTi8iyWnU70OM'

client = AipOcr(APP_ID, API_KEY, SECRET_KEY)


# 读取图片
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()


# options = {}
# options["language_type"] = "ENG"

image = get_file_content('picture/captcha.jpg')

url = 'https://www.douban.com/misc/captcha?id=U5ivIQP8K6C4lUTSJY9DaeJg:en&size=s'

print(client.basicGeneralUrl(url))
Exemple #9
0
#此为百度Ai开发平台的云打码,识别图片正确率极低,仅为展示调用接口。
from aip import AipOcr
""" 你的 APPID AK SK """
APP_ID = '你的APP_ID'
API_KEY = '你的API_KEY'
SECRET_KEY = '你的SECRET_KEY '

client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

url = "https://sam.huat.edu.cn:8443/selfservice/common/web/verifycode.jsp"
""" 调用通用文字识别, 图片参数为远程url图片 """
client.basicGeneralUrl(url)
""" 如果有可选参数 """
options = {}
options["language_type"] = "CHN_ENG"
options["detect_direction"] = "true"
options["detect_language"] = "true"
options["probability"] = "true"
""" 带参数调用通用文字识别, 图片参数为远程url图片 """
res = client.basicGeneralUrl(url, options)

print(res)
Exemple #10
0
""" 如果有可选参数 """
options = {}
options["language_type"] = "CHN_ENG"
options["detect_direction"] = "true"
options["detect_language"] = "true"
options["probability"] = "true"


""" 带参数调用通用文字识别, 图片参数为本地图片 """
client.basicGeneral(image, options)

# url = "http//www.x.com/sample.jpg"
url = "http://www.lmth2013.com/validatecode.aspx"

""" 调用通用文字识别, 图片参数为远程url图片 """
web_acc=client.basicGeneralUrl(url);
print(web_acc)

# """ 如果有可选参数 """
# options = {}
# options["language_type"] = "CHN_ENG"
# options["detect_direction"] = "true"
# options["detect_language"] = "true"
# options["probability"] = "true"
#
# """ 带参数调用通用文字识别, 图片参数为远程url图片 """
# client.basicGeneralUrl(url, options)


def main():
    book = xw.Book.caller()
    """爬取农业部数据"""
    today = datetime.date.today()
    #获取当月第一天
    firstmonthday = datetime.datetime(today.year, today.month, 1)
    #获取当年第一天
    firstday = datetime.datetime(today.year, 1, 1)
    oneday = datetime.timedelta(days=1)
    all = {}  #存放所有爬取的网页链接,key为日期,value为链接
    url = 'http://www.scs.moa.gov.cn/scxxfb/'

    #爬取主页
    response = requests.get(url)
    content = response.content
    page = etree.HTML(content)
    data = page.find('.//div[@class="sj_e_tonzhi_list"]')

    for i in data:
        infos = i.findall('.//li')
        for info in infos:
            rrr = info.find('.//a')
            link = url + str(rrr.get('href'))
            date = re.findall(r'.\w+.t(\d+)\w+', link)
            all[date[0]] = str(link)

    for i in range(1, 13):
        url = 'http://www.scs.moa.gov.cn/scxxfb/index_' + str(i) + '.htm'
        response = requests.get(url)
        content = response.content
        page = etree.HTML(content)
        data = page.find('.//div[@class="sj_e_tonzhi_list"]')

        for i in data:
            infos = i.findall('.//li')
            for info in infos:
                rrr = info.find('.//a')
                link = 'http://www.scs.moa.gov.cn/scxxfb/' + str(
                    rrr.get('href'))
                date = re.findall(r'.\w+.t(\d+)\w+', link)
                all[date[0]] = str(link)

    #print(all)
    #爬取目标页、正则提取猪肉价格
    def price_get(link):
        response = requests.get(link)
        content = response.content
        page = etree.HTML(content)
        info = page.find('.//div[@class="TRS_Editor"]')
        text = info.find('.//p').text
        price = re.findall(r'猪肉\D+(\d+.\d+)元', text)
        return price

    price1 = {}  #存放猪肉价格,key为日期,value为价格

    #今天的价格,若未更新则为前一天价格
    while today.strftime('%Y%m%d') not in all.keys():
        today -= oneday
    else:
        d_p_price = price_get(str(all[today.strftime('%Y%m%d')]))
        price1[today.strftime('%Y%m%d')] = d_p_price

    #本月初价格,更新时间为本月第一个工作日
    while firstmonthday.strftime('%Y%m%d') not in all.keys():
        firstmonthday += oneday
    else:
        m_p_price = price_get(str(all[firstmonthday.strftime('%Y%m%d')]))
        price1[firstmonthday.strftime('%Y%m%d')] = m_p_price

    #本年初价格,更新时间为本年第一个工作日
    while firstday.strftime('%Y%m%d') not in all.keys():
        firstday += oneday
    else:
        y_p_price = price_get(str(all[firstday.strftime('%Y%m%d')]))
        price1[firstday.strftime('%Y%m%d')] = y_p_price

    #对应价格的列表
    #l=[price1[today.strftime('%Y%m%d')],price1[firstmonthday.strftime('%Y%m%d')],price1[firstday.strftime('%Y%m%d')]]

    print(price1)
    '''
    最终结果是price1是一个字典
    pric1e[today.strftime('%Y%m%d')]是今日价格
    price1[firstmonthday.strftime('%Y%m%d')]是本月初
    price1[firstday.strftime('%Y%m%d')]是本年初
    '''
    """爬取二元能繁母猪数据"""
    #百度云账号
    APP_ID = '#####'
    API_KEY = '########'
    SECRECT_KEY = '########'
    client = AipOcr(APP_ID, API_KEY, SECRECT_KEY)

    #爬取主页,获取目标网页链接
    url = 'http://sousuo.gov.cn/s.htm?q=%E4%BA%8C%E5%85%83%E6%AF%8D%E7%8C%AA%E9%94%80%E5%94%AE%E4%BB%B7%E6%A0%BC&t=govall&timetype=timeqb&mintime=&maxtime=&sort=pubtime&sortType=1&nocorrect='
    response = requests.get(url)
    content = response.content
    page = etree.HTML(content)
    table = page.find('.//h3[@class="res-title"]')
    channels = table.find('.//a')
    link = channels.get('href')
    #print(link)

    #爬取最新公告的标题
    html = requests.get(link)
    html.encoding = 'utf-8'
    text = html.text
    page1 = etree.HTML(text)
    info = page1.find('.//div[@class="article oneColumn pub_border"]')
    t = info.find('.//h1')
    title = t.text
    #print(title)

    #从公告标题中提取更新数据对应的日期
    datestr = title[len(title) - 14:len(title) - 9]
    date = '2020年' + datestr
    date1 = datetime.datetime.strptime(date, '%Y年%m月%d日')
    #print(date1)

    #爬取公告中的图片
    content1 = page1.find('.//div[@class="pages_content"]')
    channels1 = content1.find('.//img')
    link_img = channels1.get('src')
    links = str(link)
    pic_urls = links[:len(links) - 19] + link_img

    #调用百度api对图片进行文本识别,从中提取价格内容
    prices = client.basicGeneralUrl(pic_urls)
    r = prices['words_result']
    info = r[5]
    price = info['words']
    #print(price)

    #更新每周二元母猪价格
    pork_price = {}
    week = date1.strftime("%W")
    pork_price[week + '周'] = price  #存放每周二元母猪价格,key为周数,对应价格
    print(pork_price)
    """抓取wind数据 写入excel"""

    #链接到wind数据库
    w.start()
    w.isconnected()

    #统计仔猪数据
    ##download仔猪数据
    pig_baby_codes = ['############']  ###仔猪代码已打码
    pig_baby = w.edb(pig_baby_codes,
                     datetime.date.today() + datetime.timedelta(days=-5),
                     datetime.date.today(),
                     usedf=True,
                     ShowBlank=0)
    pig_baby = pig_baby[1]
    pig_baby.columns = ['###########']  ###仔猪地区标签已打码

    ##分地区统计仔猪数据
    pig_baby_mean = pd.DataFrame([])
    pig_baby_mean_names = ['##########']  ###仔猪分地区统计的地区标签已打码
    for i in range(1, 13, 2):
        pig_baby_mean[pig_baby_mean_names[int(
            (i - 1) /
            2)]] = (pig_baby.iloc[:, i - 1] + pig_baby.iloc[:, i]) / 2
    print(pig_baby_mean)

    #生猪
    ##download生猪数据
    pig_codes = ["###############"]  ###生猪代码已打码
    pig = w.edb(pig_codes,
                datetime.date.today() + datetime.timedelta(days=-4),
                datetime.date.today(),
                usedf=True,
                ShowBlank=0)
    pig = pig[1]
    pig.columns = ["###############"]  ###生猪地区标签已打码

    ##分地区统计仔猪数据
    pig_mean = pd.DataFrame(np.zeros((4, 5)))
    pig_mean_names = ["###########"]  ###生猪分地区统计的地区标签已打码
    pig_mean.columns = pig_mean_names
    print(pig_mean)
    pig_mean.index = pig.index[1:]
    for name in pig_mean_names:
        i = 0
        for n in list(pig.columns):
            if name in n:
                pig_mean[name] = pig_mean[name] + pig[n]
                i += 1
        pig_mean[name] = pig_mean[name] / i

    print(pig_baby_mean)

    #统计玉米数据
    ##donload玉米价格
    corn_codes = ['S5005793']
    corn = w.edb(corn_codes,
                 datetime.date.today() + datetime.timedelta(days=-5),
                 datetime.date.today(),
                 usedf=True,
                 ShowBlank=0)
    corn = corn[1]
    corn.columns = ['现货价:玉米:平均价']
    corn = corn.T
    print(corn)

    #关闭Wind接口
    w.stop()

    #仔猪、生猪、猪肉、玉米价格汇总
    pig_baby_mean = pig_baby_mean.T
    pig_mean = pig_mean.T
    pig_baby_data = list(pig_baby_mean[pig_baby_mean.columns[-1]])
    pig_baby_data.append(np.mean(pig_baby_data))
    pig_data = list(pig_mean[pig_mean.columns[-1]])
    pig_data.append(np.mean(pig_data))
    corn_data = list(corn[corn.columns[-1]])
    pig_baby_data.extend(pig_data)
    pig_baby_data.extend(corn_data)
    pig_baby_data.append(float(price1[today.strftime('%Y%m%d')][0]))
    alldata = pig_baby_data
    print(alldata)

    #最近5日日期的一个list——days是datetime格式列表,days1是字符格式列表
    days = [
        datetime.datetime.today() + datetime.timedelta(days=-i)
        for i in range(5)
    ]
    days1 = [days[i].strftime('%Y-%m-%d') for i in range(5)]
    days.reverse()
    days1.reverse()
    print(days)

    #最近五周的一个list——week_nows
    week_list = {}
    today = datetime.date.today()
    weeks = today.strftime("%W")
    week_n = int(weeks)
    week_list[week_n] = week_n
    l = [week_list[week_n] - i for i in range(5)]
    for i in range(5):
        l[i] = str(l[i]) + '周'
    l.reverse()
    print(l)
    week_nows = l

    #链接到目标表格
    sht = book.sheets[0]

    #判断二元能繁母猪年度数据、月度数据是否要更新
    firstday_week = datetime.datetime(datetime.date.today().year,
                                      datetime.date.today().month,
                                      1).strftime("%W") + '周'
    if week_nows[-1] == '1周':
        sht.range('Q8').value = float(price)
    if week_nows[-1] == firstday_week:
        sht.range('P8').value = float(price)

    #判断仔猪、生猪、猪肉、玉米年度数据、月度数据是否要更新
    if days1[-1][6:] == '01-01':
        sht.range('Q11:Q25').options(transpose=True).value = alldata
    if days1[-1][9:] == '01':
        sht.range('P11:P25').options(transpose=True).value = alldata

    #更新主体数据(若今天数据已更新则不再更)
    ##二元能繁母猪
    if sht.range('K7').value == week_nows[-1]:
        pass
    else:
        sht.range('G8:J8').value = sht.range('H8:K8').value
        sht.range('K8').value = float(price)

    ##仔猪、生猪、猪肉、玉米
    if sht.range('K9').value.date() == days[-1].date():
        pass
    else:
        sht.range('G7:K7').value = week_nows
        sht.range('G9:K9').value = days1
        sht.range('G11:J25').value = sht.range('H11:K25').value
        sht.range('K11:K25').options(transpose=True).value = alldata
Exemple #12
0
class Ocr_tool(object):
    def __init__(self):
        self.client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
        self.p_thres = 0.5

    def get_file_content(self, filepath):
        with open(filepath, 'rb') as fp:
            return fp.read()

    def read_image(self, filepath):
        return Image.open(filepath)

    def crop_image(self, im):
        width, height = im.size
        im = im.crop((int(width * 627 / 2160), int(height * 531 / 1080),
                      int(width * 1421 / 2160), int(height * 737 / 1080)))
        return im

    def show_im(self, im):
        im.show()

    def image2byte(self, im):
        out_buffer = BytesIO()
        im.save(out_buffer, format='PNG')
        byte_data = out_buffer.getvalue()
        return byte_data

    def ocr(self, image=None, url=None):
        options = {}
        options["language_type"] = "CHN_ENG"
        options["probability"] = "true"
        if image is not None:
            byte_data = self.image2byte(image)
            res = self.client.basicGeneral(byte_data, options)
        elif url is not None:
            res = self.client.basicGeneralUrl(url, options)
        else:
            return []

        # print(res)
        word_lis = []
        if "words_result" in res:
            for word_entry in res["words_result"]:
                if word_entry["probability"]["average"] < self.p_thres:
                    continue
                word_lis.append(word_entry["words"])

        return word_lis

    def get_im_from_url(self, imgurl):
        r = requests.get(imgurl, timeout=30)
        buffer = r.content
        im = Image.open(BytesIO(buffer))
        return im

    def get_tags_from_url(self, imgurl, crop=False):
        if crop:
            im = self.get_im_from_url(imgurl)
            if im.mode == 'P':
                print("MYDEBUG mode:P")
                return []
            im = self.crop_image(im)
            tag_lis = self.ocr(image=im)
        else:
            tag_lis = self.ocr(url=imgurl)

        return tag_lis
Exemple #13
0
# """ 调用通用文字识别, 图片参数为本地图片 """
# client.basicGeneral(image);
#
# """ 如果有可选参数 """
# options = {}
# options["language_type"] = "CHN_ENG"
# options["detect_direction"] = "true"
# options["detect_language"] = "true"
# options["probability"] = "true"
#
# """ 带参数调用通用文字识别, 图片参数为本地图片 """
# client.basicGeneral(image, options)

url = "https://wx2.sinaimg.cn/mw690/006NGRWIly1fwz7o2gugij30u00ez419.jpg"
""" 调用通用文字识别, 图片参数为远程url图片 """
client.basicGeneralUrl(url)
""" 如果有可选参数 """
options = {}
options["language_type"] = "CHN_ENG"
options["detect_direction"] = "true"
options["detect_language"] = "true"
options["probability"] = "true"
""" 带参数调用通用文字识别, 图片参数为远程url图片 """
ocr_result = client.basicGeneralUrl(url, options)

words_result = ocr_result['words_result']
result = ''

for words in words_result:
    result = result + '\n' + words['words']
""" 调用通用文字识别, 图片参数为本地图片 """
resp = client.basicGeneral(image);
print('resp1', resp)
print('---------------------------------------------')

""" 如果有可选参数 """
options = {}
options["language_type"] = "CHN_ENG"
options["detect_direction"] = "true"
options["detect_language"] = "true"
options["probability"] = "true"

""" 带参数调用通用文字识别, 图片参数为本地图片 """
client.basicGeneral(image, options)

url = "https://ss1.bdstatic.com/70cFuXSh_Q1YnxGkpoWK1HF6hhy/it/u=2844325179,1671562938&fm=26&gp=0.jpg"

""" 调用通用文字识别, 图片参数为远程url图片 """
client.basicGeneralUrl(url);

""" 如果有可选参数 """
options = {}
options["language_type"] = "CHN_ENG"
options["detect_direction"] = "true"
options["detect_language"] = "true"
options["probability"] = "true"

""" 带参数调用通用文字识别, 图片参数为远程url图片 """
resp = client.basicGeneralUrl(url, options)
print('resp2', resp)
""" 你的 APPID AK SK """
APP_ID_OCR = '22855479'
APP_ID_SPEECH = '22844737'

API_KEY_OCR = 'dEbfdWGaDhu7yG4h07OMaSU3'
API_KEY_SPEECH = 'Gc4Vtsvw3dpjxjuEpCrFlq8d'

SECRET_KEY_OCR = 'V0hD45LqGugfCnZe9eNb6ih5cp5d7Xj4'
SECRET_KEY_SPEECH = 'mBbpvR3tA7wm561dtmchP5MMjPsVnGt4'

client_ocr = AipOcr(APP_ID_OCR, API_KEY_OCR, SECRET_KEY_OCR)
client_speech = AipSpeech(APP_ID_SPEECH, API_KEY_SPEECH, SECRET_KEY_SPEECH)

url = "https://ss1.bdstatic.com/70cFuXSh_Q1YnxGkpoWK1HF6hhy/it/u=2844325179,1671562938&fm=26&gp=0.jpg"
""" 调用通用文字识别, 图片参数为远程url图片 """
client_ocr.basicGeneralUrl(url)
""" 如果有可选参数 """
options = {}
options["language_type"] = "CHN_ENG"
options["detect_direction"] = "true"
options["detect_language"] = "true"
options["probability"] = "true"
""" 带参数调用通用文字识别, 图片参数为远程url图片 """
resp = client_ocr.basicGeneralUrl(url, options)
words_result = resp['words_result']
words = list(map(lambda record: record['words'], words_result))

# 文本转音频
result = client_speech.synthesis(','.join(words), 'zh', 1, {
    'vol': 5,
})
Exemple #16
0
# encoding:utf-8
import requests
from aip import AipNlp
from aip import AipOcr
""" 你的 APPID AK SK """
APP_ID = '17925688'
API_KEY = 'Ra8Mg2MLx2a8E9hs6BKrBp3D'
SECRET_KEY = 'VPeAIciWGBEqQcXs1lDNdY5rdubRoBaG'

clientOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY)
clientNlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)

url = "https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1575441285116&di=54ac097a7c11ff5c211ad13d788211a4&imgtype=0&src=http%3A%2F%2Fphotocdn.sohu.com%2F20131025%2FImg388882088.jpg"
""" 如果有可选参数 """
options = {}
options["language_type"] = "CHN_ENG"
options["detect_direction"] = "true"
options["detect_language"] = "true"
options["probability"] = "true"
""" 带参数调用通用文字识别, 图片参数为远程url图片 """
text = clientOcr.basicGeneralUrl(url, options)
print(text)
""" 调用词法分析 """

#print(clientNlp.lexer(text))