Example #1
0
    def get_com_id(self):  #随机获取一条符合条件的公司信息
        sel = """
        SELECT `com_id`,`com_name`
        FROM `com_info`
        WHERE `origin`
        IS NOT NULL AND LENGTH(`com_id`) > 5 AND `status_credit_execued` IS NULL
        ORDER BY RAND() LIMIT 1;
        """

        # 测试sql#
        # sel = """
        # SELECT `com_id`, `com_name`
        # FROM `com_info`
        # WHERE com_id = '299eee201318f0283f086b4847d69fc7';
        # """
        # 测试sql#

        result = db().selsts(sel)
        if result == ():
            result = [None, None]
        else:
            result = result[0]
        return result
Example #2
0
    def get_page_info(self):  #获取页面详情
        pt = PatentInfo()
        value = pt.get_page_count()
        com_id = value[0]
        com_name = value[1]
        count_page = value[2]
        if com_id == None:
            pass
        else:
            key = pt.search_key(com_name)
            index_url = value[3]
            count = 0
            for page in range(1, count_page + 1):
                # 'https://www.qichacha.com/company_getinfos?unique=&companyname=&p=2&tab=assets&box=zhuanli&zlpublicationyear=&zlipclist=&zlkindcode=&zllegalstatus='
                page_url = f'{index_url}/company_getinfos?unique={com_id}&companyname={key}&p={page}&tab=assets&box=zhuanli'
                hds = gh().header()
                hds.update({'Referer': f'{index_url}/firm_{com_id}.html'})
                time.sleep(random.randint(1, 2))
                res_pg = requests.get(page_url, headers=hds).text
                tree_pg = etree.HTML(res_pg)
                content_li = tree_pg.xpath('//table/tr[position()>1]')
                for content in content_li:
                    count += 1
                    patent_num = content.xpath('td[1]/text()')[0]
                    patent_type = content.xpath('td[2]/text()')[0]
                    patent_pub_num = content.xpath('td[3]/text()')[0]
                    patent_pub_date = content.xpath('td[4]/text()')[0]
                    patent_name = content.xpath('td[5]/a/text()')[0].strip()
                    patent_link = content.xpath('td[5]/a/@href')[0]
                    patent_url = ''.join((index_url, patent_link))
                    time.sleep(random.randint(1, 3))
                    res_dt = requests.get(patent_url, headers=hds).text
                    tree_dt = etree.HTML(res_dt)
                    app_num = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请号")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    app_date = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请日")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    prio_date = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"优先权日")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    prio_num = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"优先权号")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    inventor = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"发明人")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    try:
                        applicant = tree_dt.xpath(
                            '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请(专利权)人")]/following-sibling::td[1]/a/text()'
                        )[0].strip()
                    except:
                        applicant = tree_dt.xpath(
                            '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请(专利权)人")]/following-sibling::td[1]'
                        )[0].strip()
                    try:
                        agency = tree_dt.xpath(
                            '//table[@class="ntable"]/tbody/tr/td[contains(text(),"代理机构")]/following-sibling::td[1]/a/text()'
                        )[0].strip()
                    except:
                        agency = tree_dt.xpath(
                            '//table[@class="ntable"]/tbody/tr/td[contains(text(),"代理机构")]/following-sibling::td[1]/text()'
                        )[0].strip()
                    agent = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"代理人")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    ipc = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"IPC分类号")]/following-sibling::td[1]/text()'
                    )[0].strip().replace(' ', '').replace('\n', '')
                    cpc = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"CPC分类号")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    app_address = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请人地址")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    app_zip_code = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请人邮编")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    abstract = tree_dt.xpath(
                        '//table[@class="ntable"]/tbody/tr/td[contains(text(),"摘要")]/following-sibling::td[1]/text()'
                    )[0].strip()
                    try:
                        abstract_photo = tree_dt.xpath(
                            '//table[@class="ntable"]/tbody/tr/td[contains(text(),"摘要附图")]/following-sibling::td[1]/img/@src'
                        )[0].strip()
                    except:
                        abstract_photo = '-'
                    try:
                        claim = tree_dt.xpath(
                            '//table[@class="ntable"]/tr/td[@class="ea_instructions" and position()=1]/p/text()'
                        )
                        claim = ''.join(claim)
                    except:
                        claim = '-'
                    try:
                        instructions = tree_dt.xpath(
                            '//div[@class="tcaption"]/h3[text()="说明书"]/parent::div/following-sibling::table[@class="ntable"]/tr/td[@class="ea_instructions"]/h1/text()|//div[@class="tcaption"]/h3[text()="说明书"]/parent::div/following-sibling::table[@class="ntable"]/tr/td[@class="ea_instructions"]/h2/text()|//div[@class="tcaption"]/h3[text()="说明书"]/parent::div/following-sibling::table[@class="ntable"]/tr/td[@class="ea_instructions"]/p/text()'
                        )
                        instructions = ''.join(instructions)
                    except:
                        instructions = '-'
                    print('\n{0}--总第{1}条----{2}/{3}页--{0}\n'.format(
                        '-' * 9, count, page, count_page))
                    localtime = tm().get_localtime()  # 当前时间
                    print(f'公司ID:{com_id} 当前时间:{localtime}')
                    print(
                        f'序号:{patent_num}\n专利类型:{patent_type}\n公开(公告)号:{patent_pub_num}\n公开(公告)日期:{patent_pub_date}\n专利名称:{patent_name}\n'
                        f'专利页URL:{patent_url}\n申请号:{app_num}\n申请日期:{app_date}\n优先权日:{prio_date}\n优先权号:{prio_num}\n'
                        f'发明人:{inventor}\n申请(专利权)人:{applicant}\n代理机构:{agency}\n代理人:{agent}\nIPC分类号:{ipc}\n'
                        f'CPC分类号:{cpc}\n申请人地址:{app_address}\n申请人邮编:{app_zip_code}\n摘要:{abstract}\n摘要附图:{abstract_photo}\n'
                        f'权利要求:{claim}\n说明书:{instructions}')
                    ins = f"""
                    INSERT INTO  
                    `com_patent`
                    (`com_id`,`patent_num`,`patent_type`,`patent_pub_num`,`patent_pub_date`,
                    `patent_name`,`patent_url`,`app_num`,`app_date`,`prio_date`,
                    `prio_num`,`inventor`,`applicant`,`agency`,`agent`,
                    `ipc`,`cpc`,`app_address`,`app_zip_code`,`abstract`,`abstract_photo`,
                    `claim`,`instructions`)
                    VALUES 
                    ("{com_id}","{patent_num}","{patent_type}","{patent_pub_num}","{patent_pub_date}",
                    "{patent_name}","{patent_url}","{app_num}","{app_date}","{prio_date}",
                    "{prio_num}","{inventor}","{applicant}","{agency}","{agent}",
                    "{ipc}","{cpc}","{app_address}","{app_zip_code}","{abstract}","{abstract_photo}",
                    "{claim}","{instructions}");
                    """
                    db().inssts(ins)

                    upd = f"""
                    UPDATE 
                    `com_info` 
                    SET
                    `status` = 1
                    WHERE 
                    `com_id` = "{com_id}" ;
                    """
                    db().updsts(upd)
                    # input('暂停')
            localtime = tm().get_localtime()  # 当前时间
            print('\n{1}\n{0}数据采集完成!{0}\n{1}'.format('+' * 7, '+' * 25))
            print(f'当前时间:{localtime}')
Example #3
0
 def get_com_name(self,sql):
     com_name = db().inssts(sql)
     return com_name
    def get_page_info(self):  #解析页面内容
        cd = Credit()
        value = cd.get_page_count()
        com_id = value[0]
        com_name = value[1]
        count_page = value[2]
        count_record = value[3]
        key = dk().search_key(com_name)
        count = 0
        for page in range(1, count_page + 1):
            index_url = 'https://www.qichacha.com'
            page_url = f'{index_url}/company_getinfos?unique={com_id}&companyname={key}&p={page}&tab=susong&box=zhixing'
            hds = gh().header()
            hds.update({'Referer': f'{index_url}/firm_{com_id}.html'})
            time.sleep(random.randint(1, 2))
            res_pg = requests.get(page_url, headers=hds).text
            if '<script>window.location.href' in res_pg:
                print('访问频繁,需验证!{get_page_info}')
                input('暂停')
            elif '<script>location.href="/user_login"</script>' in res_pg:
                print('Cookie失效,需更换!{get_page_info}')
                input('程序暂停运行!')
            elif '您的账号访问超频,请稍后访问或联系客服人员' in res_pg:
                print('账号访问超频,请更换账号!{get_page_info}')
                input('程序暂停运行!')
            else:
                tree_pg = etree.HTML(res_pg)
                content_li = tree_pg.xpath(
                    '//table[@class="ntable ntable-odd"]/tr[position()>2]')
                for nbr, content in enumerate(content_li, 1):
                    count += 1
                    try:
                        exec_num = content.xpath('td[1]/text()')[0]
                        case_num = content.xpath('td[2]/a/text()')[0]
                        case_id = content.xpath(
                            'td[2]/a[contains(@onclick,"showRelatModal")]/@onclick'
                        )[0].split('zhixing",')[1].split('"')[1]
                        case_url = 'id='.join(
                            ('https://www.qichacha.com/company_zhixingRelat?',
                             case_id))
                        filing_time = content.xpath('td[3]/text()')[0]
                        court_of_exec = content.xpath('td[4]/text()')[0]
                        exec_obj = content.xpath('td[5]/text()')[0]
                        time.sleep(random.randint(1, 2))
                        res_info = requests.get(case_url, headers=hds).text
                        if '<script>window.location.href' in res_info:
                            print('访问频繁,需验证!{get_page_info}')
                            input('暂停')
                        elif '<script>location.href="/user_login"</script>' in res_info:
                            print('Cookie失效,需更换!{get_page_info}')
                            input('程序暂停运行!')
                        elif '您的账号访问超频,请稍后访问或联系客服人员' in res_info:
                            print('账号访问超频,请更换账号!{get_page_info}')
                            input('程序暂停运行!')
                        else:
                            tree_info = etree.HTML(res_info)
                            exec_person = tree_info.xpath(
                                '//table/tbody/tr[1]/td[2]/text()')[0]
                            occ = tree_info.xpath(
                                '//table/tbody/tr[1]/td[4]/text()')[0]
                    except:
                        exec_num = None
                        case_num = None
                        case_id = None
                        case_url = None
                        filing_time = None
                        court_of_exec = None
                        exec_obj = None
                        exec_person = None
                        occ = None
                    print('\n{0}--总第{1}条----{2}/{3}页--{0}\n'.format(
                        '-' * 9, count, page, count_page))
                    localtime = tm().get_localtime()  # 当前时间
                    create_time = localtime
                    print(f'当前时间:{create_time}')
                    print(
                        f'公司ID:{com_id}\n序号:{exec_num}\n案号:{case_num}\n案例ID:{case_id}\n案例链接:{case_url}\n'
                        f'立案时间:{filing_time}\n执行法院:{court_of_exec}\n执行标的:{exec_obj}\n被执行人:{exec_person}\n身份证号/组织机构代码:{occ}\n'
                    )
                    if exec_num == None:
                        ins = """
                        INSERT INTO
                        `com_credit_execued`
                        (`com_id`,`exec_num`,`case_num`,`case_id`,`filing_time`,
                        `court_of_exec`,`exec_obj`,`exec_person`,`occ`,`create_time`)
                        VALUES
                        (NULL,NULL,NULL,NULL,NULL,
                        NULL,NULL,NULL,NULL);
                        """
                    else:
                        ins = f"""
                        INSERT INTO 
                        `com_credit_execued`
                        (`com_id`,`exec_num`,`case_num`,`case_id`,`filing_time`,
                        `court_of_exec`,`exec_obj`,`exec_person`,`occ`,`create_time`)
                        VALUES 
                        ("{com_id}","{exec_num}","{case_num}","{case_id}","{filing_time}",
                        "{court_of_exec}","{exec_obj}","{exec_person}","{occ}","{create_time}");
                        """
                    db().inssts(ins)

                    upd = f"""
                    UPDATE 
                    `com_info` 
                    SET
                    `status_credit_execued` = 1
                    WHERE 
                    `com_id` = "{com_id}" ;
                    """
                    db().updsts(upd)

        localtime = tm().get_localtime()  # 当前时间
        print('\n{1}\n{0}数据采集完成!{0}\n{1}'.format('+' * 7, '+' * 25))
        print(f'当前时间:{localtime}\n')
        time.sleep(3)
Example #5
0
 def __init__(self):
     self.rc = RecruitInfo()
     self.db = db()
     self.gh = gh()
     self.gm = gm()
     self.index_url = 'https://www.qichacha.com'
Example #6
0
 def __init__(self):
     self.db = db()
     self.gh = gh()
     self.tm = tm()
     self.gm = gm()
     self.index_url = 'https://www.qichacha.com'
Example #7
0
 def __init__(self):
     self.db = db()
Example #8
0
    def get_page_info(self):  #获取页面详情
        pt = PatentInfo()
        value = pt.get_page_count()
        com_id = value[0]
        com_name = value[1]
        count_page = value[2]

        # 临时代码,供单次补采数据【001】
        com_id = '6129f29192de208800c7b5d23486a154'
        com_name = '乐融致新电子科技(天津)有限公司'
        count_page = 298
        # 临时代码,供单次补采数据【001】

        if com_id == None:
            pass
        else:
            key = dk().search_key(com_name)
            index_url = value[3]
            count = 0
            start_time = tm().get_localtime()  #当前时间
            for page in range(148, count_page + 1):  #临时代码,供单次补采数据【001】
                # for page in range(1, count_page + 1):
                #     if page == 1:
                #         page_url = f'https://www.qichacha.com/company_getinfos?unique={com_id}&companyname={com_name}&tab=assets'
                page_url = f'{index_url}/company_getinfos?unique={com_id}&companyname={key}&p={page}&tab=assets&box=zhuanli'
                hds = gh().header()
                hds.update({'Referer': f'{index_url}/firm_{com_id}.html'})
                time.sleep(random.randint(1, 2))
                res_pg = requests.get(page_url, headers=hds).text
                if '<script>window.location.href' in res_pg:
                    print('访问频繁,需验证!{get_page_info[1]}')
                    input('暂停')
                elif '<script>location.href="/user_login"</script>' in res_pg:
                    print('Cookie失效,需更换!{get_page_info[1]}')
                    input('程序暂停运行!')
                elif '您的账号访问超频,请稍后访问或联系客服人员' in res_pg:
                    print('账号访问超频,请更换账号!{get_page_info[1]}')
                    input('程序暂停运行!')
                else:
                    tree_pg = etree.HTML(res_pg)
                    content_li = tree_pg.xpath('//table/tr[position()>1]')
                    for content in content_li:
                        count += 1
                        patent_num = content.xpath('td[1]/text()')[0]
                        patent_type = content.xpath('td[2]/text()')[0]
                        patent_pub_num = content.xpath('td[3]/text()')[0]
                        patent_pub_date = content.xpath('td[4]/text()')[0]
                        patent_name = content.xpath(
                            'td[5]/a/text()')[0].strip()
                        patent_link = content.xpath('td[5]/a/@href')[0]
                        patent_id = patent_link.split('_com_')[1]
                        patent_url = ''.join((index_url, patent_link))
                        time.sleep(random.randint(1, 3))
                        res_dt = requests.get(patent_url, headers=hds).text
                        if '<script>window.location.href' in res_dt:
                            print('访问频繁,需验证!{get_page_info[2]}')
                            input('暂停')
                        elif '<script>location.href="/user_login"</script>' in res_dt:
                            print('Cookie失效,需更换!{get_page_info[2]}')
                            input('程序暂停运行!')
                        elif '您的账号访问超频,请稍后访问或联系客服人员' in res_dt:
                            print('账号访问超频,请更换账号!{get_page_info[2]}')
                            input('程序暂停运行!')
                        else:
                            tree_dt = etree.HTML(res_dt)
                            app_num = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请号")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            app_date = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请日")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            prio_date = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"优先权日")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            prio_num = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"优先权号")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            inventor = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"发明人")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            try:
                                applicant = tree_dt.xpath(
                                    '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请(专利权)人")]/following-sibling::td[1]/a/text()'
                                )[0].strip()
                            except:
                                applicant = tree_dt.xpath(
                                    '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请(专利权)人")]/following-sibling::td[1]'
                                )[0].strip()
                            try:
                                agency = tree_dt.xpath(
                                    '//table[@class="ntable"]/tbody/tr/td[contains(text(),"代理机构")]/following-sibling::td[1]/a/text()'
                                )[0].strip()
                            except:
                                agency = tree_dt.xpath(
                                    '//table[@class="ntable"]/tbody/tr/td[contains(text(),"代理机构")]/following-sibling::td[1]/text()'
                                )[0].strip()
                            agent = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"代理人")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            ipc = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"IPC分类号")]/following-sibling::td[1]/text()'
                            )[0].strip().replace(' ', '').replace('\n', '')
                            cpc = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"CPC分类号")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            app_address = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请人地址")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            app_zip_code = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"申请人邮编")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            abstract = tree_dt.xpath(
                                '//table[@class="ntable"]/tbody/tr/td[contains(text(),"摘要")]/following-sibling::td[1]/text()'
                            )[0].strip()
                            try:
                                abstract_photo = tree_dt.xpath(
                                    '//table[@class="ntable"]/tbody/tr/td[contains(text(),"摘要附图")]/following-sibling::td[1]/img/@src'
                                )[0].strip()
                            except:
                                abstract_photo = '-'
                            try:
                                claim = tree_dt.xpath(
                                    '//table[@class="ntable"]/tr/td[@class="ea_instructions" and position()=1]/p/text()'
                                )
                                claim = ''.join(claim)
                            except:
                                claim = '-'
                            try:
                                instructions = tree_dt.xpath(
                                    '//div[@class="tcaption"]/h3[text()="说明书"]/parent::div/following-sibling::table[@class="ntable"]/tr/td[@class="ea_instructions"]/h1/text()|//div[@class="tcaption"]/h3[text()="说明书"]/parent::div/following-sibling::table[@class="ntable"]/tr/td[@class="ea_instructions"]/h2/text()|//div[@class="tcaption"]/h3[text()="说明书"]/parent::div/following-sibling::table[@class="ntable"]/tr/td[@class="ea_instructions"]/p/text()'
                                )
                                instructions = ''.join(instructions)
                            except:
                                instructions = '-'
                            print('\n{0}--总第{1}条----{2}/{3}页--{0}\n'.format(
                                '-' * 9, count, page, count_page))
                            localtime = tm().get_localtime()  # 当前时间
                            create_time = localtime
                            print(f'公司ID:{com_id} 当前时间:{localtime}')
                            print(f'公司名称:{com_name}\n专利ID:{patent_id}')
                            print(
                                f'序号:{patent_num}\n专利类型:{patent_type}\n公开(公告)号:{patent_pub_num}\n公开(公告)日期:{patent_pub_date}\n专利名称:{patent_name}\n'
                                f'专利页URL:{patent_url}\n申请号:{app_num}\n申请日期:{app_date}\n优先权日:{prio_date}\n优先权号:{prio_num}\n'
                                f'发明人:{inventor}\n申请(专利权)人:{applicant}\n代理机构:{agency}\n代理人:{agent}\nIPC分类号:{ipc}\n'
                                f'CPC分类号:{cpc}\n申请人地址:{app_address}\n申请人邮编:{app_zip_code}\n摘要:{abstract}\n摘要附图:{abstract_photo}\n'
                                f'权利要求:{claim}\n说明书:{instructions}\n')
                            ins = f"""
                            INSERT INTO  
                            `com_patent`
                            (`com_id`,`patent_num`,`patent_type`,`patent_pub_num`,`patent_pub_date`,
                            `patent_name`,`patent_url`,`app_num`,`app_date`,`prio_date`,
                            `prio_num`,`inventor`,`applicant`,`agency`,`agent`,
                            `ipc`,`cpc`,`app_address`,`app_zip_code`,`abstract`,`abstract_photo`,
                            `claim`,`instructions`,`create_time`,`patent_id`)
                            VALUES 
                            ("{com_id}","{patent_num}","{patent_type}","{patent_pub_num}","{patent_pub_date}",
                            "{patent_name}","{patent_url}","{app_num}","{app_date}","{prio_date}",
                            "{prio_num}","{inventor}","{applicant}","{agency}","{agent}",
                            "{ipc}","{cpc}","{app_address}","{app_zip_code}","{abstract}","{abstract_photo}",
                            "{claim}","{instructions}","{create_time}","{patent_id}");
                            """
                            db().inssts(ins)

                            upd = f"""
                            UPDATE 
                            `com_info` 
                            SET
                            `status_patent` = 1
                            WHERE 
                            `com_id` = "{com_id}" ;
                            """
                            db().updsts(upd)
            localtime = tm().get_localtime()  # 当前时间
            print('\n{1}\n{0}数据采集完成!{0}\n{1}'.format('+' * 7, '+' * 25))
            print(f'当前时间:{localtime}\n')
            time.sleep(3)
Example #9
0
    def get_page_info(self):  # 解析页面内容
        alb = AdmLicenseBc()
        value = alb.get_page_count()
        com_id = value[0]
        com_name = value[1]
        count_page = value[2]
        count_record = value[3]
        key = dk().search_key(com_name)
        count = 0
        for page in range(1, count_page + 1):
            index_url = 'https://www.qichacha.com'
            page_url = f'{index_url}/company_getinfos?unique={com_id}&companyname={key}&p={page}&tab=run&box=licens'
            hds = gh().header()
            hds.update({'Referer': f'{index_url}/firm_{com_id}.html'})
            time.sleep(random.randint(1, 2))
            res = requests.get(page_url, headers=hds).text
            if '<script>window.location.href' in res:
                print('访问频繁,需验证!{get_page_info[2]}')
                input('暂停')
            elif '<script>location.href="/user_login"</script>' in res:
                print('Cookie失效,需更换!{get_page_info[2]}')
                input('程序暂停运行!')
            elif '您的账号访问超频,请稍后访问或联系客服人员' in res:
                print('账号访问超频,请更换账号!{get_page_info[2]}')
                input('程序暂停运行!')
            else:
                tree = etree.HTML(res)
                content_li = tree.xpath(
                    '//table[@class="ntable ntable-odd"]/tr[position()>2]')
                for nbr, content in enumerate(content_li, 1):
                    count += 1
                    try:
                        license_num = content.xpath('td[1]/text()')[0]
                        license_doc_num = content.xpath('td[2]/text()')[0]
                        license_doc_name = content.xpath('td[3]/text()')[0]
                        valid_period_from = content.xpath('td[4]/text()')[0]
                        valid_period_to = content.xpath('td[5]/text()')[0]
                        license_office = content.xpath('td[6]/text()')[0]
                        license_content = content.xpath('td[7]/text()')[0]
                    except:
                        license_num = None
                        license_doc_num = None
                        license_doc_name = None
                        valid_period_from = None
                        valid_period_to = None
                        license_office = None
                        license_content = None

                    print('\n{0}--总第{1}条----{2}/{3}页--{0}\n'.format(
                        '-' * 9, count, page, count_page))
                    localtime = tm().get_localtime()  # 当前时间
                    create_time = localtime
                    print(f'当前时间:{create_time}')
                    print(
                        f'公司ID:{com_id}\n序号:{license_num}\n许可文件编号:{license_doc_num}\n许可文件名称:{license_doc_name}\n有效期自:{valid_period_from}\n'
                        f'有效期至:{valid_period_to}\n许可机关:{license_office}\n许可内容:{license_content}'
                    )
                    if license_num == None:
                        ins = """
                        INSERT INTO
                        `com_credit_adm_license_bc`
                        (`com_id`,`license_num`,`license_doc_num`,`license_doc_name`,`valid_period_from`,
                        `valid_period_to`,`license_office`,`license_content`,`create_time`)
                        VALUES
                        (NULL,NULL,NULL,NULL,NULL,
                        NULL,NULL,NULL,NULL);
                        """
                    else:
                        ins = f"""
                        INSERT INTO
                        `com_credit_adm_license_bc`
                        (`com_id`,`license_num`,`license_doc_num`,`license_doc_name`,`valid_period_from`,
                        `valid_period_to`,`license_office`,`license_content`,`create_time`)
                        VALUES
                        ("{com_id}","{license_num}","{license_doc_num}","{license_doc_name}","{valid_period_from}",
                        "{valid_period_to}","{license_office}","{license_content}","{create_time}");
                        """
                    db().inssts(ins)

                    upd = f"""
                        UPDATE 
                        `com_info` 
                        SET
                        `status_credit_adm_license_bc` = 1
                        WHERE 
                        `com_id` = "{com_id}" ;
                        """
                    db().updsts(upd)

        localtime = tm().get_localtime()  # 当前时间
        print('\n{1}\n{0}数据采集完成!{0}\n{1}'.format('+' * 7, '+' * 25))
        print(f'当前时间:{localtime}\n')
        time.sleep(3)