def main_table_punishoff_subinfo(date): db_main = db.table_main(date) rq_punishoff = rq.request_punishoff_subinfo() db_punishoff = db.table_punishoff(date) selections = db_punishoff.func_select_punishoff_f_subinfo() for s in range(0,len(selections),1): selection = selections[s] if len(selection[1])!=0: rq_punishoff.func_request_header(selection[0]) req = rq_punishoff.func_get(selection[0]) data = req.text if '没有任何信息' not in data: req_dts = rq_punishoff.func_re_punishoff(req.text) for req_dt in req_dts: db_punishoff.func_write_table((selection[0], selection[0] + '_' + req_dt[0],) + req_dt) try: nums, pages, page = rq_punishoff.func_re_main_nums(data) except ValueError: pages=1 nums =0 with open("punishoff.txt", "a+", encoding='utf-8') as text_file: text_file.write(selection[0] + '\n') for i in range(2, int(pages)+1,1): req = rq_punishoff.func_post(selection[0], i) req_dts = rq_punishoff.func_re_punishoff(req.text) for req_dt in req_dts: db_punishoff.func_write_table((selection[0], selection[0] + '_' + req_dt[0],) + req_dt) db_main.func_update(selection[0],'pages_overseasbrach',nums)
def main_table_basic(date): rq_basic = rq.request_basic() db_main = db.table_main(date) db_basic = db.table_basic(date) db_qualifications = db.table_qualifications(date) db_overseascpa = db.table_overseascpa(date) db_subsidiaries = db.table_subsidiaries(date) db_subinfo = db.table_subinfo(date) selections = db_main.func_select_swsbm_web() #selections.reverse() for selection in selections: guid = selection[1].split("'")[1] code = selection[1].split("'")[3] req = rq_basic.func_get(guid, code) data = req.text if '分所编号' in data: sub_info, data = rq_basic.func_parse_sub(data) db_subinfo.func_write_table((selection[0], ) + sub_info) else: info, data = rq_basic.func_parse_basic_p_basic(data) db_basic.func_write_table(info) infos, data = rq_basic.func_parse_basic_p_subsidiaries(data) for ifo in infos: db_subsidiaries.func_write_table(( selection[0], selection[0] + '_' + ifo[0], ) + ifo) db_main.func_update(selection[0], 'no_subsidiaries', len(infos)) infos, data = rq_basic.func_parse_basic_p_qualifications(data) for ifo in infos: db_qualifications.func_write_table(( selection[0], selection[0] + '_' + ifo[0], ) + ifo) db_main.func_update(selection[0], 'no_qualifications', len(infos)) infos, data = rq_basic.func_parse_basic_p_overseascpa(data) for ifo in infos: db_overseascpa.func_write_table(( selection[0], selection[0] + '_' + ifo[0], ) + ifo) db_main.func_update(selection[0], 'no_overseasCPA', len(infos)) if len(data) > 30: with open(selection[0] + ".txt", "w", encoding='utf-8') as text_file: text_file.write(data)
def main_table_main(date): cicpa_firm = cicpa.cicpa() rq_main = rq.request_main() db_main = db.table_main(date) for count in range(1, 4, 1): #循环4次用于检查是否有遗漏 for key in cicpa_firm.locations: if key != '全部': req = rq_main.func_request_main_post(cicpa_firm.locations[key], 1) nums, pages, page = rq_main.func_re_main_nums(req.text) counts = db_main.func_count_by_sf(key) if counts != int(nums): for i in range(1, int(pages) + 1, 1): req = rq_main.func_request_main_post( cicpa_firm.locations[key], i) req_dts = rq_main.func_re_main(req.text) for req_dt in req_dts: db_main.func_write_table( (tuple(req_dt) + (key, i) + (0, ) * 36)) time.sleep(random.randrange(1, 10))
def main_table_overseasbranch(date): db_main = db.table_main(date) rq_overseasbranch = rq.request_overseasbranch() db_overseasbranch = db.table_overseasbranch(date) selections = db_overseasbranch.func_select_overseasbranch() for s in range(0, len(selections), 1): selection = selections[s] if len(selection[1]) != 0: guid = selection[1].split("\'")[1] code = selection[1].split("\'")[3] rq_overseasbranch.func_request_header(guid, code) req = rq_overseasbranch.func_get(guid, code) data = req.text req_dts = rq_overseasbranch.func_re_overseasbranch(req.text) for req_dt in req_dts: db_overseasbranch.func_write_table(( selection[0], selection[0] + '_' + req_dt[0], ) + req_dt) try: nums, pages, page = rq_overseasbranch.func_re_main_nums(data) except ValueError: pages = 1 nums = 0 with open("overseasbranch.txt", "a+", encoding='utf-8') as text_file: text_file.write(selection[0] + '\n') for i in range(2, int(pages) + 1, 1): req = rq_overseasbranch.func_post(guid, code, i) req_dts = rq_overseasbranch.func_re_overseasbranch(req.text) for req_dt in req_dts: db_overseasbranch.func_write_table(( selection[0], selection[0] + '_' + req_dt[0], ) + req_dt) db_main.func_update(selection[0], 'no_overseasbranch', nums)
def main_table_cpainfo(date): db_main = db.table_main(date) rq_cpainfo = rq.request_cpainfo() db_cpainfo = db.table_cpainfo(date) db_cpainfo_otherqualis = db.table_cpainfo_otherqualis(date) db_cpainfo_overseasqualis = db.table_cpainfo_overseasqualis(date) db_cpainfo_rewards = db.table_cpainfo_rewards(date) db_cpainfo_cpaorgan = db.table_cpainfo_cpaorgan(date) db_cpainfo_otherorgan = db.table_cpainfo_otherorgan(date) db_cpainfo_congress = db.table_cpainfo_congress(date) db_cpainfo_otherparty = db.table_cpainfo_otherparty(date) selections = db_cpainfo.func_select_cpainfo() for s in range(90000, len(selections), 1): selection = selections[s] guid = selection[1].split("'")[1] code = selection[1].split("'")[3] req = rq_cpainfo.func_get(guid, code) data = req.text req_dt, data = rq_cpainfo.func_parse_cpa_basic(data) db_cpainfo.func_write_table(( selection[0], selection[0] + '_' + req_dt[0] + '_' + req_dt[12], ) + req_dt) infos, data = rq_cpainfo.func_parse_cpa_otherqualis(data) for info in infos: db_cpainfo_otherqualis.func_write_table(( selection[0], req_dt[12], selection[0] + '_' + req_dt[12] + '_' + info[0], code, ) + info) db_main.func_update(selection[0], 'no_subinfo', len(infos)) infos, data = rq_cpainfo.func_parse_cpa_overseasqualis(data) for info in infos: db_cpainfo_overseasqualis.func_write_table(( selection[0], req_dt[12], selection[0] + '_' + req_dt[12] + '_' + info[0], code, ) + info) db_main.func_update(selection[0], 'pages_subinfo', len(infos)) infos, data = rq_cpainfo.func_parse_cpa_rewards(data) for info in infos: db_cpainfo_rewards.func_write_table(( selection[0], req_dt[12], selection[0] + '_' + req_dt[12] + '_' + info[0], code, ) + info) db_main.func_update(selection[0], 'subinfo_NY', len(infos)) infos, data = rq_cpainfo.func_parse_cpa_cpaorgan(data) for info in infos: db_cpainfo_cpaorgan.func_write_table(( selection[0], req_dt[12], selection[0] + '_' + req_dt[12] + '_' + info[0], code, ) + info) db_main.func_update(selection[0], 'cpainfo', len(infos)) infos, data = rq_cpainfo.func_parse_cpa_otherorgan(data) for info in infos: db_cpainfo_otherorgan.func_write_table(( selection[0], req_dt[12], selection[0] + '_' + req_dt[12] + '_' + info[0], code, ) + info) db_main.func_update(selection[0], 'pages_cpainfo', len(infos)) infos, data = rq_cpainfo.func_parse_cpa_congress(data) for info in infos: db_cpainfo_congress.func_write_table(( selection[0], req_dt[12], selection[0] + '_' + req_dt[12] + '_' + info[0], code, ) + info) db_main.func_update(selection[0], 'cpainfo_NY', len(infos)) infos, data = rq_cpainfo.func_parse_cpa_otherparty(data) for info in infos: db_cpainfo_otherparty.func_write_table(( selection[0], req_dt[12], selection[0] + '_' + req_dt[12] + '_' + info[0], code, ) + info) db_main.func_update(selection[0], 'no_overseasbranch', len(infos)) if len(data) > 2200: with open(fl.html_path + req_dt[12] + ".txt", "w", encoding='utf-8') as text_file: text_file.write(data)