def get_gu_dong_detail(self, inv_id): url = "http://www.gsxt.gov.cn/corp-query-entprise-info-shareholderDetail-%s.html" % urllib.quote( inv_id.encode('utf-8')).replace('%7C', '|') try: r = self.get_request(url) j = json.loads(r.text) except (requests.exceptions.ContentDecodingError, ValueError): return {u'认缴明细': [], u'实缴明细': []} rjmx = [] sjmx = [] if 'data' in j: sj_data = j['data'][0] rj_data = j['data'][1] for sj in sj_data: sje = sj['acConAm'] sjczfs = sj['conForm_CN'] sjczrq = TimeUtils.get_date(sj['conDate']) sjmx.append({ u'实缴出资方式': sjczfs, u'实缴出资额(万元)': sje, u'实缴出资日期': sjczrq }) for rj in rj_data: rje = rj['subConAm'] rjczfs = rj['conForm_CN'] rjczrq = TimeUtils.get_date(rj['conDate']) rjmx.append({ u'认缴出资方式': rjczfs, u'认缴出资额(万元)': rje, u'认缴出资日期': rjczrq }) return {u'认缴明细': rjmx, u'实缴明细': sjmx}
def get_dong_chan_di_ya_detail(self, dy_id, output_data): # 抵押权人信息 dyqrxx_url = 'http://www.gsxt.gov.cn/corp-query-entprise-info-mortregpersoninfo-%s.html' % dy_id dyqrxx_res = self.get_request(dyqrxx_url) dyqrxx_data = json.loads(dyqrxx_res.text)['data'] dyqrxx_dict = { 'bLicType_CN': 'dyqr_zzlx', # 抵押权人证照类型 'more': 'dyqr_mc', # 抵押权人名称 'bLicNo': 'dyqr_zzhm', # 证照号码 # '': '', # 住所地 } # 被担保主债权信息 bdbzzqxx_url = 'http://www.gsxt.gov.cn/corp-query-entprise-info-mortCreditorRightInfo-%s.html' % dy_id bdbzzqxx_res = self.get_request(bdbzzqxx_url) bdbzzqxx_data = json.loads(bdbzzqxx_res.text)['data'] bdbzzqxx_dict = { 'priClaSecKind_CN': 'dbzq_zl', # 种类 'priClaSecAm': 'dbzq_sl', # 数额 'warCov': 'dbzq_fw', # 担保的范围 'remark': 'dbzq_bz', # 备注 } # 抵押物信息 dywxx_url = 'http://www.gsxt.gov.cn/corp-query-entprise-info-mortGuaranteeInfo-%s.html' % dy_id dywxx_res = self.get_request(dywxx_url) dywxx_data = json.loads(dywxx_res.text)['data'] dywxx_dict = { 'guaDes': 'dyw_xq', # 数量、质量、状况、所在地等情况 'own': 'dyw_gs', # 所有权或使用权归属 'guaName': 'dyw_mc', # 抵押物名称 } for d in dyqrxx_data: for k in d: if k in dyqrxx_dict: col = dyqrxx_dict[k] val = d[k] output_data[col] = val for d in bdbzzqxx_data: for k in d: if k in bdbzzqxx_dict: col = bdbzzqxx_dict[k] val = d[k] # if col == 'pefPerForm' or col == 'pefPerTo' and val: # val = TimeUtils.get_date(val) if col == 'priClaSecAm': val = val + bdbzzqxx_data['regCapCur_CN'] output_data[col] = val output_data['dbzq_qx'] = TimeUtils.get_date( d['pefPerForm']) + u'至' + TimeUtils.get_date(d['pefPerTo']) # val = TimeUtils.get_date(val) for d in dywxx_data: for k in d: if k in dywxx_dict: col = dywxx_dict[k] val = d[k] output_data[col] = val
def get_gt_challenge(self, t=1): if t == 15: raise Exception(u'获取不到challenge') self.info(u'第%d次获取challenge。。。' % t) ts_1 = TimeUtils.get_cur_ts_mil() url_1 = "http://sd.gsxt.gov.cn/pub/geetest/register/%s?_=%s" % (TimeUtils.get_cur_ts_mil(), ts_1) r_1 = self.get_request(url_1) try: self.challenge = str(json.loads(r_1.text)['challenge']) self.gt = str(json.loads(r_1.text)['gt']) except: self.get_gt_challenge(t+1)
def get_chou_cha_jian_cha(self): """ 获取抽查检查信息 :return: """ draw = 1 chou_cha_jian_cha_data = [] while True: params = { 'draw': draw, 'length': 5, 'start': 5 * (draw - 1), } r = self.post_request(self.chou_cha_jian_cha_url, params=params) json_obj = json.loads(r.text) data = json_obj['data'] for item in data: chou_cha_jian_cha_data.append({}) for k in item: if k in TianJinConfig.chou_cha_jian_cha_dict: col = TianJinConfig.chou_cha_jian_cha_dict[k] val = item[k] if type(val) == unicode: if '<span' in val: val = re.sub('<span[^<]*</span>', '', val) if '<div' in val: val = re.sub('<div[^<]*</div>', '', val) val = re.sub(r'\s', '', val) if col == 'check_type': if str(val) == '1': val = u'抽查' elif str(val) == '2': val = u'检查' elif col == 'check_date' and val: val = TimeUtils.get_date(val) chou_cha_jian_cha_data[-1][col] = val if int(json_obj['draw']) < int(json_obj['totalPage']): draw += 1 # self.get_chou_cha_jian_cha(draw + 1, chou_cha_jian_cha_data) else: # print json.dumps(chou_cha_jian_cha_data, ensure_ascii=False) family = 'Spot_Check' table_id = '16' self.json_result[family] = [] for i in range(len(chou_cha_jian_cha_data)): row = chou_cha_jian_cha_data[i] self.json_result[family].append({}) self.json_result[family][-1][ 'rowkey'] = '%s_%s_%s_%s%d' % (self.cur_mc, table_id, self.reg_code, self.today, i + 1) self.json_result[family][-1][ family + ':registrationno'] = self.reg_code self.json_result[family][-1][ family + ':enterprisename'] = self.cur_mc for k in row: col = family + ':' + k val = row[k] self.json_result[family][-1][col] = val break
def break_params(self): bg_img = self.down_geetest_img(self.bg_url) slice_img = self.down_geetest_img(self.slice_url) fullbg_path = os.path.join(os.path.dirname(__file__), '../data/fullbg/' + self.fullbg_url.split('/')[-1]) if not os.path.exists(fullbg_path): print u'New fullbg image' fullbg_img = self.down_geetest_img(self.fullbg_url) fullbg_img.save(fullbg_path) fullbg_img = Image.open(fullbg_path) distance = GeetestDistanceV2.get_distance(bg_img, fullbg_img, slice_img) # print 'distance->', distance trail_arr = BreakTrail.get_trali_arr(distance) # print 'distance:', distance pass_time = trail_arr[-1][-1] # print trail_arr params = { 'challenge': str(self.challenge), 'gt': str(self.gt), 'userresponse': GeetestParamsBroker.imitate_userrespose(distance, self.challenge), 'passtime': pass_time, 'imgload': random.randint(70, 150), 'aa': GeetestParamsBroker.imitate_aa(trail_arr, self.c, self.s), 'callback': 'geetest_%s' % TimeUtils.get_cur_ts_mil(), } # print json.dumps(params, indent=4) return params, trail_arr[-1][2]
def get_gt_challenge(self): url_1 = 'http://bj.gsxt.gov.cn/pc-geetest/register?t=%s' % TimeUtils.get_cur_ts_mil( ) # print url_1 r_1 = self.get_request(url_1) self.challenge = str(json.loads(r_1.text)['challenge']) self.gt = str(json.loads(r_1.text)['gt'])
def get_dong_chan_di_ya(self): draw = 1 dong_chan_di_ya_data = [] while True: params = { 'draw': draw, 'length': 5, 'start': 5 * (draw - 1), } r = self.post_request(self.dong_chan_di_ya_url, params=params) json_obj = json.loads(r.text) data = json_obj['data'] for item in data: dong_chan_di_ya_data.append({}) for k in item: if k in ZongJuConfig.dong_chan_di_ya_dict: col = ZongJuConfig.dong_chan_di_ya_dict[k] val = item[k] if type(val) == unicode: if '<span' in val: val = re.sub('<span[^<]*</span>', '', val) if '<div' in val: val = re.sub('<div[^<]*</div>', '', val) val = re.sub(r'\s', '', val) if col == 'chattelmortgage_status': if str(val) == '1': val = u'有效' else: val = u'无效' elif col == 'chattelmortgage_guaranteedamount': val = str(val) + re.sub(r'\s', '', item['regCapCur_Cn']) elif col == 'chattelmortgage_registrationdate' and val: val = TimeUtils.get_date(val) dong_chan_di_ya_data[-1][col] = val if int(json_obj['draw']) < int(json_obj['totalPage']): draw += 1 # self.get_gu_quan_chu_zhi(draw + 1, dong_chan_di_ya_data) else: # print json.dumps(dong_chan_di_ya_data, ensure_ascii=False) family = 'Chattel_Mortgage' table_id = '11' self.json_result[family] = [] for i in range(len(dong_chan_di_ya_data)): row = dong_chan_di_ya_data[i] self.json_result[family].append({}) self.json_result[family][-1][ 'rowkey'] = '%s_%s_%s_%s%d' % (self.input_company_name, table_id, self.reg_code, self.today, i + 1) self.json_result[family][-1][ family + ':registrationno'] = self.reg_code self.json_result[family][-1][ family + ':enterprisename'] = self.input_company_name for k in row: col = family + ':' + k val = row[k] self.json_result[family][-1][col] = val break
def get_gt_challenge(self): url_1 = "http://zj.gsxt.gov.cn/pc-geetest/register?t=%s" % TimeUtils.get_cur_ts_mil( ) self.info(u'获取gt和challenge...') r_1 = self.get_request(url_1) json_1 = json.loads(r_1.text) self.gt = json_1['gt'] self.challenge = json_1['challenge']
def get_gt_challenge(self): url_1 = "http://www.gsxt.gov.cn/SearchItemCaptcha?v=%s" % TimeUtils.get_cur_ts_mil() # print url_1 self.info(u'**获取gt和challenge...') r_1 = self.get(url_1) json_1 = json.loads(r_1.text) self.gt = json_1['gt'] self.challenge = json_1['challenge']
def get_jing_ying_yi_chang(self): """ 获取经营异常信息 :return: """ draw = 1 jing_ying_yi_chang_data = [] while True: params = { 'draw': draw, 'length': 5, 'start': 5 * (draw - 1), } r = self.post_request(self.jing_ying_yi_chang_url, params=params) json_obj = json.loads(r.text) data = json_obj['data'] for item in data: jing_ying_yi_chang_data.append({}) for k in item: if k in TianJinConfig.jing_ying_yi_chang_dict: col = TianJinConfig.jing_ying_yi_chang_dict[k] val = item[k] if type(val) == unicode: if '<span' in val: val = re.sub('<span[^<]*</span>', '', val) if '<div' in val: val = re.sub('<div[^<]*</div>', '', val) val = re.sub(r'\s', '', val) if col == 'abnormal_datesin' or col == 'abnormal_datesout' and val: val = TimeUtils.get_date(val) jing_ying_yi_chang_data[-1][col] = val if int(json_obj['draw']) < int(json_obj['totalPage']): draw += 1 # self.get_jing_ying_yi_chang(draw + 1, jing_ying_yi_chang_data) else: # print json.dumps(jing_ying_yi_chang_data, ensure_ascii=False) family = 'Business_Abnormal' table_id = '14' self.json_result[family] = [] for i in range(len(jing_ying_yi_chang_data)): row = jing_ying_yi_chang_data[i] self.json_result[family].append({}) self.json_result[family][-1][ 'rowkey'] = '%s_%s_%s_%s%d' % (self.cur_mc, table_id, self.reg_code, self.today, i + 1) self.json_result[family][-1][ family + ':registrationno'] = self.reg_code self.json_result[family][-1][ family + ':enterprisename'] = self.cur_mc for k in row: col = family + ':' + k val = row[k] self.json_result[family][-1][col] = val break
def submit_search_request(self, keyword, flags=True, account_id='null', task_id='null'): """ 提交详细查询请求,解析详情内容 :param keyword: 查询输入内容 :param flags: 查询内容信用代码还是公司名 :param account_id: 账户id :param task_id: 任务id :return: """ self.json_result = {} self.today = TimeUtils.get_today() keyword = keyword.replace('(', u'(').replace(')', u')').replace(' ', '') mc, xydm = '', '' if flags: mc = keyword else: xydm = keyword self.tag_a = self.get_tag_a_from_page(keyword) if self.tag_a: self.json_result['inputCompanyName'] = keyword self.json_result['taskId'] = task_id self.json_result['accountId'] = account_id # self.info('jiben') self.get_ying_ye_zhi_zhao() # self.info('zhuyaorenyuan') try: self.get_zhu_yao_ren_yuan() except Exception as e: self.info(u'主要人员表失败:%s' % e) pass # self.info('biangeng') self.get_bian_geng() # self.info('gudong') self.get_gu_dong() # self.get_gu_quan_chu_zhi() # self.get_dong_chan_di_ya() # self.get_jing_ying_yi_chang() # self.get_chou_cha_jian_cha() # self.get_xing_zheng_chu_fa() # self.send_msg(json.dumps(self.json_result, ensure_ascii=False)) # 已失效 self.send_msg_to_kafka( json.dumps(self.json_result, ensure_ascii=False)) return 1 else: return 0
def refresh_api(self): self.info(u'轨迹有误,刷新极验api...') url_2 = "http://api.geetest.com/refresh.php?" \ "challenge=%s" \ ">=%s" \ "&callback=geetest_%s" % (self.challenge, self.gt, TimeUtils.get_cur_ts_mil()) r_2 = self.get(url_2) # print r_2.text json_2 = json.loads(r_2.text[r_2.text.find('{'):r_2.text.rfind('}') + 1]) self.bg_url = 'http://' + self.static_server + json_2['bg'] self.fullbg_url = 'http://' + self.static_server + json_2['fullbg'] self.slice_url = 'http://' + self.static_server + json_2['slice'] self.challenge = json_2['challenge']
def get_bian_geng(self): draw = 1 bian_geng_data = [] while True: params = { 'draw': draw, 'length': 5, 'start': 5 * (draw - 1), } r = self.post_request(self.bian_geng_url, params=params) json_obj = json.loads(r.text) data = json_obj['data'] for item in data: bian_geng_data.append({}) for k in item: col = ZongJuConfig.bian_geng_dict[k] val = item[k] if type(val) == unicode: if '<span' in val: val = re.sub('<span[^<]*</span>', '', val) if '<div' in val: val = re.sub('<div[^<]*</div>', '', val) val = re.sub(r'\s', '', val) if col == 'changedannouncement_date' and val: val = TimeUtils.get_date(val) bian_geng_data[-1][col] = val if int(json_obj['draw']) < int(json_obj['totalPage']): draw += 1 # self.get_bian_geng(draw + 1, bian_geng_data) else: # print json.dumps(bian_geng_data, ensure_ascii=False) family = 'Changed_Announcement' table_id = '05' self.json_result[family] = [] for i in range(len(bian_geng_data)): row = bian_geng_data[i] self.json_result[family].append({}) self.json_result[family][-1][ 'rowkey'] = '%s_%s_%s_%s%d' % (self.input_company_name, table_id, self.reg_code, self.today, i + 1) self.json_result[family][-1][ family + ':registrationno'] = self.reg_code self.json_result[family][-1][ family + ':enterprisename'] = self.input_company_name for k in row: col = family + ':' + k val = row[k] self.json_result[family][-1][col] = val break
def get_gt_challenge(self, t=0): if t == 15: raise Exception(u'获取gt和challenge失败') url_1 = 'http://hb.gsxt.gov.cn/registerValidate.jspx?t=%s' % TimeUtils.get_cur_ts_mil( ) r_1 = self.get_request(url_1) # print r_1.text, r_1.text.strip().startswith('{'), r_1.text.strip().endswith('}') if r_1.text.strip().startswith('{') and r_1.text.strip().endswith('}'): self.challenge = str(json.loads(r_1.text)['challenge']) self.gt = str(json.loads(r_1.text)['gt']) else: time.sleep(1) self.reset_session() self.get_gt_challenge(t + 1)
def get_tag_a_from_page(self, keyword): cur_ts_mil = TimeUtils.get_cur_ts_mil() yzm_url = "http://qyxy.baic.gov.cn/CheckCodeYunSuan?currentTimeMillis=%s" % cur_ts_mil yzm = self.get_yzm(yzm_url) print 'yzm', yzm check_url = "http://qyxy.baic.gov.cn/login/loginAction!checkCode.dhtml?check_code=%s¤tTimeMillis=%s&random=%s" \ % (yzm, cur_ts_mil, random.randint(10000, 100000)) print check_url r = self.post_request( check_url, headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:52.0) Gecko/20100101 Firefox/52.0", "Host": "qyxy.baic.gov.cn", }) print r.text search_url = "http://qyxy.baic.gov.cn/es/esAction!entlist.dhtml?currentTimeMillis=%s&credit_ticket=%s&check_code=%s" % ( cur_ts_mil, self.credit_ticket, yzm) print search_url params = { 'idFlag': 'qyxy', 'module': '', 'queryStr': keyword, 'currentTimeMillis': cur_ts_mil, 'credit_ticket': self.credit_ticket, 'check_code': yzm } print params search_url = "http://qyxy.baic.gov.cn/es/esAction!entlist.dhtml?currentTimeMillis=" + cur_ts_mil + "&credit_ticket=" + self.credit_ticket + "&check_code=" + yzm + "&idFlag=qyxy&module=&queryStr=%E9%94%A4%E5%AD%90%E7%A7%91%E6%8A%80" print search_url r = self.post_request( search_url, headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:52.0) Gecko/20100101 Firefox/52.0", "Host": "qyxy.baic.gov.cn", # "Content-Length": "0", # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", # "Accept-Encoding": "gzip, deflate", # "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3", # "Connection": "keep-alive", # "Upgrade-Insecure-Requests": "1", # "Referer": "http://qyxy.baic.gov.cn/simple/dealSimpleAction!transport_ww.dhtml?fourth=fourth&sysid=0150008788304366b7d3903b5067bb8c&module=wzsy&styleFlag=sy", }) r.encoding = 'utf-8' print r.text print self.session.cookies
def run(self): cnt_0 = 0 cnt_1 = 0 cnt_2 = 0 while True: sql_1 = "select mc,province from " \ "(" \ "select * from %s where update_status=-1 limit 30 " \ ") t " \ "order by RAND() limit 1" % self.src_table # print sql_1 res_1 = MySQL.execute_query(sql_1) if len(res_1) > 0: mc = res_1[0][0] last_update_date = TimeUtils.get_today() sql_2 = "update %s set update_status=-2 " \ "where mc='%s'" \ % (self.src_table, mc) MySQL.execute_update(sql_2) try: update_status = self.searcher.submit_search_request( keyword=mc) sql_3 = "update %s set update_status=%d, last_update_time=now() " \ "where mc='%s'" % \ (self.src_table, update_status, mc) except Exception, e: self.info(traceback.format_exc(e)) update_status = -1 self.info(str(e)) sql_3 = "update %s set update_status=%d,last_update_time=now() " \ "where mc='%s'" % \ (self.src_table, update_status, mc) MySQL.execute_update(sql_3) if update_status == 0: cnt_0 += 1 elif update_status == 1: cnt_1 += 1 else: cnt_2 += 1 self.info(u'查询有结果: %d, 查询无结果: %d, 查询失败:%d' % (cnt_1, cnt_0, cnt_2)) else: self.info(u'更新完毕') break
def submit_search_request(self, keyword, account_id='null', task_id='null'): """ :param keyword: :param flags: {True: 名称查询,False:信用代码查询} :param account_id: :param task_id: :return: """ if check(keyword): is_xydm = True else: is_xydm = False keyword = self.process_mc(keyword) update_status = 0 self.json_result.clear() # keyword = self.process_mc(keyword) self.json_result['accountId'] = account_id self.json_result['taskId'] = task_id self.today = TimeUtils.get_today() self.tag_a = self.get_tag_a_from_page(keyword) self.json_result['inputCompanyName'] = self.input_company_name # print self.tag_a if self.tag_a: if is_xydm or self.input_company_name == keyword: update_status = 1 else: update_status = 888 if self.get_ying_ye_zhi_zhao(): self.get_zhu_yao_ren_yuan() self.get_bian_geng() self.get_gu_dong() # self.get_gu_quan_chu_zhi() # self.get_dong_chan_di_ya() # self.get_jing_ying_yi_chang() # self.get_chou_cha_jian_cha() # self.get_xing_zheng_chu_fa() self.send_msg_to_kafka( json.dumps(self.json_result, ensure_ascii=False)) else: update_status = -1 return update_status
def load_api(self): self.info(u'获取极验api...') ts = TimeUtils.get_cur_ts_mil() url_1 = 'http://api.geetest.com/gettype.php?gt=%s&callback=geetest_%s' % (self.gt, ts) # print 'url_1', url_1 r_1 = self.get(url_1) # print r_1.text # print r_1.text[len('geetest_%s(' % ts):-1] if 'data' in json.loads(r_1.text[len('geetest_%s(' % ts):-1]): path = json.loads(r_1.text[len('geetest_%s(' % ts):-1])['data']['path'] else: path = json.loads(r_1.text[len('geetest_%s(' % ts):-1])['path'] url_2 = "http://api.geetest.com/get.php?" \ "gt=%s" \ "&challenge=%s" \ "&product=%s" \ "&offline=false" \ "&protocol=" \ "&type=slide" \ "&path=%s" \ "&callback=geetest_%s" % (self.gt, self.challenge, self.geetest_product, path, TimeUtils.get_cur_ts_mil()) # print 'url_2', url_2 r_2 = self.get(url_2, headers={}) # print r_2.text json_2 = json.loads(r_2.text[r_2.text.find('{'):r_2.text.rfind('}') + 1]) self.s = json_2['s'] self.c = json_2['c'] if 'staticservers' in json_2: self.static_server = json_2['staticservers'][0] self.bg_url = 'http://' + self.static_server + json_2['bg'] self.fullbg_url = 'http://' + self.static_server + json_2['fullbg'] self.slice_url = 'http://' + self.static_server + json_2['slice'] self.challenge = json_2['challenge'] elif 'static_servers' in json_2: self.static_server = json_2['static_servers'][0] self.bg_url = 'http://' + self.static_server + json_2['bg'] self.fullbg_url = 'http://' + self.static_server + json_2['fullbg'] self.slice_url = 'http://' + self.static_server + json_2['slice'] self.challenge = json_2['challenge'] else: self.session.cookies.clear(domain='api.geetest.com') self.get_gt_challenge() self.load_api()
def info(msg): try: print TimeUtils.get_cur_time() + ' ->', msg except UnicodeEncodeError: pass
def get_xing_zheng_chu_fa(self): """ 获取行政处罚信息 :return: """ draw = 1 xing_zheng_chu_fa_data = [] while True: params = { 'draw': draw, 'length': 5, 'start': 5 * (draw - 1), } r = self.post_request(self.xing_zheng_chu_fa_url, params=params) json_obj = json.loads(r.text) data = json_obj['data'] for item in data: if not item['illegActType'] and not item['penAuth_CN'] \ and not item['penContent'] and not item['penDecIssDate'] \ and not item['penDecNo']: continue xing_zheng_chu_fa_data.append({}) for k in item: if k in TianJinConfig.xing_zheng_chu_fa_dict: col = TianJinConfig.xing_zheng_chu_fa_dict[k] val = item[k] if type(val) == unicode: if '<span' in val: val = re.sub('<span[^<]*</span>', '', val) if '<div' in val: val = re.sub('<div[^<]*</div>', '', val) val = re.sub(r'\s', '', val) if col in ('penalty_decisiondate', 'penalty_announcedate') and val: val = TimeUtils.get_date(val) xing_zheng_chu_fa_data[-1][col] = val if item['vPunishmentDecision'] and item['vPunishmentDecision'][ 'fileName'].endswith('.pdf'): detail_url = 'http://tj.gsxt.gov.cn/doc/%s/casefiles/%s' % ( item['nodeNum'], item['vPunishmentDecision']['fileName']) xing_zheng_chu_fa_data[-1]['penalty_details'] = detail_url if int(json_obj['draw']) < int(json_obj['totalPage']): # self.get_xing_zheng_chu_fa(draw + 1, xing_zheng_chu_fa_data) draw += 1 else: # print json.dumps(xing_zheng_chu_fa_data, ensure_ascii=False) family = 'Administrative_Penalty' table_id = '13' self.json_result[family] = [] for i in range(len(xing_zheng_chu_fa_data)): row = xing_zheng_chu_fa_data[i] self.json_result[family].append({}) self.json_result[family][-1][ 'rowkey'] = '%s_%s_%s_%s%d' % (self.cur_mc, table_id, self.reg_code, self.today, i + 1) self.json_result[family][-1][ family + ':registrationno'] = self.reg_code self.json_result[family][-1][ family + ':enterprisename'] = self.cur_mc for k in row: col = family + ':' + k val = row[k] self.json_result[family][-1][col] = val break
def get_gu_quan_chu_zhi(self): """ 获取股权出质信息 :return: """ draw = 1 gu_quan_chu_zhi_data = [] while True: params = { 'draw': draw, 'length': 5, 'start': 5 * (draw - 1), } r = self.post_request(self.gu_quan_chu_zhi_url, params=params) json_obj = json.loads(r.text) data = json_obj['data'] for item in data: gu_quan_chu_zhi_data.append({}) for k in item: if k in TianJinConfig.gu_quan_chu_zhi_dict: col = TianJinConfig.gu_quan_chu_zhi_dict[k] val = item[k] if type(val) == unicode: if '<span' in val: val = re.sub('<span[^<]*</span>', '', val) if '<div' in val: val = re.sub('<div[^<]*</div>', '', val) val = re.sub(r'\s', '', val) if col == 'equitypledge_status': if str(val) == '1': val = u'有效' else: val = u'无效' elif col == 'equitypledge_amount' and val: if item['pledAmUnit']: val = str(val) + re.sub( r'\s', '', item['pledAmUnit']) else: val = str(val) + u'万元' elif col == 'equitypledge_registrationdate' and val: val = TimeUtils.get_date(val) gu_quan_chu_zhi_data[-1][col] = val if int(json_obj['draw']) < int(json_obj['totalPage']): draw += 1 # self.get_gu_quan_chu_zhi(draw + 1, gu_quan_chu_zhi_data) else: # print json.dumps(gu_quan_chu_zhi_data, ensure_ascii=False) family = 'Equity_Pledge' table_id = '12' self.json_result[family] = [] for i in range(len(gu_quan_chu_zhi_data)): row = gu_quan_chu_zhi_data[i] self.json_result[family].append({}) self.json_result[family][-1][ 'rowkey'] = '%s_%s_%s_%s%d' % (self.cur_mc, table_id, self.reg_code, self.today, i + 1) self.json_result[family][-1][ family + ':registrationno'] = self.reg_code self.json_result[family][-1][ family + ':enterprisename'] = self.cur_mc for k in row: col = family + ':' + k val = row[k] self.json_result[family][-1][col] = val break
def get_gt_challenge(self): url_1 = 'http://gd.gsxt.gov.cn/aiccips//verify/start.html?t=%s' % TimeUtils.get_cur_ts_mil( ) r_1 = self.get_request(url_1) self.challenge = str(json.loads(r_1.text)['challenge'])
def get_gt_challenge(self): url_1 = "http://www.jsgsj.gov.cn:58888/province/geetestViladateServlet.json?register=true&t=%s" % TimeUtils.get_cur_ts_mil( ) self.info(u'获取gt和challenge...') r_1 = self.get_request(url_1) json_1 = json.loads(r_1.text) self.gt = json_1['gt'] self.challenge = json_1['challenge']