def parse(self, response): request = checkTimeError(response) if request:return request '''从业资格证书--公司基本信息''' item = SacItem() js = json.loads(response.text) configs = configs1 for json_ in js: result = dict() for config in configs['data']: result[config['En']] = json_[config['v']] result[config['En']] = S.replace_invalid_char(result[config['En']]) item['result'] = result item['keys'] = configs['list']['keys'] item['db'] = configs['list']['db'] CropRowID = result['CropRowID'] datas = asc_data(CropRowID) headers = {'User-Agent':generate_user_agent()} yield scrapy.FormRequest("http://person.sac.net.cn/pages/registration/train-line-register!search.action", formdata=datas[0], headers = headers, meta = {'CropRowID':CropRowID}, priority=0, callback = self.cctparse) yield scrapy.FormRequest("http://person.sac.net.cn/pages/registration/train-line-register!search.action", formdata=datas[1], headers = headers, meta = {'CropRowID':CropRowID}, priority=0, callback = self.cctparse) yield item
def senior_executiveParse(self, response): request = checkTimeError(response) if request:return request '''证券公司--高管信息''' item = SacItem() orgid = response.meta['orgid'] js = json.loads(response.text) configs = {'list':{'v':'','t':'','keys':['orgid','name','OFFICE_DATE','OFFICE_DATE'],'db':'dbo.SAC_executive'}, 'data':[{'n':'现任职务','En':'CURRENT_POSITION','t':'json','v':'EI_CURRENT_POSITION','dt':''}, {'n':'姓名','En':'name','t':'json','v':'EI_NAME','dt':''}, {'n':'任职起始时间','En':'OFFICE_DATE','t':'json','v':'EI_OFFICE_DATE','dt':''}, {'n':'性别','En':'gender','t':'json','v':'GC_ID','dt':''}, ] } for js_ in js: result=dict() result['orgid'] = orgid for config in configs['data']: k = config['En'] result[k] = S.select_content(js_, config) result[k] = S.replace_invalid_char(result[k]) item['result'] = result item['keys'] = configs['list']['keys'] item['db'] = configs['list']['db'] yield item
def otcInfoParse4(self, response): request = checkTimeError(response) if request:return request '''证券评级机构--高管人员信息''' item = SacItem() orgid = response.meta['otcid'] js = json.loads(response.text) configs = {'list':{'v':'','t':'','keys':['NAME','orgid','PRACTITIONERS_START_DATE'],'db':'dbo.SAC_otcseniorExecutive'}, 'data':[{'n':'中国注册会计师资格证书号码','En':'ACCOUNTANTS_NO','t':'json','v':'EI_ACCOUNTANTS_NO','dt':''}, {'n':'现任职务','En':'CURRENT_POSITION','t':'json','v':'EI_CURRENT_POSITION','dt':''}, {'n':'是否通过证券评级业务高级管理人员资质测试','En':'ISPASS_SENIOR_MANAGEMENT','t':'json','v':'EI_ISPASS_SENIOR_MANAGEMENT','dt':''}, {'n':'姓名','En':'NAME','t':'json','v':'EI_NAME','dt':''}, {'n':'任职起始时间','En':'PRACTITIONERS_START_DATE','t':'json','v':'EI_PRACTITIONERS_START_DATE','dt':''}, {'n':'证券从业人员证书号码','En':'SECURITIES_PROFESSIONALS','t':'json','v':'EI_SECURITIES_PROFESSIONALS','dt':''}, {'n':'性别','En':'Gender','t':'json','v':'GC_ID','dt':''} ] } for js_ in js: result = dict() result['orgid'] = orgid for config in configs['data']: k = config['En'] result[k] = S.select_content(js_, config) result[k] = S.replace_invalid_char(result[k]) item['result'] = result item['keys'] = configs['list']['keys'] item['db'] = configs['list']['db'] yield item
def otcInfoParse3(self, response): request = checkTimeError(response) if request:return request '''证券评级机构--执照图片''' item = SacItem() orgid = response.meta['orgid'] js = json.loads(response.text) configs = {'list':{'v':'','t':'','keys':['REG_ID','ZRNI_NAME'],'db':'dbo.SAC_otclicenseCopy'}, 'data':[{'n':'REGID','En':'REG_ID','t':'json','v':'MRI_REG_ID','dt':''}, {'n':'证书ID','En':'ZRNI_ID','t':'json','v':'ZRNI_ID','dt':''}, {'n':'证书name','En':'ZRNI_NAME','t':'json','v':'ZRNI_NAME','dt':''}, {'n':'证书path','En':'ZRNI_PATH','t':'json','v':'ZRNI_PATH','dt':''}, {'n':'证书类型','En':'ZRNI_TYPE','t':'json','v':'ZRNI_TYPE','dt':''}, ] } for js_ in js: result = dict() result['orgid'] = orgid for config in configs['data']: k = config['En'] result[k] = S.select_content(js_, config) result[k] = S.replace_invalid_char(result[k]) formtxt = 'http://jg.sac.net.cn/pages/publicity/train-line-register!writeFile.action?inputPath={path}&fileName={filename}' filename = urllib.parse.quote(urllib.parse.quote(result['ZRNI_NAME'].encode('utf-8')).encode('utf-8')) result['url'] = formtxt.format(path=result['ZRNI_PATH'],filename = filename) item['result'] = result item['keys'] = configs['list']['keys'] item['db'] = configs['list']['db'] yield item
def otcInfoParse2(self, response): request = checkTimeError(response) if request:return request '''证券评级机构--基本信息2''' item = SacItem() js = json.loads(response.text) configs = otcInfoBaseconfigs2 for js_ in js: result = response.meta['result'] for config in configs['data']: k = config['En'] result[k] = S.select_content(js_, config) result[k] = S.replace_invalid_char(result[k]) item['result'] = result item['keys'] = configs['list']['keys'] item['db'] = configs['list']['db'] yield item yield scrapy.FormRequest('http://jg.sac.net.cn/pages/publicity/resource!search.action', formdata = { 'filter_EQS_mri_reg_id':str(result['REG_ID']), 'sqlkey':'info', 'sqlval':'GET_FILES_BY_REG_ID'}, callback = self.otcInfoParse3, meta = {'orgid':result['orgid']}, headers = {'User-Agent':generate_user_agent(os=('win','mac','linux')), 'Referer': 'http://jg.sac.net.cn/pages/publicity/credit_rating_reg.html?aoi_id={orgid}&is_org_search=no'.format(orgid=result['orgid']), 'Content-Type': 'application/x-www-form-urlencoded', 'Connection':'keep-alive'}, )
def Employee_Change(self, response): request = checkTimeError(response) if request:return request '''证券从业资格-个人变更信息''' item = SacItem() js = json.loads(response.text) result = dict() configs = Employee_ChangeConfigs for json_ in js: for config in configs['data']: result[config['En']] = S.select_content(json_, config,response) result[config['En']] = S.replace_invalid_char(result[config['En']]) item['result'] = result item['keys'] = configs['list']['keys'] item['db'] = configs['list']['db'] yield item
def orgInfoParse2(self, response): request = checkTimeError(response) if request:return request '''证券公司信息获取经营范围''' item = SacItem() result = response.meta['result'] result['orgid'] = response.meta['orgid'] js = json.loads(response.text) PTSC_NAME = [] for i in js: PTSC_NAME.append(i['PTSC_NAME']) result['ptsc'] = ','.join(PTSC_NAME) result['ptsc'] = S.replace_invalid_char(result['ptsc']) item['result'] = result item['keys'] = ['orgid'] item['db'] = 'dbo.SAC_securitiesInfo' yield item
def EQS_sacInfoParse2(self, response): request = checkTimeError(response) if request:return request '''证券投资咨询机构--基本信息2''' js = json.loads(response.text) item = SacItem() configs = EQS_sacInfoParse2Configs for js_ in js: result = response.meta['result'] for config in configs['data']: k = config['En'] result[k] = S.select_content(js_, config) result[k] = S.replace_invalid_char(result[k]) item['result'] = result item['keys'] = configs['list']['keys'] item['db'] = configs['list']['db'] yield item
def Employee_InFo(self, response): request = checkTimeError(response) if request:return request item = SacItem() try: js = json.loads(response.text) result = response.meta['result'] for json_ in js: result['image'] = 'http://photo.sac.net.cn/sacmp/images/'+json_['RPI_PHOTO_PATH'] result['ADI_NAME'] = json_['ADI_NAME'] result['ADI_ID'] = json_['ADI_ID'] item['result'] = result item['keys'] = cctconfigs['list']['keys'] item['db'] = cctconfigs['list']['db'] yield item except: msg = '%s%s'%(response.url,response.text) scrapy.log.msg(msg)
def BRANCH_OrgParse(self, response): request = checkTimeError(response) if request:return request '''证券公司--分公司信息''' item = SacItem() orgid = response.meta['orgid'] page = response.meta['page'] js = json.loads(response.text) if page == 1: totalPage = js['totalPages'] else: totalPage = response.meta['totalPage'] configs = BRANCH_OrgConfigs for js_ in js['result']: result=dict() result['orgid'] = orgid for config in configs['data']: k = config['En'] result[k] = S.select_content(js_, config) result[k] = S.replace_invalid_char(result[k]) item['result'] = result item['keys'] = configs['list']['keys'] item['db'] = configs['list']['db'] yield item if page<=totalPage: page+=1 yield scrapy.FormRequest('http://jg.sac.net.cn/pages/publicity/resource!list.action', formdata = {'filter_LIKES_mboi_branch_full_name':'', 'filter_LIKES_mboi_off_address':'', 'filter_EQS_aoi_id':str(orgid), 'page.searchFileName':'publicity', 'page.sqlKey':'PAG_BRANCH_ORG', 'page.sqlCKey':'SIZE_BRANCH_ORG', '_search':'false', 'nd':str(int(time.time()*1000)), 'page.pageSize':'15', 'page.pageNo':str(page), 'page.orderBy':'MATO_UPDATE_DATE', 'page.order':'desc'}, meta = {'orgid':orgid,'page':page,'totalPage':totalPage}, callback = self.BRANCH_OrgParse, headers = {'User-Agent':generate_user_agent(os=('win','mac','linux'))},)
def otcInfoParse5(self, response): request = checkTimeError(response) if request:return request '''证券评级机构--执业人员信息''' item = SacItem() page = response.meta['page'] orgid = response.meta['otcid'] js = json.loads(response.text) if page==1: totalPage = js['totalPages'] else: totalPage = response.meta['totalPage'] configs = otcInfoConfigs for js_ in js['result']: result = dict() result['orgid'] = orgid for config in configs['data']: k = config['En'] result[k] = S.select_content(js_, config) result[k] = S.replace_invalid_char(result[k]) item['result'] = result item['keys'] = configs['list']['keys'] item['db'] = configs['list']['db'] yield item if page<totalPage: page+=1 yield scrapy.FormRequest('http://jg.sac.net.cn/pages/publicity/resource!list.action', formdata={ 'filter_EQS_aoi_id':str(orgid), 'page.searchFileName':'publicity', 'page.sqlKey':'PAG_PRACTITIONERS', 'page.sqlCKey':'SIZE_PRACTITONERS', '_search':'false', 'nd':str(int(time.time()*1000)), 'page.pageSize':'15', 'page.pageNo':str(page), 'page.orderBy':'MATO_UPDATE_DATE', 'page.order':'desc'}, callback = self.otcInfoParse5, meta = {'otcid':orgid,'page':page,'totalPage':totalPage}, headers = {'User-Agent':generate_user_agent(os=('win','mac','linux')), 'Connection':'keep-alive'},)
def orgInfoParse1(self, response): request = checkTimeError(response) if request:return request '''证券公司信息基本信息--result传入orgInfoParse2''' item = SacItem() orgid = response.meta['orgid'] js = json.loads(response.text) configs = orgInfoparse1configs result = dict() for js_ in js: for config in configs['data']: k = config['En'] result[k] = S.select_content(js_ , config,response) result[k] = S.replace_invalid_char(result[k]) data = {'filter_EQS_aoi_id':str(orgid), 'sqlkey':'publicity', 'sqlval':'SEARCH_ZQGS_QUALIFATION'} yield scrapy.FormRequest('http://jg.sac.net.cn/pages/publicity/resource!search.action', formdata = data, headers = {'User-Agent':generate_user_agent(os=('win','mac','linux'))}, callback = self.orgInfoParse2, meta = {'orgid':orgid,'result':result}, )