def tendering_info(self, response): tr = Selector(response=response).xpath('//tr[@class="row"]') print('招标信息', '---------', response.meta['company_name']) for t in tr: td = t.xpath('./td') tendering_type = td[2].xpath('text()').extract_first() tendering_method = td[3].xpath('text()').extract_first() wining_bidder = td[4].xpath('text()').extract_first() wining_number = td[5].xpath('text()').extract_first() wining_data = td[6].xpath('text()').extract_first() tendering = templates.Mark(companyName=response.meta['company_name'], code=response.meta['code'], tenderClass=tendering_type, tenderType=tendering_method, tenderNum=wining_number, provinceTenderNum=wining_number, tenderResultDate=wining_data, tenderCorpName=wining_bidder, tenderMoney=response.meta['tenderMoney'], ) t_data = tendering.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectTender.htm', headers={'Content-Type': 'application/json'}, method='POST', body=json.dumps(t_data), callback=self.project_zz, meta={'type': '招标'} ) print('招标信息', t_data, '---------', response.meta['company_name'])
def bid_info(self, response): attrs = [ {'that': '', 'attr': '//table[@width="100%"]/tr[3]/td[2]/text()', 'name': 'code'}, {'that': '', 'attr': '//table[@width="100%"]/tr[10]/td[2]/text()', 'name': 'tenderNum'}, {'that': '', 'attr': '//table[@width="100%"]/tr[10]/td[2]/text()', 'name': 'provinceTenderNum'}, {'that': '', 'attr': '//table[@width="100%"]/tr[11]/td[2]/text()', 'name': 'tenderClass'}, {'that': '', 'attr': '//table[@width="100%"]/tr[11]/td[4]/text()', 'name': 'tenderType'}, {'that': '', 'attr': '//table[@width="100%"]/tr[12]/td[2]/text()', 'name': 'tenderResultDate'}, {'that': '', 'attr': '//table[@width="100%"]/tr[12]/td[4]/text()', 'name': 'tenderMoney'}, {'that': '', 'attr': '//table[@width="100%"]/tr[13]/td[2]/text()', 'name': 'prjSize'}, {'that': '', 'attr': '//table[@width="100%"]/tr[13]/td[4]/text()', 'name': 'area'}, {'that': '', 'attr': '//table[@width="100%"]/tr[14]/td[2]/a/@title', 'name': 'agencyCorpName'}, {'that': '', 'attr': '//table[@width="100%"]/tr[14]/td[4]/text()', 'name': 'agencyCorpCode'}, {'that': '', 'attr': '//table[@width="100%"]/tr[15]/td[2]/a/text()', 'name': 'tenderCorpName'}, {'that': '', 'attr': '//table[@width="100%"]/tr[15]/td[4]/text()', 'name': 'tenderCorpCode'}, {'that': '', 'attr': '//table[@width="100%"]/tr[16]/td[2]/a/text()', 'name': 'constructorName'}, {'that': '', 'attr': '//table[@width="100%"]/tr[16]/td[4]/text()', 'name': 'constructorIDCard'}, {'that': '', 'attr': '//table[@width="100%"]/tr[17]/td[2]/a/text()', 'name': 'createDate'}, ] bid = templates.Projects('Mark') bid_zz = bid.html_analysis(response=response, attrs=attrs) bid_zz['companyName'] = response.meta['companyName'] bid_data = templates.Mark(**bid_zz) bid_data = bid_data.data() print(bid_data, '招标信息') yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', body=json.dumps(bid_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '招标信息'} )
def bid(self, response): data_line = response.text data_dict = re.split('jQuery\d+_\d+\(', data_line)[1] data_dict = data_dict.replace(')', '') json_data = json.loads(data_dict) data = json_data['data']['TenderInfoList'] for d in data: bid = templates.Mark(companyName=response.meta['companyName'], tenderNum=d['TenderNum'], code=d['PrjNum'], provinceTenderNum=d['TenderNum'], tenderResultDate=d['TenderResultDate'], tenderType=d['TenderType'], tenderClass=d['TenderClass'], tenderMoney=d['TenderMoney'], ) bid_data = bid.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', body=json.dumps(bid_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '招标'} )
def win_bid(self, response): tr = Selector(response=response).xpath('//table[@class="Tab"]/tr') agencyCorpName = tr[5].xpath('./td[2]/span/text()').extract_first() agencyCorpCode = tr[5].xpath('./td[4]/span/text()').extract_first() tenderCorpName = tr[6].xpath('./td[2]/span/text()').extract_first() tenderCorpCode = tr[6].xpath('./td[4]/span/text()').extract_first() tenderType = tr[7].xpath('./td[2]/span/text()').extract_first() tenderClass = tr[7].xpath('./td[4]/span/text()').extract_first() tenderResultDate = tr[8].xpath('./td[2]/span/text()').extract_first() tenderMoney = tr[8].xpath('./td[4]/span/text()').extract_first() prjSize = tr[9].xpath('./td[2]/span/text()').extract_first() area = tr[9].xpath('./td[4]/span/text()').extract_first() constructorName = tr[10].xpath('./td[2]/text()').extract_first() tenderNum = Selector(response=response).xpath( '//span[@id="ctl00_ContentPlaceHolder1_FormView1_Label3"]/text()' ).extract_first() bid = templates.Mark(companyName=response.meta['company_name'], code=response.meta['code'], tenderNum=tenderNum, provinceTenderNum=tenderNum, agencyCorpName=agencyCorpName, agencyCorpCode=agencyCorpCode, tenderCorpName=tenderCorpName, tenderCorpCode=tenderCorpCode, tenderType=tenderType, tenderClass=tenderClass, tenderResultDate=tenderResultDate, tenderMoney=tenderMoney, area=area, prjSize=prjSize, constructorName=constructorName) b_data = bid.data() yield scrapy.Request( url= 'http://api.maotouin.com/rest/companyInfo/addCompanyProjectTender.htm', headers={'Content-Type': 'application/json'}, callback=self.project_zz, body=json.dumps(b_data), method='POST', meta={'type': '招标信息'}) print(bid.data(), '招标信息BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB')
def project(self, response): basic = templates.Projects('Project') attrs = [ {'that': '', 'attr': '//td[@id="lblPrjName"]/text()', 'name': 'name'}, {'that': '', 'attr': '//td[@id="lblPrjNum"]/text()', 'name': 'code'}, {'that': '', 'attr': '//td[@id="lblPrjNum"]/text()', 'name': 'provinceCode'}, {'that': '', 'attr': '//td[@id="lblPrjTypeNum"]/text()', 'name': 'catalog'}, {'that': '', 'attr': '//td[@id="lblBuildCorpName"]/text()', 'name': 'unit'}, {'that': '', 'attr': '//td[@id="lblBuildCorpCode"]/text()', 'name': 'unitLicenseNum'}, {'that': '', 'attr': '//td[@id="lblCountyNum"]/text()', 'name': 'area'}, {'that': '', 'attr': '//td[@id="lblPrjApprovalNum"]/text()', 'name': 'docuCode'}, {'that': '', 'attr': '//td[@id="lblPrjApprovalLevelNum"]/text()', 'name': 'level'}, {'that': '', 'attr': '//td[@id="lblAllInvest"]/text()', 'name': 'money'}, {'that': '', 'attr': '//td[@id="lblAllArea"]/text()', 'name': 'acreage'}, {'that': '', 'attr': '//td[@id="lblPrjPropertyNum"]/text()', 'name': 'trait'}, {'that': '', 'attr': '//td[@id="lblPrjFunctionNum"]/text()', 'name': 'purpose'}, ] basic_text = basic.html_analysis(response=response, attrs=attrs) basic_text['companyName'] = response.meta['companyName'] if basic_text['level'] == '暂无': basic_text['level'] = '' basic_data = templates.Project(**basic_text) basic_data = basic_data.data() print('基本信息', '*******************************', basic_data) yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', body=json.dumps(basic_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '基本信息', 'company_name': basic_data['companyName']} ) # bid_list = Selector(response=response).xpath('//div[@id="project_step1"]/table/tbody/tr') for b in bid_list: tenderClass = b.xpath('./td[2]/text()').extract_first() tenderType = b.xpath('./td[3]/text()').extract_first() tenderCorpName = b.xpath('./td[4]/a/text()').extract_first() tenderResultDate = b.xpath('./td[5]/text()').extract_first() tenderMoney = b.xpath('./td[6]/text()').extract_first() tenderNum = b.xpath('./td[7]/a/text()').extract_first() bid_data = templates.Mark(tenderClass=tenderClass, tenderType=tenderType, tenderCorpName=tenderCorpName, tenderResultDate=tenderResultDate, tenderMoney=tenderMoney, tenderNum=tenderNum, provinceTenderNum=tenderNum, code=basic_data['code'], companyName=response.meta['companyName'] ) bid_data = bid_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectTender.htm', body=json.dumps(bid_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '招标信息',} ) # print('招标信息', '*******************************', bid_data) drawing_list = Selector(response=response).xpath('//div[@id="project_step2"]/table/tbody/tr') for d in drawing_list: censorNum = d.xpath('./td[2]/text()').extract_first() provinceCensorNum = d.xpath('./td[3]/text()').extract_first() surveyCorpName = d.xpath('./td[4]/a/text()').extract_first() designCorpName = d.xpath('./td[5]/a/text()').extract_first() censorCorpName = d.xpath('./td[6]/a/text()').extract_first() censorEDate = d.xpath('./td[7]/a/text()').extract_first() drawing_data = templates.MakeDrawing(censorNum=censorNum, provinceCensorNum=provinceCensorNum, surveyCorpName=surveyCorpName, designCorpName=designCorpName, censorCorpName=censorCorpName, censorEDate=censorEDate, code=basic_data['code'], companyName=response.meta['companyName'] ) drawing_data = drawing_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm', body=json.dumps(drawing_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '施工图纸审查', } ) print('施工图纸审查', '*******************************', drawing_data) contract_list = Selector(response=response).xpath('//div[@id="project_step3"]/table/tbody/tr') for d in contract_list: contractType = d.xpath('./td[2]/text()').extract_first() recordNum = d.xpath('./td[3]/text()').extract_first() provinceRecordNum = d.xpath('./td[4]/text()').extract_first() contractMoney = d.xpath('./td[5]/text()').extract_first() contractDate = d.xpath('./td[6]/text()').extract_first() contract_data = templates.Contract(contractType=contractType, recordNum=recordNum, provinceRecordNum=provinceRecordNum, contractMoney=contractMoney, contractDate=contractDate, code=basic_data['code'], companyName=response.meta['companyName'] ) contract_data = contract_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectContract.htm', body=json.dumps(contract_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '合同备案', } ) print('合同备案', '*******************************', contract_data) print(response.url, 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX') construction_list = Selector(response=response).xpath('//div[@id="project_step4"]/table/tbody/tr') for d in construction_list: builderLicenceNum = d.xpath('./td[2]/text()').extract_first() consCorpName = d.xpath('./td[3]/a/text()').extract_first() contractMoney = d.xpath('./td[4]/text()').extract_first() area = d.xpath('./td[5]/text()').extract_first() createDate = d.xpath('./td[6]/text()').extract_first() construction_data = templates.ConstructionPermit(builderLicenceNum=builderLicenceNum, provinceBuilderLicenceNum=builderLicenceNum, consCorpName=consCorpName, contractMoney=contractMoney, area=area, code=basic_data['code'], createDate=createDate, companyName=response.meta['companyName'] ) construction_data = construction_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectBuilderLicence.htm', body=json.dumps(construction_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '施工许可', } ) print('施工许可', '*******************************', construction_data) completion_list = Selector(response=response).xpath('//div[@id="project_step4"]/table/tbody/tr') for c in completion_list: prjFinishNum = c.xpath('./td[2]/text()').extract_first() provincePrjFinishNum = c.xpath('./td[3]/a/text()').extract_first() factCost = c.xpath('./td[4]/text()').extract_first() factArea = c.xpath('./td[5]/text()').extract_first() factBeginDate = c.xpath('./td[6]/text()').extract_first() factEndDate = c.xpath('./td[6]/text()').extract_first() completion_data = templates.Completion(prjFinishNum=prjFinishNum, provincePrjFinishNum=provincePrjFinishNum, factCost=factCost, factArea=factArea, factBeginDate=factBeginDate, code=basic_data['code'], factEndDate=factEndDate, companyName=response.meta['companyName'] ) completion_data = completion_data.data() yield scrapy.Request( url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectFinish.htm', body=json.dumps(completion_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '竣工验收', } ) print('竣工验收', '*******************************', completion_data)
def company_project(self, response): basic = Selector(response=response).xpath('//table[@class="detailTable"]')[0] basic_tr = basic.xpath('./tr') project_name = basic_tr[1].xpath('./td')[3].xpath('text()').extract_first() project_number = basic_tr[1].xpath('./td')[1].xpath('text()').extract_first() area = basic_tr[3].xpath('./td')[3].xpath('text()').extract_first() if area is not None: area_data = '' for a in area.split(): area_data += a else: area_data = '' unit = basic_tr[2].xpath('./td')[3].xpath('text()').extract_first() unitLicenseNum = basic_tr[3].xpath('./td')[1].xpath('text()').extract_first() catalog = basic_tr[2].xpath('./td')[1].xpath('text()').extract_first() traits = basic_tr[7].xpath('./td')[3].xpath('text()').extract_first() purpose = basic_tr[8].xpath('./td')[1].xpath('text()').extract_first() money = basic_tr[6].xpath('./td')[1].xpath('text()').extract_first() acreage = basic_tr[6].xpath('./td')[3].xpath('text()').extract_first() level = basic_tr[4].xpath('./td')[3].xpath('text()').extract_first() docuCode = basic_tr[4].xpath('./td')[1].xpath('text()').extract_first() ccc = templates.Project(name=project_name, companyName=response.meta['company_name'], area=area_data, provinceCode=project_number, unit=unit, unitLicenseNum=unitLicenseNum, catalog=catalog, trait=traits, purpose=purpose, money=money, acreage=acreage, level=level, docuCode=docuCode, code=project_number ) basic_data = ccc.data() print('基本信息', basic_data) yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', headers={'Content-Type': 'application/json'}, method='POST', body=json.dumps(basic_data), callback=self.project_zz, meta={'type': '基本信息', 'company_name': basic_data['companyName']} ) # print(data) mark = Selector(response=response).xpath('//table[@class="detailTable"]')[1] mark_tr = mark.xpath('./tr') del mark_tr[0] mark_list = [k for index, k in enumerate(mark_tr) if (index % 2 != 0)] print(len(mark_list), 'mmmmmmmmmmmmmmmmmmmm') for m in mark_list: td = m.xpath('./td') if len(td) == 1: pass # print(len(td), '没有招标信息的', project_name) else: project_code = td[0].xpath('./a/text()').extract_first() build_size = td[1].xpath('text()').extract_first() mark_name = td[2].xpath('text()').extract_first() have_project = td[3].xpath('text()').extract_first() mark_data = templates.Mark(companyName=response.meta['company_name'], tenderNum=project_code, prjSize=build_size, provinceTenderNum=project_code, agencyCorpName=mark_name, tenderCorpName=have_project, code=project_number) make_zz_data = mark_data.data() print(project_code, build_size, mark_name, have_project, '招标信息') yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectTender.htm', method='POST', headers={'Content-Type': 'application/json'}, body=json.dumps(make_zz_data), callback=self.project_zz, meta={'type': '招标信息'} ) contract = Selector(response=response).xpath('//table[@class="detailTable"]')[2] contract = contract.xpath('./tr') del contract[0] contract_list = [k for index, k in enumerate(contract) if (index % 2 != 0)] for m in contract_list: td = m.xpath('./td') if len(td) == 1: print(len(td), '没有合同信息的', project_name) else: contract_mark_number = td[0].xpath('text()').extract_first() contract_number = td[1].xpath('text()').extract_first() send_company = td[2].xpath('text()').extract_first() make_company = td[3].xpath('text()').extract_first() union_company = td[4].xpath('text()').extract_first() contract_object = templates.Contract(companyName=response.meta['company_name'], code=project_number, recordNum=contract_mark_number, contractNum=contract_number, proprietorCorpName=send_company, contractorCorpName=make_company, unionCorpName=union_company, provinceRecordNum=contract_mark_number ) contract_data = contract_object.data() print('合同信息', contract_data) yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectContract' '.htm', method='POST', headers={'Content-Type': 'application/json'}, body=json.dumps(contract_data), callback=self.project_zz, meta={'type': '合同信息'} ) make_drawing = Selector(response=response).xpath('//table[@class="detailTable"]')[3] make_drawing = make_drawing.xpath('./tr') make_see_number = make_drawing[1].xpath('./td')[1].xpath('text()').extract_first() make_see_name = make_drawing[1].xpath('./td')[3].xpath('text()').extract_first() drawing_num = make_drawing[2].xpath('./td')[1].xpath('text()').extract_first() see_date = make_drawing[2].xpath('./td')[3].xpath('text()').extract_first() see_name = make_drawing[3].xpath('./td')[1].xpath('text()').extract_first() see_num = make_drawing[3].xpath('./td')[3].xpath('text()').extract_first() desgin_name = make_drawing[4].xpath('./td')[1].xpath('text()').extract_first() desgin_num = make_drawing[4].xpath('./td')[3].xpath('text()').extract_first() make_size = make_drawing[5].xpath('./td')[1].xpath('text()').extract_first() ok_pass = make_drawing[5].xpath('./td')[3].xpath('text()').extract_first() see_error = make_drawing[6].xpath('./td')[1].xpath('text()').extract_first() see_number = make_drawing[6].xpath('./td')[3].xpath('text()').extract_first() drawing = templates.MakeDrawing( censorNum=make_see_number, censorCorpName=make_see_name, censorCorpCode=drawing_num, censorEDate=see_date, surveyCorpName=see_name, surveyCorpCode=see_num, designCorpName=desgin_name, designCorpCode=desgin_num, companyName=response.meta['company_name'], code=project_number, prjSize=make_size, engineers=[], provinceCensorNum=make_see_number ) drawing_data = drawing.data() if drawing_data['censorNum']: print(drawing_data, '施工图纸信息') yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm', headers={'Content-Type': 'application/json'}, method='POST', body=json.dumps(drawing_data), callback=self.project_zz, meta={'type': '施工图纸信息'} ) construction = Selector(response=response).xpath('//table[@class="detailTable"]')[5] construction_tr = construction.xpath('./tr') # 施工许可证系统备案编号 construction_num = construction_tr[1].xpath('./td')[1].xpath('text()').extract_first() # 暂时用不到 usr_plan_land_num = construction_tr[1].xpath('./td')[3].xpath('text()').extract_first() # 暂时用不到 usr_make_land_num = construction_tr[2].xpath('./td')[1].xpath('text()').extract_first() # 施工图审查合格书编号 qualified_num = construction_tr[2].xpath('./td')[3].xpath('text()').extract_first() # 合同金额(万元) money_capital = construction_tr[3].xpath('./td')[1].xpath('text()').extract_first() # 面积(平方米) construction_area = construction_tr[3].xpath('./td')[3].xpath('text()').extract_first() # 建设规模暂时用不到 construction_size = construction_tr[4].xpath('./td')[1].xpath('text()').extract_first() # 发证日期 construction_date = construction_tr[4].xpath('./td')[3].xpath('text()').extract_first() # 勘察单位名称 c_survey_name = construction_tr[5].xpath('./td')[1].xpath('text()').extract_first() # 勘察单位组织机构代码 c_survey_num = construction_tr[5].xpath('./td')[3].xpath('text()').extract_first() # 设计单位名称 c_degsin_name = construction_tr[6].xpath('./td')[1].xpath('text()').extract_first() # 设计单位组织机构代码 c_degsin_num = construction_tr[6].xpath('./td')[3].xpath('text()').extract_first() # 施工单位名称 c_make_name = construction_tr[7].xpath('./td')[1].xpath('text()').extract_first() # 施工单位组织机构代码 c_make_num = construction_tr[7].xpath('./td')[3].xpath('text()').extract_first() # 施工单位安全生产许可证编号 c_make_safe_num = construction_tr[8].xpath('./td')[1].xpath('text()').extract_first() # 监理单位名称 c_supervisor_name = construction_tr[8].xpath('./td')[3].xpath('text()').extract_first() # 监理单位组织机构代码 c_supervisor_num = construction_tr[9].xpath('./td')[1].xpath('text()').extract_first() # 项目经理姓名 c_project_person_name = construction_tr[9].xpath('./td')[3].xpath('text()').extract_first() # 施工图审查人员证件类型 c_name_person_idctype = construction_tr[10].xpath('./td')[1].xpath('text()').extract_first() # 项目经理身份证 c_name_person_idcard = construction_tr[10].xpath('./td')[3].xpath('text()').extract_first() # 总监理工程师姓名 c_chief_name = construction_tr[11].xpath('./td')[1].xpath('text()').extract_first() # 总监理工程师证件类型 c_chief_idtype = construction_tr[11].xpath('./td')[3].xpath('text()').extract_first() # 总监理工程师证件号码 c_chief_idcard = construction_tr[12].xpath('./td')[1].xpath('text()').extract_first() # 安全生产管理人员姓名 c_safe_manager = construction_tr[12].xpath('./td')[3].xpath('text()').extract_first() # 安全生产管理证件类型 c_safe_idtype = construction_tr[13].xpath('./td')[1].xpath('text()').extract_first() # 安全生产管理人员姓名 c_safe_idcard = construction_tr[13].xpath('./td')[3].xpath('text()').extract_first() # 安全生产考核合格证书编号 c_safe_assessenment_num = construction_tr[14].xpath('./td')[1].xpath('text()').extract_first() # 安全生产管理人员类型 c_safe_assessenment_type = construction_tr[14].xpath('./td')[3].xpath('text()').extract_first() construction_model = templates.ConstructionPermit(builderLicenceNum=construction_num, censorNum=qualified_num, contractMoney=money_capital, area=construction_area, econCorpName=c_survey_name, econCorpCode=c_survey_num, designCorpName=c_degsin_name, designCorpCode=c_degsin_num, consCorpName=c_make_name, consCorpCode=c_make_num, superCorpName=c_supervisor_name, superCorpCode=c_supervisor_num, constructorName=c_project_person_name, constructorIDCard=c_name_person_idcard, supervisionName=c_chief_name, supervisionIDCard=c_chief_idcard, companyName=response.meta['company_name'], code=project_number, provinceBuilderLicenceNum=construction_num ) construction_make_data = construction_model.data() print('施工许可信息', construction_make_data) if construction_make_data['builderLicenceNum']: yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo' '/addCompanyProjectBuilderLicence.htm', headers={'Content-Type': 'application/json'}, method='POST', body=json.dumps(construction_make_data), callback=self.project_zz, meta={'type': '施工许可信息'} ) completion_info = Selector(response=response).xpath('//table[@class="detailTable"]')[8] completion_tr = completion_info.xpath('./tr') # 竣工备案编号 completion_num = completion_tr[1].xpath('./td')[1].xpath('text()').extract_first() # 施工许可证编号 completion_make_numer = completion_tr[1].xpath('./td')[3].xpath('text()').extract_first() # 质量检测机构名称 test_name = completion_tr[2].xpath('./td')[1].xpath('text()').extract_first() # 质量检测机构组织机构代码 test_number = completion_tr[2].xpath('./td')[3].xpath('text()').extract_first() # 实际造价(万元) actual_capital = completion_tr[3].xpath('./td')[1].xpath('text()').extract_first() # 实际面积(平方米) actual_area = completion_tr[3].xpath('./td')[3].xpath('text()').extract_first() # 实际建设规模 actual_size = completion_tr[4].xpath('./td')[1].xpath('text()').extract_first() # 结构体系 c_body = completion_tr[4].xpath('./td')[3].xpath('text()').extract_first() # 备注 remarks = completion_tr[5].xpath('./td')[1].xpath('text()').extract_first() Completion_data = templates.Completion( companyName=response.meta['company_name'], code=project_number, prjFinishNum=completion_num, factCost=actual_capital, factArea=actual_area, factSize=actual_size, prjStructureType=c_body, mark=remarks, provincePrjFinishNum=completion_num ) Completion_zz = Completion_data.data() if Completion_zz['prjFinishNum']: print('当前公司----%s---%s--竣工数据' % (project_name, Completion_zz)) yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectFinish.htm', headers={'Content-Type': 'application/json'}, method='POST', body=json.dumps(Completion_zz), callback=self.project_zz, meta={'type': '竣工验收'} )
def bid_info(self, response): attrs = [ { 'that': '', 'attr': '//td[@class="name_level3 col_01_value"]/span/text()', 'name': 'code' }, { 'that': '', 'attr': '//span[@id="lblTenderMoney"]/text()', 'name': 'tenderMoney' }, { 'that': '', 'attr': '//span[@id="lblArea"]/text()', 'name': 'area' }, { 'that': '', 'attr': '//span[@id="lblTenderNum"]/text()', 'name': 'tenderNum' }, { 'that': '', 'attr': '//span[@id="lblTenderNum"]/text()', 'name': 'provinceTenderNum' }, { 'that': '', 'attr': '//span[@id="lblTenderClassNum"]/text()', 'name': 'tenderClass' }, { 'that': '', 'attr': '//span[@id="lblTenderTypeNum"]/text()', 'name': 'tenderType' }, { 'that': '', 'attr': '//span[@id="lblTenderResultDate"]/text()', 'name': 'tenderResultDate' }, { 'that': '', 'attr': '//span[@id="lblPrjSize"]/text()', 'name': 'prjSize' }, { 'that': '', 'attr': '//span[@id="lblAgencyCorpName"]/text()', 'name': 'agencyCorpName' }, { 'that': '', 'attr': '//span[@id="lblAgencyCorpCode"]/text()', 'name': 'agencyCorpCode' }, { 'that': '', 'attr': '//span[@id="lblTenderCorpName"]/text()', 'name': 'tenderCorpName' }, { 'that': '', 'attr': '//span[@id="lblTenderCorpCode"]/text()', 'name': 'tenderCorpCode' }, { 'that': '', 'attr': '//span[@id="lblCreateDate"]/text()', 'name': 'createDate' }, { 'that': '', 'attr': '//span[@id="lblConstructorName"]/text()', 'name': 'constructorName' }, { 'that': '', 'attr': '//span[@id="lblConstructorIDCard"]/text()', 'name': 'constructorIDCard' }, ] bid_object = templates.Projects('Mark') bid_data = bid_object.html_analysis(response, attrs) bid_data['companyName'] = response.meta['companyName'] bid_zz = templates.Mark(**bid_data) bid_zz = bid_zz.data() bid_zz['code'] = response.meta['code'] print(bid_zz, '招标信息') yield scrapy.Request( url= 'https://api.maotouin.com/rest/companyInfo/addCompanyProjectTender.htm', body=json.dumps(bid_zz), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '招标信息'})