def finish_info(self, response): attrs = [ {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[3]/td[2]/text()', 'name': 'code'}, {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[10]/td[2]/text()', 'name': 'prjFinishNum'}, {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[10]/td[2]/text()', 'name': 'provincePrjFinishNum'}, {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[11]/td[2]/text()', 'name': 'factCost'}, {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[11]/td[4]/text()', 'name': 'factArea'}, {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[12]/td[2]/text()', 'name': 'factSize'}, {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[13]/td[2]/text()', 'name': 'factBeginDate'}, {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[13]/td[4]/text()', 'name': 'factEndDate'}, {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[14]/td[2]/text()', 'name': 'createDate'}, {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[14]/td[4]/text()', 'name': 'mark'}, ] finish = templates.Projects('Completion') finish = finish.html_analysis(response=response, attrs=attrs) finish['companyName'] = response.meta['companyName'] finish_data = templates.Completion(**finish) print(finish_data.data(), '竣工') contract_data = finish_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectFinish.htm', body=json.dumps(contract_data), callback=self.project_zz, method='POST', headers={'Content-Type': 'application/json'}, meta={'type': '竣工'} )
def contract_info(self, response): attrs = [ {'that': '', 'attr': '//table[@width="100%"]/tr[3]/td[2]/text()', 'name': 'code'}, {'that': '', 'attr': '//table[@width="100%"]/tr[10]/td[2]/text()', 'name': 'recordNum'}, {'that': '', 'attr': '//table[@width="100%"]/tr[10]/td[2]/text()', 'name': 'provinceRecordNum'}, {'that': '', 'attr': '//table[@width="100%"]/tr[12]/td[2]/text()', 'name': 'contractType'}, {'that': '', 'attr': '//table[@width="100%"]/tr[12]/td[4]/text()', 'name': 'contractMoney'}, {'that': '', 'attr': '//table[@width="100%"]/tr[13]/td[2]/text()', 'name': 'prjSize'}, {'that': '', 'attr': '//table[@width="100%"]/tr[13]/td[4]/text()', 'name': 'contractDate'}, {'that': '', 'attr': '//table[@width="100%"]/tr[14]/td[2]/a/@title', 'name': 'proprietorCorpName'}, {'that': '', 'attr': '//table[@width="100%"]/tr[14]/td[4]/text()', 'name': 'proprietorCorpCode'}, {'that': '', 'attr': '//table[@width="100%"]/tr[15]/td[2]/a/text()', 'name': 'contractorCorpName'}, {'that': '', 'attr': '//table[@width="100%"]/tr[15]/td[4]/text()', 'name': 'contractorCorpCode'}, {'that': '', 'attr': '//table[@width="100%"]/tr[16]/td[2]/a/text()', 'name': 'unionCorpName'}, {'that': '', 'attr': '//table[@width="100%"]/tr[16]/td[4]/text()', 'name': 'unionCorpCode'}, {'that': '', 'attr': '//table[@width="100%"]/tr[17]/td[2]/a/text()', 'name': 'createDate'}, ] contract = templates.Projects('Contract') contract = contract.html_analysis(response=response, attrs=attrs) contract['companyName'] = response.meta['companyName'] contract_data = templates.Contract(**contract) contract_data = contract_data.data() print(contract_data, '合同信息') yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectContract.htm', body=json.dumps(contract_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '合同信息'} )
def bid_info(self, response): attrs = [ {'that': '', 'attr': '//table[@width="100%"]/tr[3]/td[2]/text()', 'name': 'code'}, {'that': '', 'attr': '//table[@width="100%"]/tr[10]/td[2]/text()', 'name': 'tenderNum'}, {'that': '', 'attr': '//table[@width="100%"]/tr[10]/td[2]/text()', 'name': 'provinceTenderNum'}, {'that': '', 'attr': '//table[@width="100%"]/tr[11]/td[2]/text()', 'name': 'tenderClass'}, {'that': '', 'attr': '//table[@width="100%"]/tr[11]/td[4]/text()', 'name': 'tenderType'}, {'that': '', 'attr': '//table[@width="100%"]/tr[12]/td[2]/text()', 'name': 'tenderResultDate'}, {'that': '', 'attr': '//table[@width="100%"]/tr[12]/td[4]/text()', 'name': 'tenderMoney'}, {'that': '', 'attr': '//table[@width="100%"]/tr[13]/td[2]/text()', 'name': 'prjSize'}, {'that': '', 'attr': '//table[@width="100%"]/tr[13]/td[4]/text()', 'name': 'area'}, {'that': '', 'attr': '//table[@width="100%"]/tr[14]/td[2]/a/@title', 'name': 'agencyCorpName'}, {'that': '', 'attr': '//table[@width="100%"]/tr[14]/td[4]/text()', 'name': 'agencyCorpCode'}, {'that': '', 'attr': '//table[@width="100%"]/tr[15]/td[2]/a/text()', 'name': 'tenderCorpName'}, {'that': '', 'attr': '//table[@width="100%"]/tr[15]/td[4]/text()', 'name': 'tenderCorpCode'}, {'that': '', 'attr': '//table[@width="100%"]/tr[16]/td[2]/a/text()', 'name': 'constructorName'}, {'that': '', 'attr': '//table[@width="100%"]/tr[16]/td[4]/text()', 'name': 'constructorIDCard'}, {'that': '', 'attr': '//table[@width="100%"]/tr[17]/td[2]/a/text()', 'name': 'createDate'}, ] bid = templates.Projects('Mark') bid_zz = bid.html_analysis(response=response, attrs=attrs) bid_zz['companyName'] = response.meta['companyName'] bid_data = templates.Mark(**bid_zz) bid_data = bid_data.data() print(bid_data, '招标信息') yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', body=json.dumps(bid_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '招标信息'} )
def construction_info(self, response): attrs = [ {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[3]/td[2]/text()', 'name': 'code'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[10]/td[2]/text()', 'name': 'builderLicenceNum'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[10]/td[2]/text()', 'name': 'provinceBuilderLicenceNum'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[11]/td[2]/text()', 'name': 'censorNum'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[11]/td[4]/text()', 'name': 'contractMoney'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[12]/td[2]/a/@title', 'name': 'constructorName'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[12]/td[4]/text()', 'name': 'constructorIDCard'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[13]/td[2]/a/text()', 'name': 'supervisionName'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[13]/td[4]/text()', 'name': 'supervisionIDCard'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[14]/td[2]/text()', 'name': 'area'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[14]/td[4]/text()', 'name': 'createDate'}, {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[2]/td[3]/a/text()', 'name': 'designCorpName'}, {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[2]/td[4]/text()', 'name': 'designCorpCode'}, {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[2]/td[5]/text()', 'name': 'designCorpArea'}, {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[3]/td[3]/a/text()', 'name': 'econCorpName'}, {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[3]/td[4]/text()', 'name': 'econCorpCode'}, {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[3]/td[5]/text()', 'name': 'econCorpArea'}, {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[4]/td[3]/a/text()', 'name': 'consCorpName'}, {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[4]/td[4]/text()', 'name': 'consCorpCode'}, {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[4]/td[5]/text()', 'name': 'consCorpArea'}, {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[5]/td[3]/a/text()', 'name': 'superCorpName'}, {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[5]/td[4]/text()', 'name': 'superCorpCode'}, {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[5]/td[5]/text()', 'name': 'superCorpArea'}, ] construction = templates.Projects('ConstructionPermit') construction = construction.html_analysis(response=response, attrs=attrs) construction['companyName'] = response.meta['companyName'] construction_data = templates.ConstructionPermit(**construction) print(construction_data.data(), '施工许可信息录入') contract_data = construction_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectBuilderLicence.htm', body=json.dumps(contract_data), callback=self.project_zz, method='POST', headers={'Content-Type': 'application/json'}, meta={'type': '施工许可信息录入'} )
def project(self, response): basic_info = templates.Projects('Project') attrs = [ {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[1]/td[2]/text()', 'name': 'name'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[2]/td[2]/text()', 'name': 'code'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[2]/td[2]/text()', 'name': 'provinceCode'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[3]/td[2]/text()', 'name': 'unit'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[3]/td[4]/text()', 'name': 'catalog'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[4]/td[2]/text()', 'name': 'unitLicenseNum'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[4]/td[4]/text()', 'name': 'area'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[6]/td[2]/text()', 'name': 'docuCode'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[6]/td[4]/text()', 'name': 'level'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[7]/td[2]/text()', 'name': 'money'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[7]/td[4]/text()', 'name': 'acreage'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[8]/td[2]/text()', 'name': 'trait'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[8]/td[4]/text()', 'name': 'purpose'}, ] basic_data = basic_info.html_analysis(response, attrs) basic_data['companyName'] = response.meta['companyName'] basic = templates.Project(**basic_data) b_data = basic.data() print(b_data, '基本信息', b_data['companyName']) yield scrapy.Request( # url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', body=json.dumps(b_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '基本信息', 'company_name': b_data['companyName']} ) bid_url = Selector(response=response).xpath('//div[@class="classContent t1"]/table/tr') bid_url = bid_url[1:] for b in bid_url: a = b.xpath('./td[7]/a/@href').extract_first() a = 'http://115.29.2.37:8080/' + a yield scrapy.Request(url=a, callback=self.bid_info, headers=heads, meta={'companyName': response.meta['companyName']} ) drawing_info = Selector(response=response).xpath('//div[@class="classContent t2"]/table/tr') drawing_info = drawing_info[1:] print(len(drawing_info), '施工图纸审查----bbbbbbbbbbbbbbbbbbbbbbbbbbbbb', response.url) for d in drawing_info: censorNum = d.xpath('./td[2]/text()').extract_first() surveyCorpName = d.xpath('./td[3]/text()').extract_first() designCorpName = d.xpath('./td[4]/text()').extract_first() censorCorpName = d.xpath('./td[5]/text()').extract_first() censorEDate = d.xpath('./td[6]/text()').extract_first() drawing_data = templates.MakeDrawing(censorNum=censorNum, surveyCorpName=surveyCorpName, designCorpName=designCorpName, censorCorpName=censorCorpName, censorEDate=censorEDate ) drawing_data = drawing_data.data() print(drawing_data, '施工图纸审查') yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm', body=json.dumps(drawing_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '施工图纸审查'} ) contract_list = Selector(response=response).xpath('//div[@class="classContent t3"]/table/tr') contract_list = contract_list[1:] for c in contract_list: print(c.xpath('./td[6]/a/@href').extract_first(), '合同备案url') u = 'http://115.29.2.37:8080/' + c.xpath('./td[6]/a/@href').extract_first() yield scrapy.Request(url=u, callback=self.contract_info, meta={'companyName': response.meta['companyName']}) construction_list = Selector(response=response).xpath('//div[@class="classContent t4"]/table/tr/td/a/@href') for c in construction_list: u = 'http://115.29.2.37:8080/' + c.extract() yield scrapy.Request(url=u, callback=self.construction_info, meta={'companyName': response.meta['companyName']}) finish_list = Selector(response=response).xpath('//div[@class="classContent t5"]/table/tr/td/a/@href') for f in finish_list: u = 'http://115.29.2.37:8080/' + f.extract() yield scrapy.Request(url=u, callback=self.finish_info, meta={'companyName': response.meta['companyName']})
def project(self, response): basic = templates.Projects('Project') attrs = [ {'that': '', 'attr': '//td[@id="lblPrjName"]/text()', 'name': 'name'}, {'that': '', 'attr': '//td[@id="lblPrjNum"]/text()', 'name': 'code'}, {'that': '', 'attr': '//td[@id="lblPrjNum"]/text()', 'name': 'provinceCode'}, {'that': '', 'attr': '//td[@id="lblPrjTypeNum"]/text()', 'name': 'catalog'}, {'that': '', 'attr': '//td[@id="lblBuildCorpName"]/text()', 'name': 'unit'}, {'that': '', 'attr': '//td[@id="lblBuildCorpCode"]/text()', 'name': 'unitLicenseNum'}, {'that': '', 'attr': '//td[@id="lblCountyNum"]/text()', 'name': 'area'}, {'that': '', 'attr': '//td[@id="lblPrjApprovalNum"]/text()', 'name': 'docuCode'}, {'that': '', 'attr': '//td[@id="lblPrjApprovalLevelNum"]/text()', 'name': 'level'}, {'that': '', 'attr': '//td[@id="lblAllInvest"]/text()', 'name': 'money'}, {'that': '', 'attr': '//td[@id="lblAllArea"]/text()', 'name': 'acreage'}, {'that': '', 'attr': '//td[@id="lblPrjPropertyNum"]/text()', 'name': 'trait'}, {'that': '', 'attr': '//td[@id="lblPrjFunctionNum"]/text()', 'name': 'purpose'}, ] basic_text = basic.html_analysis(response=response, attrs=attrs) basic_text['companyName'] = response.meta['companyName'] if basic_text['level'] == '暂无': basic_text['level'] = '' basic_data = templates.Project(**basic_text) basic_data = basic_data.data() print('基本信息', '*******************************', basic_data) yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', body=json.dumps(basic_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '基本信息', 'company_name': basic_data['companyName']} ) # bid_list = Selector(response=response).xpath('//div[@id="project_step1"]/table/tbody/tr') for b in bid_list: tenderClass = b.xpath('./td[2]/text()').extract_first() tenderType = b.xpath('./td[3]/text()').extract_first() tenderCorpName = b.xpath('./td[4]/a/text()').extract_first() tenderResultDate = b.xpath('./td[5]/text()').extract_first() tenderMoney = b.xpath('./td[6]/text()').extract_first() tenderNum = b.xpath('./td[7]/a/text()').extract_first() bid_data = templates.Mark(tenderClass=tenderClass, tenderType=tenderType, tenderCorpName=tenderCorpName, tenderResultDate=tenderResultDate, tenderMoney=tenderMoney, tenderNum=tenderNum, provinceTenderNum=tenderNum, code=basic_data['code'], companyName=response.meta['companyName'] ) bid_data = bid_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectTender.htm', body=json.dumps(bid_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '招标信息',} ) # print('招标信息', '*******************************', bid_data) drawing_list = Selector(response=response).xpath('//div[@id="project_step2"]/table/tbody/tr') for d in drawing_list: censorNum = d.xpath('./td[2]/text()').extract_first() provinceCensorNum = d.xpath('./td[3]/text()').extract_first() surveyCorpName = d.xpath('./td[4]/a/text()').extract_first() designCorpName = d.xpath('./td[5]/a/text()').extract_first() censorCorpName = d.xpath('./td[6]/a/text()').extract_first() censorEDate = d.xpath('./td[7]/a/text()').extract_first() drawing_data = templates.MakeDrawing(censorNum=censorNum, provinceCensorNum=provinceCensorNum, surveyCorpName=surveyCorpName, designCorpName=designCorpName, censorCorpName=censorCorpName, censorEDate=censorEDate, code=basic_data['code'], companyName=response.meta['companyName'] ) drawing_data = drawing_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm', body=json.dumps(drawing_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '施工图纸审查', } ) print('施工图纸审查', '*******************************', drawing_data) contract_list = Selector(response=response).xpath('//div[@id="project_step3"]/table/tbody/tr') for d in contract_list: contractType = d.xpath('./td[2]/text()').extract_first() recordNum = d.xpath('./td[3]/text()').extract_first() provinceRecordNum = d.xpath('./td[4]/text()').extract_first() contractMoney = d.xpath('./td[5]/text()').extract_first() contractDate = d.xpath('./td[6]/text()').extract_first() contract_data = templates.Contract(contractType=contractType, recordNum=recordNum, provinceRecordNum=provinceRecordNum, contractMoney=contractMoney, contractDate=contractDate, code=basic_data['code'], companyName=response.meta['companyName'] ) contract_data = contract_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectContract.htm', body=json.dumps(contract_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '合同备案', } ) print('合同备案', '*******************************', contract_data) print(response.url, 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX') construction_list = Selector(response=response).xpath('//div[@id="project_step4"]/table/tbody/tr') for d in construction_list: builderLicenceNum = d.xpath('./td[2]/text()').extract_first() consCorpName = d.xpath('./td[3]/a/text()').extract_first() contractMoney = d.xpath('./td[4]/text()').extract_first() area = d.xpath('./td[5]/text()').extract_first() createDate = d.xpath('./td[6]/text()').extract_first() construction_data = templates.ConstructionPermit(builderLicenceNum=builderLicenceNum, provinceBuilderLicenceNum=builderLicenceNum, consCorpName=consCorpName, contractMoney=contractMoney, area=area, code=basic_data['code'], createDate=createDate, companyName=response.meta['companyName'] ) construction_data = construction_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectBuilderLicence.htm', body=json.dumps(construction_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '施工许可', } ) print('施工许可', '*******************************', construction_data) completion_list = Selector(response=response).xpath('//div[@id="project_step4"]/table/tbody/tr') for c in completion_list: prjFinishNum = c.xpath('./td[2]/text()').extract_first() provincePrjFinishNum = c.xpath('./td[3]/a/text()').extract_first() factCost = c.xpath('./td[4]/text()').extract_first() factArea = c.xpath('./td[5]/text()').extract_first() factBeginDate = c.xpath('./td[6]/text()').extract_first() factEndDate = c.xpath('./td[6]/text()').extract_first() completion_data = templates.Completion(prjFinishNum=prjFinishNum, provincePrjFinishNum=provincePrjFinishNum, factCost=factCost, factArea=factArea, factBeginDate=factBeginDate, code=basic_data['code'], factEndDate=factEndDate, companyName=response.meta['companyName'] ) completion_data = completion_data.data() yield scrapy.Request( url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectFinish.htm', body=json.dumps(completion_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '竣工验收', } ) print('竣工验收', '*******************************', completion_data)
def construction_info(self, response): attrs = [ { 'that': '', 'attr': '//span[@id="lblPrjNum"]/text()', 'name': 'code' }, { 'that': '', 'attr': '//span[@id="lblBuilderLicenceNum"]/text()', 'name': 'builderLicenceNum' }, { 'that': '', 'attr': '//span[@id="lblBuilderLicenceNum"]/text()', 'name': 'provinceBuilderLicenceNum' }, { 'that': '', 'attr': '//span[@id="lblCensorNum"]/text()', 'name': 'censorNum' }, { 'that': '', 'attr': '//span[@id="lblContractMoney"]/text()', 'name': 'contractMoney' }, { 'that': '', 'attr': '//span[@id="lblArea"]/text()', 'name': 'area' }, { 'that': '', 'attr': '//span[@id="lblConstructorName"]/text()', 'name': 'constructorName' }, { 'that': '', 'attr': '//span[@id="lblConstructorIDCard"]/text()', 'name': 'constructorIDCard' }, { 'that': '', 'attr': '//span[@id="lblSupervisionName"]/text()', 'name': 'supervisionName' }, { 'that': '', 'attr': '//span[@id="lblSupervisionIDCard"]/text()', 'name': 'supervisionIDCard' }, { 'that': '', 'attr': '//span[@id="lblCreateDate"]/text()', 'name': 'createDate' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[2]/td[2]/a/text()', 'name': 'designCorpName' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[2]/td[3]/text()', 'name': 'designCorpCode' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[2]/td[4]/text()', 'name': 'designCorpArea' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[3]/td[2]/a/text()', 'name': 'consCorpName' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[3]/td[3]/text()', 'name': 'consCorpCode' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[3]/td[4]/text()', 'name': 'consCorpArea' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[4]/td[2]/a/text()', 'name': 'superCorpName' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[4]/td[3]/text()', 'name': 'superCorpCode' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[4]/td[4]/text()', 'name': 'superCorpArea' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[5]/td[2]/a/text()', 'name': 'econCorpName' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[5]/td[3]/text()', 'name': 'econCorpCode' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[5]/td[4]/text()', 'name': 'econCorpArea' }, ] contract_object = templates.Projects('ConstructionPermit') construction_data = contract_object.html_analysis(response, attrs) construction_data['companyName'] = response.meta['companyName'] construction_zz = templates.ConstructionPermit(**construction_data) construction_zz = construction_zz.data() print(construction_zz, '施工许可信息') construction_zz['code'] = response.meta['code'] yield scrapy.Request( url= 'https://api.maotouin.com/rest/companyInfo/addCompanyProjectBuilderLicence.htm', body=json.dumps(construction_zz), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '施工许可信息'})
def contract_info(self, response): attrs = [ { 'that': '', 'attr': '//span[@id="lblPrjNum"]/text()', 'name': 'code' }, { 'that': '', 'attr': '//span[@id="lblRecordNum"]/text()', 'name': 'recordNum' }, { 'that': '', 'attr': '//span[@id="lblRecordNum"]/text()', 'name': 'provinceRecordNum' }, { 'that': '', 'attr': '//span[@id="lblContractNum"]/text()', 'name': 'contractNum' }, { 'that': '', 'attr': '//span[@id="lblcontractclassnum"]/text()', 'name': 'contractClassify' }, { 'that': '', 'attr': '//span[@id="lblContractTypeNum"]/text()', 'name': 'contractType' }, { 'that': '', 'attr': '//span[@id="lblContractMoney"]/text()', 'name': 'contractMoney' }, { 'that': '', 'attr': '//span[@id="lblPrjSize"]/text()', 'name': 'prjSize' }, { 'that': '', 'attr': '//span[@id="lblContractDate"]/text()', 'name': 'contractDate' }, { 'that': '', 'attr': '//span[@id="lblPropietorCorpName"]/text()', 'name': 'proprietorCorpName' }, { 'that': '', 'attr': '//span[@id="lblPropietorCorpCode"]/text()', 'name': 'proprietorCorpCode' }, { 'that': '', 'attr': '//span[@id="lblContractorCorpName"]/text()', 'name': 'contractorCorpName' }, { 'that': '', 'attr': '//span[@id="lblContractorCorpCode"]/text()', 'name': 'contractorCorpCode' }, { 'that': '', 'attr': '//span[@id="lblUnionCorpName"]/text()', 'name': 'unionCorpName' }, { 'that': '', 'attr': '//span[@id="lblUnionCorpCode"]/text()', 'name': 'unionCorpCode' }, { 'that': '', 'attr': '//span[@id="lblCreateDate"]/text()', 'name': 'createDate' }, ] contract_object = templates.Projects('Contract') contract_data = contract_object.html_analysis(response, attrs) contract_data['companyName'] = response.meta['companyName'] contract_zz = templates.Contract(**contract_data) contract_zz = contract_zz.data() print(contract_zz, '合同备案信息') contract_zz['code'] = response.meta['code'] yield scrapy.Request( url= 'https://api.maotouin.com/rest/companyInfo/addCompanyProjectContract.htm', body=json.dumps(contract_zz), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '合同备案信息'})
def drawing_info(self, response): attrs = [ { 'that': '', 'attr': '//span[@id="lblPrjNum"]/text()', 'name': 'code' }, { 'that': '', 'attr': '//span[@id="lblCensorCorpName"]/text()', 'name': 'censorCorpName' }, { 'that': '', 'attr': '//span[@id="lblCensorCorpCode"]/text()', 'name': 'censorCorpCode' }, { 'that': '', 'attr': '//span[@id="lblCensorNum"]/text()', 'name': 'censorNum' }, { 'that': '', 'attr': '//span[@id="lblCensorNum"]/text()', 'name': 'provinceCensorNum' }, { 'that': '', 'attr': '//span[@id="lblCensorEDate"]/text()', 'name': 'censorEDate' }, { 'that': '', 'attr': '//span[@id="lblPrjSize"]/text()', 'name': 'prjSize' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[2]//td[2]/a/text()', 'name': 'surveyCorpName' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[2]/td[3]/text()', 'name': 'surveyCorpCode' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[2]/td[4]/text()', 'name': 'surveyCorpArea' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[3]/td[2]/a/text()', 'name': 'designCorpName' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[3]/td[3]/text()', 'name': 'designCorpCode' }, { 'that': '', 'attr': '//table[@class="rpd_basic_table"][2]/tr[3]/td[4]/text()', 'name': 'designCorpArea' }, ] drawing_object = templates.Projects('MakeDrawing') drawing_data = drawing_object.html_analysis(response, attrs) if drawing_data['censorNum']: p_tr = Selector(response=response).xpath( '//table[@class="rpd_basic_table"][3]/tr') p_tr = p_tr[1:-1] engineers = [] for p in p_tr: person = { 'companyName': '', 'tradeName': '', 'prjDuty': '', 'name': '', 'card': '', 'specialty': '' } companyName = p.xpath('./td[1]/text()').extract_first() if companyName: person['companyName'] = companyName tradeName = p.xpath('./td[2]/text()').extract_first() if tradeName: person['tradeName'] = tradeName prjDuty = p.xpath('./td[3]/text()').extract_first() if prjDuty: person['prjDuty'] = prjDuty name = p.xpath('./td[4]/text()').extract_first() if name: person['name'] = name card = p.xpath('./td[5]/text()').extract_first() if card: person['card'] = card specialty = p.xpath('./td[6]/text()').extract_first() if specialty: person['specialty'] = specialty if person['name']: engineers.append(person) drawing_data['engineers'] = engineers drawing_data['companyName'] = response.meta['companyName'] drawing_zz = templates.MakeDrawing(**drawing_data) print(drawing_zz.data(), '施工图纸审查') drawing_xx = drawing_zz.data() drawing_xx['code'] = response.meta['code'] yield scrapy.Request( url= 'https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm', body=json.dumps(drawing_xx), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '施工图纸审查'})
def bid_info(self, response): attrs = [ { 'that': '', 'attr': '//td[@class="name_level3 col_01_value"]/span/text()', 'name': 'code' }, { 'that': '', 'attr': '//span[@id="lblTenderMoney"]/text()', 'name': 'tenderMoney' }, { 'that': '', 'attr': '//span[@id="lblArea"]/text()', 'name': 'area' }, { 'that': '', 'attr': '//span[@id="lblTenderNum"]/text()', 'name': 'tenderNum' }, { 'that': '', 'attr': '//span[@id="lblTenderNum"]/text()', 'name': 'provinceTenderNum' }, { 'that': '', 'attr': '//span[@id="lblTenderClassNum"]/text()', 'name': 'tenderClass' }, { 'that': '', 'attr': '//span[@id="lblTenderTypeNum"]/text()', 'name': 'tenderType' }, { 'that': '', 'attr': '//span[@id="lblTenderResultDate"]/text()', 'name': 'tenderResultDate' }, { 'that': '', 'attr': '//span[@id="lblPrjSize"]/text()', 'name': 'prjSize' }, { 'that': '', 'attr': '//span[@id="lblAgencyCorpName"]/text()', 'name': 'agencyCorpName' }, { 'that': '', 'attr': '//span[@id="lblAgencyCorpCode"]/text()', 'name': 'agencyCorpCode' }, { 'that': '', 'attr': '//span[@id="lblTenderCorpName"]/text()', 'name': 'tenderCorpName' }, { 'that': '', 'attr': '//span[@id="lblTenderCorpCode"]/text()', 'name': 'tenderCorpCode' }, { 'that': '', 'attr': '//span[@id="lblCreateDate"]/text()', 'name': 'createDate' }, { 'that': '', 'attr': '//span[@id="lblConstructorName"]/text()', 'name': 'constructorName' }, { 'that': '', 'attr': '//span[@id="lblConstructorIDCard"]/text()', 'name': 'constructorIDCard' }, ] bid_object = templates.Projects('Mark') bid_data = bid_object.html_analysis(response, attrs) bid_data['companyName'] = response.meta['companyName'] bid_zz = templates.Mark(**bid_data) bid_zz = bid_zz.data() bid_zz['code'] = response.meta['code'] print(bid_zz, '招标信息') yield scrapy.Request( url= 'https://api.maotouin.com/rest/companyInfo/addCompanyProjectTender.htm', body=json.dumps(bid_zz), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '招标信息'})
def project_basic(self, response): basic = templates.Projects('Project') attrs = [ { 'that': '', 'attr': '//td[@colspan="3"]/text()', 'name': 'name' }, { 'that': '', 'attr': '//td[@class="name_level3 col_01_value"]/text()', 'name': 'code' }, { 'that': 0, 'attr': '//td[@class="col_02_value"]', 'name': 'provinceCode', 'then': 'text()' }, { 'that': 1, 'attr': '//td[@class="col_01_value"]', 'name': 'unit', 'then': 'text()' }, { 'that': 1, 'attr': '//td[@class="col_02_value"]', 'name': 'catalog', 'then': 'text()' }, { 'that': 2, 'attr': '//td[@class="col_01_value"]', 'name': 'unitLicenseNum', 'then': 'text()' }, { 'that': 2, 'attr': '//td[@class="col_02_value"]', 'name': 'area', 'then': 'text()' }, { 'that': 4, 'attr': '//td[@class="col_01_value"]', 'name': 'docuCode', 'then': 'text()' }, { 'that': 4, 'attr': '//td[@class="col_02_value"]', 'name': 'level', 'then': 'text()' }, { 'that': 5, 'attr': '//td[@class="col_01_value"]', 'name': 'money', 'then': 'text()' }, { 'that': 5, 'attr': '//td[@class="col_02_value"]', 'name': 'acreage', 'then': 'text()' }, { 'that': 6, 'attr': '//td[@class="col_01_value"]', 'name': 'trait', 'then': 'text()' }, { 'that': 6, 'attr': '//td[@class="col_02_value"]', 'name': 'purpose', 'then': 'text()' }, ] code = Selector(response=response).xpath( '//td[@class="name_level3 col_01_value"]/text()').extract_first() name = Selector(response=response).xpath( '//td[@colspan="3"]/text()').extract_first() code = code.split()[0] xx = 'PRJNUM=(.*)' basic_d = basic.html_analysis(response, attrs) basic_d['companyName'] = response.meta['companyName'] basic = templates.Project(**basic_d) basic_data = basic.data() print(basic_data, '基本信息') yield scrapy.Request( url= 'https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', body=json.dumps(basic_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={ 'type': '基本信息', 'company_name': basic_data['companyName'] }, ) # bid_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=ztb&PRJNUM=%s&_=1558598717869' \ % re.findall(xx, response.url)[0] yield scrapy.Request(url=bid_url, callback=self.project_bid_list, meta={ 'companyName': response.meta['companyName'], 'code': code }) drawing_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=sgtsc&PRJNUM=%s&_=1558598717869' % \ re.findall(xx, response.url)[0] print(response.url, '施工图纸审查') yield scrapy.Request(url=drawing_url, callback=self.project_drawing_list, meta={ 'companyName': response.meta['companyName'], 'code': code }) contract_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=htba&PRJNUM=%s&_=1558598717869' % \ re.findall(xx, response.url)[0] print(contract_url, '合同备案全部') yield scrapy.Request(url=contract_url, callback=self.project_contract_list, meta={ 'companyName': response.meta['companyName'], 'code': code }) construction_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=sgxk&PRJNUM=%s&_=1558598717869' % \ re.findall(xx, response.url)[0] print(construction_url, '施工许可详list') yield scrapy.Request(url=construction_url, callback=self.project_construction_list, meta={ 'companyName': response.meta['companyName'], 'name': name })