def finish_info(self, response):
     attrs = [
         {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[3]/td[2]/text()',
          'name': 'code'},
         {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[10]/td[2]/text()',
          'name': 'prjFinishNum'},
         {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[10]/td[2]/text()',
          'name': 'provincePrjFinishNum'},
         {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[11]/td[2]/text()',
          'name': 'factCost'},
         {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[11]/td[4]/text()',
          'name': 'factArea'},
         {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[12]/td[2]/text()',
          'name': 'factSize'},
         {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[13]/td[2]/text()',
          'name': 'factBeginDate'},
         {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[13]/td[4]/text()',
          'name': 'factEndDate'},
         {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[14]/td[2]/text()',
          'name': 'createDate'},
         {'that': '', 'attr': '//div[@class="detail"][1]/div[@class="detail_list"]/table/tr[14]/td[4]/text()',
          'name': 'mark'},
     ]
     finish = templates.Projects('Completion')
     finish = finish.html_analysis(response=response, attrs=attrs)
     finish['companyName'] = response.meta['companyName']
     finish_data = templates.Completion(**finish)
     print(finish_data.data(), '竣工')
     contract_data = finish_data.data()
     yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectFinish.htm',
                          body=json.dumps(contract_data),
                          callback=self.project_zz, method='POST',
                          headers={'Content-Type': 'application/json'},
                          meta={'type': '竣工'}
                          )
    def contract_info(self, response):
        attrs = [
            {'that': '', 'attr': '//table[@width="100%"]/tr[3]/td[2]/text()', 'name': 'code'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[10]/td[2]/text()', 'name': 'recordNum'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[10]/td[2]/text()', 'name': 'provinceRecordNum'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[12]/td[2]/text()', 'name': 'contractType'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[12]/td[4]/text()', 'name': 'contractMoney'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[13]/td[2]/text()', 'name': 'prjSize'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[13]/td[4]/text()', 'name': 'contractDate'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[14]/td[2]/a/@title', 'name': 'proprietorCorpName'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[14]/td[4]/text()', 'name': 'proprietorCorpCode'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[15]/td[2]/a/text()', 'name': 'contractorCorpName'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[15]/td[4]/text()', 'name': 'contractorCorpCode'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[16]/td[2]/a/text()', 'name': 'unionCorpName'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[16]/td[4]/text()', 'name': 'unionCorpCode'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[17]/td[2]/a/text()', 'name': 'createDate'},

        ]
        contract = templates.Projects('Contract')
        contract = contract.html_analysis(response=response, attrs=attrs)
        contract['companyName'] = response.meta['companyName']
        contract_data = templates.Contract(**contract)
        contract_data = contract_data.data()
        print(contract_data, '合同信息')
        yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectContract.htm',
                             body=json.dumps(contract_data),
                             callback=self.project_zz,
                             headers={'Content-Type': 'application/json'}, method='POST',
                             meta={'type': '合同信息'}
                             )
    def bid_info(self, response):
        attrs = [
            {'that': '', 'attr': '//table[@width="100%"]/tr[3]/td[2]/text()', 'name': 'code'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[10]/td[2]/text()', 'name': 'tenderNum'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[10]/td[2]/text()', 'name': 'provinceTenderNum'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[11]/td[2]/text()', 'name': 'tenderClass'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[11]/td[4]/text()', 'name': 'tenderType'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[12]/td[2]/text()', 'name': 'tenderResultDate'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[12]/td[4]/text()', 'name': 'tenderMoney'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[13]/td[2]/text()', 'name': 'prjSize'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[13]/td[4]/text()', 'name': 'area'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[14]/td[2]/a/@title', 'name': 'agencyCorpName'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[14]/td[4]/text()', 'name': 'agencyCorpCode'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[15]/td[2]/a/text()', 'name': 'tenderCorpName'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[15]/td[4]/text()', 'name': 'tenderCorpCode'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[16]/td[2]/a/text()', 'name': 'constructorName'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[16]/td[4]/text()', 'name': 'constructorIDCard'},
            {'that': '', 'attr': '//table[@width="100%"]/tr[17]/td[2]/a/text()', 'name': 'createDate'},

        ]
        bid = templates.Projects('Mark')
        bid_zz = bid.html_analysis(response=response, attrs=attrs)
        bid_zz['companyName'] = response.meta['companyName']
        bid_data = templates.Mark(**bid_zz)
        bid_data = bid_data.data()
        print(bid_data, '招标信息')
        yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
                             body=json.dumps(bid_data),
                             callback=self.project_zz,
                             headers={'Content-Type': 'application/json'}, method='POST',
                             meta={'type': '招标信息'}
                             )
    def construction_info(self, response):
        attrs = [
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[3]/td[2]/text()', 'name': 'code'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[10]/td[2]/text()', 'name': 'builderLicenceNum'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[10]/td[2]/text()',
             'name': 'provinceBuilderLicenceNum'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[11]/td[2]/text()', 'name': 'censorNum'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[11]/td[4]/text()', 'name': 'contractMoney'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[12]/td[2]/a/@title', 'name': 'constructorName'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[12]/td[4]/text()', 'name': 'constructorIDCard'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[13]/td[2]/a/text()', 'name': 'supervisionName'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[13]/td[4]/text()', 'name': 'supervisionIDCard'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[14]/td[2]/text()', 'name': 'area'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[14]/td[4]/text()', 'name': 'createDate'},
            {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[2]/td[3]/a/text()',
             'name': 'designCorpName'},
            {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[2]/td[4]/text()', 'name': 'designCorpCode'},
            {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[2]/td[5]/text()', 'name': 'designCorpArea'},
            {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[3]/td[3]/a/text()', 'name': 'econCorpName'},
            {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[3]/td[4]/text()', 'name': 'econCorpCode'},
            {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[3]/td[5]/text()', 'name': 'econCorpArea'},
            {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[4]/td[3]/a/text()', 'name': 'consCorpName'},
            {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[4]/td[4]/text()', 'name': 'consCorpCode'},
            {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[4]/td[5]/text()', 'name': 'consCorpArea'},
            {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[5]/td[3]/a/text()', 'name': 'superCorpName'},
            {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[5]/td[4]/text()', 'name': 'superCorpCode'},
            {'that': '', 'attr': '//div[@class="classContent t1"]/table/tr[5]/td[5]/text()', 'name': 'superCorpArea'},

        ]
        construction = templates.Projects('ConstructionPermit')
        construction = construction.html_analysis(response=response, attrs=attrs)
        construction['companyName'] = response.meta['companyName']
        construction_data = templates.ConstructionPermit(**construction)
        print(construction_data.data(), '施工许可信息录入')
        contract_data = construction_data.data()
        yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectBuilderLicence.htm',
                             body=json.dumps(contract_data),
                             callback=self.project_zz, method='POST',
                             headers={'Content-Type': 'application/json'},
                             meta={'type': '施工许可信息录入'}
                             )
    def project(self, response):
        basic_info = templates.Projects('Project')
        attrs = [
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[1]/td[2]/text()', 'name': 'name'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[2]/td[2]/text()', 'name': 'code'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[2]/td[2]/text()', 'name': 'provinceCode'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[3]/td[2]/text()', 'name': 'unit'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[3]/td[4]/text()', 'name': 'catalog'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[4]/td[2]/text()', 'name': 'unitLicenseNum'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[4]/td[4]/text()', 'name': 'area'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[6]/td[2]/text()', 'name': 'docuCode'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[6]/td[4]/text()', 'name': 'level'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[7]/td[2]/text()', 'name': 'money'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[7]/td[4]/text()', 'name': 'acreage'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[8]/td[2]/text()', 'name': 'trait'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[8]/td[4]/text()', 'name': 'purpose'},

        ]
        basic_data = basic_info.html_analysis(response, attrs)
        basic_data['companyName'] = response.meta['companyName']
        basic = templates.Project(**basic_data)
        b_data = basic.data()
        print(b_data, '基本信息', b_data['companyName'])
        yield scrapy.Request(
            # url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
            url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
            body=json.dumps(b_data),
            callback=self.project_zz,
            headers={'Content-Type': 'application/json'}, method='POST',
            meta={'type': '基本信息', 'company_name': b_data['companyName']}
        )

        bid_url = Selector(response=response).xpath('//div[@class="classContent t1"]/table/tr')
        bid_url = bid_url[1:]
        for b in bid_url:
            a = b.xpath('./td[7]/a/@href').extract_first()
            a = 'http://115.29.2.37:8080/' + a
            yield scrapy.Request(url=a, callback=self.bid_info, headers=heads,
                                 meta={'companyName': response.meta['companyName']}
                                 )

        drawing_info = Selector(response=response).xpath('//div[@class="classContent t2"]/table/tr')
        drawing_info = drawing_info[1:]
        print(len(drawing_info), '施工图纸审查----bbbbbbbbbbbbbbbbbbbbbbbbbbbbb', response.url)
        for d in drawing_info:
            censorNum = d.xpath('./td[2]/text()').extract_first()
            surveyCorpName = d.xpath('./td[3]/text()').extract_first()
            designCorpName = d.xpath('./td[4]/text()').extract_first()
            censorCorpName = d.xpath('./td[5]/text()').extract_first()
            censorEDate = d.xpath('./td[6]/text()').extract_first()
            drawing_data = templates.MakeDrawing(censorNum=censorNum, surveyCorpName=surveyCorpName,
                                                 designCorpName=designCorpName, censorCorpName=censorCorpName,
                                                 censorEDate=censorEDate
                                                 )
            drawing_data = drawing_data.data()
            print(drawing_data, '施工图纸审查')
            yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm',
                                 body=json.dumps(drawing_data),
                                 callback=self.project_zz,
                                 headers={'Content-Type': 'application/json'}, method='POST',
                                 meta={'type': '施工图纸审查'}
                                 )
        contract_list = Selector(response=response).xpath('//div[@class="classContent t3"]/table/tr')
        contract_list = contract_list[1:]
        for c in contract_list:
            print(c.xpath('./td[6]/a/@href').extract_first(), '合同备案url')
            u = 'http://115.29.2.37:8080/' + c.xpath('./td[6]/a/@href').extract_first()
            yield scrapy.Request(url=u, callback=self.contract_info,
                                 meta={'companyName': response.meta['companyName']})

        construction_list = Selector(response=response).xpath('//div[@class="classContent t4"]/table/tr/td/a/@href')
        for c in construction_list:
            u = 'http://115.29.2.37:8080/' + c.extract()
            yield scrapy.Request(url=u, callback=self.construction_info,
                                 meta={'companyName': response.meta['companyName']})

        finish_list = Selector(response=response).xpath('//div[@class="classContent t5"]/table/tr/td/a/@href')
        for f in finish_list:
            u = 'http://115.29.2.37:8080/' + f.extract()
            yield scrapy.Request(url=u, callback=self.finish_info,
                                 meta={'companyName': response.meta['companyName']})
Exemple #6
0
    def project(self, response):
        basic = templates.Projects('Project')
        attrs = [
            {'that': '', 'attr': '//td[@id="lblPrjName"]/text()', 'name': 'name'},
            {'that': '', 'attr': '//td[@id="lblPrjNum"]/text()', 'name': 'code'},
            {'that': '', 'attr': '//td[@id="lblPrjNum"]/text()', 'name': 'provinceCode'},
            {'that': '', 'attr': '//td[@id="lblPrjTypeNum"]/text()', 'name': 'catalog'},
            {'that': '', 'attr': '//td[@id="lblBuildCorpName"]/text()', 'name': 'unit'},
            {'that': '', 'attr': '//td[@id="lblBuildCorpCode"]/text()', 'name': 'unitLicenseNum'},
            {'that': '', 'attr': '//td[@id="lblCountyNum"]/text()', 'name': 'area'},
            {'that': '', 'attr': '//td[@id="lblPrjApprovalNum"]/text()', 'name': 'docuCode'},
            {'that': '', 'attr': '//td[@id="lblPrjApprovalLevelNum"]/text()', 'name': 'level'},
            {'that': '', 'attr': '//td[@id="lblAllInvest"]/text()', 'name': 'money'},
            {'that': '', 'attr': '//td[@id="lblAllArea"]/text()', 'name': 'acreage'},
            {'that': '', 'attr': '//td[@id="lblPrjPropertyNum"]/text()', 'name': 'trait'},
            {'that': '', 'attr': '//td[@id="lblPrjFunctionNum"]/text()', 'name': 'purpose'},
        ]
        basic_text = basic.html_analysis(response=response, attrs=attrs)
        basic_text['companyName'] = response.meta['companyName']
        if basic_text['level'] == '暂无':
            basic_text['level'] = ''
        basic_data = templates.Project(**basic_text)
        basic_data = basic_data.data()
        print('基本信息', '*******************************', basic_data)
        yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
                             body=json.dumps(basic_data), callback=self.project_zz,
                             headers={'Content-Type': 'application/json'}, method='POST',
                             meta={'type': '基本信息', 'company_name': basic_data['companyName']}
                             )
        #
        bid_list = Selector(response=response).xpath('//div[@id="project_step1"]/table/tbody/tr')
        for b in bid_list:
            tenderClass = b.xpath('./td[2]/text()').extract_first()
            tenderType = b.xpath('./td[3]/text()').extract_first()
            tenderCorpName = b.xpath('./td[4]/a/text()').extract_first()
            tenderResultDate = b.xpath('./td[5]/text()').extract_first()
            tenderMoney = b.xpath('./td[6]/text()').extract_first()
            tenderNum = b.xpath('./td[7]/a/text()').extract_first()
            bid_data = templates.Mark(tenderClass=tenderClass, tenderType=tenderType, tenderCorpName=tenderCorpName,
                                      tenderResultDate=tenderResultDate, tenderMoney=tenderMoney, tenderNum=tenderNum,
                                      provinceTenderNum=tenderNum, code=basic_data['code'],
                                      companyName=response.meta['companyName']
                                      )
            bid_data = bid_data.data()
            yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectTender.htm',
                                 body=json.dumps(bid_data), callback=self.project_zz,
                                 headers={'Content-Type': 'application/json'}, method='POST',
                                 meta={'type': '招标信息',}
                                 )
            # print('招标信息', '*******************************', bid_data)

        drawing_list = Selector(response=response).xpath('//div[@id="project_step2"]/table/tbody/tr')
        for d in drawing_list:
            censorNum = d.xpath('./td[2]/text()').extract_first()
            provinceCensorNum = d.xpath('./td[3]/text()').extract_first()
            surveyCorpName = d.xpath('./td[4]/a/text()').extract_first()
            designCorpName = d.xpath('./td[5]/a/text()').extract_first()
            censorCorpName = d.xpath('./td[6]/a/text()').extract_first()
            censorEDate = d.xpath('./td[7]/a/text()').extract_first()
            drawing_data = templates.MakeDrawing(censorNum=censorNum, provinceCensorNum=provinceCensorNum,
                                                 surveyCorpName=surveyCorpName,
                                                 designCorpName=designCorpName, censorCorpName=censorCorpName,
                                                 censorEDate=censorEDate, code=basic_data['code'],
                                                 companyName=response.meta['companyName']
                                                 )
            drawing_data = drawing_data.data()
            yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm',
                                 body=json.dumps(drawing_data), callback=self.project_zz,
                                 headers={'Content-Type': 'application/json'}, method='POST',
                                 meta={'type': '施工图纸审查', }
                                 )
            print('施工图纸审查', '*******************************', drawing_data)

        contract_list = Selector(response=response).xpath('//div[@id="project_step3"]/table/tbody/tr')
        for d in contract_list:
            contractType = d.xpath('./td[2]/text()').extract_first()
            recordNum = d.xpath('./td[3]/text()').extract_first()
            provinceRecordNum = d.xpath('./td[4]/text()').extract_first()
            contractMoney = d.xpath('./td[5]/text()').extract_first()
            contractDate = d.xpath('./td[6]/text()').extract_first()
            contract_data = templates.Contract(contractType=contractType, recordNum=recordNum,
                                               provinceRecordNum=provinceRecordNum,
                                               contractMoney=contractMoney,
                                               contractDate=contractDate, code=basic_data['code'],
                                               companyName=response.meta['companyName']
                                               )
            contract_data = contract_data.data()
            yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectContract.htm',
                                 body=json.dumps(contract_data), callback=self.project_zz,
                                 headers={'Content-Type': 'application/json'}, method='POST',
                                 meta={'type': '合同备案', }

                                 )
            print('合同备案', '*******************************', contract_data)
        print(response.url, 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX')

        construction_list = Selector(response=response).xpath('//div[@id="project_step4"]/table/tbody/tr')
        for d in construction_list:
            builderLicenceNum = d.xpath('./td[2]/text()').extract_first()
            consCorpName = d.xpath('./td[3]/a/text()').extract_first()
            contractMoney = d.xpath('./td[4]/text()').extract_first()
            area = d.xpath('./td[5]/text()').extract_first()
            createDate = d.xpath('./td[6]/text()').extract_first()
            construction_data = templates.ConstructionPermit(builderLicenceNum=builderLicenceNum, provinceBuilderLicenceNum=builderLicenceNum,
                                                             consCorpName=consCorpName,
                                                             contractMoney=contractMoney,
                                                             area=area, code=basic_data['code'],
                                                             createDate=createDate,
                                                             companyName=response.meta['companyName']
                                                             )
            construction_data = construction_data.data()
            yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectBuilderLicence.htm',
                                 body=json.dumps(construction_data), callback=self.project_zz,
                                 headers={'Content-Type': 'application/json'}, method='POST',
                                 meta={'type': '施工许可', }
                                 )
            print('施工许可', '*******************************', construction_data)

        completion_list = Selector(response=response).xpath('//div[@id="project_step4"]/table/tbody/tr')
        for c in completion_list:
            prjFinishNum = c.xpath('./td[2]/text()').extract_first()
            provincePrjFinishNum = c.xpath('./td[3]/a/text()').extract_first()
            factCost = c.xpath('./td[4]/text()').extract_first()
            factArea = c.xpath('./td[5]/text()').extract_first()
            factBeginDate = c.xpath('./td[6]/text()').extract_first()
            factEndDate = c.xpath('./td[6]/text()').extract_first()
            completion_data = templates.Completion(prjFinishNum=prjFinishNum,
                                                   provincePrjFinishNum=provincePrjFinishNum,
                                                   factCost=factCost,
                                                   factArea=factArea,
                                                   factBeginDate=factBeginDate,
                                                   code=basic_data['code'],
                                                   factEndDate=factEndDate,
                                                   companyName=response.meta['companyName']
                                                   )
            completion_data = completion_data.data()
            yield scrapy.Request(
                url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectFinish.htm',
                body=json.dumps(completion_data), callback=self.project_zz,
                headers={'Content-Type': 'application/json'}, method='POST',
                meta={'type': '竣工验收', }
            )
            print('竣工验收', '*******************************', completion_data)
 def construction_info(self, response):
     attrs = [
         {
             'that': '',
             'attr': '//span[@id="lblPrjNum"]/text()',
             'name': 'code'
         },
         {
             'that': '',
             'attr': '//span[@id="lblBuilderLicenceNum"]/text()',
             'name': 'builderLicenceNum'
         },
         {
             'that': '',
             'attr': '//span[@id="lblBuilderLicenceNum"]/text()',
             'name': 'provinceBuilderLicenceNum'
         },
         {
             'that': '',
             'attr': '//span[@id="lblCensorNum"]/text()',
             'name': 'censorNum'
         },
         {
             'that': '',
             'attr': '//span[@id="lblContractMoney"]/text()',
             'name': 'contractMoney'
         },
         {
             'that': '',
             'attr': '//span[@id="lblArea"]/text()',
             'name': 'area'
         },
         {
             'that': '',
             'attr': '//span[@id="lblConstructorName"]/text()',
             'name': 'constructorName'
         },
         {
             'that': '',
             'attr': '//span[@id="lblConstructorIDCard"]/text()',
             'name': 'constructorIDCard'
         },
         {
             'that': '',
             'attr': '//span[@id="lblSupervisionName"]/text()',
             'name': 'supervisionName'
         },
         {
             'that': '',
             'attr': '//span[@id="lblSupervisionIDCard"]/text()',
             'name': 'supervisionIDCard'
         },
         {
             'that': '',
             'attr': '//span[@id="lblCreateDate"]/text()',
             'name': 'createDate'
         },
         {
             'that': '',
             'attr':
             '//table[@class="rpd_basic_table"][2]/tr[2]/td[2]/a/text()',
             'name': 'designCorpName'
         },
         {
             'that': '',
             'attr':
             '//table[@class="rpd_basic_table"][2]/tr[2]/td[3]/text()',
             'name': 'designCorpCode'
         },
         {
             'that': '',
             'attr':
             '//table[@class="rpd_basic_table"][2]/tr[2]/td[4]/text()',
             'name': 'designCorpArea'
         },
         {
             'that': '',
             'attr':
             '//table[@class="rpd_basic_table"][2]/tr[3]/td[2]/a/text()',
             'name': 'consCorpName'
         },
         {
             'that': '',
             'attr':
             '//table[@class="rpd_basic_table"][2]/tr[3]/td[3]/text()',
             'name': 'consCorpCode'
         },
         {
             'that': '',
             'attr':
             '//table[@class="rpd_basic_table"][2]/tr[3]/td[4]/text()',
             'name': 'consCorpArea'
         },
         {
             'that': '',
             'attr':
             '//table[@class="rpd_basic_table"][2]/tr[4]/td[2]/a/text()',
             'name': 'superCorpName'
         },
         {
             'that': '',
             'attr':
             '//table[@class="rpd_basic_table"][2]/tr[4]/td[3]/text()',
             'name': 'superCorpCode'
         },
         {
             'that': '',
             'attr':
             '//table[@class="rpd_basic_table"][2]/tr[4]/td[4]/text()',
             'name': 'superCorpArea'
         },
         {
             'that': '',
             'attr':
             '//table[@class="rpd_basic_table"][2]/tr[5]/td[2]/a/text()',
             'name': 'econCorpName'
         },
         {
             'that': '',
             'attr':
             '//table[@class="rpd_basic_table"][2]/tr[5]/td[3]/text()',
             'name': 'econCorpCode'
         },
         {
             'that': '',
             'attr':
             '//table[@class="rpd_basic_table"][2]/tr[5]/td[4]/text()',
             'name': 'econCorpArea'
         },
     ]
     contract_object = templates.Projects('ConstructionPermit')
     construction_data = contract_object.html_analysis(response, attrs)
     construction_data['companyName'] = response.meta['companyName']
     construction_zz = templates.ConstructionPermit(**construction_data)
     construction_zz = construction_zz.data()
     print(construction_zz, '施工许可信息')
     construction_zz['code'] = response.meta['code']
     yield scrapy.Request(
         url=
         'https://api.maotouin.com/rest/companyInfo/addCompanyProjectBuilderLicence.htm',
         body=json.dumps(construction_zz),
         callback=self.project_zz,
         headers={'Content-Type': 'application/json'},
         method='POST',
         meta={'type': '施工许可信息'})
 def contract_info(self, response):
     attrs = [
         {
             'that': '',
             'attr': '//span[@id="lblPrjNum"]/text()',
             'name': 'code'
         },
         {
             'that': '',
             'attr': '//span[@id="lblRecordNum"]/text()',
             'name': 'recordNum'
         },
         {
             'that': '',
             'attr': '//span[@id="lblRecordNum"]/text()',
             'name': 'provinceRecordNum'
         },
         {
             'that': '',
             'attr': '//span[@id="lblContractNum"]/text()',
             'name': 'contractNum'
         },
         {
             'that': '',
             'attr': '//span[@id="lblcontractclassnum"]/text()',
             'name': 'contractClassify'
         },
         {
             'that': '',
             'attr': '//span[@id="lblContractTypeNum"]/text()',
             'name': 'contractType'
         },
         {
             'that': '',
             'attr': '//span[@id="lblContractMoney"]/text()',
             'name': 'contractMoney'
         },
         {
             'that': '',
             'attr': '//span[@id="lblPrjSize"]/text()',
             'name': 'prjSize'
         },
         {
             'that': '',
             'attr': '//span[@id="lblContractDate"]/text()',
             'name': 'contractDate'
         },
         {
             'that': '',
             'attr': '//span[@id="lblPropietorCorpName"]/text()',
             'name': 'proprietorCorpName'
         },
         {
             'that': '',
             'attr': '//span[@id="lblPropietorCorpCode"]/text()',
             'name': 'proprietorCorpCode'
         },
         {
             'that': '',
             'attr': '//span[@id="lblContractorCorpName"]/text()',
             'name': 'contractorCorpName'
         },
         {
             'that': '',
             'attr': '//span[@id="lblContractorCorpCode"]/text()',
             'name': 'contractorCorpCode'
         },
         {
             'that': '',
             'attr': '//span[@id="lblUnionCorpName"]/text()',
             'name': 'unionCorpName'
         },
         {
             'that': '',
             'attr': '//span[@id="lblUnionCorpCode"]/text()',
             'name': 'unionCorpCode'
         },
         {
             'that': '',
             'attr': '//span[@id="lblCreateDate"]/text()',
             'name': 'createDate'
         },
     ]
     contract_object = templates.Projects('Contract')
     contract_data = contract_object.html_analysis(response, attrs)
     contract_data['companyName'] = response.meta['companyName']
     contract_zz = templates.Contract(**contract_data)
     contract_zz = contract_zz.data()
     print(contract_zz, '合同备案信息')
     contract_zz['code'] = response.meta['code']
     yield scrapy.Request(
         url=
         'https://api.maotouin.com/rest/companyInfo/addCompanyProjectContract.htm',
         body=json.dumps(contract_zz),
         callback=self.project_zz,
         headers={'Content-Type': 'application/json'},
         method='POST',
         meta={'type': '合同备案信息'})
    def drawing_info(self, response):
        attrs = [
            {
                'that': '',
                'attr': '//span[@id="lblPrjNum"]/text()',
                'name': 'code'
            },
            {
                'that': '',
                'attr': '//span[@id="lblCensorCorpName"]/text()',
                'name': 'censorCorpName'
            },
            {
                'that': '',
                'attr': '//span[@id="lblCensorCorpCode"]/text()',
                'name': 'censorCorpCode'
            },
            {
                'that': '',
                'attr': '//span[@id="lblCensorNum"]/text()',
                'name': 'censorNum'
            },
            {
                'that': '',
                'attr': '//span[@id="lblCensorNum"]/text()',
                'name': 'provinceCensorNum'
            },
            {
                'that': '',
                'attr': '//span[@id="lblCensorEDate"]/text()',
                'name': 'censorEDate'
            },
            {
                'that': '',
                'attr': '//span[@id="lblPrjSize"]/text()',
                'name': 'prjSize'
            },
            {
                'that': '',
                'attr':
                '//table[@class="rpd_basic_table"][2]/tr[2]//td[2]/a/text()',
                'name': 'surveyCorpName'
            },
            {
                'that': '',
                'attr':
                '//table[@class="rpd_basic_table"][2]/tr[2]/td[3]/text()',
                'name': 'surveyCorpCode'
            },
            {
                'that': '',
                'attr':
                '//table[@class="rpd_basic_table"][2]/tr[2]/td[4]/text()',
                'name': 'surveyCorpArea'
            },
            {
                'that': '',
                'attr':
                '//table[@class="rpd_basic_table"][2]/tr[3]/td[2]/a/text()',
                'name': 'designCorpName'
            },
            {
                'that': '',
                'attr':
                '//table[@class="rpd_basic_table"][2]/tr[3]/td[3]/text()',
                'name': 'designCorpCode'
            },
            {
                'that': '',
                'attr':
                '//table[@class="rpd_basic_table"][2]/tr[3]/td[4]/text()',
                'name': 'designCorpArea'
            },
        ]

        drawing_object = templates.Projects('MakeDrawing')
        drawing_data = drawing_object.html_analysis(response, attrs)
        if drawing_data['censorNum']:
            p_tr = Selector(response=response).xpath(
                '//table[@class="rpd_basic_table"][3]/tr')
            p_tr = p_tr[1:-1]
            engineers = []
            for p in p_tr:
                person = {
                    'companyName': '',
                    'tradeName': '',
                    'prjDuty': '',
                    'name': '',
                    'card': '',
                    'specialty': ''
                }
                companyName = p.xpath('./td[1]/text()').extract_first()
                if companyName:
                    person['companyName'] = companyName

                tradeName = p.xpath('./td[2]/text()').extract_first()
                if tradeName:
                    person['tradeName'] = tradeName

                prjDuty = p.xpath('./td[3]/text()').extract_first()
                if prjDuty:
                    person['prjDuty'] = prjDuty

                name = p.xpath('./td[4]/text()').extract_first()
                if name:
                    person['name'] = name

                card = p.xpath('./td[5]/text()').extract_first()
                if card:
                    person['card'] = card

                specialty = p.xpath('./td[6]/text()').extract_first()
                if specialty:
                    person['specialty'] = specialty

                if person['name']:
                    engineers.append(person)
            drawing_data['engineers'] = engineers
            drawing_data['companyName'] = response.meta['companyName']
            drawing_zz = templates.MakeDrawing(**drawing_data)
            print(drawing_zz.data(), '施工图纸审查')
            drawing_xx = drawing_zz.data()
            drawing_xx['code'] = response.meta['code']
            yield scrapy.Request(
                url=
                'https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm',
                body=json.dumps(drawing_xx),
                callback=self.project_zz,
                headers={'Content-Type': 'application/json'},
                method='POST',
                meta={'type': '施工图纸审查'})
 def bid_info(self, response):
     attrs = [
         {
             'that': '',
             'attr': '//td[@class="name_level3 col_01_value"]/span/text()',
             'name': 'code'
         },
         {
             'that': '',
             'attr': '//span[@id="lblTenderMoney"]/text()',
             'name': 'tenderMoney'
         },
         {
             'that': '',
             'attr': '//span[@id="lblArea"]/text()',
             'name': 'area'
         },
         {
             'that': '',
             'attr': '//span[@id="lblTenderNum"]/text()',
             'name': 'tenderNum'
         },
         {
             'that': '',
             'attr': '//span[@id="lblTenderNum"]/text()',
             'name': 'provinceTenderNum'
         },
         {
             'that': '',
             'attr': '//span[@id="lblTenderClassNum"]/text()',
             'name': 'tenderClass'
         },
         {
             'that': '',
             'attr': '//span[@id="lblTenderTypeNum"]/text()',
             'name': 'tenderType'
         },
         {
             'that': '',
             'attr': '//span[@id="lblTenderResultDate"]/text()',
             'name': 'tenderResultDate'
         },
         {
             'that': '',
             'attr': '//span[@id="lblPrjSize"]/text()',
             'name': 'prjSize'
         },
         {
             'that': '',
             'attr': '//span[@id="lblAgencyCorpName"]/text()',
             'name': 'agencyCorpName'
         },
         {
             'that': '',
             'attr': '//span[@id="lblAgencyCorpCode"]/text()',
             'name': 'agencyCorpCode'
         },
         {
             'that': '',
             'attr': '//span[@id="lblTenderCorpName"]/text()',
             'name': 'tenderCorpName'
         },
         {
             'that': '',
             'attr': '//span[@id="lblTenderCorpCode"]/text()',
             'name': 'tenderCorpCode'
         },
         {
             'that': '',
             'attr': '//span[@id="lblCreateDate"]/text()',
             'name': 'createDate'
         },
         {
             'that': '',
             'attr': '//span[@id="lblConstructorName"]/text()',
             'name': 'constructorName'
         },
         {
             'that': '',
             'attr': '//span[@id="lblConstructorIDCard"]/text()',
             'name': 'constructorIDCard'
         },
     ]
     bid_object = templates.Projects('Mark')
     bid_data = bid_object.html_analysis(response, attrs)
     bid_data['companyName'] = response.meta['companyName']
     bid_zz = templates.Mark(**bid_data)
     bid_zz = bid_zz.data()
     bid_zz['code'] = response.meta['code']
     print(bid_zz, '招标信息')
     yield scrapy.Request(
         url=
         'https://api.maotouin.com/rest/companyInfo/addCompanyProjectTender.htm',
         body=json.dumps(bid_zz),
         callback=self.project_zz,
         headers={'Content-Type': 'application/json'},
         method='POST',
         meta={'type': '招标信息'})
Exemple #11
0
    def project_basic(self, response):
        basic = templates.Projects('Project')
        attrs = [
            {
                'that': '',
                'attr': '//td[@colspan="3"]/text()',
                'name': 'name'
            },
            {
                'that': '',
                'attr': '//td[@class="name_level3 col_01_value"]/text()',
                'name': 'code'
            },
            {
                'that': 0,
                'attr': '//td[@class="col_02_value"]',
                'name': 'provinceCode',
                'then': 'text()'
            },
            {
                'that': 1,
                'attr': '//td[@class="col_01_value"]',
                'name': 'unit',
                'then': 'text()'
            },
            {
                'that': 1,
                'attr': '//td[@class="col_02_value"]',
                'name': 'catalog',
                'then': 'text()'
            },
            {
                'that': 2,
                'attr': '//td[@class="col_01_value"]',
                'name': 'unitLicenseNum',
                'then': 'text()'
            },
            {
                'that': 2,
                'attr': '//td[@class="col_02_value"]',
                'name': 'area',
                'then': 'text()'
            },
            {
                'that': 4,
                'attr': '//td[@class="col_01_value"]',
                'name': 'docuCode',
                'then': 'text()'
            },
            {
                'that': 4,
                'attr': '//td[@class="col_02_value"]',
                'name': 'level',
                'then': 'text()'
            },
            {
                'that': 5,
                'attr': '//td[@class="col_01_value"]',
                'name': 'money',
                'then': 'text()'
            },
            {
                'that': 5,
                'attr': '//td[@class="col_02_value"]',
                'name': 'acreage',
                'then': 'text()'
            },
            {
                'that': 6,
                'attr': '//td[@class="col_01_value"]',
                'name': 'trait',
                'then': 'text()'
            },
            {
                'that': 6,
                'attr': '//td[@class="col_02_value"]',
                'name': 'purpose',
                'then': 'text()'
            },
        ]
        code = Selector(response=response).xpath(
            '//td[@class="name_level3 col_01_value"]/text()').extract_first()
        name = Selector(response=response).xpath(
            '//td[@colspan="3"]/text()').extract_first()
        code = code.split()[0]
        xx = 'PRJNUM=(.*)'
        basic_d = basic.html_analysis(response, attrs)
        basic_d['companyName'] = response.meta['companyName']

        basic = templates.Project(**basic_d)
        basic_data = basic.data()
        print(basic_data, '基本信息')
        yield scrapy.Request(
            url=
            'https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
            body=json.dumps(basic_data),
            callback=self.project_zz,
            headers={'Content-Type': 'application/json'},
            method='POST',
            meta={
                'type': '基本信息',
                'company_name': basic_data['companyName']
            },
        )

        #
        bid_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=ztb&PRJNUM=%s&_=1558598717869' \
                  % re.findall(xx, response.url)[0]
        yield scrapy.Request(url=bid_url,
                             callback=self.project_bid_list,
                             meta={
                                 'companyName': response.meta['companyName'],
                                 'code': code
                             })

        drawing_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=sgtsc&PRJNUM=%s&_=1558598717869' % \
                      re.findall(xx, response.url)[0]
        print(response.url, '施工图纸审查')
        yield scrapy.Request(url=drawing_url,
                             callback=self.project_drawing_list,
                             meta={
                                 'companyName': response.meta['companyName'],
                                 'code': code
                             })

        contract_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=htba&PRJNUM=%s&_=1558598717869' % \
                       re.findall(xx, response.url)[0]
        print(contract_url, '合同备案全部')
        yield scrapy.Request(url=contract_url,
                             callback=self.project_contract_list,
                             meta={
                                 'companyName': response.meta['companyName'],
                                 'code': code
                             })

        construction_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=sgxk&PRJNUM=%s&_=1558598717869' % \
                           re.findall(xx, response.url)[0]
        print(construction_url, '施工许可详list')
        yield scrapy.Request(url=construction_url,
                             callback=self.project_construction_list,
                             meta={
                                 'companyName': response.meta['companyName'],
                                 'name': name
                             })