Beispiel #1
0
    def parse(self, response):
        global count
        endFlag = '0'
        nextUrl = 'http://222.190.131.17:8075/portalopenPublicInformation.do?method=querybeianExamineAll'
        print('$$$$$$$$$$$$$$$$$$' + str(count) + '$$$$$$$$$$$$$$$$$$')
        #/html/body/div[7]/div/div/div/div[3]/ul/table/tbody/tr[2]
        for each in response.xpath("//*[@id='publicInformationForm']/tr"):
            item = GovinvestJiangsuItem()
            investDict = {}
            date = each.xpath("./td[5]/text()").extract()[0]
            #time.sleep(0.3)
            recordDate = datetime.strptime(date, "%Y/%m/%d")
            currDate = datetime.strptime(datetime.now().strftime("%Y-%m-%d"),
                                         "%Y-%m-%d")
            #print(currDate)
            yesterday = datetime.strptime(
                (datetime.today() + timedelta(-1)).strftime("%Y-%m-%d"),
                "%Y-%m-%d")
            #print(yesterday)
            if currDate == recordDate:
                print('currDate == recordDate')
                continue
            if yesterday > recordDate:
                endFlag = '1'
                print('yesterday > recordDate')
                continue
            #do sth. here
            title = tool.returnNotNull(each.xpath("./td[1]/@title").extract())
            name = tool.returnNotNull(each.xpath("./td[2]/text()").extract())
            department = tool.returnNotNull(
                each.xpath("./td[3]/text()").extract())
            code = tool.returnNotNull(each.xpath("./td[4]/text()").extract())
            date = tool.returnNotNull(each.xpath("./td[5]/text()").extract())
            #             if len(resultno)>0:
            #                 resultno = resultno[0]
            #             else:
            #                 resultno = 'null'
            investDict[u'备案时间'] = date  #备案时间
            investDict[u'项目名称'] = title  #项目名称
            investDict[u'申报单位名称'] = name  #申报单位名称
            investDict[u'备案机关'] = department  #备案机关
            investDict[u'备案证号'] = code  #备案证号

            item['dic'] = investDict
            yield item

        count += 1
        if count < 100 and endFlag == '0':
            print('go next page ------------------------------' + str(count))
            #time.sleep(5)
            yield scrapy.FormRequest(nextUrl,
                                     formdata={'pageNo': str(count)},
                                     callback=self.parse)
Beispiel #2
0
 def get_detail(self,response,applyDate):
     item = GovinvestFujianItem()
     investDict = {}
     projectCode = response.xpath("//*[@id='step_2']/fieldset/table/tbody/tr[1]/td[1]/text()").extract()[0].strip()
     projectCodeValue = response.xpath("//*[@id='step_2']/fieldset/table/tbody/tr[1]/td[2]/text()").extract()[0].strip()
     projectName = response.xpath("//*[@id='step_2']/fieldset/table/tbody/tr[1]/td[3]/text()").extract()[0].strip()
     projectNameValue = response.xpath("//*[@id='step_2']/fieldset/table/tbody/tr[1]/td[4]/text()").extract()[0].strip()
     
     projectType = response.xpath("//*[@id='step_2']/fieldset/table/tbody/tr[2]/td[1]/text()").extract()[0].strip()
     projectTypeValue = response.xpath("//*[@id='step_2']/fieldset/table/tbody/tr[2]/td[2]/text()").extract()[0].strip()
     projectLegelPerson = response.xpath("//*[@id='step_2']/fieldset/table/tbody/tr[2]/td[3]/text()").extract()[0].strip()
     projectLegelPersonValue = response.xpath("//*[@id='aout_div']/fieldset/div/table/tbody/tr[1]/td[1]/text()").extract()[0].strip()
     
     applyCode = response.xpath("//*[@id='step_2']/fieldset/table/tbody/tr[3]/td[1]/text()").extract()[0].strip()
     applyCodeValue = response.xpath("//*[@id='step_2']/fieldset/table/tbody/tr[3]/td[2]/text()").extract()[0].strip()
      
      
     #//*[@id='itemInfos']/fieldset/table/tbody/tr/th[1]
     approveDepartment = response.xpath("//*[@id='itemInfos']/fieldset/table/tbody/tr/th[1]/text()").extract()[0].strip()
     approveMatter = response.xpath("//*[@id='itemInfos']/fieldset/table/tbody/tr/th[2]/text()").extract()[0].strip()
     approveNo = response.xpath("//*[@id='itemInfos']/fieldset/table/tbody/tr/th[3]/text()").extract()[0].strip()
     approveResult = response.xpath("//*[@id='itemInfos']/fieldset/table/tbody/tr/th[4]/text()").extract()[0].strip()
     approveTime = response.xpath("//*[@id='itemInfos']/fieldset/table/tbody/tr/th[5]/text()").extract()[0].strip()
     
     #//*[@id='itemInfos']/fieldset/div[2]/table/tbody/tr/td[1]
     approveDepartmentValue = commonTool.returnNotNull(response.xpath("//*[@id='itemInfos']/fieldset/div[2]/table/tbody/tr/td[1]/text()").extract())
     approveMatterValue = commonTool.returnNotNull(response.xpath("//*[@id='itemInfos']/fieldset/div[2]/table/tbody/tr/td[2]/text()").extract())
     approveNoValue = commonTool.returnNotNull(response.xpath("//*[@id='itemInfos']/fieldset/div[2]/table/tbody/tr/td[3]/text()").extract())
     approveResultValue = commonTool.returnNotNull(response.xpath("//*[@id='itemInfos']/fieldset/div[2]/table/tbody/tr/td[4]/text()").extract())
     approveTimeValue =  commonTool.returnNotNull(response.xpath("//*[@id='itemInfos']/fieldset/div[2]/table/tbody/tr/td[5]/text()").extract())
     
     
     investDict['申请时间'] = applyDate  #申请时间
     investDict[projectName] = self.localStrip(projectNameValue)  #项目名称
     investDict[projectLegelPerson] = self.localStrip(projectLegelPersonValue)  #项目法人单位
     investDict[approveTime] = self.localStrip(approveTimeValue)  #审批时间
     investDict[applyCode] = self.localStrip(applyCodeValue)  #报建编号
     investDict[approveDepartment] = self.localStrip(approveDepartmentValue)  #审批部门
     investDict[projectCode] = self.localStrip(projectCodeValue)  #项目代码
     investDict[projectType] = self.localStrip(projectTypeValue)  #项目类型
     investDict[approveMatter] = self.localStrip(approveMatterValue)  #审批事项
     investDict[approveResult] = self.localStrip(approveResultValue)  #审批结果
     investDict[approveNo] = self.localStrip(approveNoValue)  #审批文号
     item['dic']=investDict
     return item
Beispiel #3
0
 def parse(self, response):
     global count
     endFlag='0'
     print ('$$$$$$$$$$$$$$$$$$'+str(self.count)+'$$$$$$$$$$$$$$$$$$')
     #print(response.text)
     for each in response.xpath("//*[@id='tb']/tbody/tr"):
         trid = commonTool.returnNotNull(each.xpath("./@id"))
         if trid != None:
             continue
         name = each.xpath("./td[1]/text()").extract()[0].strip()
         print(name)
         date = each.xpath("./td[4]/text()").extract()[0].strip()
         print(date)
         recordDate = datetime.strptime(date, "%Y-%m-%d")
         #print(recordDate)
         currDate = datetime.strptime(datetime.now().strftime("%Y-%m-%d"), "%Y-%m-%d")
         #print(currDate)
         yesterday = datetime.strptime((datetime.today()+ timedelta(-1)).strftime("%Y-%m-%d"), "%Y-%m-%d")
         #print(yesterday)
         if currDate == recordDate:
             print('currDate == recordDate')
             #continue 
         if yesterday > recordDate:
             print('yesterday > recordDate')
             #endFlag='1'
             #continue 
         projectcode = each.xpath("./td[2]/text()").extract()[0].strip()
         detailUrl = 'https://fj.tzxm.gov.cn/eap/credit.publicShow?projectcode='+projectcode+'&biaoji=0'
         add_params = {}
         add_params['applyDate'] = date
         yield scrapy.Request(detailUrl, callback=self.get_detail,headers=self.headers,cb_kwargs=add_params)
      
     self.count +=1     
     print ('go next page ------------------------------'+str(self.count))
     if self.count<100 and endFlag=='0':
         yield scrapy.FormRequest(self.start_urls[0], formdata = {'page':str(self.count)},headers=self.headers, callback=self.parse)
Beispiel #4
0
    def get_detail(self, response):
        #print(response.text)
        item = GovinvestGuangdongItem()
        investDict = {}
        projectCode = response.xpath(
            "//*[@id='hytab']/tr[2]/td[1]/text()").extract()[0]
        projectCodeValue = commonTool.returnNotNull(
            response.xpath("//*[@id='hytab']/tr[2]/td[2]/text()").extract())
        projectName = response.xpath(
            "//*[@id='hytab']/tr[3]/td[1]/text()").extract()[0]
        projectNameValue = commonTool.returnNotNull(
            response.xpath("//*[@id='hytab']/tr[3]/td[2]/text()").extract())
        projectPlace = response.xpath(
            "//*[@id='hytab']/tr[4]/td[1]/text()").extract()[0]
        projectPlaceValue = commonTool.returnNotNull(
            response.xpath("//*[@id='hytab']/tr[4]/td[2]/text()").extract())
        projectInvest = response.xpath(
            "//*[@id='hytab']/tr[5]/td[1]/text()").extract()[0]
        projectInvestValue = commonTool.returnNotNull(
            response.xpath("//*[@id='hytab']/tr[5]/td[2]/text()").extract())
        projectContent = response.xpath(
            "//*[@id='hytab']/tr[6]/td[1]/text()").extract()[0]
        projectContentValue = commonTool.returnNotNull(
            response.xpath("//*[@id='hytab']/tr[6]/td[2]/text()").extract())
        enterpriseName = response.xpath(
            "//*[@id='hytab']/tr[7]/td[1]/text()").extract()[0]
        enterpriseNameValue = commonTool.returnNotNull(
            response.xpath("//*[@id='hytab']/tr[7]/td[2]/text()").extract())
        examineUnit = response.xpath(
            "//*[@id='hytab']/tr[8]/td[1]/text()").extract()[0]
        examineUnitValue = commonTool.returnNotNull(
            response.xpath("//*[@id='hytab']/tr[8]/td[2]/text()").extract())
        approveDate = response.xpath(
            "//*[@id='hytab']/tr[9]/td[1]/text()").extract()[0]
        approveDateValue = commonTool.returnNotNull(
            response.xpath("//*[@id='hytab']/tr[9]/td[2]/text()").extract())
        finishDate = response.xpath(
            "//*[@id='hytab']/tr[10]/td[1]/text()").extract()[0]
        finishDateValue = commonTool.returnNotNull(
            response.xpath("//*[@id='hytab']/tr[10]/td[2]/text()").extract())
        period = response.xpath(
            "//*[@id='hytab']/tr[11]/td[1]/text()").extract()[0]
        periodValue = commonTool.returnNotNull(
            response.xpath("//*[@id='hytab']/tr[11]/td[2]/text()").extract())
        projectStat = response.xpath(
            "//*[@id='hytab']/tr[12]/td[1]/text()").extract()[0]
        projectStatValue = commonTool.returnNotNull(
            response.xpath("//*[@id='hytab']/tr[12]/td[2]/text()").extract())

        investDict[approveDate] = approveDateValue  #备案申报日期
        investDict[projectName] = projectNameValue  #项目名称
        investDict[enterpriseName] = enterpriseNameValue  #建设单位
        investDict[projectCode] = projectCodeValue  #备案项目编号
        investDict[projectPlace] = projectPlaceValue  #项目所在地
        investDict[projectInvest] = projectInvestValue  #项目总投资
        investDict[projectContent] = projectContentValue  #项目规模及内容
        investDict[examineUnit] = examineUnitValue  #备案机关
        investDict[finishDate] = finishDateValue  #复核通过日期
        investDict[period] = periodValue  #项目起止年限
        investDict[projectStat] = projectStatValue  #项目当前状态
        print(investDict)
        item['dic'] = investDict
        return item
Beispiel #5
0
    def get_detail(self, response):
        #print(response.text)
        item = GovinvestHebeiItem()
        investDict = {}
        #/html/body/table[2]/tbody/tr[1]/td[1]/b
        projectCode = response.xpath(
            "/html/body/table[2]/tr[1]/td[1]/b/text()").extract()[0].strip()
        projectCodeValue = response.xpath(
            "/html/body/table[2]/tr[1]/td[2]/text()").extract()[0].strip()
        projectName = response.xpath(
            "/html/body/table[2]/tr[1]/td[3]/b/text()").extract()[0].strip()
        projectNameValue = response.xpath(
            "/html/body/table[2]/tr[1]/td[4]/text()").extract()[0].strip()

        projectLegelPersonValue = ''
        projectTypeValue = ''
        if commonTool.returnNotNull(
                response.xpath("/html/body/table[2]/tr[2]/td[1]/b/text()")):
            projectType = response.xpath(
                "/html/body/table[2]/tr[2]/td[1]/b/text()").extract()[0].strip(
                )
        if commonTool.returnNotNull(
                response.xpath("/html/body/table[2]/tr[2]/td[2]/text()")):
            projectTypeValue = response.xpath(
                "/html/body/table[2]/tr[2]/td[2]/text()").extract()[0].strip()
        if commonTool.returnNotNull(
                response.xpath("/html/body/table[2]/tr[2]/td[3]/b/text()")):
            projectLegelPerson = response.xpath(
                "/html/body/table[2]/tr[2]/td[3]/b/text()").extract()[0].strip(
                )
        if commonTool.returnNotNull(
                response.xpath("/html/body/table[2]/tr[2]/td[4]/text()")):
            c = response.xpath(
                "/html/body/table[2]/tr[2]/td[4]/text()").extract()[0].strip()

        #//*[@id='itemInfos']/fieldset/table/tbody/tr/th[1]
        approveDepartment = response.xpath(
            "/html/body/table[4]/tr[1]/td[1]/b/text()").extract()[0].strip()
        approveMatter = response.xpath(
            "/html/body/table[4]/tr[1]/td[2]/b/text()").extract()[0].strip()
        approveResult = response.xpath(
            "/html/body/table[4]/tr[1]/td[3]/b/text()").extract()[0].strip()
        approveTime = response.xpath(
            "/html/body/table[4]/tr[1]/td[4]/b/text()").extract()[0].strip()
        approveNo = response.xpath(
            "/html/body/table[4]/tr[1]/td[5]/b/text()").extract()[0].strip()

        #//*[@id='itemInfos']/fieldset/div[2]/table/tbody/tr/td[1]
        approveDepartmentValue = commonTool.returnNotNull(
            response.xpath(
                "/html/body/table[4]/tr[2]/td[1]/text()").extract()).strip()
        approveMatterValue = commonTool.returnNotNull(
            response.xpath(
                "/html/body/table[4]/tr[2]/td[2]/text()").extract()).strip()
        approveResultValue = commonTool.returnNotNull(
            response.xpath(
                "/html/body/table[4]/tr[2]/td[3]/text()").extract()).strip()
        approveTimeValue = commonTool.returnNotNull(
            response.xpath(
                "/html/body/table[4]/tr[2]/td[4]/text()").extract()).strip()
        approveNoValue = commonTool.returnNotNull(
            response.xpath(
                "/html/body/table[4]/tr[2]/td[5]/text()").extract()).strip()

        investDict[approveTime] = approveTimeValue  #审批时间
        investDict[projectName] = self.localStrip(projectNameValue)  #项目名称
        if projectLegelPersonValue:
            investDict[projectLegelPerson] = self.localStrip(
                projectLegelPersonValue)  #项目法人单位
        investDict[approveDepartment] = self.localStrip(
            approveDepartmentValue)  #审批部门
        investDict[projectCode] = self.localStrip(projectCodeValue)  #项目代码
        if projectTypeValue:
            investDict[projectType] = self.localStrip(projectTypeValue)  #项目类型
        investDict[approveMatter] = self.localStrip(approveMatterValue)  #审批事项
        investDict[approveResult] = self.localStrip(approveResultValue)  #审批结果
        investDict[approveNo] = self.localStrip(approveNoValue)  #审批文号
        item['dic'] = investDict
        return item
Beispiel #6
0
    def get_detail(self, response):
        item = GovinvestAnhuiItem()
        investDict = {}

        #print(response.text)
        #//*[@id="tab00"]/div[1]/table/tbody/tr[1]/td[1]
        projectCode = response.xpath(
            "//*[@id='tab00']/div[1]/table/tr[1]/td[1]/text()").extract()[0]
        projectCodeValue = commonTool.returnNotNull(
            response.xpath(
                "//*[@id='tab00']/div[1]/table/tr[1]/td[2]/text()").extract())
        projectName = response.xpath(
            "//*[@id='tab00']/div[1]/table/tr[1]/td[3]/text()").extract()[0]
        projectNameValue = commonTool.returnNotNull(
            response.xpath(
                "//*[@id='tab00']/div[1]/table/tr[1]/td[4]/text()").extract())

        projectType = response.xpath(
            "//*[@id='tab00']/div[1]/table/tr[2]/td[1]/text()").extract()[0]
        projectTypeValue = commonTool.returnNotNull(
            response.xpath(
                "//*[@id='tab00']/div[1]/table/tr[2]/td[2]/text()").extract())
        projectLegelPerson = response.xpath(
            "//*[@id='tab00']/div[1]/table/tr[2]/td[3]/text()").extract()[0]
        projectLegelPersonValue = commonTool.returnNotNull(
            response.xpath(
                "//*[@id='tab00']/div[1]/table/tr[2]/td[4]/text()").extract())

        #//*[@id="tab00"]/div[2]/div[2]/table/tbody/tr[1]/td[1]
        approveDepartment = response.xpath(
            "//*[@id='tab00']/div[2]/div[2]/table/tr[1]/td[1]/text()").extract(
            )[0]
        approveMatter = response.xpath(
            "//*[@id='tab00']/div[2]/div[2]/table/tr[1]/td[2]/text()").extract(
            )[0]
        approveResult = response.xpath(
            "//*[@id='tab00']/div[2]/div[2]/table/tr[1]/td[3]/text()").extract(
            )[0]
        approveTime = response.xpath(
            "//*[@id='tab00']/div[2]/div[2]/table/tr[1]/td[4]/text()").extract(
            )[0]
        approveNo = response.xpath(
            "//*[@id='tab00']/div[2]/div[2]/table/tr[1]/td[5]/text()").extract(
            )[0]

        approveDepartmentValue = commonTool.returnNotNull(
            response.xpath(
                "//*[@id='tab00']/div[2]/div[2]/table/tr[2]/td[1]/text()").
            extract())
        approveMatterValue = commonTool.returnNotNull(
            response.xpath(
                "//*[@id='tab00']/div[2]/div[2]/table/tr[2]/td[2]/text()").
            extract())
        approveResultValue = commonTool.returnNotNull(
            response.xpath(
                "//*[@id='tab00']/div[2]/div[2]/table/tr[2]/td[3]/text()").
            extract())
        approveTimeValue = commonTool.returnNotNull(
            response.xpath(
                "//*[@id='tab00']/div[2]/div[2]/table/tr[2]/td[4]/text()").
            extract())
        approveNoValue = commonTool.returnNotNull(
            response.xpath(
                "//*[@id='tab00']/div[2]/div[2]/table/tr[2]/td[5]/span[1]/text()"
            ).extract())

        investDict[approveTime] = approveTimeValue  #审批时间
        investDict[projectName] = projectNameValue  #项目名称
        investDict[projectLegelPerson] = projectLegelPersonValue  #项目法人单位
        investDict[approveDepartment] = approveDepartmentValue  #审批部门
        investDict[projectCode] = projectCodeValue  #项目代码
        investDict[projectType] = projectTypeValue  #项目类型
        investDict[approveMatter] = approveMatterValue  #审批事项
        investDict[approveResult] = approveResultValue  #审批结果
        investDict[approveNo] = approveNoValue  #审批文号
        item['dic'] = investDict
        return item