Exemple #1
0
                                reg = r'doOpen\(\'(.*?)\'\)'
                                pattern = re.compile(reg)
                                pri = pattern.findall(str(priName))[0]
                                regID = regIDlist[i].contents[0]
                                entdict = dict(Name=Name,
                                               regID=regID,
                                               Date=cdate,
                                               pri=pri)
                                self.PrintInfo(entdict)
                    except Exception:
                        self.printitemerror(pageNos, i)
                        continue
            if br == 1: break

    def PrintInfo(self, ent):
        req = Request(url='http://gsxt.scaic.gov.cn/ztxy.do',
                      data=self.getinfopostdata(ent.get('pri')),
                      headers={'User-Agent': 'Magic Browser'})
        inforesult = self.gethtml(req)
        infolist = inforesult.find('tr', attrs={'name': 'yc'}).findAll('td')
        self.gendown(ent, infolist)


if __name__ == '__main__':
    location = '四川'
    YCParser = GetYCParser()
    YCParser.GetYC(location,
                   startdate=date(1900, 11, 1),
                   enddate=date.today() - timedelta(days=0),
                   fmode='a')
            info = infolist[i]
            reg = r'"specause":"(.*?)"'
            pattern = re.compile(reg)
            inreason = pattern.findall(info)
            f.write(inreason[0] + '|')
            reg = r'"abntime":"(.*?)"'
            pattern = re.compile(reg)
            intime = pattern.findall(info)
            f.write(intime[0] + '|')
            reg = r'"remexcpres":"(.*?)"'
            pattern = re.compile(reg)
            outreason = pattern.findall(info)
            if outreason: f.write(outreason[0])
            f.write('|')
            reg = r'"remdate":"(.*?)"'
            pattern = re.compile(reg)
            outtime = pattern.findall(info)
            if outtime: f.write(outtime[0])
            f.write('|')
            reg = r'"decorg":"(.*?)"'
            pattern = re.compile(reg)
            org = pattern.findall(info)
            f.write(org[0] + '|')
            f.write('\n')


if __name__ == '__main__':
    location = '重庆'
    YCParser = GetYCParser()
    YCParser.GetYC(location, startdate=date(1900, 10, 9), enddate=date.today())
Exemple #3
0
                if br==1:break

    def PrintInfo(self,ent,f):
        time.sleep(2)
        req=Request(
            url='http://aic.hainan.gov.cn:1888/aiccips/GSpublicity/GSpublicityList.html?service=cipUnuDirInfo',
            data=self.getinfopostdata(ent),
            headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0',
                     'Content-Length':'71',
                     'Cookie':self.gen_cookie(),
                     'Content-Type': 'application/x-www-form-urlencoded'})
        inforesult=self.gethtml(req)
        infolist=inforesult.findAll('td')
        l=int(len(infolist)/6)
        for j in range(l):
            f.write(self.to_utf8(ent.get('Name'))+'|')
            f.write(self.to_utf8(ent.get('reg').strip())+'|')
            for k in range(6):
                i=j*6+k
                infostr=infolist[i].contents
                if infostr:
                    infostr=infostr[0]
                    f.write(self.to_utf8(infostr.replace('\n','').strip()))
                f.write('|')
            f.write('\n')

if __name__=='__main__':
    location='海南'
    YCParser=GetYCParser()
    YCParser.GetYC(location,startdate=date(2015,8,10),enddate=date.today()-timedelta(days=0))
Exemple #4
0
                                if self.checkname(Name) == False: continue
                                regID = regIDlist[i].contents[0]
                                regID = self.dealID(regID)
                                href = infolist[i].get('href')
                                entdict = dict(Name=Name,
                                               regID=regID,
                                               Date=cdate,
                                               href=href)
                                self.PrintInfo(entdict)
                    except Exception as e:
                        print e
                        print traceback.print_exc()
                        self.printitemerror(pageNos, i)
                        continue
            if br == 1: break

    def PrintInfo(self, ent):
        req = Request(url='http://www.ahcredit.gov.cn' + ent.get('href'),
                      headers={'User-Agent': 'Magic Browser'})
        inforesult = self.gethtml(req)
        infolist = inforesult.find('table', attrs={
            'id': 'excTab'
        }).findAll('td')
        self.gendown(ent, infolist)


if __name__ == '__main__':
    location = '安徽'
    YCParser = GetYCParser()
    YCParser.GetYC(location, startdate=date(2015, 01, 1), enddate=date.today())
Exemple #5
0
            if k==1:break

    def PrintInfo(self,ent,f):
        #取得注册号
        infourl='http://tjcredit.gov.cn/platform/saic/viewBaseExc.ftl?entId='+ent.get('entId')
        inforesult=self.gethtml(infourl)
        id=inforesult.findAll('span')[1].contents[0][5:]
    #取得经营异常信息
        infourl='http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId='+ent.get('entId')+'&departmentId=scjgw&infoClassId=qyjyycmlxx'
        inforesult=self.gethtml(infourl)
        infolist=inforesult.findAll('td',attrs={'class':''})
        l=int(len(infolist)/6)
        for j in range(l):
            f.write(ent.get('Name')+'|')
            f.write(id+'|')
            for k in range(6):
                i=j*6+k
                infostr=infolist[i].contents
                if infostr:
                    infostr=infostr[0]
                    if i==2:f.write(str(self.changedate(str(infostr))))
                    else:f.write(infostr.replace('\n','').strip())
                f.write('|')
            f.write('\n')


if __name__=='__main__':
    location='天津'
    YCParser=GetYCParser()
    YCParser.GetYC(location,startdate=date(1900,10,8),enddate=date.today()-timedelta(days=0))
Exemple #6
0
        day=int(cdate[k2+1:k3-1])
    else:
        k1=cdate.find('-')
        if k1!=-1:
            k2=cdate[5:].find('-')+5
            month=int(cdate[5:k2])
            day=int(cdate[k2+1:k3])
        else:
            print(cdate+'\n')
            year=input('year=')
            month=input('month=')
            day=input('day=')
    return date(year,month,day)

if __name__=='__main__':
    yc=YCParser()
    rs='D:/GSXT/GSXTresult/'
    dirlist=os.listdir(rs)
    frecord=open('D:/GSXT/GSXT整理temp.txt','w')
    total=0
    ntotal=0
    idlist={}
    for dirr in dirlist:
        f=open(rs+dirr,'r')
        k=dirr.find('.')
        prov=dirr[:k]   #取省份
        recordlist={}   #记录字典
        yclist={}
        for line in f.readlines():
            total+=1
            if total % 10000==0:print(total)
Exemple #7
0
    else:
        k1 = cdate.find('-')
        if k1 != -1:
            k2 = cdate[5:].find('-') + 5
            month = int(cdate[5:k2])
            day = int(cdate[k2 + 1:k3])
        else:
            print(cdate + '\n')
            year = input('year=')
            month = input('month=')
            day = input('day=')
    return date(year, month, day)


if __name__ == '__main__':
    yc = YCParser()
    rs = 'D:/GSXT/GSXTresult/'
    dirlist = os.listdir(rs)
    frecord = open('D:/GSXT/GSXT整理temp.txt', 'w')
    total = 0
    ntotal = 0
    idlist = {}
    for dirr in dirlist:
        f = open(rs + dirr, 'r')
        k = dirr.find('.')
        prov = dirr[:k]  #取省份
        recordlist = {}  #记录字典
        yclist = {}
        for line in f.readlines():
            total += 1
            if total % 10000 == 0: print(total)
Exemple #8
0
                                               href=href)
                                self.PrintInfo(entdict)
                    except Exception:
                        self.printitemerror(pageNos, i)
                        continue
                if br == 1: break

    def PrintInfo(self, ent):
        req = urllib.request.Request(
            url='http://222.143.24.157' + ent.get('href'),
            headers={
                'User-Agent':
                'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0'
            })
        inforesult = self.gethtml(req)
        infolist = inforesult.find('table', attrs={
            'id': 'excTab'
        }).findAll('td')
        self.gendown(ent, infolist)


if __name__ == '__main__':
    location = '河南'
    YCParser = GetYCParser()
    YCParser.GetYC(location,
                   startdate=date(1900, 10, 8),
                   enddate=date.today() - timedelta(days=0),
                   fmode='a',
                   pagemode='a',
                   itemmode='a')