def linkAnalysis(self, link): temp = link.find("a") address = temp["href"] brief = temp["title"] print 'link address is : ', address # print 'link content is :', brief timenow = link.find(attrs={"class": "date"}).string # print timenow y = timenow[0:4] m = timenow[5:7] d = timenow[8:10] second = timenow[12:20] timenow = y + '-' + m + '-' + d + ' ' + second # print timenow now = time.strptime(timenow, '%Y-%m-%d %H:%M:%S') if time.mktime(now) < time.mktime(self.updateTime): return 0 childSoup = self.getChildSoup(address) try: newsContent = childSoup.find(attrs={"class": "art_main"}) except: newsContent = None stringTemp = "" if not newsContent: return 1 for content in newsContent: stringTemp += str(content) # print stringTemp # print searchdict tags = self.searchTag(stringTemp) # print 'tags is ',tags if tags: newin = Info() newin.addLink(address, brief) for tag in tags: newin.addTag(tag) print 'in this news find ', tag self.addInfo(newin) print '------------------------- one link analysis end ', '------------------' return 1
def linkAnalysis(self,link): temp=link.find("a") address=temp["href"] brief=temp["title"] print 'link address is : ',address # print 'link content is :', brief timenow=link.find(attrs={"class":"date"}).string # print timenow y=timenow[0:4] m=timenow[5:7] d=timenow[8:10] second=timenow[12:20] timenow=y+'-'+m+'-'+d+' '+second # print timenow now= time.strptime(timenow, '%Y-%m-%d %H:%M:%S') if time.mktime(now)<time.mktime(self.updateTime): return 0 childSoup=self.getChildSoup(address) try: newsContent=childSoup.find(attrs={"class":"art_main"}) except: newsContent=None stringTemp="" if not newsContent: return 1 for content in newsContent: stringTemp+=str(content) # print stringTemp # print searchdict tags=self.searchTag(stringTemp) # print 'tags is ',tags if tags: newin=Info() newin.addLink(address,brief) for tag in tags: newin.addTag(tag) print 'in this news find ',tag self.addInfo(newin) print '------------------------- one link analysis end ','------------------' return 1
def linkAnalysis(self, link): temp = link.findAll("a") dat = link.findAll("span") i = 0 for da in dat: strtemp = da.string if strtemp.find(':') == -1: return else: address = 'http://stock.cfi.cn/' + temp[i]["href"] brief = temp[i].string t = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) truetime = t[0:11] + strtemp + ':59' now = time.strptime(truetime, '%Y-%m-%d %H:%M:%S') if time.mktime(now) < time.mktime(self.updateTime): return else: print 'link address is : ', address childSoup = self.getChildSoup(address) try: newsContent = childSoup.find(attrs={"id": "tdcontent"}) except: newsContent = None stringTemp = "" if not newsContent: return for content in newsContent: stringTemp += str(content) tags = self.searchTag(stringTemp) if tags: newin = Info() newin.addLink(address, brief) for tag in tags: newin.addTag(tag) print 'in this news find ', tag self.addInfo(newin) print '------------------------- one link analysis end ', '------------------' i += 1
def linkAnalysis(self,link): temp=link.findAll("a") dat=link.findAll("span") i=0 for da in dat: strtemp=da.string if strtemp.find(':')==-1: return else: address='http://stock.cfi.cn/'+temp[i]["href"] brief=temp[i].string t=time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) truetime=t[0:11]+strtemp+':59' now= time.strptime(truetime, '%Y-%m-%d %H:%M:%S') if time.mktime(now)<time.mktime(self.updateTime): return else : print 'link address is : ',address childSoup=self.getChildSoup(address) try: newsContent=childSoup.find(attrs={"id":"tdcontent"}) except: newsContent=None stringTemp="" if not newsContent: return for content in newsContent: stringTemp+=str(content) tags=self.searchTag(stringTemp) if tags: newin=Info() newin.addLink(address,brief) for tag in tags: newin.addTag(tag) print 'in this news find ',tag self.addInfo(newin) print '------------------------- one link analysis end ','------------------' i+=1
def linkAnalysis(self, link): temp = link.find("a") address = temp["href"] brief = temp["title"] timenow = link.find("span").string timenow += ":59" now = time.strptime(timenow, '%Y-%m-%d %H:%M:%S') # print 'news time:',timenow,'and last update time :',self.updateTime if time.mktime(now) < time.mktime(self.updateTime): return 0 print 'link address is : ', address childSoup = self.getChildSoup(address) try: newsContent = childSoup.find(attrs={ "class": "Body", "id": "ContentBody" }) except: newsContent = None stringTemp = "" if newsContent == None: return 1 for content in newsContent: stringTemp += str(content) tags = self.searchTag(stringTemp) if tags: newin = Info() newin.addLink(address, brief) for tag in tags: newin.addTag(tag) print 'in this news find ', tag self.addInfo(newin) print '------------------------- one link analysis end -------------------------' return 1
def linkAnalysis(self,link): temp=link.find("a") address=temp["href"] brief=temp["title"] timenow=link.find("span").string timenow+=":59" now= time.strptime(timenow, '%Y-%m-%d %H:%M:%S') # print 'news time:',timenow,'and last update time :',self.updateTime if time.mktime(now)<time.mktime(self.updateTime): return 0 print 'link address is : ',address childSoup=self.getChildSoup(address) try: newsContent=childSoup.find(attrs={"class":"Body","id":"ContentBody"}) except: newsContent=None stringTemp="" if newsContent==None: return 1 for content in newsContent: stringTemp+=str(content) tags=self.searchTag(stringTemp) if tags: newin=Info() newin.addLink(address,brief) for tag in tags: newin.addTag(tag) print 'in this news find ',tag self.addInfo(newin) print '------------------------- one link analysis end -------------------------' return 1