Beispiel #1
0
    def linkAnalysis(self, link):
        temp = link.find("a")
        address = temp["href"]
        brief = temp["title"]
        print 'link address is : ', address
        #     print 'link content is :', brief

        timenow = link.find(attrs={"class": "date"}).string
        #    print timenow
        y = timenow[0:4]
        m = timenow[5:7]
        d = timenow[8:10]
        second = timenow[12:20]
        timenow = y + '-' + m + '-' + d + ' ' + second
        #  print timenow

        now = time.strptime(timenow, '%Y-%m-%d %H:%M:%S')

        if time.mktime(now) < time.mktime(self.updateTime):

            return 0

        childSoup = self.getChildSoup(address)
        try:
            newsContent = childSoup.find(attrs={"class": "art_main"})
        except:
            newsContent = None

        stringTemp = ""
        if not newsContent:
            return 1
        for content in newsContent:
            stringTemp += str(content)


#        print stringTemp
#        print searchdict
        tags = self.searchTag(stringTemp)
        # print 'tags is ',tags
        if tags:
            newin = Info()
            newin.addLink(address, brief)
            for tag in tags:
                newin.addTag(tag)
                print 'in this news find ', tag
            self.addInfo(newin)

        print '------------------------- one link analysis end ', '------------------'
        return 1
Beispiel #2
0
    def linkAnalysis(self,link):
        temp=link.find("a")
        address=temp["href"]   
        brief=temp["title"]   
        print 'link address is : ',address 
   #     print 'link content is :', brief 
        
        timenow=link.find(attrs={"class":"date"}).string
    #    print timenow
        y=timenow[0:4]
        m=timenow[5:7]
        d=timenow[8:10]
        second=timenow[12:20]
        timenow=y+'-'+m+'-'+d+' '+second
      #  print timenow

        now= time.strptime(timenow, '%Y-%m-%d %H:%M:%S')
     
        if time.mktime(now)<time.mktime(self.updateTime):
         
            return 0
       
        childSoup=self.getChildSoup(address)    
        try:  
           newsContent=childSoup.find(attrs={"class":"art_main"})
        except:
           newsContent=None

        stringTemp=""
        if not newsContent:
            return 1
        for content in newsContent:
            stringTemp+=str(content)
#        print stringTemp
#        print searchdict
        tags=self.searchTag(stringTemp)
       # print 'tags is ',tags
        if tags:
               newin=Info()
               newin.addLink(address,brief)
               for tag  in tags:
                  newin.addTag(tag)
                  print 'in this news find ',tag
               self.addInfo(newin)

        print '------------------------- one link analysis end ','------------------'
        return 1
Beispiel #3
0
    def linkAnalysis(self, link):
        temp = link.findAll("a")
        dat = link.findAll("span")
        i = 0
        for da in dat:
            strtemp = da.string
            if strtemp.find(':') == -1:
                return
            else:
                address = 'http://stock.cfi.cn/' + temp[i]["href"]
                brief = temp[i].string
                t = time.strftime('%Y-%m-%d %H:%M:%S',
                                  time.localtime(time.time()))
                truetime = t[0:11] + strtemp + ':59'
                now = time.strptime(truetime, '%Y-%m-%d %H:%M:%S')
                if time.mktime(now) < time.mktime(self.updateTime):
                    return
                else:
                    print 'link address is : ', address
                childSoup = self.getChildSoup(address)
                try:
                    newsContent = childSoup.find(attrs={"id": "tdcontent"})
                except:
                    newsContent = None

                stringTemp = ""
                if not newsContent:
                    return
                for content in newsContent:
                    stringTemp += str(content)

                tags = self.searchTag(stringTemp)
                if tags:
                    newin = Info()
                    newin.addLink(address, brief)
                    for tag in tags:
                        newin.addTag(tag)
                        print 'in this news find ', tag
                    self.addInfo(newin)
                print '------------------------- one link analysis end ', '------------------'
                i += 1
Beispiel #4
0
    def linkAnalysis(self,link):
        temp=link.findAll("a")
        dat=link.findAll("span")
        i=0        
        for da in dat:
            strtemp=da.string
            if strtemp.find(':')==-1:
                 return
            else:
                address='http://stock.cfi.cn/'+temp[i]["href"]
                brief=temp[i].string
                t=time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
                truetime=t[0:11]+strtemp+':59'
                now= time.strptime(truetime, '%Y-%m-%d %H:%M:%S')

                if time.mktime(now)<time.mktime(self.updateTime):
                   return
                else :
                    print 'link address is : ',address 
		childSoup=self.getChildSoup(address)      
                try:
		   newsContent=childSoup.find(attrs={"id":"tdcontent"})
                except:
                   newsContent=None

		stringTemp=""
		if not newsContent:
		    return 
		for content in newsContent:
		    stringTemp+=str(content)

		tags=self.searchTag(stringTemp)
		if tags:
		       newin=Info()
		       newin.addLink(address,brief)
		       for tag  in tags:
		          newin.addTag(tag)
		          print 'in this news find ',tag
		       self.addInfo(newin)
		print '------------------------- one link analysis end ','------------------'
                i+=1
Beispiel #5
0
    def linkAnalysis(self, link):
        temp = link.find("a")
        address = temp["href"]
        brief = temp["title"]
        timenow = link.find("span").string
        timenow += ":59"
        now = time.strptime(timenow, '%Y-%m-%d %H:%M:%S')
        #    print 'news time:',timenow,'and last update time :',self.updateTime
        if time.mktime(now) < time.mktime(self.updateTime):

            return 0

        print 'link address is : ', address
        childSoup = self.getChildSoup(address)
        try:
            newsContent = childSoup.find(attrs={
                "class": "Body",
                "id": "ContentBody"
            })
        except:
            newsContent = None

        stringTemp = ""

        if newsContent == None:
            return 1
        for content in newsContent:
            stringTemp += str(content)

        tags = self.searchTag(stringTemp)

        if tags:
            newin = Info()
            newin.addLink(address, brief)
            for tag in tags:
                newin.addTag(tag)
                print 'in this news find ', tag
            self.addInfo(newin)
        print '------------------------- one link analysis end -------------------------'
        return 1
Beispiel #6
0
    def linkAnalysis(self,link):
        temp=link.find("a")
        address=temp["href"]   
        brief=temp["title"]   
        timenow=link.find("span").string
        timenow+=":59"
        now= time.strptime(timenow, '%Y-%m-%d %H:%M:%S')
    #    print 'news time:',timenow,'and last update time :',self.updateTime
        if time.mktime(now)<time.mktime(self.updateTime):
          
            return 0
       
        print 'link address is : ',address 
        childSoup=self.getChildSoup(address)      
        try:
           newsContent=childSoup.find(attrs={"class":"Body","id":"ContentBody"})
        except:
           newsContent=None

        stringTemp=""
 
        if newsContent==None:
            return 1
        for content in newsContent:
            stringTemp+=str(content)

        tags=self.searchTag(stringTemp)

        if tags:
               newin=Info()
               newin.addLink(address,brief)
               for tag  in tags:
                  newin.addTag(tag)
                  print 'in this news find ',tag
               self.addInfo(newin)
        print '------------------------- one link analysis end -------------------------'
        return 1