Esempio n. 1
0
 def UpdateLastDelivered(self, title, num):
     userName = self.UserName()
     dbItem = LastDelivered.all().filter('username = '******'bookname = ', title).get()
     self.last_delivered_volume = u' 第%d话' % num
     if dbItem:
         dbItem.num = num
         dbItem.record = self.last_delivered_volume
         dbItem.datetime = datetime.datetime.utcnow() + datetime.timedelta(hours=TIMEZONE)
     else:
         dbItem = LastDelivered(username=userName, bookname=title, num=num, record=self.last_delivered_volume,
             datetime=datetime.datetime.utcnow() + datetime.timedelta(hours=TIMEZONE))
     dbItem.put()
Esempio n. 2
0
    def ParseFeedUrls(self):
        urls = [] #用于返回
        
        userName = self.UserName()
        for item in self.feeds:
            title, url = item[0], item[1]
            comic_id = ""
            
            lastCount = LastDelivered.all().filter('username = '******'These is no log in db LastDelivered for name: %s, set to 0' % title)
                oldNum = 0
            else:
                oldNum = lastCount.num

            urlpaths = urlparse.urlsplit(url.lower()).path.split("/")
            if ( (u"id" in urlpaths) and (urlpaths.index(u"id")+1 < len(urlpaths)) ):
                comic_id = urlpaths[urlpaths.index(u"id")+1]

            if ( (not comic_id.isdigit()) or (comic_id=="") ):
                self.log.warn('can not get comic id: %s' % url)
                break

            chapterList = self.getChapterList(comic_id)
            for deliverCount in range(5):
                newNum = oldNum + deliverCount
                if newNum < len(chapterList):
                    imgList = self.getImgList(chapterList[newNum], comic_id)
                    for img in imgList:
                        urls.append((title, img, img, None))
                    self.UpdateLastDelivered(title, newNum+1)
                    if newNum == 0:
                        break

        return urls
Esempio n. 3
0
 def GetNewComic(self):
     urls = []
     
     if not self.feeds:
         return []
     
     userName = self.UserName()
     decoder = AutoDecoder(isfeed=False)
     for item in self.feeds:
         title, url = item[0], item[1]
         
         lastCount = LastDelivered.all().filter('username = '******'These is no log in db LastDelivered for name: %s, set to 0' % title)
             oldNum = 0
         else:
             oldNum = lastCount.num
             
         opener = URLOpener(self.host, timeout=60)
         result = opener.open(url)
         if result.status_code != 200:
             self.log.warn('fetch index page for %s failed[%s] : %s' % (title, URLOpener.CodeMap(result.status_code), url))
             continue
         content = result.content
         content = self.AutoDecodeContent(content, decoder, self.feed_encoding, opener.realurl, result.headers)
         
         soup = BeautifulSoup(content, 'lxml')
         
         allComicTable = soup.find_all('table', {'width': '688'})
         addedForThisComic = False
         for comicTable in allComicTable:
             comicVolumes = comicTable.find_all('a', {'target': '_blank'})
             for volume in comicVolumes:
                 texts = volume.text.split(' ')
                 if len(texts) > 2 and texts[1].isdigit() and volume.get('href'):
                     num = int(texts[1])
                     if num > oldNum:
                         oldNum = num
                         href = self.urljoin(self.host, volume.get('href'))
                         urls.append((title, num, href))
                         addedForThisComic = True
                         break #一次只推送一卷(有时候一卷已经很多图片了)
                         
             if addedForThisComic:
                 break
                 
     return urls