Example #1
0
    def ParseFeedUrls(self):
        urls = [] #用于返回
        
        userName = self.UserName()
        for item in self.feeds:
            title, url = item[0], item[1]
            comic_id = ""
            
            lastCount = LastDelivered.all().filter('username = '******'These is no log in db LastDelivered for name: %s, set to 0' % title)
                oldNum = 0
            else:
                oldNum = lastCount.num

            urlpaths = urlparse.urlsplit(url.lower()).path.split("/")
            if ( (u"id" in urlpaths) and (urlpaths.index(u"id")+1 < len(urlpaths)) ):
                comic_id = urlpaths[urlpaths.index(u"id")+1]

            if ( (not comic_id.isdigit()) or (comic_id=="") ):
                self.log.warn('can not get comic id: %s' % url)
                break

            chapterList = self.getChapterList(comic_id)
            for deliverCount in range(5):
                newNum = oldNum + deliverCount
                if newNum < len(chapterList):
                    imgList = self.getImgList(chapterList[newNum], comic_id)
                    for img in imgList:
                        urls.append((title, img, img, None))
                    self.UpdateLastDelivered(title, newNum+1)
                    if newNum == 0:
                        break

        return urls
Example #2
0
    def GetNewComic(self):
        urls = []

        if not self.feeds:
            return []

        userName = self.UserName()
        decoder = AutoDecoder(isfeed=False)
        for item in self.feeds:
            title, url = item[0], item[1]

            lastCount = LastDelivered.all().filter(
                'username = '******'These is no log in db LastDelivered for name: %s, set to 0'
                    % title)
                oldNum = 0
            else:
                oldNum = lastCount.num

            opener = URLOpener(self.host, timeout=60)
            result = opener.open(url)
            if result.status_code != 200:
                self.log.warn(
                    'fetch index page for %s failed[%s] : %s' %
                    (title, URLOpener.CodeMap(result.status_code), url))
                continue
            content = result.content
            content = self.AutoDecodeContent(content, decoder,
                                             self.feed_encoding,
                                             opener.realurl, result.headers)

            soup = BeautifulSoup(content, 'lxml')

            allComicTable = soup.find_all('table', {'width': '688'})
            addedForThisComic = False
            for comicTable in allComicTable:
                comicVolumes = comicTable.find_all('a', {'target': '_blank'})
                for volume in comicVolumes:
                    texts = volume.text.split(' ')
                    if len(texts) > 2 and texts[1].isdigit() and volume.get(
                            'href'):
                        num = int(texts[1])
                        if num > oldNum:
                            oldNum = num
                            href = self.urljoin(self.host, volume.get('href'))
                            urls.append((title, num, href))
                            addedForThisComic = True
                            break  #一次只推送一卷(有时候一卷已经很多图片了)

                if addedForThisComic:
                    break

        return urls
Example #3
0
 def UpdateLastDelivered(self, title, num):
     userName = self.UserName()
     dbItem = LastDelivered.all().filter('username = '******'bookname = ', title).get()
     self.last_delivered_volume = u' 第%d话' % num
     if dbItem:
         dbItem.num = num
         dbItem.record = self.last_delivered_volume
         dbItem.datetime = datetime.datetime.utcnow() + datetime.timedelta(hours=TIMEZONE)
     else:
         dbItem = LastDelivered(username=userName, bookname=title, num=num, record=self.last_delivered_volume,
             datetime=datetime.datetime.utcnow() + datetime.timedelta(hours=TIMEZONE))
     dbItem.put()
Example #4
0
 def GetNewComic(self):
     urls = []
     
     if not self.feeds:
         return []
     
     userName = self.UserName()
     decoder = AutoDecoder(isfeed=False)
     for item in self.feeds:
         title, url = item[0], item[1]
         
         lastCount = LastDelivered.all().filter('username = '******'These is no log in db LastDelivered for name: %s, set to 0' % title)
             oldNum = 0
         else:
             oldNum = lastCount.num
             
         opener = URLOpener(self.host, timeout=60)
         result = opener.open(url)
         if result.status_code != 200:
             self.log.warn('fetch index page for %s failed[%s] : %s' % (title, URLOpener.CodeMap(result.status_code), url))
             continue
         content = result.content
         content = self.AutoDecodeContent(content, decoder, self.feed_encoding, opener.realurl, result.headers)
         
         soup = BeautifulSoup(content, 'lxml')
         
         allComicTable = soup.find_all('table', {'width': '688'})
         addedForThisComic = False
         for comicTable in allComicTable:
             comicVolumes = comicTable.find_all('a', {'target': '_blank'})
             for volume in comicVolumes:
                 texts = volume.text.split(' ')
                 if len(texts) > 2 and texts[1].isdigit() and volume.get('href'):
                     num = int(texts[1])
                     if num > oldNum:
                         oldNum = num
                         href = self.urljoin(self.host, volume.get('href'))
                         urls.append((title, num, href))
                         addedForThisComic = True
                         break #一次只推送一卷(有时候一卷已经很多图片了)
                         
             if addedForThisComic:
                 break
                 
     return urls
Example #5
0
    def ParseFeedUrls(self):
        urls = []
        userName = self.UserName()
        decoder = AutoDecoder(isfeed=False)

        lastCount = LastDelivered.all().filter('username = '******''
        else:
            oldNum = lastCount.num
            oldChapterTitle = lastCount.record

        opener = URLOpener(self.host, timeout=60)
        result = opener.open(self.feeds)
        if result.status_code != 200:
            self.log.warn('fetch index page for %s failed[%s] : %s' %
                          (self.title, URLOpener.CodeMap(
                              result.status_code), self.feeds))
            return []

        # 从页面获取章节列表
        content = self.AutoDecodeContent(result.content, decoder,
                                         self.feed_encoding, opener.realurl,
                                         result.headers)
        soup = BeautifulSoup(content, 'lxml')
        chapterList = self.GetChapterList(soup)

        chapterNum = 0
        for chapter in chapterList:
            if chapterNum >= self.limit:
                break
            url = chapter.get('href')
            num = self.GetChapterNum(url)
            if num > oldNum:
                oldNum = num
                oldChapterTitle = chapter.text
                chapterNum += 1
                urls.append(
                    (self.title, oldChapterTitle, self.urljoin(self.host,
                                                               url), ''))

        self.UpdateLastDelivered(self.title, oldNum, oldChapterTitle)
        return urls