예제 #1
0
def pushwork2(f_id, feeds):
    #区别是自动还是手动处理的
    log = logging.getLogger()

    #自动
    if cmp('http', feeds[0][1][0:4].lower()) == 0:
        redbook = BaseFeedBook(log)
        redbook.feeds = feeds
    else:
        #手动
        mfeedclasses = FeedClasses()
        for mfeed in mfeedclasses:
            for my_mfeed in feeds:
                if mfeed.__name__ == my_mfeed[1]:
                    redbook = mfeed(log)

    ROOT = path.dirname(path.abspath(__file__))
    temp_dir = path.join(ROOT, 'temp')
    #创建文件夹
    output_dir = path.join(temp_dir, 'feed_%s' % f_id)
    isExists = path.exists(output_dir)
    if isExists:
        rmtree(output_dir)
        time.sleep(3)

    isExists = path.exists(output_dir)
    if not isExists:
        os.makedirs(output_dir)
        print 'mkdir'
        time.sleep(1)

    #清空对应的mysql表
    model.delete_old_article(f_id)
    time.sleep(0.5)

    for sec_or_media, url, title, content, brief in redbook.Items():
        if sec_or_media.startswith(r'image/'):
            filename = path.join(output_dir, title)
            fout = open(filename, "wb")
            fout.write(content)
            fout.close()
        else:
            model.put_section_article(f_id, sec_or_media, url, title, content,
                                      brief)

    #改变更新时间
    model.update_article_update_time(f_id)
    print '-=end grasp=-'
예제 #2
0
    def GET(self):
        username = web.input().get("u")
        bookid = web.input().get("id")

        user = KeUser.all().filter("name = ", username).get()
        if not user:
            return "User not exist!<br />"

        to = user.kindle_email
        if (';' in to) or (',' in to):
            to = to.replace(',', ';').replace(' ', '').split(';')

        booktype = user.book_type  #mobi,epub
        bookmode = user.book_mode or 'periodical'  #periodical,comic
        titlefmt = user.titlefmt
        tz = user.timezone

        bookid = bookid.split(',') if ',' in bookid else [bookid]
        bks = []
        for id_ in bookid:
            try:
                bks.append(Book.get_by_id(int(id_)))
            except:
                continue
                #return "id of book is invalid or book not exist!<br />"

        book4meta = None
        if len(bks) == 0:
            return "No have book to push!"
        elif len(bks) == 1:
            if bks[0].builtin:
                book4meta = BookClass(bks[0].title)
                mhfile = book4meta.mastheadfile
                coverfile = book4meta.coverfile
                if issubclass(book4meta, BaseComicBook
                              ):  #如果单独推送一个继承自BaseComicBook的书籍,则自动设置为漫画模式
                    bookmode = 'comic'
            else:  #单独的推送自定义RSS
                book4meta = bks[0]
                mhfile = DEFAULT_MASTHEAD
                coverfile = DEFAULT_COVER
        else:  #多本书合并推送时使用“自定义RSS”的元属性
            book4meta = user.ownfeeds
            mhfile = DEFAULT_MASTHEAD
            coverfile = DEFAULT_COVER_BV if user.merge_books else DEFAULT_COVER

        if not book4meta:
            return "No have book to push.<br />"

        opts = None
        oeb = None

        # 创建 OEB
        #global log
        opts = getOpts(user.device, bookmode)
        oeb = CreateOeb(main.log, None, opts)
        bookTitle = "%s %s" % (book4meta.title, local_time(
            titlefmt, tz)) if titlefmt else book4meta.title

        if bookmode == 'comic':
            pubtype = 'book:book:KindleEar'
        else:
            pubtype = 'periodical:magazine:KindleEar'

        setMetaData(oeb,
                    bookTitle,
                    book4meta.language,
                    local_time("%Y-%m-%d", tz),
                    pubtype=pubtype)
        oeb.container = ServerContainer(main.log)

        #guide
        if mhfile:
            id_, href = oeb.manifest.generate('masthead',
                                              mhfile)  # size:600*60
            oeb.manifest.add(id_, href, MimeFromFilename(mhfile))
            oeb.guide.add('masthead', 'Masthead Image', href)

        if coverfile:
            imgData = None
            imgMime = ''
            #使用保存在数据库的用户上传的封面
            if coverfile == DEFAULT_COVER and user.cover:
                imgData = user.cover
                imgMime = 'image/jpeg'  #保存在数据库中的只可能是jpeg格式
            elif callable(coverfile):  #如果封面需要回调的话
                try:
                    imgData = book4meta().coverfile()
                    if imgData:
                        imgType = imghdr.what(None, imgData)
                        if imgType:  #如果是合法图片
                            imgMime = r"image/" + imgType
                        else:
                            main.log.warn(
                                'content of cover is invalid : [%s].' %
                                bookTitle)
                            imgData = None
                except Exception as e:
                    main.log.warn(
                        'Failed to fetch cover for book [%s]. [Error: %s]' %
                        (bookTitle, str(e)))
                    coverfile = DEFAULT_COVER
                    imgData = None
                    imgMime = ''

            if imgData and imgMime:
                id_, href = oeb.manifest.generate('cover', 'cover.jpg')
                item = oeb.manifest.add(id_, href, imgMime, data=imgData)
            else:
                id_, href = oeb.manifest.generate('cover', coverfile)
                item = oeb.manifest.add(id_, href, MimeFromFilename(coverfile))
            oeb.guide.add('cover', 'Cover', href)
            oeb.metadata.add('cover', id_)
        elif len(bks) > 1 and DEFAULT_COVER:
            #将所有书籍的封面拼贴成一个
            #如果DEFAULT_COVER=None说明用户不需要封面
            id_, href = oeb.manifest.generate('cover', 'cover.jpg')
            item = oeb.manifest.add(id_,
                                    href,
                                    'image/jpeg',
                                    data=self.MergeCovers(bks, opts, user))
            oeb.guide.add('cover', 'Cover', href)
            oeb.metadata.add('cover', id_)

        itemcnt, imgindex = 0, 0
        sections = OrderedDict()
        toc_thumbnails = {}  #map img-url -> manifest-href
        for bk in bks:
            if bk.builtin:
                cbook = BookClass(bk.title)
                if not cbook:
                    main.log.warn('not exist book <%s>' % bk.title)
                    continue
                book = cbook(imgindex=imgindex, opts=opts, user=user)
                book.url_filters = [flt.url for flt in user.urlfilter]
                if bk.needs_subscription:  #需要登录
                    subs_info = user.subscription_info(bk.title)
                    if subs_info:
                        book.account = subs_info.account
                        book.password = subs_info.password
            else:  # 自定义RSS
                if bk.feedscount == 0:
                    continue  #return "the book has no feed!<br />"

                book = BaseFeedBook(imgindex=imgindex, opts=opts, user=user)
                book.title = bk.title
                book.description = bk.description
                book.language = bk.language
                book.keep_image = bk.keep_image
                book.oldest_article = bk.oldest_article
                book.fulltext_by_readability = True
                feeds = bk.feeds
                book.feeds = []
                for feed in feeds:
                    if feed.url.startswith(
                        ("http://www.cartoonmad.com", "http://ac.qq.com",
                         "http://m.ac.qq.com")):
                        self.ProcessComicRSS(username, user, feed)
                    else:
                        book.feeds.append(
                            (feed.title, feed.url, feed.isfulltext))
                book.url_filters = [flt.url for flt in user.urlfilter]

            # 对于html文件,变量名字自文档,thumbnail为文章第一个img的url
            # 对于图片文件,section为图片mime,url为原始链接,title为文件名,content为二进制内容,
            #    img的thumbail仅当其为article的第一个img为True
            try:  #书的质量可能不一,一本书的异常不能影响其他书籍的推送
                for sec_or_media, url, title, content, brief, thumbnail in book.Items(
                ):
                    if not sec_or_media or not title or not content:
                        continue

                    if sec_or_media.startswith(r'image/'):
                        id_, href = oeb.manifest.generate(id='img', href=title)
                        item = oeb.manifest.add(id_,
                                                href,
                                                sec_or_media,
                                                data=content)
                        if thumbnail:
                            toc_thumbnails[url] = href
                        imgindex += 1
                    else:
                        #id, href = oeb.manifest.generate(id='feed', href='feed%d.html'%itemcnt)
                        #item = oeb.manifest.add(id, href, 'application/xhtml+xml', data=content)
                        #oeb.spine.add(item, True)
                        sections.setdefault(sec_or_media, [])
                        sections[sec_or_media].append(
                            (title, brief, thumbnail, content))
                        itemcnt += 1
            except Exception as e:
                excFileName, excFuncName, excLineNo = get_exc_location()
                main.log.warn(
                    "Failed to push <%s> : %s, in file '%s', %s (line %d)" %
                    (book.title, str(e), excFileName, excFuncName, excLineNo))
                continue

        volumeTitle = ''
        if itemcnt > 0:
            #漫画模式不需要TOC和缩略图
            if bookmode == 'comic':
                insertHtmlToc = False
                insertThumbnail = False
                if len(bks) == 1 and book:  #因为漫画模式没有目录,所以在标题中添加卷号
                    volumeTitle = book.LastDeliveredVolume()
                    oeb.metadata.clear('title')
                    oeb.metadata.add('title', bookTitle + volumeTitle)
            else:
                insertHtmlToc = GENERATE_HTML_TOC
                insertThumbnail = GENERATE_TOC_THUMBNAIL

            InsertToc(oeb, sections, toc_thumbnails, insertHtmlToc,
                      insertThumbnail)
            oIO = byteStringIO()
            o = EPUBOutput() if booktype == "epub" else MOBIOutput()
            o.convert(oeb, oIO, opts, main.log)
            try:
                ultima_log = DeliverLog.all().order('-time').get()
            except:
                ultima_log = sorted(DeliverLog.all(),
                                    key=attrgetter('time'),
                                    reverse=True)
                ultima_log = ultima_log[0] if ultima_log else None
            if ultima_log:
                diff = datetime.datetime.utcnow() - ultima_log.datetime
                if diff.days * 86400 + diff.seconds < 10:
                    time.sleep(8)
            self.SendToKindle(username, to, book4meta.title + volumeTitle,
                              booktype, str(oIO.getvalue()), tz)
            rs = "%s(%s).%s Sent!" % (book4meta.title, local_time(tz=tz),
                                      booktype)
            main.log.info(rs)
            return rs
        else:
            self.deliverlog(username,
                            str(to),
                            book4meta.title + volumeTitle,
                            0,
                            status='nonews',
                            tz=tz)
            rs = "No new feeds."
            main.log.info(rs)
            return rs
예제 #3
0
import os, re, urllib, urlparse, datetime, logging
from config import *
from books.base import BaseFeedBook, BaseUrlBook, WebpageBook

log = logging.getLogger()

feeds = [[u'163easynet', "http://www.xinhuanet.com/ent/news_ent.xml"],
         [u'XXXzzhXXX', "http://www.sciencenet.cn/xml/news.aspx?news=0"]]

feeds1 = [[
    u'=-=-=asff', 'http://tech.sina.com.cn/i/2014-01-08/08039077686.shtml'
]]
#feeds2=[['324','http://blog.csdn.net/b2b160/article/details/4030702']]
zzh = BaseFeedBook(log, 2)
zzh.feeds = feeds

i = 0
for sec_or_media, url, title, content, brief in zzh.Items():
    if sec_or_media.startswith(r'image/'):
        filename = 'image/' + title
        fout = open(filename, "wb")
        fout.write(content)
        fout.close()
    else:
        i += 1
        filename = 'image/' + str(i) + '.html'
        fout = open(filename, "wb+")
        fout.write(content.encode('utf-8'))
        fout.close()