Esempio n. 1
0
def download_pdf(url, category):
    # add domain
    url = "https://www.geekbooks.me" + url
    # if category directory is not exist, create
    if os.path.isdir(conf_books_dir + category):
        pass
    else:
        os.makedirs(conf_books_dir + category)
    # create instance of MozillaCookieJar
    cookie = cookielib.MozillaCookieJar()
    # get cookie from file
    cookie.load('../data/cookie4geek.data',
                ignore_discard=True,
                ignore_expires=True)
    handler = urllib2.HTTPCookieProcessor(cookie)
    opener = urllib2.build_opener(handler)
    # add header
    opener.addheaders = [('User-agent', 'Mozilla/5.0'), ("Referer", url)]
    for url in list_pdf_url_by_book_detail_url(url):
        logger.info("PageDetailPage> {url}".format(
            url=("https://www.geekbooks.me" + url)))
        file_name = url.split('/')[-1]
        u = opener.open("https://www.geekbooks.me" + url)
        print "Preparing to download..."
        # f with directory
        if os.path.exists(conf_books_dir + category + "/" +
                          file_name) and detect_book(
                              (conf_books_dir + category + "/" + file_name)):
            continue
        f = open(conf_books_dir + category + "/" + file_name, 'wb')
        meta = u.info()
        file_size = int(meta.getheaders("Content-Length")[0])
        print "Downloading: %s Bytes: %s" % (file_name, file_size)
        file_size_dl = 0
        block_sz = 8192
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break
            file_size_dl += len(buffer)
            f.write(buffer)
            status = r"%10d  [%3.2f%%]" % (file_size_dl,
                                           file_size_dl * 100. / file_size)
            status = status + chr(8) * (len(status) + 1)
            print status,
        f.close()
Esempio n. 2
0
def download_pdf(url, category):
    # add domain
    url = "https://www.geekbooks.me" + url
    # if category directory is not exist, create
    if os.path.isdir(conf_books_dir + category):
        pass
    else:
        os.makedirs(conf_books_dir + category)
    # create instance of MozillaCookieJar
    cookie = cookielib.MozillaCookieJar()
    # get cookie from file
    cookie.load('../data/cookie4geek.data', ignore_discard=True, ignore_expires=True)
    handler = urllib2.HTTPCookieProcessor(cookie)
    opener = urllib2.build_opener(handler)
    # add header
    opener.addheaders = [('User-agent', 'Mozilla/5.0'), ("Referer", url)]
    for url in list_pdf_url_by_book_detail_url(url):
        logger.info("PageDetailPage> {url}".format(url=("https://www.geekbooks.me" + url)))
        file_name = url.split('/')[-1]
        u = opener.open("https://www.geekbooks.me" + url)
        print "Preparing to download..."
        # f with directory
        if os.path.exists(conf_books_dir + category + "/" + file_name) and detect_book(
                (conf_books_dir + category + "/" + file_name)):
            continue
        f = open(conf_books_dir + category + "/" + file_name, 'wb')
        meta = u.info()
        file_size = int(meta.getheaders("Content-Length")[0])
        print "Downloading: %s Bytes: %s" % (file_name, file_size)
        file_size_dl = 0
        block_sz = 8192
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break
            file_size_dl += len(buffer)
            f.write(buffer)
            status = r"%10d  [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
            status = status + chr(8) * (len(status) + 1)
            print status,
        f.close()
Esempio n. 3
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from GeekBook.util.log_util import logger
if __name__ == '__main__':
    logger.info("info")
    logger.debug("debug")
    logger.warn("warn")
    logger.error("error")
    pass