def download_book(url, page_start=0, page_end=None): """Yield (info, page, image_data) for pages from page_start to page_end""" opener = lib.get_cookies_opener() cover_url = get_cover_url(get_id_from_string(url)) cover_html = download(cover_url, opener=opener) info = get_info(cover_html) page_ids = itertools.islice(info["page_ids"], page_start, page_end) for page, page_id in enumerate(page_ids, page_start): page_url = get_page_url(info["prefix"], page_id) page_html = download(page_url, opener=opener) image_url = get_image_url_from_page(page_html) if image_url: image_data = download(image_url, opener=opener) yield info, page, image_data
def download_book(url, page_start=0, page_end=None): """Yield tuples (info, page, image_data) for each page of the book <url> from <page_start> to <page_end>""" info = get_info_from_url(url) opener = lib.get_cookies_opener() page_ids = itertools.islice(info["page_ids"], page_start, page_end) for page0, page_id in enumerate(page_ids): page = page0 + page_start page_url = get_page_url(info["prefix"], page_id) page_html = download(page_url, opener=opener) image_url = get_image_url_from_page(page_html) if image_url: image_data = download(image_url, opener=opener) yield info, page, image_data
def download_book(url, page_start=0, page_end=None): """Yield tuples (info, page, image_data) for each page of the book <url> from <page_start> to <page_end>""" info = get_info_from_url(url) opener = lib.get_cookies_opener() page_ids = itertools.islice(info["page_ids"], page_start, page_end) for page0, page_id in enumerate(page_ids): page = page0 + page_start page_url = get_page_url(info["prefix"], page_id) page_html = download(page_url, opener=opener) image_url0 = get_image_url_from_page(page_html) if image_url0: width, height = info["max_resolution"] image_url = re.sub("w=(\d+)", "w=" + str(width), image_url0) image_data = download(image_url, opener=opener) yield info, page, image_data
def download_book(url, page_start=0, page_end=None): """Yield tuples (info, page, image_data) for each page of the book <url> from <page_start> to <page_end>""" opener = lib.get_cookies_opener() cover_url = get_cover_url(get_id_from_string(url)) cover_html = download(cover_url, opener=opener) info = get_info(cover_html) page_ids = itertools.islice(info["page_ids"], page_start, page_end) for page0, page_id in enumerate(page_ids): page = page0 + page_start page_url = get_page_url(info["prefix"], page_id) page_html = download(page_url, opener=opener) image_url = get_image_url_from_page(page_html) if image_url: image_data = download(image_url, opener=opener) yield info, page, image_data
def get_info_from_url(url): opener = lib.get_cookies_opener() cover_url = get_cover_url(get_id_from_string(url)) cover_html = download(cover_url, opener=opener) return get_info(cover_html)
def get_title(url): opener = lib.get_cookies_opener() cover_url = get_cover_url(get_id_from_string(url)) cover_html = download(cover_url, opener=opener) info = get_info(cover_html) return htmlentity2unicode(info["title"].encode("utf-8"))