def get_info(widgets, url, opener): debug = widgets.debug html = yield asyncjobs.ProgressDownloadThreadedTask( url, opener, headers=HEADERS, elapsed_cb=functools.partial(on_elapsed, widgets, "info")) try: info = pysheng.get_info(html) except ValueError, detail: debug("Error parsing page HTML: %s" % str(detail)) raise
def download_book(widgets, state, url, page_start=0, page_end=None): """Yield (info, page, image_data) for pages from page_start to page_end""" try: set_sensitivity(widgets, start=False, pause=True, cancel=True, browse_destdir=False, page_start=False, page_end=False) destdir = widgets.destdir.get_text() debug = widgets.debug set_sensitivity(widgets, check=False, savepdf=False) debug("Output directory: %s" % destdir) debug("Page_start: %s, Page end: %s" % (adj_int(page_start, +1, 1), adj_int(page_end, +1, "last"))) opener = lib.get_cookies_opener() book_id = pysheng.get_id_from_string(url) debug("Book ID: %s" % book_id) cover_url = pysheng.get_cover_url(book_id) widgets.progress_all.set_fraction(0.0) widgets.progress_all.set_text('') widgets.progress_current.set_pulse_step(0.04) state.downloaded_images = None info = yield _from(get_info(widgets, cover_url, opener)) if not widgets.page_start.get_text(): widgets.page_start.set_text(str(1)) if not widgets.page_end.get_text(): widgets.page_end.set_text(str(len(info["page_ids"]))) page_ids = info["page_ids"][page_start:adj_int(page_end, +1)] namespace = dict(title=info["title"], attribution=info["attribution"]) images = [] for page, page_id in enumerate(page_ids): page += page_start if namespace["attribution"]: image_file_template = "%(attribution)s - %(title)s - %(page)03d" else: image_file_template = "%(title)s - %(page)03d" filename0 = image_file_template % dict(namespace, page=page + 1) filename = string_to_valid_filename(filename0.encode("utf-8"), 240) output_path = os.path.join(destdir, filename) existing_files = glob.glob(escape_glob(output_path) + ".*") if existing_files: debug("Skip existing image: %s" % existing_files[0]) images.append(output_path) continue relative_page = page - page_start + 1 widgets.progress_all.set_fraction( float(relative_page - 1) / len(page_ids)) widgets.progress_all.set_text( "Total: %d%%" % (int(100 * float(relative_page - 1) / len(page_ids)))) header = "[%d/%d] " % (relative_page, len(page_ids)) debug(header + "Start page: %d (page_id: %s)" % (page + 1, page_id)) page_url = pysheng.get_page_url(info["prefix"], page_id) debug(header + "Download page contents: %s" % (page_url)) widgets.progress_current.set_fraction(0.0) page_html = yield asyncjobs.ProgressDownloadThreadedTask( page_url, opener, headers=HEADERS, elapsed_cb=functools.partial(on_elapsed, widgets, "page")) image_url0 = pysheng.get_image_url_from_page(page_html) width, height = info["max_resolution"] image_url = re.sub("w=(\d+)", "w=" + str(width), image_url0) if not image_url: debug("No image for this page, probably access is restricted") continue debug(header + "Download page image: %s" % image_url) widgets.progress_current.set_fraction(0.0) image_data = yield asyncjobs.ProgressDownloadThreadedTask( image_url, opener, headers=HEADERS, elapsed_cb=functools.partial(on_elapsed, widgets, "image")) format = imghdr.what(StringIO.StringIO(image_data)) or "png" debug(header + "Image downloaded (size=%d, format=%s)" % (len(image_data), format)) output_path_with_extension = output_path + "." + format createfile(output_path_with_extension, image_data) debug(header + "Image written: %s" % output_path_with_extension) images.append(output_path_with_extension) widgets.progress_all.set_fraction(1.0) widgets.progress_all.set_text("Done") debug("Done!") restart_buttons(widgets) state.downloaded_images = images if namespace["attribution"]: state.pdf_filename = "%(attribution)s - %(title)s.pdf" % namespace else: state.pdf_filename = "%(title)s.pdf" % namespace set_sensitivity(widgets, savepdf=True) except asyncjobs.JobCancelled: return except Exception, detail: traceback.print_exc() debug("job error: %s" % detail) restart_buttons(widgets)
def threaded_task(state, loop, url): state.result = None cb = functools.partial(elapsed_cb, state) state.callback = [] state.result = \ yield asyncjobs.ProgressDownloadThreadedTask(url, elapsed_cb=cb)