Example #1
0
def check_book(widgets, url):
    set_sensitivity(widgets, url=False, check=False, start=False, cancel=True)
    debug = widgets.debug
    debug("Checking book: %s" % url)
    try:
        opener = lib.get_cookies_opener()
        book_id = pysheng.get_id_from_string(url)
        debug("Book ID: %s" % book_id)
        cover_url = pysheng.get_cover_url(book_id)
        set_book_info(widgets, None)
        info = yield _from(get_info(widgets, cover_url, opener))
        widgets.page_start.set_text(str(1))
        widgets.page_end.set_text(str(len(info["page_ids"])))
        debug("Check book done")
        restart_buttons(widgets)
    except asyncjobs.JobCancelled:
        return
    except Exception, detail:
        traceback.print_exc()
        debug(Exception(detail))
        debug("Check book error")
        restart_buttons(widgets)
Example #2
0
def check_book(widgets, url):    
    set_sensitivity(widgets, url=False, check=False, start=False, cancel=True)
    debug = widgets.debug
    debug("Checking book: %s" % url)
    try: 
        opener = lib.get_cookies_opener()
        book_id = pysheng.get_id_from_string(url)
        debug("Book ID: %s" % book_id)
        cover_url = pysheng.get_cover_url(book_id)
        set_book_info(widgets, None)
        info = yield _from(get_info(widgets, cover_url, opener))
        widgets.page_start.set_text(str(1))
        widgets.page_end.set_text(str(len(info["page_ids"])))
        debug("Check book done")
        restart_buttons(widgets)
    except asyncjobs.JobCancelled:
        return
    except Exception, detail:
        traceback.print_exc()
        debug(Exception(detail))
        debug("Check book error")
        restart_buttons(widgets)
Example #3
0
def download_book(widgets, state, url, page_start=0, page_end=None):
    """Yield (info, page, image_data) for pages from page_start to page_end"""
    try:
        set_sensitivity(widgets,
                        start=False,
                        pause=True,
                        cancel=True,
                        browse_destdir=False,
                        page_start=False,
                        page_end=False)
        destdir = widgets.destdir.get_text()
        debug = widgets.debug
        set_sensitivity(widgets, check=False, savepdf=False)

        debug("Output directory: %s" % destdir)
        debug("Page_start: %s, Page end: %s" %
              (adj_int(page_start, +1, 1), adj_int(page_end, +1, "last")))
        opener = lib.get_cookies_opener()
        book_id = pysheng.get_id_from_string(url)
        debug("Book ID: %s" % book_id)
        cover_url = pysheng.get_cover_url(book_id)
        widgets.progress_all.set_fraction(0.0)
        widgets.progress_all.set_text('')
        widgets.progress_current.set_pulse_step(0.04)
        state.downloaded_images = None
        info = yield _from(get_info(widgets, cover_url, opener))

        if not widgets.page_start.get_text():
            widgets.page_start.set_text(str(1))
        if not widgets.page_end.get_text():
            widgets.page_end.set_text(str(len(info["page_ids"])))
        page_ids = info["page_ids"][page_start:adj_int(page_end, +1)]
        namespace = dict(title=info["title"], attribution=info["attribution"])
        images = []

        for page, page_id in enumerate(page_ids):
            page += page_start
            if namespace["attribution"]:
                image_file_template = "%(attribution)s - %(title)s - %(page)03d"
            else:
                image_file_template = "%(title)s - %(page)03d"
            filename0 = image_file_template % dict(namespace, page=page + 1)
            filename = string_to_valid_filename(filename0.encode("utf-8"), 240)
            output_path = os.path.join(destdir, filename)
            existing_files = glob.glob(escape_glob(output_path) + ".*")
            if existing_files:
                debug("Skip existing image: %s" % existing_files[0])
                images.append(output_path)
                continue
            relative_page = page - page_start + 1
            widgets.progress_all.set_fraction(
                float(relative_page - 1) / len(page_ids))
            widgets.progress_all.set_text(
                "Total: %d%%" %
                (int(100 * float(relative_page - 1) / len(page_ids))))
            header = "[%d/%d] " % (relative_page, len(page_ids))
            debug(header + "Start page: %d (page_id: %s)" %
                  (page + 1, page_id))
            page_url = pysheng.get_page_url(info["prefix"], page_id)
            debug(header + "Download page contents: %s" % (page_url))
            widgets.progress_current.set_fraction(0.0)
            page_html = yield asyncjobs.ProgressDownloadThreadedTask(
                page_url,
                opener,
                headers=HEADERS,
                elapsed_cb=functools.partial(on_elapsed, widgets, "page"))

            image_url0 = pysheng.get_image_url_from_page(page_html)
            width, height = info["max_resolution"]
            image_url = re.sub("w=(\d+)", "w=" + str(width), image_url0)
            if not image_url:
                debug("No image for this page, probably access is restricted")
                continue
            debug(header + "Download page image: %s" % image_url)
            widgets.progress_current.set_fraction(0.0)
            image_data = yield asyncjobs.ProgressDownloadThreadedTask(
                image_url,
                opener,
                headers=HEADERS,
                elapsed_cb=functools.partial(on_elapsed, widgets, "image"))
            format = imghdr.what(StringIO.StringIO(image_data)) or "png"
            debug(header + "Image downloaded (size=%d, format=%s)" %
                  (len(image_data), format))
            output_path_with_extension = output_path + "." + format
            createfile(output_path_with_extension, image_data)
            debug(header + "Image written: %s" % output_path_with_extension)
            images.append(output_path_with_extension)

        widgets.progress_all.set_fraction(1.0)
        widgets.progress_all.set_text("Done")
        debug("Done!")
        restart_buttons(widgets)
        state.downloaded_images = images

        if namespace["attribution"]:
            state.pdf_filename = "%(attribution)s - %(title)s.pdf" % namespace
        else:
            state.pdf_filename = "%(title)s.pdf" % namespace
        set_sensitivity(widgets, savepdf=True)
    except asyncjobs.JobCancelled:
        return
    except Exception, detail:
        traceback.print_exc()
        debug("job error: %s" % detail)
        restart_buttons(widgets)
Example #4
0
def download_book(widgets, state, url, page_start=0, page_end=None):
    """Yield (info, page, image_data) for pages from page_start to page_end"""
    try:
        set_sensitivity(widgets, start=False, pause=True, cancel=True,
            browse_destdir=False, page_start=False, page_end=False)
        destdir = widgets.destdir.get_text()
        debug = widgets.debug
        set_sensitivity(widgets, check=False, savepdf=False)
        
        debug("Output directory: %s" % destdir)
        debug("Page_start: %s, Page end: %s" % 
            (adj_int(page_start, +1, 1), adj_int(page_end, +1, "last")))
        opener = lib.get_cookies_opener()
        book_id = pysheng.get_id_from_string(url)
        debug("Book ID: %s" % book_id)
        cover_url = pysheng.get_cover_url(book_id)
        widgets.progress_all.set_fraction(0.0)
        widgets.progress_all.set_text('') 
        widgets.progress_current.set_pulse_step(0.04)
        state.downloaded_images = None
        info = yield _from(get_info(widgets, cover_url, opener))
        
        if not widgets.page_start.get_text():
            widgets.page_start.set_text(str(1))
        if not widgets.page_end.get_text():
            widgets.page_end.set_text(str(len(info["page_ids"])))
        page_ids = info["page_ids"][page_start:adj_int(page_end, +1)]        
        namespace = dict(title=info["title"], attribution=info["attribution"])
        images = []
        
        for page, page_id in enumerate(page_ids):
            page += page_start
            if namespace["attribution"]:
                image_file_template = "%(attribution)s - %(title)s - %(page)03d"
            else:
                image_file_template = "%(title)s - %(page)03d"
            filename0 = image_file_template % dict(namespace, page=page+1)
            filename = string_to_valid_filename(filename0.encode("utf-8"), 240)
            output_path = os.path.join(destdir, filename)
            existing_files = glob.glob(escape_glob(output_path) + ".*")
            if existing_files:
                debug("Skip existing image: %s" % existing_files[0])
                images.append(output_path)
                continue
            relative_page = page - page_start + 1
            widgets.progress_all.set_fraction(float(relative_page-1)/len(page_ids))
            widgets.progress_all.set_text(
                "Total: %d%%" % (int(100*float(relative_page-1) / len(page_ids))))
            header = "[%d/%d] " % (relative_page, len(page_ids))
            debug(header + "Start page: %d (page_id: %s)" % (page+1, page_id))
            page_url = pysheng.get_page_url(info["prefix"], page_id)
            debug(header + "Download page contents: %s" % (page_url))
            widgets.progress_current.set_fraction(0.0)
            page_html = yield asyncjobs.ProgressDownloadThreadedTask(
                page_url, opener, headers=HEADERS,
                elapsed_cb=functools.partial(on_elapsed, widgets, "page"))
            
            image_url0 = pysheng.get_image_url_from_page(page_html)
            if not image_url0:
                debug("No image for this page, probably access is restricted")
            else:       
                width, height = info["max_resolution"]
                image_url = re.sub("w=(\d+)", "w=" + str(width), image_url0)
                debug(header + "Download page image: %s" % image_url)
                widgets.progress_current.set_fraction(0.0)
                image_data = yield asyncjobs.ProgressDownloadThreadedTask(
                    image_url, opener, headers=HEADERS,
                    elapsed_cb=functools.partial(on_elapsed, widgets, "image"))
                format = imghdr.what(StringIO.StringIO(image_data)) or "png"
                debug(header + "Image downloaded (size=%d, format=%s)" % (len(image_data), format))
                output_path_with_extension = output_path + "." + format
                createfile(output_path_with_extension, image_data)            
                debug(header + "Image written: %s" % output_path_with_extension)
                images.append(output_path_with_extension)

        widgets.progress_all.set_fraction(1.0)
        widgets.progress_all.set_text("Done")                  
        debug("Done!")
        restart_buttons(widgets)
        state.downloaded_images = images
        
        if namespace["attribution"]:
            state.pdf_filename = "%(attribution)s - %(title)s.pdf" % namespace
        else:
            state.pdf_filename = "%(title)s.pdf" % namespace
        set_sensitivity(widgets, savepdf=True)        
    except asyncjobs.JobCancelled:
        return
    except Exception, detail:
        traceback.print_exc()
        debug("job error: %s" % detail)
        restart_buttons(widgets)