Example #1
0
def download_one(tdir, timeout, progress_report, url):
    try:
        purl = urlparse(url)
        with NamedTemporaryFile(dir=tdir, delete=False) as df:
            if purl.scheme == 'file':
                src = lopen(purl.path, 'rb')
                filename = os.path.basename(src)
                sz = (src.seek(0, os.SEEK_END), src.tell(), src.seek(0))[1]
            else:
                src = urlopen(url, timeout=timeout)
                filename = get_filename(purl, src)
                sz = get_content_length(src)
            progress_report(url, 0, sz)
            dest = ProgressTracker(df, url, sz, progress_report)
            with closing(src):
                shutil.copyfileobj(src, dest)
            filename = sanitize_file_name2(filename)
            mt = guess_type(filename)
            if mt in OEB_DOCS:
                raise ValueError('The external resource {} looks like a HTML document ({})'.format(url, filename))
            if not mt or mt == 'application/octet-stream' or '.' not in filename:
                raise ValueError('The external resource {} is not of a known type'.format(url))
            return True, (url, sanitize_file_name2(filename), dest.name, mt)
    except Exception as err:
        return False, (url, as_unicode(err))
Example #2
0
def download_one(tdir, timeout, progress_report, url):
    try:
        purl = urlparse(url)
        with NamedTemporaryFile(dir=tdir, delete=False) as df:
            if purl.scheme == 'file':
                src = lopen(purl.path, 'rb')
                filename = os.path.basename(src)
                sz = (src.seek(0, os.SEEK_END), src.tell(), src.seek(0))[1]
            else:
                src = urlopen(url, timeout=timeout)
                filename = get_filename(purl, src)
                sz = get_content_length(src)
            dest = ProgressTracker(df, url, sz, progress_report)
            with src:
                shutil.copyfileobj(src, dest)
            filename = sanitize_file_name2(filename)
            mt = guess_type(filename)
            if mt in OEB_DOCS:
                raise ValueError(
                    'The external resource {} looks like a HTML document ({})'.
                    format(url, filename))
            if not mt or mt == 'application/octet-stream' or '.' not in filename:
                raise ValueError(
                    'The external resource {} is not of a known type'.format(
                        url))
            return True, (url, sanitize_file_name2(filename), dest.name, mt)
    except Exception as err:
        return False, url, as_unicode(err)
Example #3
0
    def browse_icon(self, name="blank.png"):
        cherrypy.response.headers["Content-Type"] = "image/png"
        cherrypy.response.headers["Last-Modified"] = self.last_modified(self.build_time)

        if not hasattr(self, "__browse_icon_cache__"):
            self.__browse_icon_cache__ = {}
        if name not in self.__browse_icon_cache__:
            if name.startswith("_"):
                name = sanitize_file_name2(name[1:])
                try:
                    with open(os.path.join(config_dir, "tb_icons", name), "rb") as f:
                        data = f.read()
                except:
                    raise cherrypy.HTTPError(404, "no icon named: %r" % name)
            else:
                try:
                    data = I(name, data=True)
                except:
                    raise cherrypy.HTTPError(404, "no icon named: %r" % name)
            img = Image()
            img.load(data)
            width, height = img.size
            scaled, width, height = fit_image(width, height, 48, 48)
            if scaled:
                img.size = (width, height)

            self.__browse_icon_cache__[name] = img.export("png")
        return self.__browse_icon_cache__[name]
Example #4
0
    def browse_icon(self, name='blank.png'):
        cherrypy.response.headers['Content-Type'] = 'image/png'
        cherrypy.response.headers['Last-Modified'] = self.last_modified(
            self.build_time)

        if not hasattr(self, '__browse_icon_cache__'):
            self.__browse_icon_cache__ = {}
        if name not in self.__browse_icon_cache__:
            if name.startswith('_'):
                name = sanitize_file_name2(name[1:])
                try:
                    with open(os.path.join(config_dir, 'tb_icons', name),
                              'rb') as f:
                        data = f.read()
                except:
                    raise cherrypy.HTTPError(404, 'no icon named: %r' % name)
            else:
                try:
                    data = I(name, data=True)
                except:
                    raise cherrypy.HTTPError(404, 'no icon named: %r' % name)
            img = Image()
            img.load(data)
            width, height = img.size
            scaled, width, height = fit_image(width, height, 48, 48)
            if scaled:
                img.size = (width, height)

            self.__browse_icon_cache__[name] = img.export('png')
        return self.__browse_icon_cache__[name]
Example #5
0
    def browse_icon(self, name='blank.png'):
        cherrypy.response.headers['Content-Type'] = 'image/png'
        cherrypy.response.headers['Last-Modified'] = self.last_modified(self.build_time)

        if not hasattr(self, '__browse_icon_cache__'):
            self.__browse_icon_cache__ = {}
        if name not in self.__browse_icon_cache__:
            if name.startswith('_'):
                name = sanitize_file_name2(name[1:])
                try:
                    with open(os.path.join(config_dir, 'tb_icons', name), 'rb') as f:
                        data = f.read()
                except:
                    raise cherrypy.HTTPError(404, 'no icon named: %r'%name)
            else:
                try:
                    data = I(name, data=True)
                except:
                    raise cherrypy.HTTPError(404, 'no icon named: %r'%name)
            img = Image()
            img.load(data)
            width, height = img.size
            scaled, width, height = fit_image(width, height, 48, 48)
            if scaled:
                img.size = (width, height)

            self.__browse_icon_cache__[name] = img.export('png')
        return self.__browse_icon_cache__[name]
Example #6
0
def js(ctx, rd, which):
    try:
        return lopen(P('content-server/' + sanitize_file_name2(which)), 'rb')
    except EnvironmentError as e:
        if e.errno == errno.ENOENT:
            raise HTTPNotFound('No js with name: %r' % which)
        raise
Example #7
0
def js(ctx, rd, which):
    try:
        return lopen(P('content-server/' + sanitize_file_name2(which)), 'rb')
    except EnvironmentError as e:
        if e.errno == errno.ENOENT:
            raise HTTPNotFound('No js with name: %r' % which)
        raise
Example #8
0
def sanitize_file_name(x):
    from calibre.ebooks.oeb.polish.check.parsing import make_filename_safe

    x = sanitize_file_name2(x)
    while ".." in x:
        x = x.replace("..", ".")
    return make_filename_safe(x)
Example #9
0
 def __init__(self, book_title, parent=None, prefs=vprefs):
     self.book_title = book_title
     self.default_file_name = sanitize_file_name2(book_title[:75] + '.pdf')
     self.paper_size_map = {
         a: getattr(QPageSize, a.capitalize())
         for a in PAPER_SIZES
     }
     Dialog.__init__(self,
                     _('Print to PDF'),
                     'print-to-pdf',
                     prefs=prefs,
                     parent=parent)
Example #10
0
 def data(self):
     fpath = self.file_name.text().strip()
     head, tail = os.path.split(fpath)
     tail = sanitize_file_name2(tail)
     fpath = tail
     if head:
         fpath = os.path.join(head, tail)
     ans = {
         'output': fpath,
         'paper_size': self.paper_size.currentText().lower(),
         'page_numbers': self.pnum.isChecked(),
         'show_file': self.show_file.isChecked(),
     }
     for edge in 'left top right bottom'.split():
         ans['margin_' + edge] = getattr(self, '%s_margin' % edge).value()
     return ans
Example #11
0
 def data(self):
     fpath = self.file_name.text().strip()
     head, tail = os.path.split(fpath)
     tail = sanitize_file_name2(tail)
     fpath = tail
     if head:
         fpath = os.path.join(head, tail)
     ans = {
         'output': fpath,
         'paper_size': self.paper_size.currentText().lower(),
         'page_numbers':self.pnum.isChecked(),
         'show_file':self.show_file.isChecked(),
     }
     for edge in 'left top right bottom'.split():
         ans['margin_' + edge] = getattr(self, '%s_margin' % edge).value()
     return ans
Example #12
0
def sanitize_file_name(x):
    from calibre.ebooks.oeb.polish.check.parsing import make_filename_safe
    x = sanitize_file_name2(x)
    while '..' in x:
        x = x.replace('..', '.')
    return make_filename_safe(x)
Example #13
0
def test_identify_plugin(name, tests, modify_plugin=lambda plugin:None,
        fail_missing_meta=True):  # {{{
    '''
    :param name: Plugin name
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
                  test_funcs). args is a dict of keyword arguments to pass to
                  the identify method. test_funcs are callables that accept a
                  Metadata object and return True iff the object passes the
                  test.
    '''
    plugin = None
    for x in all_metadata_plugins():
        if x.name == name and 'identify' in x.capabilities:
            plugin = x
            break
    modify_plugin(plugin)
    prints('Testing the identify function of', plugin.name)
    prints('Using extra headers:', plugin.browser.addheaders)

    tdir, lf, log, abort = init_test(plugin.name)
    prints('Log saved to', lf)

    times = []
    for kwargs, test_funcs in tests:
        prints('Running test with:', kwargs)
        rq = Queue()
        args = (log, rq, abort)
        start_time = time.time()
        plugin.running_a_test = True
        try:
            err = plugin.identify(*args, **kwargs)
        finally:
            plugin.running_a_test = False
        total_time = time.time() - start_time
        times.append(total_time)
        if err is not None:
            prints('identify returned an error for args', args)
            prints(err)
            break

        results = []
        while True:
            try:
                results.append(rq.get_nowait())
            except Empty:
                break

        prints('Found', len(results), 'matches:', end=' ')
        prints('Smaller relevance means better match')

        results.sort(key=plugin.identify_results_keygen(
            title=kwargs.get('title', None), authors=kwargs.get('authors',
                None), identifiers=kwargs.get('identifiers', {})))

        for i, mi in enumerate(results):
            prints('*'*30, 'Relevance:', i, '*'*30)
            prints(mi)
            prints('\nCached cover URL    :',
                    plugin.get_cached_cover_url(mi.identifiers))
            prints('*'*75, '\n\n')

        possibles = []
        for mi in results:
            test_failed = False
            for tfunc in test_funcs:
                if not tfunc(mi):
                    test_failed = True
                    break
            if not test_failed:
                possibles.append(mi)

        if not possibles:
            prints('ERROR: No results that passed all tests were found')
            prints('Log saved to', lf)
            raise SystemExit(1)

        good = [x for x in possibles if plugin.test_fields(x) is
                None]
        if not good:
            prints('Failed to find', plugin.test_fields(possibles[0]))
            if fail_missing_meta:
                raise SystemExit(1)

        if results[0] is not possibles[0]:
            prints('Most relevant result failed the tests')
            raise SystemExit(1)

        if 'cover' in plugin.capabilities:
            rq = Queue()
            mi = results[0]
            plugin.download_cover(log, rq, abort, title=mi.title,
                    authors=mi.authors, identifiers=mi.identifiers)
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            if not results and fail_missing_meta:
                prints('Cover download failed')
                raise SystemExit(1)
            elif results:
                cdata = results[0]
                cover = os.path.join(tdir, plugin.name.replace(' ',
                    '')+'-%s-cover.jpg'%sanitize_file_name2(mi.title.replace(' ',
                        '_')))
                with open(cover, 'wb') as f:
                    f.write(cdata[-1])

                prints('Cover downloaded to:', cover)

                if len(cdata[-1]) < 10240:
                    prints('Downloaded cover too small')
                    raise SystemExit(1)

    prints('Average time per query', sum(times)/len(times))

    if os.stat(lf).st_size > 10:
        prints('There were some errors/warnings, see log', lf)
Example #14
0
def test_identify_plugin(
        name,
        tests,
        modify_plugin=lambda plugin: None,  # {{{
        fail_missing_meta=True):
    '''
    :param name: Plugin name
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
                  test_funcs). args is a dict of keyword arguments to pass to
                  the identify method. test_funcs are callables that accept a
                  Metadata object and return True iff the object passes the
                  test.
    '''
    plugin = None
    for x in all_metadata_plugins():
        if x.name == name and 'identify' in x.capabilities:
            plugin = x
            break
    modify_plugin(plugin)
    prints('Testing the identify function of', plugin.name)
    prints('Using extra headers:', plugin.browser.addheaders)

    tdir, lf, log, abort = init_test(plugin.name)
    prints('Log saved to', lf)

    times = []
    for kwargs, test_funcs in tests:
        log('')
        log('#' * 80)
        log('### Running test with:', kwargs)
        log('#' * 80)
        prints('Running test with:', kwargs)
        rq = Queue()
        args = (log, rq, abort)
        start_time = time.time()
        plugin.running_a_test = True
        try:
            err = plugin.identify(*args, **kwargs)
        finally:
            plugin.running_a_test = False
        total_time = time.time() - start_time
        times.append(total_time)
        if err is not None:
            prints('identify returned an error for args', args)
            prints(err)
            break

        results = []
        while True:
            try:
                results.append(rq.get_nowait())
            except Empty:
                break

        prints('Found', len(results), 'matches:', end=' ')
        prints('Smaller relevance means better match')

        results.sort(key=plugin.identify_results_keygen(
            title=kwargs.get('title', None),
            authors=kwargs.get('authors', None),
            identifiers=kwargs.get('identifiers', {})))

        for i, mi in enumerate(results):
            prints('*' * 30, 'Relevance:', i, '*' * 30)
            if mi.rating:
                mi.rating *= 2
            prints(mi)
            prints('\nCached cover URL    :',
                   plugin.get_cached_cover_url(mi.identifiers))
            prints('*' * 75, '\n\n')

        possibles = []
        for mi in results:
            test_failed = False
            for tfunc in test_funcs:
                if not tfunc(mi):
                    test_failed = True
                    break
            if not test_failed:
                possibles.append(mi)

        if not possibles:
            prints('ERROR: No results that passed all tests were found')
            prints('Log saved to', lf)
            log.close()
            dump_log(lf)
            raise SystemExit(1)

        good = [x for x in possibles if plugin.test_fields(x) is None]
        if not good:
            prints('Failed to find', plugin.test_fields(possibles[0]))
            if fail_missing_meta:
                raise SystemExit(1)

        if results[0] is not possibles[0]:
            prints('Most relevant result failed the tests')
            raise SystemExit(1)

        if 'cover' in plugin.capabilities:
            rq = Queue()
            mi = results[0]
            plugin.download_cover(log,
                                  rq,
                                  abort,
                                  title=mi.title,
                                  authors=mi.authors,
                                  identifiers=mi.identifiers)
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            if not results and fail_missing_meta:
                prints('Cover download failed')
                raise SystemExit(1)
            elif results:
                cdata = results[0]
                cover = os.path.join(
                    tdir,
                    plugin.name.replace(' ', '') + '-%s-cover.jpg' %
                    sanitize_file_name2(mi.title.replace(' ', '_')))
                with open(cover, 'wb') as f:
                    f.write(cdata[-1])

                prints('Cover downloaded to:', cover)

                if len(cdata[-1]) < 10240:
                    prints('Downloaded cover too small')
                    raise SystemExit(1)

    prints('Average time per query', sum(times) / len(times))

    if os.stat(lf).st_size > 10:
        prints('There were some errors/warnings, see log', lf)
Example #15
0
 def __init__(self, book_title, parent=None, prefs=vprefs):
     self.book_title = book_title
     self.default_file_name = sanitize_file_name2(book_title[:75] + '.pdf')
     self.paper_size_map = {a:getattr(QPageSize, a.capitalize()) for a in PAPER_SIZES}
     Dialog.__init__(self, _('Print to PDF'), 'print-to-pdf', prefs=prefs, parent=parent)