Ejemplo n.º 1
0
    def test_thumbnail_cache(self):  # {{{
        ' Test the operation of the thumbnail cache '
        c = self.init_tc()
        self.assertFalse(hasattr(c, 'total_size'),
                         'index read on initialization')
        c.invalidate(666)
        self.assertFalse(hasattr(c, 'total_size'), 'index read on invalidate')

        self.assertEqual(self.basic_fill(c), c.total_size)
        self.assertEqual(5, len(c))

        for i in (3, 4, 2, 5, 1):
            data, ts = c[i]
            self.assertEqual(i, ts, 'timestamp not correct')
            self.assertEqual((('%d' % i) * (i * 1000)).encode('ascii'), data)
        c.set_group_id('a')
        self.basic_fill(c)
        order = tuple(c.items)
        ts = c.current_size
        c.shutdown()
        c = self.init_tc()
        self.assertEqual(c.current_size, ts,
                         'size not preserved after restart')
        self.assertEqual(order, tuple(c.items),
                         'order not preserved after restart')
        c.shutdown()
        c = self.init_tc()
        c.invalidate((1, ))
        self.assertIsNone(c[1][1], 'invalidate before load_index() failed')
        c.invalidate((2, ))
        self.assertIsNone(c[2][1], 'invalidate after load_index() failed')
        c.set_group_id('a')
        c[1]
        c.set_size(0.001)
        self.assertLessEqual(c.current_size, 1024, 'set_size() failed')
        self.assertEqual(len(c), 1)
        self.assertIn(1, c)
        c.insert(9, 9, b'x' * (c.max_size - 1))
        self.assertEqual(len(c), 1)
        self.assertLessEqual(c.current_size, c.max_size,
                             'insert() did not prune')
        self.assertIn(9, c)
        c.empty()
        self.assertEqual(c.total_size, 0)
        self.assertEqual(len(c), 0)
        self.assertEqual(tuple(walk(c.location)), ())
        c = self.init_tc()
        self.basic_fill(c)
        self.assertEqual(len(c), 5)
        c.set_thumbnail_size(200, 201)
        self.assertIsNone(c[1][0])
        self.assertEqual(len(c), 0)
        self.assertEqual(tuple(walk(c.location)), ())
Ejemplo n.º 2
0
    def test_thumbnail_cache(self):  # {{{
        ' Test the operation of the thumbnail cache '
        c = self.init_tc()
        self.assertFalse(hasattr(c, 'total_size'), 'index read on initialization')
        c.invalidate(666)
        self.assertFalse(hasattr(c, 'total_size'), 'index read on invalidate')

        self.assertEqual(self.basic_fill(c), c.total_size)
        self.assertEqual(5, len(c))

        for i in (3, 4, 2, 5, 1):
            data, ts = c[i]
            self.assertEqual(i, ts, 'timestamp not correct')
            self.assertEqual((('%d'%i) * (i*1000)).encode('ascii'), data)
        c.set_group_id('a')
        self.basic_fill(c)
        order = tuple(c.items)
        ts = c.current_size
        c.shutdown()
        c = self.init_tc()
        self.assertEqual(c.current_size, ts, 'size not preserved after restart')
        self.assertEqual(order, tuple(c.items), 'order not preserved after restart')
        c.shutdown()
        c = self.init_tc()
        c.invalidate((1,))
        self.assertIsNone(c[1][1], 'invalidate before load_index() failed')
        c.invalidate((2,))
        self.assertIsNone(c[2][1], 'invalidate after load_index() failed')
        c.set_group_id('a')
        c[1]
        c.set_size(0.001)
        self.assertLessEqual(c.current_size, 1024, 'set_size() failed')
        self.assertEqual(len(c), 1)
        self.assertIn(1, c)
        c.insert(9, 9, b'x' * (c.max_size-1))
        self.assertEqual(len(c), 1)
        self.assertLessEqual(c.current_size, c.max_size, 'insert() did not prune')
        self.assertIn(9, c)
        c.empty()
        self.assertEqual(c.total_size, 0)
        self.assertEqual(len(c), 0)
        self.assertEqual(tuple(walk(c.location)), ())
        c = self.init_tc()
        self.basic_fill(c)
        self.assertEqual(len(c), 5)
        c.set_thumbnail_size(200, 201)
        self.assertIsNone(c[1][0])
        self.assertEqual(len(c), 0)
        self.assertEqual(tuple(walk(c.location)), ())
Ejemplo n.º 3
0
def find_programs(extensions):
    extensions = {ext.lower() for ext in extensions}
    data_dirs = [os.environ.get('XDG_DATA_HOME') or os.path.expanduser('~/.local/share')]
    data_dirs += (os.environ.get('XDG_DATA_DIRS') or '/usr/local/share/:/usr/share/').split(os.pathsep)
    data_dirs = [force_unicode(x, filesystem_encoding).rstrip(os.sep) for x in data_dirs]
    data_dirs = [x for x in data_dirs if x and os.path.isdir(x)]
    desktop_files = {}
    mime_types = {guess_type('file.' + ext)[0] for ext in extensions}
    ans = []
    for base in data_dirs:
        for f in walk(os.path.join(base, 'applications')):
            if f.endswith('.desktop'):
                bn = os.path.basename(f)
                if f not in desktop_files:
                    desktop_files[bn] = f
    for bn, path in iteritems(desktop_files):
        try:
            data = parse_desktop_file(path)
        except Exception:
            import traceback
            traceback.print_exc()
            continue
        if data is not None and mime_types.intersection(data['MimeType']):
            ans.append(process_desktop_file(data))
    ans.sort(key=lambda d:sort_key(d.get('Name')))
    return ans
Ejemplo n.º 4
0
def do_convert(path, temp_path, key, instance):
    tdir = os.path.join(temp_path, instance['path'])
    p = None
    try:
        with TemporaryFile('log.txt') as logpath:
            with open(logpath, 'w+b') as logf:
                p = start_pipe_worker(
                    'from calibre.srv.render_book import viewer_main; viewer_main()',
                    stdout=logf,
                    stderr=logf)
                running_workers.append(p)
                p.stdin.write(
                    msgpack_dumps((
                        path,
                        tdir,
                        {
                            'size': instance['file_size'],
                            'mtime': instance['file_mtime'],
                            'hash': key
                        },
                    )))
                p.stdin.close()
            if p.wait() != 0:
                with lopen(logpath, 'rb') as logf:
                    worker_output = logf.read().decode('utf-8', 'replace')
                raise ConversionFailure(path, worker_output)
    finally:
        try:
            running_workers.remove(p)
        except Exception:
            pass
    size = 0
    for f in walk(tdir):
        size += os.path.getsize(f)
    instance['cache_size'] = size
Ejemplo n.º 5
0
def write_oebbook(oeb, path):
    from calibre.ebooks.oeb.writer import OEBWriter
    from calibre import walk
    w = OEBWriter()
    w(oeb, path)
    for f in walk(path):
        if f.endswith('.opf'):
            return f
Ejemplo n.º 6
0
def docx_exploder(path, tdir, question=lambda x:True):
    zipextract(path, tdir)
    from calibre.ebooks.docx.dump import pretty_all_xml_in_dir
    pretty_all_xml_in_dir(tdir)
    for f in walk(tdir):
        if os.path.basename(f) == 'document.xml':
            return f
    raise Error('Invalid book: Could not find document.xml')
Ejemplo n.º 7
0
def write_oebbook(oeb, path):
    from calibre.ebooks.oeb.writer import OEBWriter
    from calibre import walk
    w = OEBWriter()
    w(oeb, path)
    for f in walk(path):
        if f.endswith('.opf'):
            return f
Ejemplo n.º 8
0
 def create_vfs():
     ans = {}
     for x in walk(rapydscript_dir):
         if x.endswith('.pyj'):
             r = os.path.relpath(x, rapydscript_dir).replace('\\', '/')
             with open(x, 'rb') as f:
                 ans['__stdlib__/' + r] = f.read().decode('utf-8')
     return ans
Ejemplo n.º 9
0
def read_images_from_folder(path):
    name_map = {}
    path = os.path.abspath(path)
    for filepath in walk(path):
        name = os.path.relpath(filepath, path).replace(os.sep, '/').lower()
        ext = name.rpartition('.')[-1]
        if ext in IMAGE_EXTENSIONS:
            name_map[name] = filepath
    return name_map
Ejemplo n.º 10
0
def pretty_all_xml_in_dir(path):
    for f in walk(path):
        if f.endswith('.xml') or f.endswith('.rels'):
            with open(f, 'r+b') as stream:
                raw = stream.read()
                if raw:
                    root = etree.fromstring(raw)
                    stream.seek(0)
                    stream.truncate()
                    stream.write(etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True))
Ejemplo n.º 11
0
def do_convert(path, temp_path, key, instance):
    tdir = os.path.join(temp_path, instance['path'])
    fork_job('calibre.srv.render_book', 'render_for_viewer', args=(
        path, tdir, {'size': instance['file_size'], 'mtime': instance['file_mtime'], 'hash': key},
        ), timeout=3000, no_output=True
    )
    size = 0
    for f in walk(tdir):
        size += os.path.getsize(f)
    instance['cache_size'] = size
Ejemplo n.º 12
0
def pretty_all_xml_in_dir(path):
    for f in walk(path):
        if f.endswith('.xml') or f.endswith('.rels'):
            with open(f, 'r+b') as stream:
                raw = stream.read()
                if raw:
                    root = safe_xml_fromstring(raw)
                    stream.seek(0)
                    stream.truncate()
                    stream.write(etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True))
Ejemplo n.º 13
0
 def get_content_server_strings(self):
     self.info('Generating translation template for content_server')
     from calibre import walk
     from calibre.utils.rapydscript import create_pot
     files = (f for f in walk(self.j(self.SRC, 'pyj')) if f.endswith('.pyj'))
     pottext = create_pot(files).encode('utf-8')
     dest = self.j(self.TRANSLATIONS, 'content-server', 'content-server.pot')
     with open(dest, 'wb') as f:
         f.write(pottext)
     self.upload_pot(resource='content_server')
     self.git(['add', dest])
Ejemplo n.º 14
0
 def get_content_server_strings(self):
     self.info('Generating translation template for content_server')
     from calibre import walk
     from calibre.utils.rapydscript import create_pot
     files = (f for f in walk(self.j(self.SRC, 'pyj')) if f.endswith('.pyj'))
     pottext = create_pot(files).encode('utf-8')
     dest = self.j(self.TRANSLATIONS, 'content-server', 'content-server.pot')
     with open(dest, 'wb') as f:
         f.write(pottext)
     self.upload_pot(resource='content_server')
     self.git(['add', dest])
Ejemplo n.º 15
0
    def do_scan(self):
        self.reload_cache()

        if isworker:
            # Dont scan font files in worker processes, use whatever is
            # cached. Font files typically dont change frequently enough to
            # justify a rescan in a worker process.
            self.build_families()
            return

        cached_fonts = self.cached_fonts.copy()
        self.cached_fonts.clear()
        for folder in self.folders:
            if not os.path.isdir(folder):
                continue
            try:
                files = tuple(walk(folder))
            except EnvironmentError as e:
                if DEBUG:
                    prints('Failed to walk font folder:', folder,
                           as_unicode(e))
                continue
            for candidate in files:
                if (candidate.rpartition('.')[-1].lower()
                        not in self.allowed_extensions
                        or not os.path.isfile(candidate)):
                    continue
                candidate = os.path.normcase(os.path.abspath(candidate))
                try:
                    s = os.stat(candidate)
                except EnvironmentError:
                    continue
                fileid = '{0}||{1}:{2}'.format(candidate, s.st_size,
                                               s.st_mtime)
                if fileid in cached_fonts:
                    # Use previously cached metadata, since the file size and
                    # last modified timestamp have not changed.
                    self.cached_fonts[fileid] = cached_fonts[fileid]
                    continue
                try:
                    self.read_font_metadata(candidate, fileid)
                except Exception as e:
                    if DEBUG:
                        prints('Failed to read metadata from font file:',
                               candidate, as_unicode(e))
                    continue

        if frozenset(cached_fonts) != frozenset(self.cached_fonts):
            # Write out the cache only if some font files have changed
            self.write_cache()

        self.build_families()
Ejemplo n.º 16
0
def render_cover(opf, opf_path, zf, reader=None):
    from calibre.ebooks import render_html_svg_workaround
    from calibre.utils.logging import default_log

    cpage = opf.first_spine_item()
    if not cpage:
        return
    if reader is not None and reader.encryption_meta.is_encrypted(cpage):
        return

    with TemporaryDirectory('_epub_meta') as tdir:
        with CurrentDir(tdir):
            zf.extractall()
            opf_path = opf_path.replace('/', os.sep)
            cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage)
            if not os.path.exists(cpage):
                return

            if isosx:
                # On OS X trying to render a HTML cover which uses embedded
                # fonts more than once in the same process causes a crash in Qt
                # so be safe and remove the fonts as well as any @font-face
                # rules
                for f in walk('.'):
                    if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'):
                        os.remove(f)
                ffpat = re.compile(br'@font-face.*?{.*?}',
                                   re.DOTALL | re.IGNORECASE)
                with open(cpage, 'r+b') as f:
                    raw = f.read()
                    f.truncate(0)
                    f.seek(0)
                    raw = ffpat.sub(b'', raw)
                    f.write(raw)
                from calibre.ebooks.chardet import xml_to_unicode
                raw = xml_to_unicode(raw,
                                     strip_encoding_pats=True,
                                     resolve_entities=True)[0]
                from lxml import html
                for link in html.fromstring(raw).xpath('//link'):
                    href = link.get('href', '')
                    if href:
                        path = os.path.join(os.path.dirname(cpage), href)
                        if os.path.exists(path):
                            with open(path, 'r+b') as f:
                                raw = f.read()
                                f.truncate(0)
                                f.seek(0)
                                raw = ffpat.sub(b'', raw)
                                f.write(raw)

            return render_html_svg_workaround(cpage, default_log)
Ejemplo n.º 17
0
def render_cover(opf, opf_path, zf, reader=None):
    from calibre.ebooks import render_html_svg_workaround
    from calibre.utils.logging import default_log

    cpage = opf.first_spine_item()
    if not cpage:
        return
    if reader is not None and reader.encryption_meta.is_encrypted(cpage):
        return

    with TemporaryDirectory('_epub_meta') as tdir:
        with CurrentDir(tdir):
            zf.extractall()
            opf_path = opf_path.replace('/', os.sep)
            cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage)
            if not os.path.exists(cpage):
                return

            if isosx:
                # On OS X trying to render a HTML cover which uses embedded
                # fonts more than once in the same process causes a crash in Qt
                # so be safe and remove the fonts as well as any @font-face
                # rules
                for f in walk('.'):
                    if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'):
                        os.remove(f)
                ffpat = re.compile(br'@font-face.*?{.*?}',
                        re.DOTALL|re.IGNORECASE)
                with open(cpage, 'r+b') as f:
                    raw = f.read()
                    f.truncate(0)
                    f.seek(0)
                    raw = ffpat.sub(b'', raw)
                    f.write(raw)
                from calibre.ebooks.chardet import xml_to_unicode
                raw = xml_to_unicode(raw,
                        strip_encoding_pats=True, resolve_entities=True)[0]
                from lxml import html
                for link in html.fromstring(raw).xpath('//link'):
                    href = link.get('href', '')
                    if href:
                        path = os.path.join(os.path.dirname(cpage), href)
                        if os.path.exists(path):
                            with open(path, 'r+b') as f:
                                raw = f.read()
                                f.truncate(0)
                                f.seek(0)
                                raw = ffpat.sub(b'', raw)
                                f.write(raw)

            return render_html_svg_workaround(cpage, default_log)
Ejemplo n.º 18
0
 def strip_files(self):
     from calibre import walk
     files = {self.j(self.bin_dir, x) for x in os.listdir(self.bin_dir)} | {
         x for x in {
         self.j(self.d(self.bin_dir), x) for x in os.listdir(self.bin_dir)} if os.path.exists(x)}
     for x in walk(self.lib_dir):
         x = os.path.realpath(x)
         if x not in files and is_elf(x):
             files.add(x)
     self.info('Stripping %d files...' % len(files))
     before = sum(os.path.getsize(x) for x in files)
     strip_files(files)
     after = sum(os.path.getsize(x) for x in files)
     self.info('Stripped %.1f MB' % ((before - after)/(1024*1024.)))
Ejemplo n.º 19
0
    def copy_libs(self):
        self.info('Copying libs...')
        os.mkdir(self.lib_dir)
        os.mkdir(self.bin_dir)

        gcc = subprocess.Popen(["gcc-config", "-c"],
                               stdout=subprocess.PIPE).communicate()[0]
        chost, _, gcc = gcc.rpartition('-')
        gcc_lib = '/usr/lib/gcc/%s/%s/' % (chost.strip(), gcc.strip())
        stdcpp = gcc_lib + 'libstdc++.so.?'
        stdcpp = glob.glob(stdcpp)[-1]
        ffi = gcc_lib + 'libffi.so.?'
        ffi = glob.glob(ffi)
        if ffi:
            ffi = ffi[-1]
        else:
            ffi = glob.glob('/usr/lib/libffi.so.?')[-1]

        for x in binary_includes + [stdcpp, ffi]:
            dest = self.bin_dir if '/bin/' in x else self.lib_dir
            shutil.copy2(x, dest)
        shutil.copy2('/usr/lib/libpython%s.so.1.0' % self.py_ver, dest)

        base = self.j(QTDIR, 'plugins')
        dest = self.j(self.lib_dir, 'qt_plugins')
        os.mkdir(dest)
        for x in os.listdir(base):
            y = self.j(base, x)
            if x not in ('designer', 'sqldrivers', 'codecs'):
                shutil.copytree(y, self.j(dest, x))

        im = glob.glob(MAGICK_PREFIX + '/lib/ImageMagick-*')[-1]
        self.magick_base = os.path.basename(im)
        dest = self.j(self.lib_dir, self.magick_base)
        shutil.copytree(im, dest, ignore=shutil.ignore_patterns('*.a'))
        from calibre import walk
        for x in walk(dest):
            if x.endswith('.la'):
                raw = open(x).read()
                raw = re.sub('libdir=.*', '', raw)
                open(x, 'wb').write(raw)

        dest = self.j(dest, 'config')
        src = self.j(MAGICK_PREFIX, 'share', self.magick_base, 'config')
        for x in glob.glob(src + '/*'):
            d = self.j(dest, os.path.basename(x))
            if os.path.isdir(x):
                shutil.copytree(x, d)
            else:
                shutil.copyfile(x, d)
Ejemplo n.º 20
0
    def do_scan(self):
        self.reload_cache()

        if isworker:
            # Dont scan font files in worker processes, use whatever is
            # cached. Font files typically dont change frequently enough to
            # justify a rescan in a worker process.
            self.build_families()
            return

        cached_fonts = self.cached_fonts.copy()
        self.cached_fonts.clear()
        for folder in self.folders:
            if not os.path.isdir(folder):
                continue
            try:
                files = tuple(walk(folder))
            except EnvironmentError as e:
                if DEBUG:
                    prints('Failed to walk font folder:', folder,
                            as_unicode(e))
                continue
            for candidate in files:
                if (candidate.rpartition('.')[-1].lower() not in self.allowed_extensions
                        or not os.path.isfile(candidate)):
                    continue
                candidate = os.path.normcase(os.path.abspath(candidate))
                try:
                    s = os.stat(candidate)
                except EnvironmentError:
                    continue
                fileid = '{0}||{1}:{2}'.format(candidate, s.st_size, s.st_mtime)
                if fileid in cached_fonts:
                    # Use previously cached metadata, since the file size and
                    # last modified timestamp have not changed.
                    self.cached_fonts[fileid] = cached_fonts[fileid]
                    continue
                try:
                    self.read_font_metadata(candidate, fileid)
                except Exception as e:
                    if DEBUG:
                        prints('Failed to read metadata from font file:',
                                candidate, as_unicode(e))
                    continue

        if frozenset(cached_fonts) != frozenset(self.cached_fonts):
            # Write out the cache only if some font files have changed
            self.write_cache()

        self.build_families()
Ejemplo n.º 21
0
    def copy_libs(self):
        self.info("Copying libs...")
        os.mkdir(self.lib_dir)
        os.mkdir(self.bin_dir)

        gcc = subprocess.Popen(["gcc-config", "-c"], stdout=subprocess.PIPE).communicate()[0]
        chost, _, gcc = gcc.rpartition("-")
        gcc_lib = "/usr/lib/gcc/%s/%s/" % (chost.strip(), gcc.strip())
        stdcpp = gcc_lib + "libstdc++.so.?"
        stdcpp = glob.glob(stdcpp)[-1]
        ffi = gcc_lib + "libffi.so.?"
        ffi = glob.glob(ffi)
        if ffi:
            ffi = ffi[-1]
        else:
            ffi = glob.glob("/usr/lib/libffi.so.?")[-1]

        for x in binary_includes + [stdcpp, ffi]:
            dest = self.bin_dir if "/bin/" in x else self.lib_dir
            shutil.copy2(x, dest)
        shutil.copy2("/usr/lib/libpython%s.so.1.0" % self.py_ver, dest)

        base = self.j(QTDIR, "plugins")
        dest = self.j(self.lib_dir, "qt_plugins")
        os.mkdir(dest)
        for x in os.listdir(base):
            y = self.j(base, x)
            if x not in ("designer", "sqldrivers", "codecs"):
                shutil.copytree(y, self.j(dest, x))

        im = glob.glob(MAGICK_PREFIX + "/lib/ImageMagick-*")[-1]
        self.magick_base = os.path.basename(im)
        dest = self.j(self.lib_dir, self.magick_base)
        shutil.copytree(im, dest, ignore=shutil.ignore_patterns("*.a"))
        from calibre import walk

        for x in walk(dest):
            if x.endswith(".la"):
                raw = open(x).read()
                raw = re.sub("libdir=.*", "", raw)
                open(x, "wb").write(raw)

        dest = self.j(dest, "config")
        src = self.j(MAGICK_PREFIX, "share", self.magick_base, "config")
        for x in glob.glob(src + "/*"):
            d = self.j(dest, os.path.basename(x))
            if os.path.isdir(x):
                shutil.copytree(x, d)
            else:
                shutil.copyfile(x, d)
Ejemplo n.º 22
0
 def strip_files(self):
     from calibre import walk
     files = {self.j(self.bin_dir, x) for x in os.listdir(self.bin_dir)} | {
         x for x in {
         self.j(self.d(self.bin_dir), x) for x in os.listdir(self.bin_dir)} if os.path.exists(x)}
     for x in walk(self.lib_dir):
         x = os.path.realpath(x)
         if x not in files and is_elf(x):
             files.add(x)
     self.info('Stripping %d files...' % len(files))
     before = sum(os.path.getsize(x) for x in files)
     strip_files(files)
     after = sum(os.path.getsize(x) for x in files)
     self.info('Stripped %.1f MB' % ((before - after)/(1024*1024.)))
Ejemplo n.º 23
0
 def replace_with_symlinks(self, lang_dir):
     ' Replace all identical files with symlinks to save disk space/upload bandwidth '
     from calibre import walk
     base = self.a(lang_dir)
     for f in walk(base):
         r = os.path.relpath(f, base)
         orig = self.j(self.d(base), r)
         try:
             sz = os.stat(orig).st_size
         except EnvironmentError:
             continue
         if sz == os.stat(f).st_size and filecmp._do_cmp(f, orig):
             os.remove(f)
             os.symlink(os.path.relpath(orig, self.d(f)), f)
Ejemplo n.º 24
0
 def replace_with_symlinks(self, lang_dir):
     ' Replace all identical files with symlinks to save disk space/upload bandwidth '
     from calibre import walk
     base = self.a(lang_dir)
     for f in walk(base):
         r = os.path.relpath(f, base)
         orig = self.j(self.d(base), r)
         try:
             sz = os.stat(orig).st_size
         except EnvironmentError:
             continue
         if sz == os.stat(f).st_size and filecmp._do_cmp(f, orig):
             os.remove(f)
             os.symlink(os.path.relpath(orig, self.d(f)), f)
Ejemplo n.º 25
0
    def copy_libs(self):
        self.info('Copying libs...')
        os.mkdir(self.lib_dir)
        os.mkdir(self.bin_dir)

        gcc = subprocess.Popen(["gcc-config", "-c"], stdout=subprocess.PIPE).communicate()[0]
        chost, _, gcc = gcc.rpartition('-')
        gcc_lib = '/usr/lib/gcc/%s/%s/'%(chost.strip(), gcc.strip())
        stdcpp = gcc_lib+'libstdc++.so.?'
        stdcpp = glob.glob(stdcpp)[-1]
        ffi = gcc_lib+'libffi.so.?'
        ffi = glob.glob(ffi)
        if ffi:
            ffi = ffi[-1]
        else:
            ffi = glob.glob('/usr/lib/libffi.so.?')[-1]


        for x in binary_includes + [stdcpp, ffi]:
            dest = self.bin_dir if '/bin/' in x else self.lib_dir
            shutil.copy2(x, dest)
        shutil.copy2('/usr/lib/libpython%s.so.1.0'%self.py_ver, dest)

        base = self.j(QTDIR, 'plugins')
        dest = self.j(self.lib_dir, 'qt_plugins')
        os.mkdir(dest)
        for x in os.listdir(base):
            y = self.j(base, x)
            if x not in ('designer', 'sqldrivers', 'codecs'):
                shutil.copytree(y, self.j(dest, x))

        im = glob.glob(MAGICK_PREFIX + '/lib/ImageMagick-*')[-1]
        self.magick_base = os.path.basename(im)
        dest = self.j(self.lib_dir, self.magick_base)
        shutil.copytree(im, dest, ignore=shutil.ignore_patterns('*.a'))
        from calibre import walk
        for x in walk(dest):
            if x.endswith('.la'):
                raw = open(x).read()
                raw = re.sub('libdir=.*', '', raw)
                open(x, 'wb').write(raw)

        dest = self.j(dest, 'config')
        src = self.j(MAGICK_PREFIX, 'share', self.magick_base, 'config')
        for x in glob.glob(src+'/*'):
            d = self.j(dest, os.path.basename(x))
            if os.path.isdir(x):
                shutil.copytree(x, d)
            else:
                shutil.copyfile(x, d)
Ejemplo n.º 26
0
def extract_comic(path_to_comic_file):
    '''
    Un-archive the comic file.
    '''
    tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
    if not isinstance(tdir, unicode):
        # Needed in case the zip file has wrongly encoded unicode file/dir
        # names
        tdir = tdir.decode(filesystem_encoding)
    extract(path_to_comic_file, tdir)
    for x in walk(tdir):
        bn = os.path.basename(x)
        nbn = bn.replace('#', '_')
        if nbn != bn:
            os.rename(x, os.path.join(os.path.dirname(x), nbn))
    return tdir
Ejemplo n.º 27
0
    def extract(self, stream):
        self.tdir = PersistentTemporaryDirectory('docx_container')
        try:
            zf = ZipFile(stream)
            zf.extractall(self.tdir)
        except:
            self.log.exception('DOCX appears to be invalid ZIP file, trying a'
                    ' more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream, self.tdir)

        self.names = {}
        for f in walk(self.tdir):
            name = os.path.relpath(f, self.tdir).replace(os.sep, '/')
            self.names[name] = f
Ejemplo n.º 28
0
    def extract(self, stream):
        self.tdir = PersistentTemporaryDirectory('docx_container')
        try:
            zf = ZipFile(stream)
            zf.extractall(self.tdir)
        except:
            self.log.exception('DOCX appears to be invalid ZIP file, trying a'
                    ' more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream, self.tdir)

        self.names = {}
        for f in walk(self.tdir):
            name = os.path.relpath(f, self.tdir).replace(os.sep, '/')
            self.names[name] = f
Ejemplo n.º 29
0
def extract_comic(path_to_comic_file):
    '''
    Un-archive the comic file.
    '''
    tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
    if not isinstance(tdir, unicode):
        # Needed in case the zip file has wrongly encoded unicode file/dir
        # names
        tdir = tdir.decode(filesystem_encoding)
    extract(path_to_comic_file, tdir)
    for x in walk(tdir):
        bn = os.path.basename(x)
        nbn = bn.replace('#', '_')
        if nbn != bn:
            os.rename(x, os.path.join(os.path.dirname(x), nbn))
    return tdir
Ejemplo n.º 30
0
def read_images_from_folder(path):
    name_map = {}
    path = os.path.abspath(path)
    for filepath in walk(path):
        name = os.path.relpath(filepath, path).replace(os.sep, '/')
        ext = name.rpartition('.')[-1]
        bname = os.path.basename(name)
        if bname.startswith('.') or bname.startswith('_'):
            continue
        if ext == 'svg':
            render_svg(filepath)
            ext = 'png'
            filepath = filepath[:-4] + '.png'
            name = name[:-4] + '.png'
        if ext in IMAGE_EXTENSIONS:
            name_map[name] = filepath
    return name_map
Ejemplo n.º 31
0
    def unarchive(self, path, tdir):
        extract(path, tdir)
        files = list(walk(tdir))
        files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding)
                for f in files]
        from calibre.customize.ui import available_input_formats
        fmts = set(available_input_formats())
        fmts -= {'htm', 'html', 'xhtm', 'xhtml'}
        fmts -= set(ARCHIVE_FMTS)

        for ext in fmts:
            for f in files:
                if f.lower().endswith('.'+ext):
                    if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
                        continue
                    return f, ext
        return self.find_html_index(files)
Ejemplo n.º 32
0
def read_images_from_folder(path):
    name_map = {}
    path = os.path.abspath(path)
    for filepath in walk(path):
        name = os.path.relpath(filepath, path).replace(os.sep, '/')
        ext = name.rpartition('.')[-1]
        bname = os.path.basename(name)
        if bname.startswith('.') or bname.startswith('_'):
            continue
        if ext == 'svg':
            render_svg(filepath)
            ext = 'png'
            filepath = filepath[:-4] + '.png'
            name = name[:-4] + '.png'
        if ext in IMAGE_EXTENSIONS:
            name_map[name] = filepath
    return name_map
Ejemplo n.º 33
0
 def read_icon_theme_dir(dirpath):
     ans = defaultdict(list)
     for path in walk(dirpath):
         bn = os.path.basename(path)
         name, ext = os.path.splitext(bn)
         if ext in exts:
             sz = sz_pat.findall(path)
             if sz:
                 sz = sz[-1]
                 if sz == 'scalable':
                     sz = 100000
                 else:
                     sz = int(sz.partition('x')[0])
                 idx = len(ans[name])
                 ans[name].append((-sz, idx, sz, path))
     for icons in ans.itervalues():
         icons.sort()
     return {k: (-v[0][2], v[0][3]) for k, v in ans.iteritems()}
Ejemplo n.º 34
0
def dump(path):
    dest = os.path.splitext(os.path.basename(path))[0]
    dest += '_extracted'
    if os.path.exists(dest):
        shutil.rmtree(dest)
    with ZipFile(path) as zf:
        zf.extractall(dest)

    for f in walk(dest):
        if f.endswith('.xml') or f.endswith('.rels'):
            with open(f, 'r+b') as stream:
                raw = stream.read()
                root = etree.fromstring(raw)
                stream.seek(0)
                stream.truncate()
                stream.write(etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True))

    print (path, 'dumped to', dest)
Ejemplo n.º 35
0
 def read_icon_theme_dir(dirpath):
     ans = defaultdict(list)
     for path in walk(dirpath):
         bn = os.path.basename(path)
         name, ext = os.path.splitext(bn)
         if ext in exts:
             sz = sz_pat.findall(path)
             if sz:
                 sz = sz[-1]
                 if sz == 'scalable':
                     sz = 100000
                 else:
                     sz = int(sz.partition('x')[0])
                 idx = len(ans[name])
                 ans[name].append((-sz, idx, sz, path))
     for icons in ans.itervalues():
         icons.sort()
     return {k:(-v[0][2], v[0][3]) for k, v in ans.iteritems()}
Ejemplo n.º 36
0
def find_programs(extensions):
    extensions = {ext.lower() for ext in extensions}
    data_dirs = [
        os.environ.get('XDG_DATA_HOME') or os.path.expanduser('~/.local/share')
    ]
    data_dirs += (os.environ.get('XDG_DATA_DIRS')
                  or '/usr/local/share/:/usr/share/').split(os.pathsep)
    data_dirs = [
        force_unicode(x, filesystem_encoding).rstrip(os.sep) for x in data_dirs
    ]
    data_dirs = [x for x in data_dirs if x and os.path.isdir(x)]
    desktop_files = {}
    mime_types = {guess_type('file.' + ext)[0] for ext in extensions}
    ans = []
    for base in data_dirs:
        for f in walk(os.path.join(base, 'applications')):
            if f.endswith('.desktop'):
                bn = os.path.basename(f)
                if f not in desktop_files:
                    desktop_files[bn] = f
    for bn, path in iteritems(desktop_files):
        try:
            data = parse_desktop_file(path)
        except Exception:
            import traceback
            traceback.print_exc()
            continue
        if data is not None and mime_types.intersection(data['MimeType']):
            icon = data.get('Icon', {}).get(None)
            if icon and not os.path.isabs(icon):
                icon = find_icons().get(icon)
                if icon:
                    data['Icon'] = icon
                else:
                    data.pop('Icon')
            if not isinstance(data.get('Icon'), string_or_bytes):
                data.pop('Icon', None)
            for k in ('Name', 'GenericName', 'Comment'):
                val = data.get(k)
                if val:
                    data[k] = localize_string(val)
            ans.append(data)
    ans.sort(key=lambda d: sort_key(d.get('Name')))
    return ans
Ejemplo n.º 37
0
    def __call__(self, stream, odir, log):
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.metadata.odt import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator

        if not os.path.exists(odir):
            os.makedirs(odir)
        with CurrentDir(odir):
            log('Extracting ODT file...')
            stream.seek(0)
            mi = get_metadata(stream, 'odt')
            if not mi.title:
                mi.title = _('Unknown')
            if not mi.authors:
                mi.authors = [_('Unknown')]
            self.filter_load(stream, mi, log)
            html = self.xhtml()
            # A blanket img specification like this causes problems
            # with EPUB output as the containing element often has
            # an absolute height and width set that is larger than
            # the available screen real estate
            html = html.replace('img { width: 100%; height: 100%; }', '')
            # odf2xhtml creates empty title tag
            html = html.replace('<title></title>',
                                '<title>%s</title>' % (mi.title, ))
            try:
                html = self.fix_markup(html, log)
            except:
                log.exception('Failed to filter CSS, conversion may be slow')
            with open('index.xhtml', 'wb') as f:
                f.write(as_bytes(html))
            zf = ZipFile(stream, 'r')
            self.extract_pictures(zf)
            opf = OPFCreator(os.path.abspath(getcwd()), mi)
            opf.create_manifest([(os.path.abspath(f2), None)
                                 for f2 in walk(getcwd())])
            opf.create_spine([os.path.abspath('index.xhtml')])
            with open('metadata.opf', 'wb') as f:
                opf.render(f)
            return os.path.abspath('metadata.opf')
Ejemplo n.º 38
0
def find_programs(extensions):
    extensions = {ext.lower() for ext in extensions}
    data_dirs = [os.environ.get('XDG_DATA_HOME') or os.path.expanduser('~/.local/share')]
    data_dirs += (os.environ.get('XDG_DATA_DIRS') or '/usr/local/share/:/usr/share/').split(os.pathsep)
    data_dirs = [force_unicode(x, filesystem_encoding).rstrip(os.sep) for x in data_dirs]
    data_dirs = [x for x in data_dirs if x and os.path.isdir(x)]
    desktop_files = {}
    mime_types = {guess_type('file.' + ext)[0] for ext in extensions}
    ans = []
    for base in data_dirs:
        for f in walk(os.path.join(base, 'applications')):
            if f.endswith('.desktop'):
                bn = os.path.basename(f)
                if f not in desktop_files:
                    desktop_files[bn] = f
    for bn, path in desktop_files.iteritems():
        try:
            data = parse_desktop_file(path)
        except Exception:
            import traceback
            traceback.print_exc()
            continue
        if data is not None and mime_types.intersection(data['MimeType']):
            icon = data.get('Icon', {}).get(None)
            if icon and not os.path.isabs(icon):
                icon = find_icons().get(icon)
                if icon:
                    data['Icon'] = icon
                else:
                    data.pop('Icon')
            if not isinstance(data.get('Icon'), basestring):
                data.pop('Icon', None)
            for k in ('Name', 'GenericName', 'Comment'):
                val = data.get(k)
                if val:
                    data[k] = localize_string(val)
            ans.append(data)
    ans.sort(key=lambda d:sort_key(d.get('Name')))
    return ans
Ejemplo n.º 39
0
def dump(path):
    dest = os.path.splitext(os.path.basename(path))[0]
    dest += '-dumped'
    if os.path.exists(dest):
        shutil.rmtree(dest)
    with ZipFile(path) as zf:
        zf.extractall(dest)

    for f in walk(dest):
        if f.endswith('.xml') or f.endswith('.rels'):
            with open(f, 'r+b') as stream:
                raw = stream.read()
                root = etree.fromstring(raw)
                stream.seek(0)
                stream.truncate()
                stream.write(
                    etree.tostring(root,
                                   pretty_print=True,
                                   encoding='utf-8',
                                   xml_declaration=True))

    print(path, 'dumped to', dest)
Ejemplo n.º 40
0
    def __call__(self, stream, odir, log):
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.metadata.odt import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator

        if not os.path.exists(odir):
            os.makedirs(odir)
        with CurrentDir(odir):
            log('Extracting ODT file...')
            stream.seek(0)
            mi = get_metadata(stream, 'odt')
            if not mi.title:
                mi.title = _('Unknown')
            if not mi.authors:
                mi.authors = [_('Unknown')]
            self.filter_load(stream, mi, log)
            html = self.xhtml()
            # A blanket img specification like this causes problems
            # with EPUB output as the containing element often has
            # an absolute height and width set that is larger than
            # the available screen real estate
            html = html.replace('img { width: 100%; height: 100%; }', '')
            # odf2xhtml creates empty title tag
            html = html.replace('<title></title>','<title>%s</title>'%(mi.title,))
            try:
                html = self.fix_markup(html, log)
            except:
                log.exception('Failed to filter CSS, conversion may be slow')
            with open('index.xhtml', 'wb') as f:
                f.write(html.encode('utf-8'))
            zf = ZipFile(stream, 'r')
            self.extract_pictures(zf)
            opf = OPFCreator(os.path.abspath(os.getcwdu()), mi)
            opf.create_manifest([(os.path.abspath(f2), None) for f2 in
                walk(os.getcwdu())])
            opf.create_spine([os.path.abspath('index.xhtml')])
            with open('metadata.opf', 'wb') as f:
                opf.render(f)
            return os.path.abspath('metadata.opf')
Ejemplo n.º 41
0
    def convert(self, recipe_or_file, opts, file_ext, log,
            accelerators):
        from calibre.web.feeds.recipes import compile_recipe
        opts.output_profile.flow_size = 0
        if file_ext == 'downloaded_recipe':
            from calibre.utils.zipfile import ZipFile
            zf = ZipFile(recipe_or_file, 'r')
            zf.extractall()
            zf.close()
            with lopen('download.recipe', 'rb') as f:
                self.recipe_source = f.read()
            recipe = compile_recipe(self.recipe_source)
            recipe.needs_subscription = False
            self.recipe_object = recipe(opts, log, self.report_progress)
        else:
            if os.environ.get('CALIBRE_RECIPE_URN'):
                from calibre.web.feeds.recipes.collection import get_custom_recipe, get_builtin_recipe_by_id
                urn = os.environ['CALIBRE_RECIPE_URN']
                log('Downloading recipe urn: ' + urn)
                rtype, recipe_id = urn.partition(':')[::2]
                if not recipe_id:
                    raise ValueError('Invalid recipe urn: ' + urn)
                if rtype == 'custom':
                    self.recipe_source = get_custom_recipe(recipe_id)
                else:
                    self.recipe_source = get_builtin_recipe_by_id(urn, log=log, download_recipe=True)
                if not self.recipe_source:
                    raise ValueError('Could not find recipe with urn: ' + urn)
                if not isinstance(self.recipe_source, bytes):
                    self.recipe_source = self.recipe_source.encode('utf-8')
                recipe = compile_recipe(self.recipe_source)
            elif os.access(recipe_or_file, os.R_OK):
                with lopen(recipe_or_file, 'rb') as f:
                    self.recipe_source = f.read()
                recipe = compile_recipe(self.recipe_source)
                log('Using custom recipe')
            else:
                from calibre.web.feeds.recipes.collection import (
                        get_builtin_recipe_by_title, get_builtin_recipe_titles)
                title = getattr(opts, 'original_recipe_input_arg', recipe_or_file)
                title = os.path.basename(title).rpartition('.')[0]
                titles = frozenset(get_builtin_recipe_titles())
                if title not in titles:
                    title = getattr(opts, 'original_recipe_input_arg', recipe_or_file)
                    title = title.rpartition('.')[0]

                raw = get_builtin_recipe_by_title(title, log=log,
                        download_recipe=not opts.dont_download_recipe)
                builtin = False
                try:
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                    if recipe.requires_version > numeric_version:
                        log.warn(
                        'Downloaded recipe needs calibre version at least: %s' %
                        ('.'.join(recipe.requires_version)))
                        builtin = True
                except:
                    log.exception('Failed to compile downloaded recipe. Falling '
                            'back to builtin one')
                    builtin = True
                if builtin:
                    log('Using bundled builtin recipe')
                    raw = get_builtin_recipe_by_title(title, log=log,
                            download_recipe=False)
                    if raw is None:
                        raise ValueError('Failed to find builtin recipe: '+title)
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                else:
                    log('Using downloaded builtin recipe')

            if recipe is None:
                raise ValueError('%r is not a valid recipe file or builtin recipe' %
                        recipe_or_file)

            disabled = getattr(recipe, 'recipe_disabled', None)
            if disabled is not None:
                raise RecipeDisabled(disabled)
            ro = recipe(opts, log, self.report_progress)
            ro.download()
            self.recipe_object = ro

        for key, val in self.recipe_object.conversion_options.items():
            setattr(opts, key, val)

        for f in os.listdir('.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)

        for f in walk('.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)
Ejemplo n.º 42
0
    def convert(self, stream, options, file_ext, log, accelerators):
        """Convert a KePub file into a structure calibre can process."""
        log("KEPUBInput::convert - start")
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF

        try:
            zf = ZipFile(stream)
            cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd()
            zf.extractall(cwd)
        except Exception:
            log.exception("KEPUB appears to be invalid ZIP file, trying a "
                          "more forgiving ZIP parser")
            from calibre.utils.localunzip import extractall

            stream.seek(0)
            extractall(stream)
        opf = self.find_opf()
        if opf is None:
            for f in walk("."):
                if (f.lower().endswith(".opf") and "__MACOSX" not in f
                        and not os.path.basename(f).startswith(".")):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, "name", "stream")

        if opf is None:
            raise ValueError(
                _(  # noqa: F821
                    "{0} is not a valid KEPUB file (could not find opf)").
                format(path))

        encfile = os.path.abspath("rights.xml")
        if os.path.exists(encfile):
            raise DRMError(os.path.basename(path))

        cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd()
        opf = os.path.relpath(opf, cwd)
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self.encrypted_fonts = []

        if len(parts) > 1 and parts[0]:
            delta = "/".join(parts[:-1]) + "/"
            for elem in opf.itermanifest():
                elem.set("href", delta + elem.get("href"))
            for elem in opf.iterguide():
                elem.set("href", delta + elem.get("href"))

        f = (self.rationalize_cover3
             if opf.package_version >= 3.0 else self.rationalize_cover2)
        self.removed_cover = f(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get("media-type", "") == "application/x-dtbook+xml":
                raise ValueError(
                    _("EPUB files with DTBook markup are not supported"
                      )  # noqa: F821
                )

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get("id", None)
            if id_ and y.get("media-type", None) in {
                    "application/vnd.adobe-page-template+xml",
                    "application/vnd.adobe.page-template+xml",
                    "application/adobe-page-template+xml",
                    "application/adobe.page-template+xml",
                    "application/text",
            }:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get("idref", None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError(
                _("No valid entries in the spine of this EPUB")  # noqa: F821
            )

        with open("content.opf", "wb") as nopf:
            nopf.write(opf.render())

        return os.path.abspath("content.opf")
Ejemplo n.º 43
0
    def convert(self, stream, options, file_ext, log, accelerators):
        log("KEPUBInput::convert - start")
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF
        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except:
            log.exception('KEPUB appears to be invalid ZIP file, trying a '
                          'more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream)
        opf = self.find_opf()
        if opf is None:
            for f in walk(u'.'):
                if f.lower().endswith('.opf') and '__MACOSX' not in f and \
                        not os.path.basename(f).startswith('.'):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, 'name', 'stream')

        if opf is None:
            raise ValueError(
                _('%s is not a valid KEPUB file (could not find opf)') % path)

        encfile = os.path.abspath('rights.xml')
        if os.path.exists(encfile):
            raise DRMError(os.path.basename(path))

        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self.encrypted_fonts = []

        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1]) + '/'
            for elem in opf.itermanifest():
                elem.set('href', delta + elem.get('href'))
            for elem in opf.iterguide():
                elem.set('href', delta + elem.get('href'))

        f = self.rationalize_cover3 if opf.package_version >= 3.0 else \
            self.rationalize_cover2
        self.removed_cover = f(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get('media-type', '') == 'application/x-dtbook+xml':
                raise ValueError(
                    _('EPUB files with DTBook markup are not supported'))

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get('id', None)
            if id_ and y.get('media-type', None) in {
                    'application/vnd.adobe-page-template+xml',
                    'application/vnd.adobe.page-template+xml',
                    'application/adobe-page-template+xml',
                    'application/adobe.page-template+xml', 'application/text'
            }:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError(_('No valid entries in the spine of this EPUB'))

        with open('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u'content.opf')
Ejemplo n.º 44
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
        from calibre.ebooks.chardet import detect
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.txt.processor import (
            convert_basic, convert_markdown_with_metadata,
            separate_paragraphs_single_line,
            separate_paragraphs_print_formatted, preserve_spaces,
            detect_paragraph_type, detect_formatting_type,
            normalize_line_endings, convert_textile, remove_indents,
            block_to_single_line, separate_hard_scene_breaks)

        self.log = log
        txt = ''
        log.debug('Reading text from file...')
        length = 0

        # Extract content from zip archive.
        if file_ext == 'txtz':
            zf = ZipFile(stream)
            zf.extractall('.')

            for x in walk('.'):
                if os.path.splitext(x)[1].lower() in ('.txt', '.text'):
                    with open(x, 'rb') as tf:
                        txt += tf.read() + '\n\n'
        else:
            txt = stream.read()
            if file_ext in {'md', 'textile', 'markdown'}:
                options.formatting_type = {
                    'md': 'markdown'
                }.get(file_ext, file_ext)
                log.info('File extension indicates particular formatting. '
                         'Forcing formatting type to: %s' %
                         options.formatting_type)
                options.paragraph_type = 'off'

        # Get the encoding of the document.
        if options.input_encoding:
            ienc = options.input_encoding
            log.debug('Using user specified input encoding of %s' % ienc)
        else:
            det_encoding = detect(txt)
            det_encoding, confidence = det_encoding['encoding'], det_encoding[
                'confidence']
            if det_encoding and det_encoding.lower().replace(
                    '_',
                    '-').strip() in ('gb2312', 'chinese', 'csiso58gb231280',
                                     'euc-cn', 'euccn', 'eucgb2312-cn',
                                     'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
                # Microsoft Word exports to HTML with encoding incorrectly set to
                # gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
                det_encoding = 'gbk'
            ienc = det_encoding
            log.debug(
                'Detected input encoding as %s with a confidence of %s%%' %
                (ienc, confidence * 100))
        if not ienc:
            ienc = 'utf-8'
            log.debug(
                'No input encoding specified and could not auto detect using %s'
                % ienc)
        # Remove BOM from start of txt as its presence can confuse markdown
        import codecs
        for bom in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE, codecs.BOM_UTF8,
                    codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
            if txt.startswith(bom):
                txt = txt[len(bom):]
                break
        txt = txt.decode(ienc, 'replace')

        # Replace entities
        txt = _ent_pat.sub(xml_entity_to_unicode, txt)

        # Normalize line endings
        txt = normalize_line_endings(txt)

        # Determine the paragraph type of the document.
        if options.paragraph_type == 'auto':
            options.paragraph_type = detect_paragraph_type(txt)
            if options.paragraph_type == 'unknown':
                log.debug(
                    'Could not reliably determine paragraph type using block')
                options.paragraph_type = 'block'
            else:
                log.debug('Auto detected paragraph type as %s' %
                          options.paragraph_type)

        # Detect formatting
        if options.formatting_type == 'auto':
            options.formatting_type = detect_formatting_type(txt)
            log.debug('Auto detected formatting as %s' %
                      options.formatting_type)

        if options.formatting_type == 'heuristic':
            setattr(options, 'enable_heuristics', True)
            setattr(options, 'unwrap_lines', False)
            setattr(options, 'smarten_punctuation', True)

        # Reformat paragraphs to block formatting based on the detected type.
        # We don't check for block because the processor assumes block.
        # single and print at transformed to block for processing.
        if options.paragraph_type == 'single':
            txt = separate_paragraphs_single_line(txt)
        elif options.paragraph_type == 'print':
            txt = separate_hard_scene_breaks(txt)
            txt = separate_paragraphs_print_formatted(txt)
            txt = block_to_single_line(txt)
        elif options.paragraph_type == 'unformatted':
            from calibre.ebooks.conversion.utils import HeuristicProcessor
            # unwrap lines based on punctuation
            docanalysis = DocAnalysis('txt', txt)
            length = docanalysis.line_length(.5)
            preprocessor = HeuristicProcessor(options,
                                              log=getattr(self, 'log', None))
            txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
            txt = separate_paragraphs_single_line(txt)
        elif options.paragraph_type == 'block':
            txt = separate_hard_scene_breaks(txt)
            txt = block_to_single_line(txt)

        if getattr(options, 'enable_heuristics', False) and getattr(
                options, 'dehyphenate', False):
            docanalysis = DocAnalysis('txt', txt)
            if not length:
                length = docanalysis.line_length(.5)
            dehyphenator = Dehyphenator(options.verbose, log=self.log)
            txt = dehyphenator(txt, 'txt', length)

        # User requested transformation on the text.
        if options.txt_in_remove_indents:
            txt = remove_indents(txt)

        # Preserve spaces will replace multiple spaces to a space
        # followed by the &nbsp; entity.
        if options.preserve_spaces:
            txt = preserve_spaces(txt)

        # Process the text using the appropriate text processor.
        html = ''
        input_mi = None
        if options.formatting_type == 'markdown':
            log.debug('Running text through markdown conversion...')
            try:
                input_mi, html = convert_markdown_with_metadata(
                    txt,
                    extensions=[
                        x.strip()
                        for x in options.markdown_extensions.split(',')
                        if x.strip()
                    ])
            except RuntimeError:
                raise ValueError(
                    'This txt file has malformed markup, it cannot be'
                    ' converted by calibre. See https://daringfireball.net/projects/markdown/syntax'
                )
        elif options.formatting_type == 'textile':
            log.debug('Running text through textile conversion...')
            html = convert_textile(txt)
        else:
            log.debug('Running text through basic conversion...')
            flow_size = getattr(options, 'flow_size', 0)
            html = convert_basic(txt, epub_split_size_kb=flow_size)

        # Run the HTMLized text through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(options, opt.option.name, opt.recommended_value)
        options.input_encoding = 'utf-8'
        base = os.getcwdu()
        if file_ext != 'txtz' and hasattr(stream, 'name'):
            base = os.path.dirname(stream.name)
        fname = os.path.join(base, 'index.html')
        c = 0
        while os.path.exists(fname):
            c += 1
            fname = 'index%d.html' % c
        htmlfile = open(fname, 'wb')
        with htmlfile:
            htmlfile.write(html.encode('utf-8'))
        odi = options.debug_pipeline
        options.debug_pipeline = None
        # Generate oeb from html conversion.
        oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html',
                                 log, {})
        options.debug_pipeline = odi
        os.remove(htmlfile.name)

        # Set metadata from file.
        if input_mi is None:
            from calibre.customize.ui import get_file_type_metadata
            input_mi = get_file_type_metadata(stream, file_ext)
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        meta_info_to_oeb_metadata(input_mi, oeb.metadata, log)
        self.html_postprocess_title = input_mi.title

        return oeb
Ejemplo n.º 45
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
        from calibre.ebooks.chardet import detect
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.txt.processor import (convert_basic,
                convert_markdown_with_metadata, separate_paragraphs_single_line,
                separate_paragraphs_print_formatted, preserve_spaces,
                detect_paragraph_type, detect_formatting_type,
                normalize_line_endings, convert_textile, remove_indents,
                block_to_single_line, separate_hard_scene_breaks)

        self.log = log
        txt = ''
        log.debug('Reading text from file...')
        length = 0

        # Extract content from zip archive.
        if file_ext == 'txtz':
            zf = ZipFile(stream)
            zf.extractall('.')

            for x in walk('.'):
                if os.path.splitext(x)[1].lower() in ('.txt', '.text'):
                    with open(x, 'rb') as tf:
                        txt += tf.read() + '\n\n'
        else:
            txt = stream.read()
            if file_ext in {'md', 'textile', 'markdown'}:
                options.formatting_type = {'md': 'markdown'}.get(file_ext, file_ext)
                log.info('File extension indicates particular formatting. '
                        'Forcing formatting type to: %s'%options.formatting_type)
                options.paragraph_type = 'off'

        # Get the encoding of the document.
        if options.input_encoding:
            ienc = options.input_encoding
            log.debug('Using user specified input encoding of %s' % ienc)
        else:
            det_encoding = detect(txt[:4096])
            det_encoding, confidence = det_encoding['encoding'], det_encoding['confidence']
            if det_encoding and det_encoding.lower().replace('_', '-').strip() in (
                    'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
                    'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
                # Microsoft Word exports to HTML with encoding incorrectly set to
                # gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
                det_encoding = 'gbk'
            ienc = det_encoding
            log.debug('Detected input encoding as %s with a confidence of %s%%' % (ienc, confidence * 100))
        if not ienc:
            ienc = 'utf-8'
            log.debug('No input encoding specified and could not auto detect using %s' % ienc)
        # Remove BOM from start of txt as its presence can confuse markdown
        import codecs
        for bom in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE, codecs.BOM_UTF8, codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
            if txt.startswith(bom):
                txt = txt[len(bom):]
                break
        txt = txt.decode(ienc, 'replace')

        # Replace entities
        txt = _ent_pat.sub(xml_entity_to_unicode, txt)

        # Normalize line endings
        txt = normalize_line_endings(txt)

        # Determine the paragraph type of the document.
        if options.paragraph_type == 'auto':
            options.paragraph_type = detect_paragraph_type(txt)
            if options.paragraph_type == 'unknown':
                log.debug('Could not reliably determine paragraph type using block')
                options.paragraph_type = 'block'
            else:
                log.debug('Auto detected paragraph type as %s' % options.paragraph_type)

        # Detect formatting
        if options.formatting_type == 'auto':
            options.formatting_type = detect_formatting_type(txt)
            log.debug('Auto detected formatting as %s' % options.formatting_type)

        if options.formatting_type == 'heuristic':
            setattr(options, 'enable_heuristics', True)
            setattr(options, 'unwrap_lines', False)
            setattr(options, 'smarten_punctuation', True)

        # Reformat paragraphs to block formatting based on the detected type.
        # We don't check for block because the processor assumes block.
        # single and print at transformed to block for processing.
        if options.paragraph_type == 'single':
            txt = separate_paragraphs_single_line(txt)
        elif options.paragraph_type == 'print':
            txt = separate_hard_scene_breaks(txt)
            txt = separate_paragraphs_print_formatted(txt)
            txt = block_to_single_line(txt)
        elif options.paragraph_type == 'unformatted':
            from calibre.ebooks.conversion.utils import HeuristicProcessor
            # unwrap lines based on punctuation
            docanalysis = DocAnalysis('txt', txt)
            length = docanalysis.line_length(.5)
            preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None))
            txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
            txt = separate_paragraphs_single_line(txt)
        elif options.paragraph_type == 'block':
            txt = separate_hard_scene_breaks(txt)
            txt = block_to_single_line(txt)

        if getattr(options, 'enable_heuristics', False) and getattr(options, 'dehyphenate', False):
            docanalysis = DocAnalysis('txt', txt)
            if not length:
                length = docanalysis.line_length(.5)
            dehyphenator = Dehyphenator(options.verbose, log=self.log)
            txt = dehyphenator(txt,'txt', length)

        # User requested transformation on the text.
        if options.txt_in_remove_indents:
            txt = remove_indents(txt)

        # Preserve spaces will replace multiple spaces to a space
        # followed by the &nbsp; entity.
        if options.preserve_spaces:
            txt = preserve_spaces(txt)

        # Process the text using the appropriate text processor.
        html = ''
        input_mi = None
        if options.formatting_type == 'markdown':
            log.debug('Running text through markdown conversion...')
            try:
                input_mi, html = convert_markdown_with_metadata(txt, extensions=[x.strip() for x in options.markdown_extensions.split(',') if x.strip()])
            except RuntimeError:
                raise ValueError('This txt file has malformed markup, it cannot be'
                    ' converted by calibre. See https://daringfireball.net/projects/markdown/syntax')
        elif options.formatting_type == 'textile':
            log.debug('Running text through textile conversion...')
            html = convert_textile(txt)
        else:
            log.debug('Running text through basic conversion...')
            flow_size = getattr(options, 'flow_size', 0)
            html = convert_basic(txt, epub_split_size_kb=flow_size)

        # Run the HTMLized text through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(options, opt.option.name, opt.recommended_value)
        options.input_encoding = 'utf-8'
        base = os.getcwdu()
        if file_ext != 'txtz' and hasattr(stream, 'name'):
            base = os.path.dirname(stream.name)
        fname = os.path.join(base, 'index.html')
        c = 0
        while os.path.exists(fname):
            c += 1
            fname = 'index%d.html'%c
        htmlfile = open(fname, 'wb')
        with htmlfile:
            htmlfile.write(html.encode('utf-8'))
        odi = options.debug_pipeline
        options.debug_pipeline = None
        # Generate oeb from html conversion.
        oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
                {})
        options.debug_pipeline = odi
        os.remove(htmlfile.name)

        # Set metadata from file.
        if input_mi is None:
            from calibre.customize.ui import get_file_type_metadata
            input_mi = get_file_type_metadata(stream, file_ext)
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        meta_info_to_oeb_metadata(input_mi, oeb.metadata, log)
        self.html_postprocess_title = input_mi.title

        return oeb
Ejemplo n.º 46
0
def zip_exploder(path, tdir, question=lambda x:True):
    zipextract(path, tdir)
    for f in walk(tdir):
        if f.lower().endswith('.opf'):
            return f
    raise Error('Invalid book: Could not find .opf')
Ejemplo n.º 47
0
    def build_index(self, data, browser):
        sections = data.get('index', None)
        if not sections:
            raise ValueError('No articles found, aborting')

        feeds = feeds_from_index(sections, oldest_article=self.oldest_article,
                                    max_articles_per_feed=self.max_articles_per_feed,
                                    log=self.log)
        if not feeds:
            raise ValueError('No articles found, aborting')
        if self.ignore_duplicate_articles is not None:
            feeds = self.remove_duplicate_articles(feeds)
        if self.test:
            feeds = feeds[:self.test[0]]
        self.has_single_feed = len(feeds) == 1
        index = os.path.join(self.output_dir, 'index.html')

        html = self.feeds2index(feeds)
        with open(index, 'wb') as fi:
            fi.write(html)

        if self.reverse_article_order:
            for feed in feeds:
                if hasattr(feed, 'reverse'):
                    feed.reverse()

        self.report_progress(0, _('Got feeds from index page'))
        resource_cache = {}

        total = 0
        for feed in feeds:
            total += min(self.max_articles_per_feed, len(feed))
        num = 0

        for f, feed in enumerate(feeds):
            feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
            if not os.path.isdir(feed_dir):
                os.makedirs(feed_dir)

            for a, article in enumerate(feed):
                if a >= self.max_articles_per_feed:
                    break
                num += 1
                art_dir = os.path.join(feed_dir, 'article_%d'%a)
                if not os.path.isdir(art_dir):
                    os.makedirs(art_dir)
                try:
                    url = self.print_version(article.url)
                except NotImplementedError:
                    url = article.url
                except:
                    self.log.exception('Failed to find print version for: '+article.url)
                    url = None
                if not url:
                    continue

                self.log.debug('Downloading article:', article.title, 'from', url)
                try:
                    pages = fetch_page(
                        url,
                        load_complete=self.load_complete,
                        links=self.select_links,
                        remove=self.remove_tags,
                        keep_only=self.keep_only_tags,
                        preprocess_browser=partial(self._preprocess_browser, article),
                        postprocess_html=partial(self._postprocess_html, article, f, a, len(feed)),
                        remove_before=self.remove_tags_before,
                        remove_after=self.remove_tags_after,
                        remove_javascript=self.remove_javascript,
                        delay=self.delay,
                        resource_cache=resource_cache, output_dir=art_dir, browser=browser)
                except AbortFetch:
                    self.log.exception('Fetching of article: %r aborted' % article.title)
                    continue
                except Exception:
                    self.log.exception('Fetching of article: %r failed' % article.title)
                    continue
                self.log.debug('Downloaded article:', article.title, 'from', article.url)
                article.orig_url = article.url
                article.url = 'article_%d/index.html'%a
                article.downloaded = True
                article.sub_pages  = pages[1:]
                self.report_progress(float(num)/total,
                    _(u'Article downloaded: %s')%force_unicode(article.title))

        for f, feed in enumerate(feeds):
            html = self.feed2index(f, feeds)
            feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
            with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
                fi.write(html)
        if self.no_stylesheets:
            for f in walk(self.output_dir):
                if f.endswith('.css'):
                    os.remove(f)
        self.create_opf(feeds)
        self.report_progress(1, _('Download finished'))
        return index
Ejemplo n.º 48
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF
        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except:
            log.exception('EPUB appears to be invalid ZIP file, trying a'
                          ' more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream)
        encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
        opf = self.find_opf()
        if opf is None:
            for f in walk(u'.'):
                if f.lower().endswith('.opf') and '__MACOSX' not in f and \
                        not os.path.basename(f).startswith('.'):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, 'name', 'stream')

        if opf is None:
            raise ValueError(
                '%s is not a valid EPUB file (could not find opf)' % path)

        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self._encrypted_font_uris = []
        if os.path.exists(encfile):
            if not self.process_encryption(encfile, opf, log):
                raise DRMError(os.path.basename(path))
        self.encrypted_fonts = self._encrypted_font_uris

        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1]) + '/'

            def normpath(x):
                return posixpath.normpath(delta + elem.get('href'))

            for elem in opf.itermanifest():
                elem.set('href', normpath(elem.get('href')))
            for elem in opf.iterguide():
                elem.set('href', normpath(elem.get('href')))

        f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
        self.removed_cover = f(opf, log)
        if self.removed_cover:
            self.removed_items_to_ignore = (self.removed_cover, )
        epub3_nav = opf.epub3_nav
        if epub3_nav is not None:
            self.convert_epub3_nav(epub3_nav, opf, log, options)

        for x in opf.itermanifest():
            if x.get('media-type', '') == 'application/x-dtbook+xml':
                raise ValueError(
                    'EPUB files with DTBook markup are not supported')

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get('id', None)
            if id_:
                mt = y.get('media-type', None)
                if mt in {
                        'application/vnd.adobe-page-template+xml',
                        'application/vnd.adobe.page-template+xml',
                        'application/adobe-page-template+xml',
                        'application/adobe.page-template+xml',
                        'application/text'
                }:
                    not_for_spine.add(id_)
                ext = y.get('href', '').rpartition('.')[-1].lower()
                if mt == 'text/plain' and ext in {'otf', 'ttf'}:
                    # some epub authoring software sets font mime types to
                    # text/plain
                    not_for_spine.add(id_)
                    y.set('media-type', 'application/font')

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError('No valid entries in the spine of this EPUB')

        with lopen('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u'content.opf')
Ejemplo n.º 49
0
    def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
        html_files = set([])
        try:
            x = self.get_encoding()
            codecs.lookup(x)
            enc = x
        except:
            enc = 'cp1252'
        for path in self.Contents():
            fpath = path
            if not isinstance(path, unicode_type):
                fpath = path.decode(enc)
            lpath = os.path.join(output_dir, fpath)
            self._ensure_dir(lpath)
            try:
                data = self.GetFile(path)
            except:
                self.log.exception('Failed to extract %s from CHM, ignoring' %
                                   path)
                continue
            if lpath.find(';') != -1:
                # fix file names with ";<junk>" at the end, see _reformat()
                lpath = lpath.split(';')[0]
            try:
                with open(lpath, 'wb') as f:
                    f.write(data)
                try:
                    if 'html' in guess_mimetype(path)[0]:
                        html_files.add(lpath)
                except:
                    pass
            except:
                if iswindows and len(lpath) > 250:
                    self.log.warn('%r filename too long, skipping' % path)
                    continue
                raise

        if debug_dump:
            import shutil
            shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))
        for lpath in html_files:
            with open(lpath, 'r+b') as f:
                data = f.read()
                data = self._reformat(data, lpath)
                if isinstance(data, unicode_type):
                    data = data.encode('utf-8')
                f.seek(0)
                f.truncate()
                f.write(data)

        self._extracted = True
        files = [
            y for y in os.listdir(output_dir)
            if os.path.isfile(os.path.join(output_dir, y))
        ]
        if self.hhc_path not in files:
            for f in files:
                if f.lower() == self.hhc_path.lower():
                    self.hhc_path = f
                    break
        if self.hhc_path not in files and files:
            for f in files:
                if f.partition('.')[-1].lower() in {
                        'html', 'htm', 'xhtm', 'xhtml'
                }:
                    self.hhc_path = f
                    break

        if self.hhc_path == '.hhc' and self.hhc_path not in files:
            from calibre import walk
            for x in walk(output_dir):
                if os.path.basename(x).lower() in ('index.htm', 'index.html',
                                                   'contents.htm',
                                                   'contents.html'):
                    self.hhc_path = os.path.relpath(x, output_dir)
                    break

        if self.hhc_path not in files and files:
            self.hhc_path = files[0]
Ejemplo n.º 50
0
def zip_exploder(path, tdir, question=lambda x:True):
    zipextract(path, tdir)
    for f in walk(tdir):
        if f.lower().endswith('.opf'):
            return f
    raise Error('Invalid book: Could not find .opf')
Ejemplo n.º 51
0
 def get_files(self):
     from calibre import walk
     for path in walk(os.path.join(self.SRC, 'calibre')):
         if path.endswith('.py'):
             yield path
Ejemplo n.º 52
0
    def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
        html_files = set([])
        try:
            x = self.GetEncoding()
            codecs.lookup(x)
            enc = x
        except:
            enc = 'cp1252'
        for path in self.Contents():
            fpath = path
            if not isinstance(path, unicode):
                fpath = path.decode(enc)
            lpath = os.path.join(output_dir, fpath)
            self._ensure_dir(lpath)
            try:
                data = self.GetFile(path)
            except:
                self.log.exception('Failed to extract %s from CHM, ignoring'%path)
                continue
            if lpath.find(';') != -1:
                # fix file names with ";<junk>" at the end, see _reformat()
                lpath = lpath.split(';')[0]
            try:
                with open(lpath, 'wb') as f:
                    f.write(data)
                try:
                    if 'html' in guess_mimetype(path)[0]:
                        html_files.add(lpath)
                except:
                    pass
            except:
                if iswindows and len(lpath) > 250:
                    self.log.warn('%r filename too long, skipping'%path)
                    continue
                raise

        if debug_dump:
            import shutil
            shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))
        for lpath in html_files:
            with open(lpath, 'r+b') as f:
                data = f.read()
                data = self._reformat(data, lpath)
                if isinstance(data, unicode):
                    data = data.encode('utf-8')
                f.seek(0)
                f.truncate()
                f.write(data)

        self._extracted = True
        files = [x for x in os.listdir(output_dir) if
                os.path.isfile(os.path.join(output_dir, x))]
        if self.hhc_path not in files:
            for f in files:
                if f.lower() == self.hhc_path.lower():
                    self.hhc_path = f
                    break
        if self.hhc_path not in files and files:
            for f in files:
                if f.partition('.')[-1].lower() in {'html', 'htm', 'xhtm',
                        'xhtml'}:
                    self.hhc_path = f
                    break

        if self.hhc_path == '.hhc' and self.hhc_path not in files:
            from calibre import walk
            for x in walk(output_dir):
                if os.path.basename(x).lower() in ('index.htm', 'index.html',
                        'contents.htm', 'contents.html'):
                    self.hhc_path = os.path.relpath(x, output_dir)
                    break

        if self.hhc_path not in files and files:
            self.hhc_path = files[0]
Ejemplo n.º 53
0
    def convert(self, recipe_or_file, opts, file_ext, log, accelerators):
        from calibre.web.feeds.recipes import compile_recipe
        opts.output_profile.flow_size = 0
        if file_ext == 'downloaded_recipe':
            from calibre.utils.zipfile import ZipFile
            zf = ZipFile(recipe_or_file, 'r')
            zf.extractall()
            zf.close()
            with lopen('download.recipe', 'rb') as f:
                self.recipe_source = f.read()
            recipe = compile_recipe(self.recipe_source)
            recipe.needs_subscription = False
            self.recipe_object = recipe(opts, log, self.report_progress)
        else:
            if os.environ.get('CALIBRE_RECIPE_URN'):
                from calibre.web.feeds.recipes.collection import get_custom_recipe, get_builtin_recipe_by_id
                urn = os.environ['CALIBRE_RECIPE_URN']
                log('Downloading recipe urn: ' + urn)
                rtype, recipe_id = urn.partition(':')[::2]
                if not recipe_id:
                    raise ValueError('Invalid recipe urn: ' + urn)
                if rtype == 'custom':
                    self.recipe_source = get_custom_recipe(recipe_id)
                else:
                    self.recipe_source = get_builtin_recipe_by_id(
                        urn, log=log, download_recipe=True)
                if not self.recipe_source:
                    raise ValueError('Could not find recipe with urn: ' + urn)
                if not isinstance(self.recipe_source, bytes):
                    self.recipe_source = self.recipe_source.encode('utf-8')
                recipe = compile_recipe(self.recipe_source)
            elif os.access(recipe_or_file, os.R_OK):
                with lopen(recipe_or_file, 'rb') as f:
                    self.recipe_source = f.read()
                recipe = compile_recipe(self.recipe_source)
                log('Using custom recipe')
            else:
                from calibre.web.feeds.recipes.collection import (
                    get_builtin_recipe_by_title, get_builtin_recipe_titles)
                title = getattr(opts, 'original_recipe_input_arg',
                                recipe_or_file)
                title = os.path.basename(title).rpartition('.')[0]
                titles = frozenset(get_builtin_recipe_titles())
                if title not in titles:
                    title = getattr(opts, 'original_recipe_input_arg',
                                    recipe_or_file)
                    title = title.rpartition('.')[0]

                raw = get_builtin_recipe_by_title(
                    title,
                    log=log,
                    download_recipe=not opts.dont_download_recipe)
                builtin = False
                try:
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                    if recipe.requires_version > numeric_version:
                        log.warn(
                            'Downloaded recipe needs calibre version at least: %s'
                            % ('.'.join(recipe.requires_version)))
                        builtin = True
                except:
                    log.exception(
                        'Failed to compile downloaded recipe. Falling '
                        'back to builtin one')
                    builtin = True
                if builtin:
                    log('Using bundled builtin recipe')
                    raw = get_builtin_recipe_by_title(title,
                                                      log=log,
                                                      download_recipe=False)
                    if raw is None:
                        raise ValueError('Failed to find builtin recipe: ' +
                                         title)
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                else:
                    log('Using downloaded builtin recipe')

            if recipe is None:
                raise ValueError(
                    '%r is not a valid recipe file or builtin recipe' %
                    recipe_or_file)

            disabled = getattr(recipe, 'recipe_disabled', None)
            if disabled is not None:
                raise RecipeDisabled(disabled)
            ro = recipe(opts, log, self.report_progress)
            ro.download()
            self.recipe_object = ro

        for key, val in self.recipe_object.conversion_options.items():
            setattr(opts, key, val)

        for f in os.listdir('.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)

        for f in walk('.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)
Ejemplo n.º 54
0
    def convert(self, recipe_or_file, opts, file_ext, log,
            accelerators):
        from calibre.web.feeds.recipes import compile_recipe
        opts.output_profile.flow_size = 0
        if file_ext == 'downloaded_recipe':
            from calibre.utils.zipfile import ZipFile
            zf = ZipFile(recipe_or_file, 'r')
            zf.extractall()
            zf.close()
            self.recipe_source = open(u'download.recipe', 'rb').read()
            recipe = compile_recipe(self.recipe_source)
            recipe.needs_subscription = False
            self.recipe_object = recipe(opts, log, self.report_progress)
        else:
            if os.access(recipe_or_file, os.R_OK):
                self.recipe_source = open(recipe_or_file, 'rb').read()
                recipe = compile_recipe(self.recipe_source)
                log('Using custom recipe')
            else:
                from calibre.web.feeds.recipes.collection import \
                        get_builtin_recipe_by_title
                title = getattr(opts, 'original_recipe_input_arg', recipe_or_file)
                title = os.path.basename(title).rpartition('.')[0]
                raw = get_builtin_recipe_by_title(title, log=log,
                        download_recipe=not opts.dont_download_recipe)
                builtin = False
                try:
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                    if recipe.requires_version > numeric_version:
                        log.warn(
                        'Downloaded recipe needs calibre version at least: %s' % \
                        ('.'.join(recipe.requires_version)))
                        builtin = True
                except:
                    log.exception('Failed to compile downloaded recipe. Falling '
                            'back to builtin one')
                    builtin = True
                if builtin:
                    log('Using bundled builtin recipe')
                    raw = get_builtin_recipe_by_title(title, log=log,
                            download_recipe=False)
                    if raw is None:
                        raise ValueError('Failed to find builtin recipe: '+title)
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                else:
                    log('Using downloaded builtin recipe')

            if recipe is None:
                raise ValueError('%r is not a valid recipe file or builtin recipe' %
                        recipe_or_file)

            disabled = getattr(recipe, 'recipe_disabled', None)
            if disabled is not None:
                raise RecipeDisabled(disabled)
            ro = recipe(opts, log, self.report_progress)
            ro.download()
            self.recipe_object = ro

        for key, val in self.recipe_object.conversion_options.items():
            setattr(opts, key, val)

        for f in os.listdir(u'.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)

        for f in walk(u'.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)
Ejemplo n.º 55
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF
        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except:
            log.exception('EPUB appears to be invalid ZIP file, trying a'
                    ' more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream)
        encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
        opf = self.find_opf()
        if opf is None:
            for f in walk(u'.'):
                if f.lower().endswith('.opf') and '__MACOSX' not in f and \
                        not os.path.basename(f).startswith('.'):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, 'name', 'stream')

        if opf is None:
            raise ValueError('%s is not a valid EPUB file (could not find opf)'%path)

        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self._encrypted_font_uris = []
        if os.path.exists(encfile):
            if not self.process_encryption(encfile, opf, log):
                raise DRMError(os.path.basename(path))
        self.encrypted_fonts = self._encrypted_font_uris

        epub3_nav = opf.epub3_nav
        if epub3_nav is not None:
            self.convert_epub3_nav(epub3_nav, opf, log)

        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1])+'/'
            for elem in opf.itermanifest():
                elem.set('href', delta+elem.get('href'))
            for elem in opf.iterguide():
                elem.set('href', delta+elem.get('href'))

        f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
        self.removed_cover = f(opf, log)

        for x in opf.itermanifest():
            if x.get('media-type', '') == 'application/x-dtbook+xml':
                raise ValueError(
                    'EPUB files with DTBook markup are not supported')

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get('id', None)
            if id_ and y.get('media-type', None) in {
                    'application/vnd.adobe-page-template+xml', 'application/vnd.adobe.page-template+xml',
                    'application/adobe-page-template+xml', 'application/adobe.page-template+xml',
                    'application/text'}:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError('No valid entries in the spine of this EPUB')

        with lopen('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u'content.opf')
Ejemplo n.º 56
0
    def run(self, opts):
        import glob, sys, tarfile, os, textwrap, shutil, platform
        from contextlib import closing
        from cx_Freeze import Executable, setup
        from calibre.linux import entry_points
        from calibre import walk

        is64bit = platform.architecture()[0] == '64bit'
        arch = 'x86_64' if is64bit else 'i686'
        ffi = '/usr/lib/gcc/x86_64-pc-linux-gnu/4.4.2/libffi.so.4' if is64bit else '/usr/lib/gcc/i686-pc-linux-gnu/4.4.1/libffi.so.4'
        stdcpp = '/usr/lib/gcc/%s-pc-linux-gnu/%s/libstdc++.so.6'%(arch, '4.4.2'
                if is64bit else '4.4.1')

        QTDIR          = '/usr/lib/qt4'
        QTDLLS         = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml',
                'QtWebKit', 'QtDBus', 'QtXmlPatterns')

        binary_excludes = ['libGLcore*', 'libGL*', 'libnvidia*']

        os.system('sudo cp /usr/bin/calibre-mount-helper /tmp/calibre-mount-helper')
        os.system('sudo chown kovid:users /tmp/calibre-mount-helper')

        binary_includes = [
                        '/usr/bin/pdftohtml',
                        '/usr/lib/libwmflite-0.2.so.7',
                        '/usr/lib/liblcms.so.1',
                        '/usr/lib/liblcms2.so.2',
                        '/usr/lib/libstlport.so.5.1',
                        '/tmp/calibre-mount-helper',
                        '/usr/lib/libchm.so.0',
                        '/usr/lib/libsqlite3.so.0',
                        '/usr/lib/libmng.so.1',
                        '/usr/lib/libpodofo.so.0.8.2',
                        '/lib/libz.so.1',
                        '/lib/libuuid.so.1',
                        '/usr/lib/libtiff.so.5',
                        '/lib/libbz2.so.1',
                        '/usr/lib/libpoppler.so.6',
                        '/usr/lib/libxml2.so.2',
                        '/usr/lib/libopenjpeg.so.2',
                        '/usr/lib/libxslt.so.1',
                        '/usr/lib/libjpeg.so.7',
                        '/usr/lib/libxslt.so.1',
                        '/usr/lib/libgthread-2.0.so.0',
                        stdcpp,
                        ffi,
                        '/usr/lib/libpng14.so.14',
                        '/usr/lib/libexslt.so.0',
                        '/usr/lib/libMagickWand.so.3',
                        '/usr/lib/libMagickCore.so.3',
                        '/usr/lib/libgcrypt.so.11',
                        '/usr/lib/libgpg-error.so.0',
                        '/usr/lib/libphonon.so.4',
                        '/usr/lib/libssl.so.0.9.8',
                        '/usr/lib/libcrypto.so.0.9.8',
                        '/lib/libreadline.so.6',
                        ]

        binary_includes += [os.path.join(QTDIR, 'lib%s.so.4'%x) for x in QTDLLS]


        CALIBRESRC = self.d(self.SRC)
        CALIBREPLUGINS = os.path.join(CALIBRESRC, 'src', 'calibre', 'plugins')
        FREEZE_DIR = os.path.join(CALIBRESRC, 'build', 'cx_freeze')
        DIST_DIR   = os.path.join(CALIBRESRC, 'dist')

        os.chdir(CALIBRESRC)

        self.info('Freezing calibre located at', CALIBRESRC)

        entry_points = entry_points['console_scripts'] + entry_points['gui_scripts']
        entry_points = ['calibre_postinstall=calibre.linux:main'] + entry_points
        executables = {}
        for ep in entry_points:
            executables[ep.split('=')[0].strip()] = (ep.split('=')[1].split(':')[0].strip(),
                                                    ep.split(':')[-1].strip())

        if os.path.exists(FREEZE_DIR):
            shutil.rmtree(FREEZE_DIR)
        os.makedirs(FREEZE_DIR)

        if not os.path.exists(DIST_DIR):
            os.makedirs(DIST_DIR)

        includes = [x[0] for x in executables.values()]
        includes += ['email.iterators', 'email.generator', 'sqlite3.dump']


        excludes = ['matplotlib', "Tkconstants", "Tkinter", "tcl", "_imagingtk",
                    "ImageTk", "FixTk", 'wx', 'PyQt4.QtAssistant', 'PyQt4.QtOpenGL.so',
                    'PyQt4.QtScript.so', 'PyQt4.QtSql.so', 'PyQt4.QtTest.so', 'qt',
                    'glib', 'gobject']

        packages = ['calibre', 'encodings', 'cherrypy', 'cssutils', 'xdg',
                    'dateutil', 'dns', 'email', 'dbus']

        includes += ['calibre.gui2.convert.'+x.split('/')[-1].rpartition('.')[0] for x in \
                glob.glob('src/calibre/gui2/convert/*.py')]
        includes += ['calibre.gui2.catalog.'+x.split('/')[-1].rpartition('.')[0] for x in \
                glob.glob('src/calibre/gui2/catalog/*.py')]
        includes += ['calibre.gui2.actions.'+x.split('/')[-1].rpartition('.')[0] for x in \
                glob.glob('src/calibre/gui2/actions/*.py')]
        includes += ['calibre.gui2.preferences.'+x.split('/')[-1].rpartition('.')[0] for x in \
                glob.glob('src/calibre/gui2/preferences/*.py')]


        LOADER = '/tmp/loader.py'
        open(LOADER, 'wb').write('# This script is never actually used.\nimport sys')

        INIT_SCRIPT = '/tmp/init.py'
        open(INIT_SCRIPT, 'wb').write(textwrap.dedent('''
        ## Load calibre module specified in the environment variable CALIBRE_CX_EXE
        ## Also restrict sys.path to the executables' directory and add the
        ## executables directory to LD_LIBRARY_PATH
        import encodings
        import os
        import sys
        import warnings
        import zipimport
        import locale
        import codecs

        enc = locale.getdefaultlocale()[1]
        if not enc:
            enc = locale.nl_langinfo(locale.CODESET)
        enc = codecs.lookup(enc if enc else 'UTF-8').name
        sys.setdefaultencoding(enc)

        paths = os.environ.get('LD_LIBRARY_PATH', '').split(os.pathsep)
        if DIR_NAME not in paths or not sys.getfilesystemencoding():
            paths.insert(0, DIR_NAME)
            os.environ['LD_LIBRARY_PATH'] = os.pathsep.join(paths)
            os.environ['PYTHONIOENCODING'] = enc
            os.execv(sys.executable, sys.argv)

        sys.path = sys.path[:3]
        sys.frozen = True
        sys.frozen_path = DIR_NAME
        sys.extensions_location = os.path.join(DIR_NAME, 'plugins')
        sys.resources_location = os.path.join(DIR_NAME, 'resources')
        dfv = os.environ.get('CALIBRE_DEVELOP_FROM', None)
        if dfv and os.path.exists(dfv):
            sys.path.insert(0, os.path.abspath(dfv))

        executables = %(executables)s

        exe = os.environ.get('CALIBRE_CX_EXE', False)
        ret = 1
        if not exe:
            print >>sys.stderr, 'Invalid invocation of calibre loader. CALIBRE_CX_EXE not set'
        elif exe not in executables:
            print >>sys.stderr, 'Invalid invocation of calibre loader. CALIBRE_CX_EXE=%%s is unknown'%%exe
        else:
            sys.argv[0] = exe
            module, func = executables[exe]
            module = __import__(module, fromlist=[1])
            func = getattr(module, func)
            ret = func()

        module = sys.modules.get("threading")
        if module is not None:
            module._shutdown()
        sys.exit(ret)
        ''')%dict(executables=repr(executables)))
        sys.argv = ['freeze', 'build_exe']
        setup(
            name        = __appname__,
            version     = __version__,
            executables = [Executable(script=LOADER, targetName='loader', compress=False)],
            options     = { 'build_exe' :
                            {
                            'build_exe'       : os.path.join(CALIBRESRC, 'build/cx_freeze'),
                            'optimize'        : 2,
                            'excludes'        : excludes,
                            'includes'        : includes,
                            'packages'        : packages,
                            'init_script'     : INIT_SCRIPT,
                            'copy_dependent_files' : True,
                            'create_shared_zip'    : False,
                            }
                            }
            )

        def copy_binary(src, dest_dir):
            dest = os.path.join(dest_dir, os.path.basename(src))
            if not os.path.exists(dest_dir):
                os.makedirs(dest_dir)
            shutil.copyfile(os.path.realpath(src), dest)
            shutil.copymode(os.path.realpath(src), dest)

        for f in binary_includes:
            copy_binary(f, FREEZE_DIR)

        for pat in binary_excludes:
            matches = glob.glob(os.path.join(FREEZE_DIR, pat))
            for f in matches:
                os.remove(f)

        self.info('Adding ImageMagick...')
        im = glob.glob('/usr/lib/ImageMagick-*')[0]
        dest = os.path.join(FREEZE_DIR, 'ImageMagick')
        shutil.copytree(im, dest)
        for x in os.walk(dest):
            for f in x[-1]:
                if f.endswith('.a'):
                    os.remove(os.path.join(x[0], f))

        self.info('Adding calibre plugins...')
        os.makedirs(os.path.join(FREEZE_DIR, 'plugins'))
        for f in glob.glob(os.path.join(CALIBREPLUGINS, '*.so')):
            copy_binary(f, os.path.join(FREEZE_DIR, 'plugins'))

        self.info('Adding calibre resources...')
        shutil.copytree('resources', os.path.join(FREEZE_DIR, 'resources'))

        self.info('Adding Qt plugins...')
        plugdir = os.path.join(QTDIR, 'plugins')
        for dirpath, dirnames, filenames in os.walk(plugdir):
            for f in filenames:
                if not f.endswith('.so') or 'designer' in dirpath or 'codecs' in dirpath or 'sqldrivers' in dirpath:
                    continue
                f = os.path.join(dirpath, f)
                dest_dir = dirpath.replace(plugdir, os.path.join(FREEZE_DIR, 'qtplugins'))
                copy_binary(f, dest_dir)

        self.info('Creating launchers')
        for exe in executables:
            path = os.path.join(FREEZE_DIR, exe)
            open(path, 'wb').write(textwrap.dedent('''\
            #!/bin/sh
            export CALIBRE_CX_EXE=%s
            path=`readlink -e $0`
            base=`dirname $path`
            loader=$base/loader
            export LD_LIBRARY_PATH=$base:$LD_LIBRARY_PATH
            export MAGICK_CONFIGURE_PATH=$base/ImageMagick/config
            export MAGICK_CODER_MODULE_PATH=$base/ImageMagick/modules-Q16/coders
            export MAGICK_CODER_FILTER_PATH=$base/ImageMagick/modules-Q16/filter
            export QT_PLUGIN_PATH=$base/qtplugins:$QT_PLUGIN_PATH
            $loader "$@"
            ''')%exe)
            os.chmod(path, 0755)

        exes = list(executables.keys())
        exes.remove('calibre_postinstall')
        open(os.path.join(FREEZE_DIR, 'manifest'), 'wb').write('\n'.join(exes))

        self.info('Creating archive...')
        dist = open(os.path.join(DIST_DIR, 'calibre-%s-%s.tar.bz2'%(__version__,
            arch)), 'wb')
        with closing(tarfile.open(fileobj=dist, mode='w:bz2',
                                format=tarfile.PAX_FORMAT)) as tf:
            for f in walk(FREEZE_DIR):
                name = f.replace(FREEZE_DIR, '')[1:]
                if name:
                    tf.add(f, name)
        dist.flush()
        dist.seek(0, 2)
        self.info('Archive %s created: %.2f MB'%(dist.name,
            dist.tell()/(1024.**2)))
Ejemplo n.º 57
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF

        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except:
            log.exception("EPUB appears to be invalid ZIP file, trying a" " more forgiving ZIP parser")
            from calibre.utils.localunzip import extractall

            stream.seek(0)
            extractall(stream)
        encfile = os.path.abspath(os.path.join("META-INF", "encryption.xml"))
        opf = self.find_opf()
        if opf is None:
            for f in walk(u"."):
                if f.lower().endswith(".opf") and "__MACOSX" not in f and not os.path.basename(f).startswith("."):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, "name", "stream")

        if opf is None:
            raise ValueError("%s is not a valid EPUB file (could not find opf)" % path)

        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self._encrypted_font_uris = []
        if os.path.exists(encfile):
            if not self.process_encryption(encfile, opf, log):
                raise DRMError(os.path.basename(path))
        self.encrypted_fonts = self._encrypted_font_uris

        if len(parts) > 1 and parts[0]:
            delta = "/".join(parts[:-1]) + "/"
            for elem in opf.itermanifest():
                elem.set("href", delta + elem.get("href"))
            for elem in opf.iterguide():
                elem.set("href", delta + elem.get("href"))

        self.removed_cover = self.rationalize_cover(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get("media-type", "") == "application/x-dtbook+xml":
                raise ValueError("EPUB files with DTBook markup are not supported")

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get("id", None)
            if id_ and y.get("media-type", None) in ("application/vnd.adobe-page-template+xml", "application/text"):
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get("idref", None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError("No valid entries in the spine of this EPUB")

        with open("content.opf", "wb") as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u"content.opf")