Ejemplo n.º 1
0
    def add_content_file_reference(self, name):
        '''Add a reference to the named file (from self.name_path_map) to all content files (self.get_html_names()). Currently
        only CSS files with a MIME type of text/css and JavaScript files with a MIME type of application/x-javascript are
        supported.
        '''
        if name not in self.name_path_map or name not in self.mime_map:
            raise ValueError(_("A valid file name must be given (got: {filename})").format(filename=name))
        for infile in self.get_html_names():
            self.log.info("Adding reference to {0} to file {1}".format(name, infile))
            root = self.parsed(infile)
            if root is None:
                self.log.error("Could not retrieve content file {0}".format(infile))
                continue
            head = root.xpath('./xhtml:head', namespaces={'xhtml': XHTML_NAMESPACE})
            if head is None:
                self.log.error("Could not find a <head> element in content file {0}".format(infile))
                continue
            head = head[0]
            if head is None:
                self.log.error("A <head> section was found but was undefined in content file {0}".format(infile))
                continue

            if self.mime_map[name] == guess_type('a.css')[0]:
                elem = head.makeelement("{%s}link" % XHTML_NAMESPACE, rel='stylesheet', href=os.path.relpath(name, os.path.dirname(infile)).replace(os.sep, '/'))
            elif self.mime_map[name] == guess_type('a.js')[0]:
                elem = head.makeelement("{%s}script" % XHTML_NAMESPACE, type='text/javascript', src=os.path.relpath(name, os.path.dirname(infile)).replace(os.sep, '/'))
            else:
                elem = None

            if elem is not None:
                head.append(elem)
                if self.mime_map[name] == guess_type('a.css')[0]:
                    self.fix_tail(elem)
                self.dirty(infile)
Ejemplo n.º 2
0
 def contenttypes(self):
     E = ElementMaker(namespace=namespaces['ct'], nsmap={None:namespaces['ct']})
     types = E.Types()
     for partname, mt in {
         "/word/footnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml",
         "/word/document.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml",
         "/word/numbering.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml",
         "/word/styles.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml",
         "/word/endnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml",
         "/word/settings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml",
         "/word/theme/theme1.xml": "application/vnd.openxmlformats-officedocument.theme+xml",
         "/word/fontTable.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
         "/word/webSettings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml",
         "/docProps/core.xml": "application/vnd.openxmlformats-package.core-properties+xml",
         "/docProps/app.xml": "application/vnd.openxmlformats-officedocument.extended-properties+xml",
     }.iteritems():
         types.append(E.Override(PartName=partname, ContentType=mt))
     added = {'png', 'gif', 'jpeg', 'jpg', 'svg', 'xml'}
     for ext in added:
         types.append(E.Default(Extension=ext, ContentType=guess_type('a.'+ext)[0]))
     for ext, mt in {
         "rels": "application/vnd.openxmlformats-package.relationships+xml",
         "odttf": "application/vnd.openxmlformats-officedocument.obfuscatedFont",
     }.iteritems():
         added.add(ext)
         types.append(E.Default(Extension=ext, ContentType=mt))
     for fname in self.images:
         ext = fname.rpartition(os.extsep)[-1]
         if ext not in added:
             added.add(ext)
             mt = guess_type('a.' + ext)[0]
             if mt:
                 types.append(E.Default(Extension=ext, ContentType=mt))
     return xml2str(types)
Ejemplo n.º 3
0
    def __init__(self, rootpath, opfpath, log):
        self.root = os.path.abspath(rootpath)
        self.log = log
        self.html_preprocessor = HTMLPreProcessor()
        self.css_preprocessor = CSSPreProcessor()

        self.parsed_cache = {}
        self.mime_map = {}
        self.name_path_map = {}

        # Map of relative paths with '/' separators from root of unzipped ePub
        # to absolute paths on filesystem with os-specific separators
        opfpath = os.path.abspath(opfpath)
        for dirpath, _dirnames, filenames in os.walk(self.root):
            for f in filenames:
                path = join(dirpath, f)
                name = relpath(path, self.root).replace(os.sep, "/")
                self.name_path_map[name] = path
                self.mime_map[name] = guess_type(path)[0]
                # Special case if we have stumbled onto the opf
                if path == opfpath:
                    self.opf_name = name
                    self.opf_dir = os.path.dirname(path)
                    self.mime_map[name] = guess_type("a.opf")[0]

        # Update mime map with data from the OPF
        for item in self.opf.xpath("//opf:manifest/opf:item[@href and @media-type]", namespaces={"opf": OPF2_NS}):
            href = item.get("href")
            self.mime_map[self.href_to_name(href)] = item.get("media-type")
Ejemplo n.º 4
0
    def insert_metadata(self, mi):
        self.log('Inserting metadata into book...')

        try:
            tags = map(unicode, self.oeb.metadata.subject)
        except:
            tags = []

        try:
            comments = unicode(self.oeb.metadata.description[0])
        except:
            comments = ''

        try:
            title = unicode(self.oeb.metadata.title[0])
        except:
            title = _('Unknown')

        root = render_jacket(mi, self.opts.output_profile,
                alt_title=title, alt_tags=tags,
                alt_comments=comments, rescale_fonts=True)
        id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml')

        jacket = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root)
        self.oeb.spine.insert(0, jacket, True)
        self.oeb.inserted_metadata_jacket = jacket
        for img, path in referenced_images(root):
            self.oeb.log('Embedding referenced image %s into jacket' % path)
            ext = path.rpartition('.')[-1].lower()
            item_id, href = self.oeb.manifest.generate('jacket_image', 'jacket_img.'+ext)
            with open(path, 'rb') as f:
                item = self.oeb.manifest.add(item_id, href, guess_type(href)[0], data=f.read())
            item.unload_data_from_memory()
            img.set('src', jacket.relhref(item.href))
    def convert(self, stream, options, file_ext, log, accelerators):
        log.debug('Enter convert() ...')
        dest_dir = os.getcwdu()  # note: temp dir from calibre process
        log.debug('dest_dir: ' + dest_dir)

        mi = None

        # call latex2mobi with markup output only
        from subprocess import check_output, STDOUT, CalledProcessError

        args = [self.java_exec, '-jar', os.path.join(self.plugin_dir, JAR_FILENAME), '-i', stream.name,
                '-n', '-o', dest_dir]

        from calibre_plugins.latexformulas_input.config import prefs

        if prefs['pandoc_exec'] != None and prefs['pandoc_exec'] != '':
            args.append('-p')
            args.append(prefs['pandoc_exec'])

        try:
            log.debug(check_output(args, stderr=STDOUT))
        except CalledProcessError as e:
            log.debug(e.returncode)
            log.debug(e.cmd)
            log.debug(e.output)

        opf = OPFCreator(dest_dir, mi)
        markup_dir = dest_dir + os.path.sep + os.path.basename(stream.name) + '-markup'
        log.debug('Markup-dir: ' + markup_dir)
        log.debug('CreateManifestFromFilesIn()')

        opf.create_manifest_from_files_in([markup_dir])
        for item in opf.manifest:
            if item.media_type == 'text/html':
                log.debug('Item ' + str(item) + ' is of type text/html')
                item.media_type = guess_type('a.html')[0]
                log.debug('Guess type result: ' + item.media_type)
            if item.media_type == 'text/css':
                log.debug('Item ' + str(item) + ' is of type text/css')
                item.media_type = guess_type('a.css')[0]
                log.debug('Guess type result: ' + item.media_type)

        log.debug('Create_spine()')
        opf.create_spine([os.path.basename(markup_dir) + os.path.sep + 'latex2mobi.html'])

        output_path = os.path.join(dest_dir, 'metadata.opf')
        with open(output_path, 'wb') as of:
            opf.render(of)

        log('Exit convert() ...')
        return output_path
Ejemplo n.º 6
0
 def content_type(self, name):
     if name in self.content_types:
         return self.content_types[name]
     ext = name.rpartition('.')[-1].lower()
     if ext in self.default_content_types:
         return self.default_content_types[ext]
     return guess_type(name)[0]
Ejemplo n.º 7
0
 def contenttypes(self):
     E = ElementMaker(namespace=namespaces['ct'], nsmap={None:namespaces['ct']})
     types = E.Types()
     for partname, mt in {
         "/word/footnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml",
         "/word/document.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml",
         "/word/numbering.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml",
         "/word/styles.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml",
         "/word/endnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml",
         "/word/settings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml",
         "/word/theme/theme1.xml": "application/vnd.openxmlformats-officedocument.theme+xml",
         "/word/fontTable.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
         "/word/webSettings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml",
         "/docProps/core.xml": "application/vnd.openxmlformats-package.core-properties+xml",
         "/docProps/app.xml": "application/vnd.openxmlformats-officedocument.extended-properties+xml",
     }.iteritems():
         types.append(E.Override(PartName=partname, ContentType=mt))
     added = {'png', 'gif', 'jpeg', 'jpg', 'svg', 'xml'}
     for ext in added:
         types.append(E.Default(Extension=ext, ContentType=guess_type('a.'+ext)[0]))
     for ext, mt in {
         "rels": "application/vnd.openxmlformats-package.relationships+xml",
         "odttf": "application/vnd.openxmlformats-officedocument.obfuscatedFont",
     }.iteritems():
         added.add(ext)
         types.append(E.Default(Extension=ext, ContentType=mt))
     # TODO: Iterate over all resources and add mimetypes for any that are
     # not already added
     return xml2str(types, pretty_print=True)
Ejemplo n.º 8
0
def load_html(path, view, codec='utf-8', mime_type=None,
              pre_load_callback=lambda x:None, path_is_html=False,
              force_as_html=False):
    from PyQt5.Qt import QUrl, QByteArray
    if mime_type is None:
        mime_type = guess_type(path)[0]
        if not mime_type:
            mime_type = 'text/html'
    if path_is_html:
        html = path
    else:
        with open(path, 'rb') as f:
            html = f.read().decode(codec, 'replace')

    html = EntityDeclarationProcessor(html).processed_html
    self_closing_pat = re.compile(r'<\s*([:A-Za-z0-9-]+)([^>]*)/\s*>')
    html = self_closing_pat.sub(self_closing_sub, html)

    loading_url = QUrl.fromLocalFile(path)
    pre_load_callback(loading_url)

    if force_as_html or re.search(r'<[a-zA-Z0-9-]+:svg', html) is None and '<![CDATA[' not in html:
        view.setHtml(html, loading_url)
    else:
        view.setContent(QByteArray(html.encode(codec)), mime_type,
                loading_url)
        mf = view.page().mainFrame()
        elem = mf.findFirstElement('parsererror')
        if not elem.isNull():
            return False
    return True
Ejemplo n.º 9
0
    def default_cover(self):
        '''
        Create a generic cover for books that dont have a cover
        '''
        from calibre.ebooks.metadata import authors_to_string, fmt_sidx
        if self.no_default_cover:
            return None
        self.log('Generating default cover')
        m = self.oeb.metadata
        title = unicode(m.title[0])
        authors = [unicode(x) for x in m.creator if x.role == 'aut']
        series_string = None
        if m.series and m.series_index:
            series_string = _('Book %(sidx)s of %(series)s')%dict(
                    sidx=fmt_sidx(m.series_index[0], use_roman=True),
                    series=unicode(m.series[0]))

        try:
            from calibre.ebooks import calibre_cover
            img_data = calibre_cover(title, authors_to_string(authors),
                    series_string=series_string)
            id, href = self.oeb.manifest.generate('cover',
                    u'cover_image.jpg')
            item = self.oeb.manifest.add(id, href, guess_type('t.jpg')[0],
                        data=img_data)
            m.clear('cover')
            m.add('cover', item.id)

            return item.href
        except:
            self.log.exception('Failed to generate default cover')
        return None
Ejemplo n.º 10
0
 def _manifest_from_opf(self, opf):
     manifest = self.oeb.manifest
     for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
         id = elem.get('id')
         href = elem.get('href')
         media_type = elem.get('media-type', None)
         if media_type is None:
             media_type = elem.get('mediatype', None)
         if not media_type or media_type == 'text/xml':
             guessed = guess_type(href)[0]
             media_type = guessed or media_type or BINARY_MIME
         if hasattr(media_type, 'lower'):
             media_type = media_type.lower()
         fallback = elem.get('fallback')
         if href in manifest.hrefs:
             self.logger.warn(u'Duplicate manifest entry for %r' % href)
             continue
         if not self.oeb.container.exists(href):
             self.logger.warn(u'Manifest item %r not found' % href)
             continue
         if id in manifest.ids:
             self.logger.warn(u'Duplicate manifest id %r' % id)
             id, href = manifest.generate(id, href)
         manifest.add(id, href, media_type, fallback)
     invalid = self._manifest_prune_invalid()
     self._manifest_add_missing(invalid)
Ejemplo n.º 11
0
 def parsed(self, name):
     ans = self.parsed_cache.get(name, None)
     if ans is None:
         mime = self.mime_map.get(name, guess_type(name)[0])
         ans = self.parse(self.name_path_map[name], mime)
         self.parsed_cache[name] = ans
     return ans
Ejemplo n.º 12
0
	def write(self, path):
		for name in self.dirtied:
			data = self.cache[name]
			if hasattr(data, 'xpath'):
				data = etree.tostring(data, encoding = 'UTF-8', xml_declaration = True, pretty_print = True)
			data = string.replace(data, u"\uFFFD", "")
			f = open(self.name_map[name], "wb")
			f.write(data)
			f.close()
		self.dirtied.clear()
		if os.path.exists(path):
			os.unlink(path)
		epub = zipfile.ZipFile(path, 'w', compression = zipfile.ZIP_DEFLATED)
		epub.writestr('mimetype', bytes(guess_type('a.epub')[0]), compress_type = zipfile.ZIP_STORED)

		cwd = os.getcwdu()
		os.chdir(self.root)
		zip_prefix = self.root
		if not zip_prefix.endswith(os.sep):
			zip_prefix += os.sep
		for t in os.walk(self.root, topdown = True):
			for f in t[2]:
				if f not in EXCLUDE_FROM_ZIP:
					filepath = os.path.join(t[0], f).replace(zip_prefix, '')
					st = os.stat(filepath)
					mtime = time.localtime(st.st_mtime)
					if mtime[0] < 1980:
						os.utime(filepath, None)
					epub.write(filepath)
		epub.close()
		os.chdir(cwd)
Ejemplo n.º 13
0
 def set_cover(self, mi, prefer_metadata_cover):
     cdata, ext = '', 'jpg'
     if mi.cover and os.access(mi.cover, os.R_OK):
         cdata = open(mi.cover, 'rb').read()
         ext = mi.cover.rpartition('.')[-1].lower().strip()
     elif mi.cover_data and mi.cover_data[-1]:
         cdata = mi.cover_data[1]
         ext = mi.cover_data[0]
     if ext not in ('png', 'jpg', 'jpeg'):
         ext = 'jpg'
     id = old_cover = None
     if 'cover' in self.oeb.guide:
         old_cover = self.oeb.guide['cover']
     if prefer_metadata_cover and old_cover is not None:
         cdata = ''
     if cdata:
         self.oeb.guide.remove('cover')
         self.oeb.guide.remove('titlepage')
     if old_cover is not None:
         if old_cover.href in self.oeb.manifest.hrefs:
             item = self.oeb.manifest.hrefs[old_cover.href]
             if not cdata:
                 return item.id
             self.remove_old_cover(item)
         elif not cdata:
             id = self.oeb.manifest.generate(id='cover')
             self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
             return id
     if cdata:
         id, href = self.oeb.manifest.generate('cover', 'cover.'+ext)
         self.oeb.manifest.add(id, href, guess_type('cover.'+ext)[0], data=cdata)
         self.oeb.guide.add('cover', 'Cover', href)
     return id
Ejemplo n.º 14
0
    def insert_metadata(self, mi):
        self.log('Inserting metadata into book...')

        try:
            tags = map(unicode, self.oeb.metadata.subject)
        except:
            tags = []

        try:
            comments = unicode(self.oeb.metadata.description[0])
        except:
            comments = ''

        try:
            title = unicode(self.oeb.metadata.title[0])
        except:
            title = _('Unknown')

        root = render_jacket(mi, self.opts.output_profile,
                alt_title=title, alt_tags=tags,
                alt_comments=comments)
        id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml')

        item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root)
        self.oeb.spine.insert(0, item, True)
        self.oeb.inserted_metadata_jacket = item
Ejemplo n.º 15
0
    def write(self, doc):
        toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log)
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
        css = self.styles.generate_css(self.dest_dir, self.docx)
        if css:
            with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                f.write(css.encode('utf-8'))

        opf = OPFCreator(self.dest_dir, self.mi)
        opf.toc = toc
        opf.create_manifest_from_files_in([self.dest_dir])
        for item in opf.manifest:
            if item.media_type == 'text/html':
                item.media_type = guess_type('a.xhtml')[0]
        opf.create_spine(['index.html'])
        if self.cover_image is not None:
            opf.guide.set_cover(self.cover_image)
        toc_file = os.path.join(self.dest_dir, 'toc.ncx')
        with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(toc_file, 'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx')
        if os.path.getsize(toc_file) == 0:
            os.remove(toc_file)
        return os.path.join(self.dest_dir, 'metadata.opf')
Ejemplo n.º 16
0
    def default_cover(self):
        '''
        Create a generic cover for books that dont have a cover
        '''
        if self.no_default_cover:
            return None
        self.log('Generating default cover')
        m = self.oeb.metadata
        title = unicode(m.title[0])
        authors = [unicode(x) for x in m.creator if x.role == 'aut']
        try:
            from calibre.ebooks.covers import create_cover
            series = series_index = None
            if m.series:
                series, series_index = unicode(m.series[0]), m.series_index[0]
            img_data = create_cover(title, authors, series, series_index)
            id, href = self.oeb.manifest.generate('cover',
                    u'cover_image.jpg')
            item = self.oeb.manifest.add(id, href, guess_type('t.jpg')[0],
                        data=img_data)
            m.clear('cover')
            m.add('cover', item.id)

            return item.href
        except:
            self.log.exception('Failed to generate default cover')
        return None
Ejemplo n.º 17
0
def load_html(path, view, codec='utf-8', mime_type=None,
              pre_load_callback=lambda x:None, path_is_html=False,
              force_as_html=False, loading_url=None):
    from PyQt5.Qt import QUrl, QByteArray
    if mime_type is None:
        mime_type = guess_type(path)[0]
        if not mime_type:
            mime_type = 'text/html'
    if path_is_html:
        html = path
    else:
        with open(path, 'rb') as f:
            html = f.read().decode(codec, 'replace')

    html = cleanup_html(html)
    loading_url = loading_url or QUrl.fromLocalFile(path)
    pre_load_callback(loading_url)

    if force_as_html or load_as_html(html):
        view.setHtml(html, loading_url)
    else:
        view.setContent(QByteArray(html.encode(codec)), mime_type,
                loading_url)
        mf = view.page().mainFrame()
        elem = mf.findFirstElement('parsererror')
        if not elem.isNull():
            return False
    return True
Ejemplo n.º 18
0
def register():
    base = os.path.dirname(sys.executable)

    for program, data in default_programs().iteritems():
        data = data.copy()
        exe = os.path.join(base, program)
        capabilities_path = cap_path(data)
        ext_map = {ext.lower():guess_type('file.' + ext.lower())[0] for ext in extensions(program)}
        ext_map = {ext:mt for ext, mt in ext_map.iteritems() if mt}
        prog_id_map = {ext:progid_name(data['assoc_name'], ext) for ext in ext_map}

        with Key(capabilities_path) as key:
            for k, v in {'ApplicationDescription':'description', 'ApplicationName':'name'}.iteritems():
                key.set(k, data[v])
            key.set('ApplicationIcon', '%s,0' % exe)
            key.set_default_value(r'shell\open\command', '"%s" "%%1"' % exe)

            with Key('FileAssociations', root=key) as fak, Key('MimeAssociations', root=key) as mak:
                # previous_associations = set(fak.itervalues())
                for ext, prog_id in prog_id_map.iteritems():
                    mt = ext_map[ext]
                    fak.set('.' + ext, prog_id)
                    mak.set(mt, prog_id)
        for ext, prog_id in prog_id_map.iteritems():
            create_prog_id(ext, prog_id, ext_map, exe)

        with Key(r'Software\RegisteredApplications') as key:
            key.set(data['name'], capabilities_path)

    from win32com.shell import shell, shellcon
    shell.SHChangeNotify(shellcon.SHCNE_ASSOCCHANGED, shellcon.SHCNF_DWORD | shellcon.SHCNF_FLUSH, 0, 0)
Ejemplo n.º 19
0
 def __init__(self, href_or_path, basedir=os.getcwdu(), is_path=True):
     from urllib import unquote
     self._href = None
     self._basedir = basedir
     self.path = None
     self.fragment = ''
     try:
         self.mime_type = guess_type(href_or_path)[0]
     except:
         self.mime_type = None
     if self.mime_type is None:
         self.mime_type = 'application/octet-stream'
     if is_path:
         path = href_or_path
         if not os.path.isabs(path):
             path = os.path.abspath(os.path.join(basedir, path))
         if isinstance(path, str):
             path = path.decode(sys.getfilesystemencoding())
         self.path = path
     else:
         url = urlparse(href_or_path)
         if url[0] not in ('', 'file'):
             self._href = href_or_path
         else:
             pc = url[2]
             if isinstance(pc, unicode):
                 pc = pc.encode('utf-8')
             pc = unquote(pc).decode('utf-8')
             self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
             self.fragment = unquote(url[-1])
Ejemplo n.º 20
0
    def write(self, doc):
        toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log, self.namespace)
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
        with lopen(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
        css = self.styles.generate_css(self.dest_dir, self.docx, self.notes_nopb, self.nosupsub)
        if css:
            with lopen(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                f.write(css.encode('utf-8'))

        opf = OPFCreator(self.dest_dir, self.mi)
        opf.toc = toc
        opf.create_manifest_from_files_in([self.dest_dir])
        for item in opf.manifest:
            if item.media_type == 'text/html':
                item.media_type = guess_type('a.xhtml')[0]
        opf.create_spine(['index.html'])
        if self.cover_image is not None:
            opf.guide.set_cover(self.cover_image)

        def process_guide(E, guide):
            if self.toc_anchor is not None:
                guide.append(E.reference(
                    href='index.html#' + self.toc_anchor, title=_('Table of Contents'), type='toc'))
        toc_file = os.path.join(self.dest_dir, 'toc.ncx')
        with lopen(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(toc_file, 'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx', process_guide=process_guide)
        if os.path.getsize(toc_file) == 0:
            os.remove(toc_file)
        return os.path.join(self.dest_dir, 'metadata.opf')
Ejemplo n.º 21
0
def load_html(path, view, codec='utf-8', mime_type=None,
        pre_load_callback=lambda x:None, path_is_html=False):
    from PyQt4.Qt import QUrl, QByteArray
    if mime_type is None:
        mime_type = guess_type(path)[0]
    if path_is_html:
        html = path
    else:
        with open(path, 'rb') as f:
            html = f.read().decode(codec, 'replace')

    html = EntityDeclarationProcessor(html).processed_html
    has_svg = re.search(r'<[:a-zA-Z]*svg', html) is not None
    if 'xhtml' in mime_type:
        self_closing_pat = re.compile(r'<([a-z1-6]+)\s+([^>]+)/>',
                re.IGNORECASE)
        html = self_closing_pat.sub(self_closing_sub, html)

    html = re.sub(ur'<\s*title\s*/\s*>', u'', html, flags=re.IGNORECASE)
    loading_url = QUrl.fromLocalFile(path)
    pre_load_callback(loading_url)

    if has_svg:
        view.setContent(QByteArray(html.encode(codec)), mime_type,
                loading_url)
    else:
        view.setHtml(html, loading_url)
Ejemplo n.º 22
0
	def __init__(self, path):
		tmpdir = PersistentTemporaryDirectory("_kobo-driver-extended")
		zf = zipfile.ZipFile(path)
		zf.extractall(tmpdir)

		self.root = os.path.abspath(tmpdir)
		self.log = Log()
		self.dirtied = set([])
		self.cache = {}
		self.mime_map = {}

		print("Container:__init__:Got container path {0}".format(self.root))

		if os.path.exists(os.path.join(self.root, 'mimetype')):
			os.remove(os.path.join(self.root, 'mimetype'))

		container_path = os.path.join(self.root, 'META-INF', 'container.xml')
		if not os.path.exists(container_path):
			raise InvalidEpub('No META-INF/container.xml in epub')
		self.container = etree.fromstring(open(container_path, 'rb').read())
		opf_files = self.container.xpath((r'child::ocf:rootfiles/ocf:rootfile[@media-type="{0}" and @full-path]'.format(guess_type('a.opf')[0])), namespaces = self.namespaces)
		if not opf_files:
			raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
		opf_path = os.path.join(self.root, *opf_files[0].get('full-path').split('/'))
		if not os.path.exists(opf_path):
			raise InvalidEpub('OPF file does not exist at location pointed to by META-INF/container.xml')

		# Map of relative paths with / separators to absolute
		# paths on filesystem with os separators
		self.name_map = {}
		for dirpath, dirnames, filenames in os.walk(self.root):
			for f in filenames:
				path = os.path.join(dirpath, f)
				name = os.path.relpath(path, self.root).replace(os.sep, '/')
				self.name_map[name] = path
				self.mime_map[name] = guess_type(f)[0]
				if path == opf_path:
					self.opf_name = name
					self.mime_map[name] = guess_type('a.opf')[0]

		opf = self.opf
		for item in opf.xpath('//opf:manifest/opf:item[@href and @media-type]', namespaces = self.namespaces):
			href = unquote(item.get('href'))
			item.set("href", href)
			self.mime_map[self.href_to_name(href, os.path.dirname(self.opf_name).replace(os.sep, '/'))] = item.get('media-type')
		self.set(self.opf_name, opf)
Ejemplo n.º 23
0
def ACQUISITION_ENTRY(book_id, updated, request_context):
    field_metadata = request_context.db.field_metadata
    mi = request_context.db.get_metadata(book_id)
    extra = []
    if mi.rating > 0:
        rating = rating_to_stars(mi.rating)
        extra.append(_('RATING: %s<br />')%rating)
    if mi.tags:
        extra.append(_('TAGS: %s<br />')%xml(format_tag_string(mi.tags, None)))
    if mi.series:
        extra.append(_('SERIES: %(series)s [%(sidx)s]<br />')%
                dict(series=xml(mi.series),
                sidx=fmt_sidx(float(mi.series_index))))
    for key in filter(request_context.ctx.is_field_displayable, field_metadata.ignorable_field_keys()):
        name, val = mi.format_field(key)
        if val:
            fm = field_metadata[key]
            datatype = fm['datatype']
            if datatype == 'text' and fm['is_multiple']:
                extra.append('%s: %s<br />'%
                             (xml(name),
                              xml(format_tag_string(val,
                                    fm['is_multiple']['ui_to_list'],
                                    joinval=fm['is_multiple']['list_to_ui']))))
            elif datatype == 'comments' or (fm['datatype'] == 'composite' and
                            fm['display'].get('contains_html', False)):
                extra.append('%s: %s<br />'%(xml(name), comments_to_html(unicode(val))))
            else:
                extra.append('%s: %s<br />'%(xml(name), xml(unicode(val))))
    if mi.comments:
        comments = comments_to_html(mi.comments)
        extra.append(comments)
    if extra:
        extra = html_to_lxml('\n'.join(extra))
    ans = E.entry(TITLE(mi.title), E.author(E.name(authors_to_string(mi.authors))), ID('urn:uuid:' + mi.uuid), UPDATED(mi.last_modified),
                  E.published(mi.timestamp.isoformat()))
    if mi.pubdate and not is_date_undefined(mi.pubdate):
        ans.append(ans.makeelement('{%s}date' % DC_NS))
        ans[-1].text = mi.pubdate.isoformat()
    if len(extra):
        ans.append(E.content(extra, type='xhtml'))
    get = partial(request_context.ctx.url_for, '/get', book_id=book_id, library_id=request_context.library_id)
    if mi.formats:
        fm = mi.format_metadata
        for fmt in mi.formats:
            fmt = fmt.lower()
            mt = guess_type('a.'+fmt)[0]
            if mt:
                link = E.link(type=mt, href=get(what=fmt), rel="http://opds-spec.org/acquisition")
                ffm = fm.get(fmt.upper())
                if ffm:
                    link.set('length', str(ffm['size']))
                    link.set('mtime', ffm['mtime'].isoformat())
                ans.append(link)
    ans.append(E.link(type='image/jpeg', href=get(what='cover'), rel="http://opds-spec.org/cover"))
    ans.append(E.link(type='image/jpeg', href=get(what='thumb'), rel="http://opds-spec.org/thumbnail"))

    return ans
Ejemplo n.º 24
0
def ACQUISITION_ENTRY(book_id, updated, request_context):
    field_metadata = request_context.db.field_metadata
    mi = request_context.db.get_metadata(book_id)
    extra = []
    if mi.rating > 0:
        rating = u"".join(repeat(u"\u2605", int(mi.rating / 2.0)))
        extra.append(_("RATING: %s<br />") % rating)
    if mi.tags:
        extra.append(_("TAGS: %s<br />") % xml(format_tag_string(mi.tags, None)))
    if mi.series:
        extra.append(
            _("SERIES: %(series)s [%(sidx)s]<br />")
            % dict(series=xml(mi.series), sidx=fmt_sidx(float(mi.series_index)))
        )
    for key in filter(request_context.ctx.is_field_displayable, field_metadata.ignorable_field_keys()):
        name, val = mi.format_field(key)
        if val:
            fm = field_metadata[key]
            datatype = fm["datatype"]
            if datatype == "text" and fm["is_multiple"]:
                extra.append(
                    "%s: %s<br />"
                    % (
                        xml(name),
                        xml(
                            format_tag_string(
                                val, fm["is_multiple"]["ui_to_list"], joinval=fm["is_multiple"]["list_to_ui"]
                            )
                        ),
                    )
                )
            elif datatype == "comments" or (
                fm["datatype"] == "composite" and fm["display"].get("contains_html", False)
            ):
                extra.append("%s: %s<br />" % (xml(name), comments_to_html(unicode(val))))
            else:
                extra.append("%s: %s<br />" % (xml(name), xml(unicode(val))))
    if mi.comments:
        comments = comments_to_html(mi.comments)
        extra.append(comments)
    if extra:
        extra = html_to_lxml("\n".join(extra))
    ans = E.entry(
        TITLE(mi.title), E.author(E.name(authors_to_string(mi.authors))), ID("urn:uuid:" + mi.uuid), UPDATED(updated)
    )
    if len(extra):
        ans.append(E.content(extra, type="xhtml"))
    get = partial(request_context.ctx.url_for, "/get", book_id=book_id, library_id=request_context.library_id)
    if mi.formats:
        for fmt in mi.formats:
            fmt = fmt.lower()
            mt = guess_type("a." + fmt)[0]
            if mt:
                ans.append(E.link(type=mt, href=get(what=fmt), rel="http://opds-spec.org/acquisition"))
    ans.append(E.link(type="image/jpeg", href=get(what="cover"), rel="http://opds-spec.org/cover"))
    ans.append(E.link(type="image/jpeg", href=get(what="thumb"), rel="http://opds-spec.org/thumbnail"))

    return ans
Ejemplo n.º 25
0
    def __init__(self, path, log):
        self.root = os.path.abspath(path)
        self.log = log
        self.dirtied = set([])
        self.cache = {}
        self.mime_map = {}

        if exists(join(self.root, 'mimetype')):
            os.remove(join(self.root, 'mimetype'))

        container_path = join(self.root, 'META-INF', 'container.xml')
        if not exists(container_path):
            raise InvalidEpub('No META-INF/container.xml in epub')
        self.container = etree.fromstring(open(container_path, 'rb').read())
        opf_files = self.container.xpath((
            r'child::ocf:rootfiles/ocf:rootfile'
            '[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
            ), namespaces={'ocf':OCF_NS}
        )
        if not opf_files:
            raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
        opf_path = os.path.join(self.root,
                *opf_files[0].get('full-path').split('/'))
        if not exists(opf_path):
            raise InvalidEpub('OPF file does not exist at location pointed to'
                    ' by META-INF/container.xml')

        # Map of relative paths with / separators to absolute
        # paths on filesystem with os separators
        self.name_map = {}
        for dirpath, dirnames, filenames in os.walk(self.root):
            for f in filenames:
                path = join(dirpath, f)
                name = os.path.relpath(path, self.root).replace(os.sep, '/')
                self.name_map[name] = path
                if path == opf_path:
                    self.opf_name = name
                    self.mime_map[name] = guess_type('a.opf')[0]

        for item in self.opf.xpath(
                '//opf:manifest/opf:item[@href and @media-type]',
                namespaces={'opf':OPF_NS}):
            href = item.get('href')
            self.mime_map[self.href_to_name(href,
                posixpath.dirname(self.opf_name))] = item.get('media-type')
Ejemplo n.º 26
0
 def __call__(self, oeb):
     if not self.body_font_family: return None
     if not self.href:
         iid, href = oeb.manifest.generate(u'page_styles', u'page_styles.css')
         rules = [x.cssText for x in self.rules]
         rules = u'\n\n'.join(rules)
         sheet = cssutils.parseString(rules, validate=False)
         self.href = oeb.manifest.add(iid, href, guess_type(href)[0],
                 data=sheet).href
     return self.href
Ejemplo n.º 27
0
    def get_format(self, id, format):
        format = format.upper()
        fm = self.db.format_metadata(id, format, allow_cache=False)
        if not fm:
            raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
        update_metadata = format in {'MOBI', 'EPUB', 'AZW3'}
        mi = newmi = self.db.get_metadata(
            id, index_is_id=True, cover_as_data=True, get_cover=update_metadata)

        cherrypy.response.headers['Last-Modified'] = \
            self.last_modified(max(fm['mtime'], mi.last_modified))

        fmt = self.db.format(id, format, index_is_id=True, as_file=True,
                mode='rb')
        if fmt is None:
            raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
        mt = guess_type('dummy.'+format.lower())[0]
        if mt is None:
            mt = 'application/octet-stream'
        cherrypy.response.headers['Content-Type'] = mt

        if format.lower() in plugboard_content_server_formats:
            # Get any plugboards for the content server
            plugboards = self.db.prefs.get('plugboards', {})
            cpb = find_plugboard(plugboard_content_server_value,
                                 format.lower(), plugboards)
            if cpb:
                # Transform the metadata via the plugboard
                newmi = mi.deepcopy_metadata()
                newmi.template_to_attribute(mi, cpb)

        if update_metadata:
            # Write the updated file
            from calibre.ebooks.metadata.meta import set_metadata
            set_metadata(fmt, newmi, format.lower())
            fmt.seek(0)

        fmt.seek(0, 2)
        cherrypy.response.headers['Content-Length'] = fmt.tell()
        fmt.seek(0)

        ua = cherrypy.request.headers.get('User-Agent', '').strip()
        have_kobo_browser = self.is_kobo_browser(ua)
        file_extension = "kepub.epub" if have_kobo_browser and format.lower() == "kepub" else format

        au = authors_to_string(newmi.authors if newmi.authors else
                [_('Unknown')])
        title = newmi.title if newmi.title else _('Unknown')
        fname = u'%s - %s_%s.%s'%(title[:30], au[:30], id, file_extension.lower())
        fname = ascii_filename(fname).replace('"', '_')
        cherrypy.response.headers['Content-Disposition'] = \
                b'attachment; filename="%s"'%fname
        cherrypy.response.body = fmt
        cherrypy.response.timeout = 3600
        return fmt
Ejemplo n.º 28
0
 def OutputImageFiles(self, path):
     fileNames = []
     for f in self.files:
         fname = os.path.basename(f.fileName)
         root, ext = os.path.splitext(fname)
         if ext in ['.jpeg', '.jpg', '.gif', '.svg', '.png']:
             file = open(os.path.join(path, fname), 'wb')
             file.write(f.fileBody)
             file.close()
             fileNames.append((fname, guess_type('a'+ext)[0]))
     return fileNames
Ejemplo n.º 29
0
	def add_name_to_manifest(self, name, mt = None):
		item = self.manifest_item_for_name(name)
		if item is not None:
			return
		manifest = self.opf.xpath('//opf:manifest', namespaces = {'opf': self.OPF_NS})[0]
		item = manifest.makeelement('{%s}item' % self.OPF_NS, nsmap = {'opf': self.OPF_NS}, href = self.name_to_href(name, posixpath.dirname(self.opf_name)), id = self.generate_manifest_id())
		if not mt:
			mt = guess_type(posixpath.basename(name))[0]
		if not mt:
			mt = 'application/octest-stream'
		item.set('media-type', mt)
		manifest.append(item)
		self.fix_tail(item)
Ejemplo n.º 30
0
    def get_embed_font_info(self, family, failure_critical=True):
        efi = []
        body_font_family = None
        if not family:
            return body_font_family, efi
        from calibre.utils.fonts.scanner import font_scanner, NoFonts
        from calibre.utils.fonts.utils import panose_to_css_generic_family
        try:
            faces = font_scanner.fonts_for_family(family)
        except NoFonts:
            msg = (u'No embeddable fonts found for family: %r'%family)
            if failure_critical:
                raise ValueError(msg)
            self.oeb.log.warn(msg)
            return body_font_family, efi
        if not faces:
            msg = (u'No embeddable fonts found for family: %r'%family)
            if failure_critical:
                raise ValueError(msg)
            self.oeb.log.warn(msg)
            return body_font_family, efi

        for i, font in enumerate(faces):
            ext = 'otf' if font['is_otf'] else 'ttf'
            fid, href = self.oeb.manifest.generate(id=u'font',
                href=u'fonts/%s.%s'%(ascii_filename(font['full_name']).replace(u' ', u'-'), ext))
            item = self.oeb.manifest.add(fid, href,
                    guess_type('dummy.'+ext)[0],
                    data=font_scanner.get_font_data(font))
            item.unload_data_from_memory()

            cfont = {
                    u'font-family':u'"%s"'%font['font-family'],
                    u'panose-1': u' '.join(map(unicode_type, font['panose'])),
                    u'src': u'url(%s)'%item.href,
            }

            if i == 0:
                generic_family = panose_to_css_generic_family(font['panose'])
                body_font_family = u"'%s',%s"%(font['font-family'], generic_family)
                self.oeb.log(u'Embedding font: %s'%font['font-family'])
            for k in (u'font-weight', u'font-style', u'font-stretch'):
                if font[k] != u'normal':
                    cfont[k] = font[k]
            rule = '@font-face { %s }'%('; '.join(u'%s:%s'%(k, v) for k, v in
                iteritems(cfont)))
            rule = css_parser.parseString(rule)
            efi.append(rule)

        return body_font_family, efi
Ejemplo n.º 31
0
 def do_embed(f):
     data = font_scanner.get_font_data(f)
     name = f['full_name']
     ext = 'otf' if f['is_otf'] else 'ttf'
     name = ascii_filename(name).replace(' ', '-').replace('(', '').replace(')', '')
     fid, href = self.oeb.manifest.generate(id=u'font', href=u'fonts/%s.%s'%(name, ext))
     item = self.oeb.manifest.add(fid, href, guess_type('dummy.'+ext)[0], data=data)
     item.unload_data_from_memory()
     page_sheet = self.get_page_sheet()
     href = page_sheet.relhref(item.href)
     css = '''@font-face { font-family: "%s"; font-weight: %s; font-style: %s; font-stretch: %s; src: url(%s) }''' % (
         f['font-family'], f['font-weight'], f['font-style'], f['font-stretch'], href)
     sheet = self.parser.parseString(css, validate=False)
     page_sheet.data.insertRule(sheet.cssRules[0], len(page_sheet.data.cssRules))
     return find_font_face_rules(sheet, self.oeb)[0]
Ejemplo n.º 32
0
    def get_embed_font_info(self, family, failure_critical=True):
        efi = []
        body_font_family = None
        if not family:
            return body_font_family, efi
        from calibre.utils.fonts.scanner import font_scanner
        from calibre.utils.fonts.utils import panose_to_css_generic_family
        faces = font_scanner.fonts_for_family(family)
        if not faces:
            msg = (u'No embeddable fonts found for family: %r' %
                   self.opts.embed_font_family)
            if failure_critical:
                raise ValueError(msg)
            self.oeb.log.warn(msg)
            return body_font_family, efi

        for i, font in enumerate(faces):
            ext = 'otf' if font['is_otf'] else 'ttf'
            fid, href = self.oeb.manifest.generate(
                id=u'font',
                href=u'fonts/%s.%s' %
                (ascii_filename(font['full_name']).replace(u' ', u'-'), ext))
            item = self.oeb.manifest.add(fid,
                                         href,
                                         guess_type('dummy.' + ext)[0],
                                         data=font_scanner.get_font_data(font))
            item.unload_data_from_memory()

            cfont = {
                u'font-family': u'"%s"' % font['font-family'],
                u'panose-1': u' '.join(map(unicode, font['panose'])),
                u'src': u'url(%s)' % item.href,
            }

            if i == 0:
                generic_family = panose_to_css_generic_family(font['panose'])
                body_font_family = u"'%s',%s" % (font['font-family'],
                                                 generic_family)
                self.oeb.log(u'Embedding font: %s' % font['font-family'])
            for k in (u'font-weight', u'font-style', u'font-stretch'):
                if font[k] != u'normal':
                    cfont[k] = font[k]
            rule = '@font-face { %s }' % ('; '.join(
                u'%s:%s' % (k, v) for k, v in cfont.iteritems()))
            rule = cssutils.parseString(rule)
            efi.append(rule)

        return body_font_family, efi
Ejemplo n.º 33
0
def unregister():
    for program, data in default_programs().iteritems():
        capabilities_path = cap_path(data).rpartition('\\')[0]
        ext_map = {ext.lower():guess_type('file.' + ext.lower())[0] for ext in extensions(program)}
        ext_map = {ext:mt for ext, mt in ext_map.iteritems() if mt}
        prog_id_map = {ext:progid_name(data['assoc_name'], ext) for ext in ext_map}
        with Key(r'Software\RegisteredApplications') as key:
            key.delete_value(data['name'])
        parent, sk = capabilities_path.rpartition('\\')[0::2]
        with Key(parent) as key:
            key.delete_tree(sk)
        for ext, prog_id in prog_id_map.iteritems():
            with Key(r'Software\Classes\.%s\OpenWithProgIDs' % ext) as key:
                key.delete_value(prog_id)
            with Key(r'Software\Classes') as key:
                key.delete_tree(prog_id)
Ejemplo n.º 34
0
 def set_cover(self, mi, prefer_metadata_cover):
     cdata, ext = b'', 'jpg'
     if mi.cover and os.access(mi.cover, os.R_OK):
         with open(mi.cover, 'rb') as f:
             cdata = f.read()
         ext = mi.cover.rpartition('.')[-1].lower().strip()
     elif mi.cover_data and mi.cover_data[-1]:
         cdata = mi.cover_data[1]
         ext = mi.cover_data[0]
     if ext not in ('png', 'jpg', 'jpeg'):
         ext = 'jpg'
     id = old_cover = None
     if 'cover' in self.oeb.guide:
         old_cover = self.oeb.guide['cover']
     if prefer_metadata_cover and old_cover is not None:
         cdata = b''
     if cdata:
         self.oeb.guide.remove('cover')
         self.oeb.guide.remove('titlepage')
     elif self.oeb.plumber_output_format in {'mobi', 'azw3'
                                             } and old_cover is not None:
         # The amazon formats dont support html cover pages, so remove them
         # even if no cover was specified.
         self.oeb.guide.remove('titlepage')
     do_remove_old_cover = False
     if old_cover is not None:
         if old_cover.href in self.oeb.manifest.hrefs:
             item = self.oeb.manifest.hrefs[old_cover.href]
             if not cdata:
                 return item.id
             do_remove_old_cover = True
         elif not cdata:
             id = self.oeb.manifest.generate(id='cover')[0]
             self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
             return id
     new_cover_item = None
     if cdata:
         id, href = self.oeb.manifest.generate('cover', 'cover.' + ext)
         new_cover_item = self.oeb.manifest.add(id,
                                                href,
                                                guess_type('cover.' +
                                                           ext)[0],
                                                data=cdata)
         self.oeb.guide.add('cover', 'Cover', href)
     if do_remove_old_cover:
         self.remove_old_cover(item, new_cover_item.href)
     return id
Ejemplo n.º 35
0
    def insert_cover(self):
        from calibre.ebooks.oeb.base import urldefrag
        g, m = self.oeb.guide, self.oeb.manifest
        item = None
        if 'titlepage' not in g:
            if 'cover' in g:
                href = g['cover'].href
            else:
                href = self.default_cover()
            if href is None:
                return
            width, height = self.inspect_cover(href)
            if width is None or height is None:
                self.log.warning('Failed to read cover dimensions')
                width, height = 600, 800
            #if self.preserve_aspect_ratio:
            #    width, height = 600, 800
            self.svg_template = self.svg_template.replace(
                '__viewbox__', '0 0 %d %d' % (width, height))
            self.svg_template = self.svg_template.replace(
                '__width__', str(width))
            self.svg_template = self.svg_template.replace(
                '__height__', str(height))

            if href is not None:
                templ = self.non_svg_template if self.no_svg_cover \
                        else self.svg_template
                tp = templ % unquote(href)
                id, href = m.generate('titlepage', u'titlepage.xhtml')
                item = m.add(id,
                             href,
                             guess_type('t.xhtml')[0],
                             data=etree.fromstring(tp))
        else:
            item = self.oeb.manifest.hrefs[urldefrag(
                self.oeb.guide['titlepage'].href)[0]]
        if item is not None:
            self.oeb.spine.insert(0, item, False)
            if 'cover' not in self.oeb.guide.refs:
                self.oeb.guide.add('cover', 'Title Page', 'a')
            self.oeb.guide.refs['cover'].href = item.href
            if 'titlepage' in self.oeb.guide.refs:
                self.oeb.guide.refs['titlepage'].href = item.href
            titem = getattr(self.oeb.toc, 'item_that_refers_to_cover', None)
            if titem is not None:
                titem.href = item.href
Ejemplo n.º 36
0
 def write(self, path):
     for name in self.dirtied:
         data = self.cache[name]
         raw = data
         if hasattr(data, 'xpath'):
             raw = etree.tostring(data,
                                  encoding='utf-8',
                                  xml_declaration=True)
         with open(self.name_map[name], 'wb') as f:
             f.write(raw)
     self.dirtied.clear()
     zf = ZipFile(path, 'w')
     zf.writestr('mimetype',
                 bytes(guess_type('a.epub')[0]),
                 compression=ZIP_STORED)
     zf.add_dir(self.root)
     zf.close()
Ejemplo n.º 37
0
 def contenttypes(self):
     E = ElementMaker(namespace=namespaces['ct'],
                      nsmap={None: namespaces['ct']})
     types = E.Types()
     for partname, mt in {
             "/word/footnotes.xml":
             "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml",
             "/word/document.xml":
             "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml",
             "/word/numbering.xml":
             "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml",
             "/word/styles.xml":
             "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml",
             "/word/endnotes.xml":
             "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml",
             "/word/settings.xml":
             "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml",
             "/word/theme/theme1.xml":
             "application/vnd.openxmlformats-officedocument.theme+xml",
             "/word/fontTable.xml":
             "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
             "/word/webSettings.xml":
             "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml",
             "/docProps/core.xml":
             "application/vnd.openxmlformats-package.core-properties+xml",
             "/docProps/app.xml":
             "application/vnd.openxmlformats-officedocument.extended-properties+xml",
     }.iteritems():
         types.append(E.Override(PartName=partname, ContentType=mt))
     added = {'png', 'gif', 'jpeg', 'jpg', 'svg', 'xml'}
     for ext in added:
         types.append(
             E.Default(Extension=ext,
                       ContentType=guess_type('a.' + ext)[0]))
     for ext, mt in {
             "rels":
             "application/vnd.openxmlformats-package.relationships+xml",
             "odttf":
             "application/vnd.openxmlformats-officedocument.obfuscatedFont",
     }.iteritems():
         added.add(ext)
         types.append(E.Default(Extension=ext, ContentType=mt))
     # TODO: Iterate over all resources and add mimetypes for any that are
     # not already added
     return xml2str(types)
Ejemplo n.º 38
0
def find_programs(extensions):
    extensions = {ext.lower() for ext in extensions}
    data_dirs = [
        os.environ.get('XDG_DATA_HOME') or os.path.expanduser('~/.local/share')
    ]
    data_dirs += (os.environ.get('XDG_DATA_DIRS')
                  or '/usr/local/share/:/usr/share/').split(os.pathsep)
    data_dirs = [
        force_unicode(x, filesystem_encoding).rstrip(os.sep) for x in data_dirs
    ]
    data_dirs = [x for x in data_dirs if x and os.path.isdir(x)]
    desktop_files = {}
    mime_types = {guess_type('file.' + ext)[0] for ext in extensions}
    ans = []
    for base in data_dirs:
        for f in walk(os.path.join(base, 'applications')):
            if f.endswith('.desktop'):
                bn = os.path.basename(f)
                if f not in desktop_files:
                    desktop_files[bn] = f
    for bn, path in iteritems(desktop_files):
        try:
            data = parse_desktop_file(path)
        except Exception:
            import traceback
            traceback.print_exc()
            continue
        if data is not None and mime_types.intersection(data['MimeType']):
            icon = data.get('Icon', {}).get(None)
            if icon and not os.path.isabs(icon):
                icon = find_icons().get(icon)
                if icon:
                    data['Icon'] = icon
                else:
                    data.pop('Icon')
            if not isinstance(data.get('Icon'), string_or_bytes):
                data.pop('Icon', None)
            for k in ('Name', 'GenericName', 'Comment'):
                val = data.get(k)
                if val:
                    data[k] = localize_string(val)
            ans.append(data)
    ans.sort(key=lambda d: sort_key(d.get('Name')))
    return ans
Ejemplo n.º 39
0
    def do_send_mail(self, book, mail_to, fmt, fpath):
        body = open(fpath).read()

        # read meta info
        author = authors_to_string(
            book['authors'] if book['authors'] else [_('Unknown')])
        title = book['title'] if book['title'] else _("No Title")
        fname = u'%s - %s.%s' % (title, author, fmt)
        fname = ascii_filename(fname).replace('"', '_')

        # content type
        mt = guess_type('dummy.' + fmt)[0]
        if mt is None:
            mt = 'application/octet-stream'

        # send mail
        mail_from = '*****@*****.**'
        mail_subject = _('Book from Calibre: %(title)s') % vars()
        mail_body = _('We Send this book to your kindle.')
        status = msg = ""
        try:
            msg = create_mail(mail_from,
                              mail_to,
                              mail_subject,
                              text=mail_body,
                              attachment_data=body,
                              attachment_type=mt,
                              attachment_name=fname)
            sendmail(msg,
                     from_=mail_from,
                     to=[mail_to],
                     timeout=30,
                     username=tweaks['smtp_username'],
                     password=tweaks['smtp_password'])
            status = "success"
            msg = _('Send to kindle success!! email: %(mail_to)s') % vars()
        except:
            import traceback
            cherrypy.log.error('Failed to generate cover:')
            cherrypy.log.error(traceback.format_exc())
            status = "danger"
            msg = traceback.format_exc()
        messages.append({'status': status, 'msg': msg})
        return
Ejemplo n.º 40
0
 def __new__(cls,
             path,
             mime_type=None,
             read_anchor_map=True,
             run_char_count=True,
             from_epub=False,
             read_links=True):
     ppath = path.partition('#')[0]
     if not os.path.exists(path) and os.path.exists(ppath):
         path = ppath
     obj = super(SpineItem, cls).__new__(cls, path)
     with open(path, 'rb') as f:
         raw = f.read()
     if from_epub:
         # According to the spec, HTML in EPUB must be encoded in utf-8 or
         # utf-16. Furthermore, there exist epub files produced by the usual
         # incompetents that have utf-8 encoded HTML files that contain
         # incorrect encoding declarations. See
         # http://www.idpf.org/epub/20/spec/OPS_2.0.1_draft.htm#Section1.4.1.2
         # http://www.idpf.org/epub/30/spec/epub30-publications.html#confreq-xml-enc
         # https://bugs.launchpad.net/bugs/1188843
         # So we first decode with utf-8 and only if that fails we try xml_to_unicode. This
         # is the same algorithm as that used by the conversion pipeline (modulo
         # some BOM based detection). Sigh.
         try:
             raw, obj.encoding = raw.decode('utf-8'), 'utf-8'
         except UnicodeDecodeError:
             raw, obj.encoding = xml_to_unicode(raw)
     else:
         raw, obj.encoding = xml_to_unicode(raw)
     obj.character_count = character_count(raw) if run_char_count else 10000
     obj.anchor_map = anchor_map(raw) if read_anchor_map else {}
     obj.all_links = all_links(raw) if read_links else set()
     obj.verified_links = set()
     obj.start_page = -1
     obj.pages = -1
     obj.max_page = -1
     obj.index_entries = []
     if mime_type is None:
         mime_type = guess_type(obj)[0]
     obj.mime_type = mime_type
     obj.is_single_page = None
     return obj
Ejemplo n.º 41
0
    def get_format(self, id, format):
        format = format.upper()
        fm = self.db.format_metadata(id, format, allow_cache=False)
        if not fm:
            raise web.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
        mi = newmi = self.db.get_metadata(id, index_is_id=True)
        self.set_header( 'Last-Modified', self.last_modified(max(fm['mtime'], mi.last_modified)) )
        fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb')
        if fmt is None:
            raise web.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
        mt = guess_type('dummy.'+format.lower())[0]
        if mt is None:
            mt = 'application/octet-stream'
        self.set_header( 'Content-Type', mt )

        if format == 'EPUB':
            # Get the original metadata
            # Get any EPUB plugboards for the content server
            plugboards = self.db.prefs.get('plugboards', {})
            cpb = find_plugboard(plugboard_content_server_value,
                                 'epub', plugboards)
            if cpb:
                # Transform the metadata via the plugboard
                newmi = mi.deepcopy_metadata()
                newmi.template_to_attribute(mi, cpb)

        if format in ('MOBI', 'EPUB'):
            # Write the updated file
            set_metadata(fmt, newmi, format.lower())
            fmt.seek(0)

        fmt.seek(0, 2)
        self.set_header( 'Content-Lenght', fmt.tell() )
        fmt.seek(0)

        au = authors_to_string(newmi.authors if newmi.authors else
                [_('Unknown')])
        title = newmi.title if newmi.title else _('Unknown')
        fname = u'%s - %s_%s.%s'%(title[:30], au[:30], id, format.lower())
        fname = ascii_filename(fname).replace('"', '_')
        self.set_header( 'Content-Disposition',
                b'attachment; filename="%s"'%fname )
        return fmt
Ejemplo n.º 42
0
def _parse_cover_data(root, imgid, mi, ctx):
    from calibre.ebooks.fb2 import base64_decode
    elm_binary = ctx.XPath('//fb:binary[@id="%s"]'%imgid)(root)
    if elm_binary:
        mimetype = elm_binary[0].get('content-type', 'image/jpeg')
        mime_extensions = guess_all_extensions(mimetype)

        if not mime_extensions and mimetype.startswith('image/'):
            mimetype_fromid = guess_type(imgid)[0]
            if mimetype_fromid and mimetype_fromid.startswith('image/'):
                mime_extensions = guess_all_extensions(mimetype_fromid)

        if mime_extensions:
            pic_data = elm_binary[0].text
            if pic_data:
                mi.cover_data = (mime_extensions[0][1:],
                        base64_decode(pic_data.strip()))
        else:
            prints("WARNING: Unsupported coverpage mime-type '%s' (id=#%s)" % (mimetype, imgid))
Ejemplo n.º 43
0
 def add_name_to_manifest(self, name, mt=None):
     item = self.manifest_item_for_name(name)
     if item is not None:
         return
     manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':
                                                             OPF_NS})[0]
     item = manifest.makeelement('{%s}item' % OPF_NS,
                                 nsmap={'opf': OPF_NS},
                                 href=self.name_to_href(
                                     name,
                                     posixpath.dirname(self.opf_name)),
                                 id=self.generate_manifest_id())
     if not mt:
         mt = guess_type(posixpath.basename(name))[0]
     if not mt:
         mt = 'application/octest-stream'
     item.set('media-type', mt)
     manifest.append(item)
     self.fix_tail(item)
Ejemplo n.º 44
0
def compose_mail(from_, to, text, subject=None, attachment=None,
        attachment_name=None):
    attachment_type = attachment_data = None
    if attachment is not None:
        try:
            from calibre import guess_type
            guess_type
        except ImportError:
            from mimetypes import guess_type
        attachment_data = attachment.read() if hasattr(attachment, 'read') \
                            else open(attachment, 'rb').read()
        attachment_type = guess_type(getattr(attachment, 'name', attachment))[0]
        if attachment_name is None:
            attachment_name = os.path.basename(getattr(attachment,
                'name', attachment))
    subject = subject if subject else 'no subject'
    return create_mail(from_, to, subject, text=text,
            attachment_data=attachment_data, attachment_type=attachment_type,
            attachment_name=attachment_name)
Ejemplo n.º 45
0
def register():
    base = os.path.dirname(sys.executable)

    for program, data in default_programs().iteritems():
        data = data.copy()
        exe = os.path.join(base, program)
        capabilities_path = cap_path(data)
        ext_map = {
            ext.lower(): guess_type('file.' + ext.lower())[0]
            for ext in extensions(program)
        }
        ext_map = {ext: mt for ext, mt in ext_map.iteritems() if mt}
        prog_id_map = {
            ext: progid_name(data['assoc_name'], ext)
            for ext in ext_map
        }

        with Key(capabilities_path) as key:
            for k, v in {
                    'ApplicationDescription': 'description',
                    'ApplicationName': 'name'
            }.iteritems():
                key.set(k, data[v])
            key.set('ApplicationIcon', '%s,0' % exe)
            key.set_default_value(r'shell\open\command', '"%s" "%%1"' % exe)

            with Key('FileAssociations',
                     root=key) as fak, Key('MimeAssociations',
                                           root=key) as mak:
                # previous_associations = set(fak.itervalues())
                for ext, prog_id in prog_id_map.iteritems():
                    mt = ext_map[ext]
                    fak.set('.' + ext, prog_id)
                    mak.set(mt, prog_id)
        for ext, prog_id in prog_id_map.iteritems():
            create_prog_id(ext, prog_id, ext_map, exe)

        with Key(r'Software\RegisteredApplications') as key:
            key.set(data['name'], capabilities_path)

    from win32com.shell import shell, shellcon
    shell.SHChangeNotify(shellcon.SHCNE_ASSOCCHANGED,
                         shellcon.SHCNF_DWORD | shellcon.SHCNF_FLUSH, 0, 0)
Ejemplo n.º 46
0
def register():
    base = os.path.dirname(sys.executable)

    for program, data in iteritems(default_programs()):
        data = data.copy()
        exe = os.path.join(base, program)
        capabilities_path = cap_path(data)
        ext_map = {
            ext.lower(): guess_type('file.' + ext.lower())[0]
            for ext in extensions(program)
        }
        ext_map = {ext: mt for ext, mt in iteritems(ext_map) if mt}
        prog_id_map = {
            ext: progid_name(data['assoc_name'], ext)
            for ext in ext_map
        }

        with Key(capabilities_path) as key:
            for k, v in iteritems({
                    'ApplicationDescription': 'description',
                    'ApplicationName': 'name'
            }):
                key.set(k, data[v])
            key.set('ApplicationIcon', '%s,0' % exe)
            key.set_default_value(r'shell\open\command', '"%s" "%%1"' % exe)

            with Key('FileAssociations',
                     root=key) as fak, Key('MimeAssociations',
                                           root=key) as mak:
                # previous_associations = set(fak.values())
                for ext, prog_id in iteritems(prog_id_map):
                    mt = ext_map[ext]
                    fak.set('.' + ext, prog_id)
                    mak.set(mt, prog_id)
        for ext, prog_id in iteritems(prog_id_map):
            create_prog_id(ext, prog_id, ext_map, exe)

        with Key(r'Software\RegisteredApplications') as key:
            key.set(data['name'], capabilities_path)

    winutil = plugins['winutil'][0]
    winutil.notify_associations_changed()
Ejemplo n.º 47
0
 def add_name_to_manifest(self, name, mt=None):
     item = self.manifest_item_for_name(name)
     if item is not None:
         return
     self.log.debug("Adding '{0}' to the manifest".format(name))
     manifest = self.opf.xpath('//opf:manifest',
                               namespaces=self.namespaces)[0]
     item = manifest.makeelement('{%s}item' % self.namespaces['opf'],
                                 href=self.name_to_href(
                                     name, os.path.dirname(self.opf_name)),
                                 id=self.generate_manifest_id())
     if not mt:
         mt = guess_type(os.path.basename(name))[0]
     if not mt:
         mt = 'application/octest-stream'
     item.set('media-type', mt)
     manifest.append(item)
     self.fix_tail(item)
     self.set(self.opf_name, self.opf)
     self.name_map[name] = os.path.join(self.root, name)
     self.mime_map[name] = mt
Ejemplo n.º 48
0
 def add_formats_from_clipboard(self):
     ids = self._check_add_formats_ok()
     if not ids:
         return
     md = QApplication.instance().clipboard().mimeData()
     files_to_add = []
     images = []
     if md.hasUrls():
         for url in md.urls():
             if url.isLocalFile():
                 path = url.toLocalFile()
                 if os.access(path, os.R_OK):
                     mt = guess_type(path)[0]
                     if mt and mt.startswith('image/'):
                         images.append(path)
                     else:
                         files_to_add.append(path)
     if not files_to_add and not images:
         return error_dialog(
             self.gui,
             _('No files in clipboard'),
             _('No files have been copied to the clipboard'),
             show=True)
     if files_to_add:
         self._add_formats(files_to_add, ids)
     if images:
         if len(ids) > 1 and not question_dialog(
                 self.gui, _('Are you sure?'),
                 _('Are you sure you want to set the same'
                   ' cover for all %d books?') % len(ids)):
             return
         with lopen(images[0], 'rb') as f:
             cdata = f.read()
         self.gui.current_db.new_api.set_cover(
             {book_id: cdata
              for book_id in ids})
         self.gui.refresh_cover_browser()
         m = self.gui.library_view.model()
         current = self.gui.library_view.currentIndex()
         m.current_changed(current, current)
Ejemplo n.º 49
0
 def __new__(cls,
             path,
             mime_type=None,
             read_anchor_map=True,
             run_char_count=True):
     ppath = path.partition('#')[0]
     if not os.path.exists(path) and os.path.exists(ppath):
         path = ppath
     obj = super(SpineItem, cls).__new__(cls, path)
     with open(path, 'rb') as f:
         raw = f.read()
     raw, obj.encoding = xml_to_unicode(raw)
     obj.character_count = character_count(raw) if run_char_count else 10000
     obj.anchor_map = anchor_map(raw) if read_anchor_map else {}
     obj.start_page = -1
     obj.pages = -1
     obj.max_page = -1
     obj.index_entries = []
     if mime_type is None:
         mime_type = guess_type(obj)[0]
     obj.mime_type = mime_type
     return obj
Ejemplo n.º 50
0
 def embed_font(self, style):
     ff = [unicode(f) for f in style.get('font-family', []) if unicode(f).lower() not in {
         'serif', 'sansserif', 'sans-serif', 'fantasy', 'cursive', 'monospace'}]
     if not ff:
         return
     ff = ff[0]
     if ff in self.warned or ff == 'inherit':
         return
     try:
         fonts = font_scanner.fonts_for_family(ff)
     except NoFonts:
         self.log.warn('Failed to find fonts for family:', ff, 'not embedding')
         self.warned.add(ff)
         return
     try:
         weight = int(style.get('font-weight', '400'))
     except (ValueError, TypeError, AttributeError):
         w = style['font-weight']
         if w not in self.warned2:
             self.log.warn('Invalid weight in font style: %r' % w)
             self.warned2.add(w)
         return
     for f in fonts:
         if f['weight'] == weight and f['font-style'] == style.get('font-style', 'normal') and f['font-stretch'] == style.get('font-stretch', 'normal'):
             self.log('Embedding font %s from %s' % (f['full_name'], f['path']))
             data = font_scanner.get_font_data(f)
             name = f['full_name']
             ext = 'otf' if f['is_otf'] else 'ttf'
             name = ascii_filename(name).replace(' ', '-').replace('(', '').replace(')', '')
             fid, href = self.oeb.manifest.generate(id=u'font', href=u'fonts/%s.%s'%(name, ext))
             item = self.oeb.manifest.add(fid, href, guess_type('dummy.'+ext)[0], data=data)
             item.unload_data_from_memory()
             page_sheet = self.get_page_sheet()
             href = page_sheet.relhref(item.href)
             css = '''@font-face { font-family: "%s"; font-weight: %s; font-style: %s; font-stretch: %s; src: url(%s) }''' % (
                 f['font-family'], f['font-weight'], f['font-style'], f['font-stretch'], href)
             sheet = self.parser.parseString(css, validate=False)
             page_sheet.data.insertRule(sheet.cssRules[0], len(page_sheet.data.cssRules))
             return find_font_face_rules(sheet, self.oeb)[0]
Ejemplo n.º 51
0
    def write(self, doc):
        toc = create_toc(doc, self.body, self.resolved_link_map, self.styles,
                         self.object_map, self.log, self.namespace)
        raw = html.tostring(self.html,
                            encoding='utf-8',
                            doctype='<!DOCTYPE html>')
        with lopen(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
        css = self.styles.generate_css(self.dest_dir, self.docx,
                                       self.notes_nopb, self.nosupsub)
        if css:
            with lopen(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                f.write(css.encode('utf-8'))

        opf = OPFCreator(self.dest_dir, self.mi)
        opf.toc = toc
        opf.create_manifest_from_files_in([self.dest_dir])
        for item in opf.manifest:
            if item.media_type == 'text/html':
                item.media_type = guess_type('a.xhtml')[0]
        opf.create_spine(['index.html'])
        if self.cover_image is not None:
            opf.guide.set_cover(self.cover_image)

        def process_guide(E, guide):
            if self.toc_anchor is not None:
                guide.append(
                    E.reference(href='index.html#' + self.toc_anchor,
                                title=_('Table of Contents'),
                                type='toc'))

        toc_file = os.path.join(self.dest_dir, 'toc.ncx')
        with lopen(os.path.join(self.dest_dir, 'metadata.opf'),
                   'wb') as of, open(toc_file, 'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx', process_guide=process_guide)
        if os.path.getsize(toc_file) == 0:
            os.remove(toc_file)
        return os.path.join(self.dest_dir, 'metadata.opf')
Ejemplo n.º 52
0
 def set_cover(self, mi, prefer_metadata_cover):
     cdata, ext = '', 'jpg'
     if mi.cover and os.access(mi.cover, os.R_OK):
         cdata = open(mi.cover, 'rb').read()
         ext = mi.cover.rpartition('.')[-1].lower().strip()
     elif mi.cover_data and mi.cover_data[-1]:
         cdata = mi.cover_data[1]
         ext = mi.cover_data[0]
     if ext not in ('png', 'jpg', 'jpeg'):
         ext = 'jpg'
     id = old_cover = None
     if 'cover' in self.oeb.guide:
         old_cover = self.oeb.guide['cover']
     if prefer_metadata_cover and old_cover is not None:
         cdata = ''
     if cdata:
         self.oeb.guide.remove('cover')
         self.oeb.guide.remove('titlepage')
     if old_cover is not None:
         if old_cover.href in self.oeb.manifest.hrefs:
             item = self.oeb.manifest.hrefs[old_cover.href]
             if not cdata:
                 return item.id
             self.remove_old_cover(item)
         elif not cdata:
             id = self.oeb.manifest.generate(id='cover')[0]
             self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
             return id
     if cdata:
         id, href = self.oeb.manifest.generate('cover', 'cover.' + ext)
         self.oeb.manifest.add(id,
                               href,
                               guess_type('cover.' + ext)[0],
                               data=cdata)
         self.oeb.guide.add('cover', 'Cover', href)
     return id
Ejemplo n.º 53
0
    def finalize_output(self, output, request, is_http1):
        none_match = parse_if_none_match(
            request.inheaders.get('If-None-Match', ''))
        if isinstance(output, ETaggedDynamicOutput):
            matched = '*' in none_match or (output.etag
                                            and output.etag in none_match)
            if matched:
                if self.method in ('GET', 'HEAD'):
                    self.send_not_modified(output.etag)
                else:
                    self.simple_response(httplib.PRECONDITION_FAILED)
                return

        opts = self.opts
        outheaders = request.outheaders
        stat_result = file_metadata(output)
        if stat_result is not None:
            output = filesystem_file_output(output, outheaders, stat_result)
            if 'Content-Type' not in outheaders:
                mt = guess_type(output.name)[0]
                if mt:
                    if mt in {
                            'text/plain', 'text/html',
                            'application/javascript', 'text/css'
                    }:
                        mt += '; charset=UTF-8'
                    outheaders['Content-Type'] = mt
        elif isinstance(output, (bytes, type(''))):
            output = dynamic_output(output, outheaders)
        elif hasattr(output, 'read'):
            output = ReadableOutput(output)
        elif isinstance(output, StaticOutput):
            output = ReadableOutput(ReadOnlyFileBuffer(output.data),
                                    etag=output.etag,
                                    content_length=output.content_length)
        elif isinstance(output, ETaggedDynamicOutput):
            output = dynamic_output(output(), outheaders, etag=output.etag)
        else:
            output = GeneratedOutput(output)
        ct = outheaders.get('Content-Type', '').partition(';')[0]
        compressible = (not ct or ct.startswith('text/')
                        or ct.startswith('image/svg')
                        or ct.partition(';')[0] in COMPRESSIBLE_TYPES)
        compressible = (compressible and request.status_code == httplib.OK and
                        (opts.compress_min_size > -1
                         and output.content_length >= opts.compress_min_size)
                        and acceptable_encoding(
                            request.inheaders.get('Accept-Encoding', ''))
                        and not is_http1)
        accept_ranges = (not compressible and output.accept_ranges is not None
                         and request.status_code == httplib.OK
                         and not is_http1)
        ranges = get_ranges(
            request.inheaders.get('Range'), output.content_length
        ) if output.accept_ranges and self.method in ('GET', 'HEAD') else None
        if_range = (request.inheaders.get('If-Range') or '').strip()
        if if_range and if_range != output.etag:
            ranges = None
        if ranges is not None and not ranges:
            return self.send_range_not_satisfiable(output.content_length)

        for header in ('Accept-Ranges', 'Content-Encoding',
                       'Transfer-Encoding', 'ETag', 'Content-Length'):
            outheaders.pop(header, all=True)

        matched = '*' in none_match or (output.etag
                                        and output.etag in none_match)
        if matched:
            if self.method in ('GET', 'HEAD'):
                self.send_not_modified(output.etag)
            else:
                self.simple_response(httplib.PRECONDITION_FAILED)
            return

        output.ranges = None

        if output.etag and self.method in ('GET', 'HEAD'):
            outheaders.set('ETag', output.etag, replace_all=True)
        if accept_ranges:
            outheaders.set('Accept-Ranges', 'bytes', replace_all=True)
        if compressible and not ranges:
            outheaders.set('Content-Encoding', 'gzip', replace_all=True)
            if getattr(output, 'content_length', None):
                outheaders.set('Calibre-Uncompressed-Length',
                               '%d' % output.content_length)
            output = GeneratedOutput(compress_readable_output(output.src_file),
                                     etag=output.etag)
        if output.content_length is not None and not compressible and not ranges:
            outheaders.set('Content-Length',
                           '%d' % output.content_length,
                           replace_all=True)

        if compressible or output.content_length is None:
            outheaders.set('Transfer-Encoding', 'chunked', replace_all=True)

        if ranges:
            if len(ranges) == 1:
                r = ranges[0]
                outheaders.set('Content-Length',
                               '%d' % r.size,
                               replace_all=True)
                outheaders.set('Content-Range',
                               'bytes %d-%d/%d' %
                               (r.start, r.stop, output.content_length),
                               replace_all=True)
                output.ranges = r
            else:
                range_parts = get_range_parts(ranges,
                                              outheaders.get('Content-Type'),
                                              output.content_length)
                size = sum(map(len, range_parts)) + sum(r.size + 4
                                                        for r in ranges)
                outheaders.set('Content-Length', '%d' % size, replace_all=True)
                outheaders.set('Content-Type',
                               'multipart/byteranges; boundary=' +
                               MULTIPART_SEPARATOR,
                               replace_all=True)
                output.ranges = zip_longest(ranges, range_parts)
            request.status_code = httplib.PARTIAL_CONTENT
        return output
Ejemplo n.º 54
0
    def _manifest_add_missing(self, invalid):
        import cssutils
        manifest = self.oeb.manifest
        known = set(manifest.hrefs)
        unchecked = set(manifest.values())
        cdoc = OEB_DOCS|OEB_STYLES
        invalid = set()
        while unchecked:
            new = set()
            for item in unchecked:
                data = None
                if (item.media_type in cdoc or
                        item.media_type[-4:] in ('/xml', '+xml')):
                    try:
                        data = item.data
                    except:
                        self.oeb.log.exception(u'Failed to read from manifest '
                                u'entry with id: %s, ignoring'%item.id)
                        invalid.add(item)
                        continue
                if data is None:
                    continue

                if (item.media_type in OEB_DOCS or
                        item.media_type[-4:] in ('/xml', '+xml')):
                    hrefs = [r[2] for r in iterlinks(data)]
                    for href in hrefs:
                        if isinstance(href, bytes):
                            href = href.decode('utf-8')
                        href, _ = urldefrag(href)
                        if not href:
                            continue
                        try:
                            href = item.abshref(urlnormalize(href))
                            scheme = urlparse(href).scheme
                        except:
                            self.oeb.log.exception(
                                'Skipping invalid href: %r'%href)
                            continue
                        if not scheme and href not in known:
                            new.add(href)
                elif item.media_type in OEB_STYLES:
                    try:
                        urls = list(cssutils.getUrls(data))
                    except:
                        urls = []
                    for url in urls:
                        href, _ = urldefrag(url)
                        href = item.abshref(urlnormalize(href))
                        scheme = urlparse(href).scheme
                        if not scheme and href not in known:
                            new.add(href)
            unchecked.clear()
            warned = set([])
            for href in new:
                known.add(href)
                is_invalid = False
                for item in invalid:
                    if href == item.abshref(urlnormalize(href)):
                        is_invalid = True
                        break
                if is_invalid:
                    continue
                if not self.oeb.container.exists(href):
                    if href not in warned:
                        self.logger.warn('Referenced file %r not found' % href)
                        warned.add(href)
                    continue
                if href not in warned:
                    self.logger.warn('Referenced file %r not in manifest' % href)
                    warned.add(href)
                id, _ = manifest.generate(id='added')
                guessed = guess_type(href)[0]
                media_type = guessed or BINARY_MIME
                added = manifest.add(id, href, media_type)
                unchecked.add(added)

            for item in invalid:
                self.oeb.manifest.remove(item)
Ejemplo n.º 55
0
    def update_text_record(self, record, book, path, bl_index,
                           gtz_count, ltz_count, use_tz_var):
        '''
        Update the Sony database from the book. This is done if the timestamp in
        the db differs from the timestamp on the file.
        '''

        # It seems that a Sony device can sometimes know what timezone it is in,
        # and apparently converts the dates to GMT when it writes them to its
        # DB. We can detect that a device is timezone-aware because there is a
        # 'tz' variable in the Sony DB, which we can set to "0" to tell the
        # device to ignore its own timezone when comparing mtime to the date in
        # the DB.

        # Unfortunately, if there is no tz variable in the DB, then we can't
        # tell when the device applies a timezone conversion. We use a horrible
        # heuristic to work around this problem. First, set dates only for new
        # books, trying to avoid upsetting the sony. Second, voting: if a book
        # is not new, compare its Sony DB date against localtime and gmtime.
        # Count the matches. When we must set a date, use the one with the most
        # matches. Use localtime if the case of a tie, and hope it is right.
        try:
            timestamp = os.path.getmtime(path)
        except:
            debug_print('Failed to get timestamp for:', path)
            timestamp = time.time()
        rec_date = record.get('date', None)

        def clean(x):
            if isbytestring(x):
                x = x.decode(preferred_encoding, 'replace')
            x.replace(u'\0', '')
            return x

        def record_set(k, v):
            try:
                record.set(k, clean(v))
            except:
                # v is not suitable for XML, ignore
                pass

        if not getattr(book, '_new_book', False):  # book is not new
            if record.get('tz', None) is not None:
                use_tz_var = True
            if strftime(timestamp, zone=time.gmtime) == rec_date:
                gtz_count += 1
            elif strftime(timestamp, zone=time.localtime) == rec_date:
                ltz_count += 1
        else:  # book is new. Set the time using the current votes
            if use_tz_var:
                tz = time.localtime
                record.set('tz', '0')
                debug_print("Use localtime TZ and tz='0' for new book", book.lpath)
            elif ltz_count >= gtz_count:
                tz = time.localtime
                debug_print("Use localtime TZ for new book", book.lpath)
            else:
                tz = time.gmtime
                debug_print("Use GMT TZ for new book", book.lpath)
            date = strftime(timestamp, zone=tz)
            record.set('date', clean(date))
        try:
            record.set('size', clean(str(os.stat(path).st_size)))
        except:
            record.set('size', '0')
        title = book.title if book.title else _('Unknown')
        record_set('title', title)
        ts = book.title_sort
        if not ts:
            ts = title_sort(title)
        record_set('titleSorter', ts)
        if self.use_author_sort:
            if book.author_sort:
                aus = book.author_sort
            else:
                debug_print('Author_sort is None for book', book.lpath)
                aus = authors_to_sort_string(book.authors)
            record_set('author', aus)
        else:
            record_set('author', authors_to_string(book.authors))
        ext = os.path.splitext(path)[1]
        if ext:
            ext = ext[1:].lower()
            mime = MIME_MAP.get(ext, None)
            if mime is None:
                mime = guess_type('a.'+ext)[0]
            if mime is not None:
                record.set('mime', clean(mime))
        if 'sourceid' not in record.attrib:
            record.set('sourceid', '1')
        if 'id' not in record.attrib:
            num = self.max_id(record.getroottree().getroot())
            record.set('id', str(num+1))
        return (gtz_count, ltz_count, use_tz_var)
Ejemplo n.º 56
0
    def test_http_response(self):  # {{{
        'Test HTTP protocol responses'
        from calibre.srv.http_response import parse_multipart_byterange

        def handler(conn):
            return conn.generate_static_output('test',
                                               lambda: ''.join(conn.path))
        with NamedTemporaryFile(suffix='test.epub') as f, open(P('localization/locales.zip'), 'rb') as lf, \
                TestServer(handler, timeout=1, compress_min_size=0) as server:
            fdata = (string.ascii_letters * 100).encode('ascii')
            f.write(fdata), f.seek(0)

            # Test ETag
            conn = server.connect()
            conn.request('GET', '/an_etagged_path')
            r = conn.getresponse()
            self.ae(r.status, http_client.OK), self.ae(r.read(),
                                                       b'an_etagged_path')
            etag = r.getheader('ETag')
            self.ae(etag,
                    '"%s"' % hashlib.sha1(b'an_etagged_path').hexdigest())
            conn.request('GET',
                         '/an_etagged_path',
                         headers={'If-None-Match': etag})
            r = conn.getresponse()
            self.ae(r.status, http_client.NOT_MODIFIED)
            self.ae(r.read(), b'')

            # Test gzip
            raw = b'a' * 20000
            server.change_handler(lambda conn: raw)
            conn = server.connect()
            conn.request('GET',
                         '/an_etagged_path',
                         headers={'Accept-Encoding': 'gzip'})
            r = conn.getresponse()
            self.ae(unicode_type(len(raw)),
                    r.getheader('Calibre-Uncompressed-Length'))
            self.ae(r.status, http_client.OK), self.ae(
                zlib.decompress(r.read(), 16 + zlib.MAX_WBITS), raw)

            # Test dynamic etagged content
            num_calls = [0]

            def edfunc():
                num_calls[0] += 1
                return b'data'

            server.change_handler(
                lambda conn: conn.etagged_dynamic_response("xxx", edfunc))
            conn = server.connect()
            conn.request('GET', '/an_etagged_path')
            r = conn.getresponse()
            self.ae(r.status, http_client.OK), self.ae(r.read(), b'data')
            etag = r.getheader('ETag')
            self.ae(etag, '"xxx"')
            self.ae(r.getheader('Content-Length'), '4')
            conn.request('GET',
                         '/an_etagged_path',
                         headers={'If-None-Match': etag})
            r = conn.getresponse()
            self.ae(r.status, http_client.NOT_MODIFIED)
            self.ae(r.read(), b'')
            self.ae(num_calls[0], 1)

            # Test getting a filesystem file
            for use_sendfile in (True, False):
                server.change_handler(lambda conn: f)
                server.loop.opts.use_sendfile = use_sendfile
                conn = server.connect()
                conn.request('GET', '/test')
                r = conn.getresponse()
                etag = unicode_type(r.getheader('ETag'))
                self.assertTrue(etag)
                self.ae(r.getheader('Content-Type'), guess_type(f.name)[0])
                self.ae(unicode_type(r.getheader('Accept-Ranges')), 'bytes')
                self.ae(int(r.getheader('Content-Length')), len(fdata))
                self.ae(r.status, http_client.OK), self.ae(r.read(), fdata)

                conn.request('GET', '/test', headers={'Range': 'bytes=2-25'})
                r = conn.getresponse()
                self.ae(r.status, http_client.PARTIAL_CONTENT)
                self.ae(unicode_type(r.getheader('Accept-Ranges')), 'bytes')
                self.ae(unicode_type(r.getheader('Content-Range')),
                        'bytes 2-25/%d' % len(fdata))
                self.ae(int(r.getheader('Content-Length')), 24)
                self.ae(r.read(), fdata[2:26])

                conn.request('GET',
                             '/test',
                             headers={'Range': 'bytes=100000-'})
                r = conn.getresponse()
                self.ae(r.status, http_client.REQUESTED_RANGE_NOT_SATISFIABLE)
                self.ae(unicode_type(r.getheader('Content-Range')),
                        'bytes */%d' % len(fdata))

                conn.request('GET',
                             '/test',
                             headers={
                                 'Range': 'bytes=25-50',
                                 'If-Range': etag
                             })
                r = conn.getresponse()
                self.ae(r.status, http_client.PARTIAL_CONTENT), self.ae(
                    r.read(), fdata[25:51])
                self.ae(int(r.getheader('Content-Length')), 26)

                conn.request('GET',
                             '/test',
                             headers={'Range': 'bytes=0-1000000'})
                r = conn.getresponse()
                self.ae(r.status,
                        http_client.PARTIAL_CONTENT), self.ae(r.read(), fdata)

                conn.request('GET',
                             '/test',
                             headers={
                                 'Range': 'bytes=25-50',
                                 'If-Range': '"nomatch"'
                             })
                r = conn.getresponse()
                self.ae(r.status, http_client.OK), self.ae(r.read(), fdata)
                self.assertFalse(r.getheader('Content-Range'))
                self.ae(int(r.getheader('Content-Length')), len(fdata))

                conn.request('GET',
                             '/test',
                             headers={'Range': 'bytes=0-25,26-50'})
                r = conn.getresponse()
                self.ae(r.status, http_client.PARTIAL_CONTENT)
                clen = int(r.getheader('Content-Length'))
                data = r.read()
                self.ae(clen, len(data))
                buf = BytesIO(data)
                self.ae(
                    parse_multipart_byterange(buf,
                                              r.getheader('Content-Type')),
                    [(0, fdata[:26]), (26, fdata[26:51])])

                # Test sending of larger file
                start_time = monotonic()
                lf.seek(0)
                data = lf.read()
                server.change_handler(lambda conn: lf)
                conn = server.connect(timeout=1)
                conn.request('GET', '/test')
                r = conn.getresponse()
                self.ae(r.status, http_client.OK)
                rdata = r.read()
                self.ae(len(data), len(rdata))
                self.ae(
                    hashlib.sha1(data).hexdigest(),
                    hashlib.sha1(rdata).hexdigest())
                self.ae(data, rdata)
                time_taken = monotonic() - start_time
                self.assertLess(time_taken, 1,
                                'Large file transfer took too long')
Ejemplo n.º 57
0
    def __enter__(self,
                  processed=False,
                  only_input_plugin=False,
                  run_char_count=True,
                  read_anchor_map=True,
                  extract_embedded_fonts_for_qt=False):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''

        from calibre.ebooks.conversion.plumber import Plumber, create_oebbook

        self.delete_on_exit = []
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base = self._tdir.__enter__()
        plumber = Plumber(self.pathtoebook, self.base, self.log)
        plumber.setup_options()
        if self.pathtoebook.lower().endswith('.opf'):
            plumber.opts.dont_package = True
        if hasattr(plumber.opts, 'no_process'):
            plumber.opts.no_process = True

        plumber.input_plugin.for_viewer = True
        with plumber.input_plugin, open(plumber.input, 'rb') as inf:
            self.pathtoopf = plumber.input_plugin(inf, plumber.opts,
                                                  plumber.input_fmt, self.log,
                                                  {}, self.base)

            if not only_input_plugin:
                # Run the HTML preprocess/parsing from the conversion pipeline as
                # well
                if (processed
                        or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'}
                        and not hasattr(self.pathtoopf, 'manifest')):
                    if hasattr(self.pathtoopf, 'manifest'):
                        self.pathtoopf = write_oebbook(self.pathtoopf,
                                                       self.base)
                    self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
                                                    plumber.opts)

            if hasattr(self.pathtoopf, 'manifest'):
                self.pathtoopf = write_oebbook(self.pathtoopf, self.base)

        self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
        if getattr(plumber.input_plugin, 'is_kf8', False):
            self.book_format = 'KF8'

        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
        if self.opf is None:
            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.language = self.opf.language
        if self.language:
            self.language = self.language.lower()
        ordered = [i for i in self.opf.spine if i.is_linear] + \
                  [i for i in self.opf.spine if not i.is_linear]
        self.spine = []
        Spiny = partial(SpineItem,
                        read_anchor_map=read_anchor_map,
                        run_char_count=run_char_count)
        is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
        for i in ordered:
            spath = i.path
            mt = None
            if i.idref is not None:
                mt = self.opf.manifest.type_for_id(i.idref)
            if mt is None:
                mt = guess_type(spath)[0]
            try:
                self.spine.append(Spiny(spath, mime_type=mt))
                if is_comic:
                    self.spine[-1].is_single_page = True
            except:
                self.log.warn('Missing spine item:', repr(spath))

        cover = self.opf.cover
        if cover and self.ebook_ext in {
                'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3'
        }:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE %
                     prepare_string_for_xml(rcpath, True)).encode('utf-8')
            with open(cfile, 'wb') as f:
                f.write(chtml)
            self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)

        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(Spiny(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()

        sizes = [i.character_count for i in self.spine]
        self.pages = [
            math.ceil(i / float(self.CHARACTERS_PER_PAGE)) for i in sizes
        ]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1

        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        if read_anchor_map:
            create_indexing_data(self.spine, self.toc)

        self.read_bookmarks()

        if extract_embedded_fonts_for_qt:
            from calibre.ebooks.oeb.iterator.extract_fonts import extract_fonts
            try:
                extract_fonts(self.opf, self.log)
            except:
                ol = self.log.filter_level
                self.log.filter_level = self.log.DEBUG
                self.log.exception('Failed to extract fonts')
                self.log.filter_level = ol

        return self
Ejemplo n.º 58
0
def ACQUISITION_ENTRY(book_id, updated, request_context):
    field_metadata = request_context.db.field_metadata
    mi = request_context.db.get_metadata(book_id)
    extra = []
    if (mi.rating or 0) > 0:
        rating = rating_to_stars(mi.rating)
        extra.append(_('RATING: %s<br />') % rating)
    if mi.tags:
        extra.append(
            _('TAGS: %s<br />') % xml(format_tag_string(mi.tags, None)))
    if mi.series:
        extra.append(
            _('SERIES: %(series)s [%(sidx)s]<br />') %
            dict(series=xml(mi.series), sidx=fmt_sidx(float(mi.series_index))))
    for key in filter(request_context.ctx.is_field_displayable,
                      field_metadata.ignorable_field_keys()):
        name, val = mi.format_field(key)
        if val:
            fm = field_metadata[key]
            datatype = fm['datatype']
            if datatype == 'text' and fm['is_multiple']:
                extra.append(
                    '%s: %s<br />' %
                    (xml(name),
                     xml(
                         format_tag_string(
                             val,
                             fm['is_multiple']['ui_to_list'],
                             joinval=fm['is_multiple']['list_to_ui']))))
            elif datatype == 'comments' or (fm['datatype'] == 'composite'
                                            and fm['display'].get(
                                                'contains_html', False)):
                extra.append('%s: %s<br />' %
                             (xml(name), comments_to_html(unicode_type(val))))
            else:
                extra.append('%s: %s<br />' %
                             (xml(name), xml(unicode_type(val))))
    if mi.comments:
        comments = comments_to_html(mi.comments)
        extra.append(comments)
    if extra:
        extra = html_to_lxml('\n'.join(extra))
    ans = E.entry(TITLE(mi.title),
                  E.author(E.name(authors_to_string(mi.authors))),
                  ID('urn:uuid:' + mi.uuid), UPDATED(mi.last_modified),
                  E.published(mi.timestamp.isoformat()))
    if mi.pubdate and not is_date_undefined(mi.pubdate):
        ans.append(ans.makeelement('{%s}date' % DC_NS))
        ans[-1].text = mi.pubdate.isoformat()
    if len(extra):
        ans.append(E.content(extra, type='xhtml'))
    get = partial(request_context.ctx.url_for,
                  '/get',
                  book_id=book_id,
                  library_id=request_context.library_id)
    if mi.formats:
        fm = mi.format_metadata
        for fmt in mi.formats:
            fmt = fmt.lower()
            mt = guess_type('a.' + fmt)[0]
            if mt:
                link = E.link(type=mt,
                              href=get(what=fmt),
                              rel="http://opds-spec.org/acquisition")
                ffm = fm.get(fmt.upper())
                if ffm:
                    link.set('length', unicode_type(ffm['size']))
                    link.set('mtime', ffm['mtime'].isoformat())
                ans.append(link)
    ans.append(
        E.link(type='image/jpeg',
               href=get(what='cover'),
               rel="http://opds-spec.org/cover"))
    ans.append(
        E.link(type='image/jpeg',
               href=get(what='thumb'),
               rel="http://opds-spec.org/thumbnail"))
    ans.append(
        E.link(type='image/jpeg',
               href=get(what='cover'),
               rel="http://opds-spec.org/image"))
    ans.append(
        E.link(type='image/jpeg',
               href=get(what='thumb'),
               rel="http://opds-spec.org/image/thumbnail"))

    return ans
Ejemplo n.º 59
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from lxml import etree
        from calibre.ebooks.metadata.fb2 import ensure_namespace
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
        from calibre.ebooks.chardet import xml_to_unicode
        self.log = log
        log.debug('Parsing XML...')
        raw = stream.read().replace('\0', '')
        raw = xml_to_unicode(raw,
                             strip_encoding_pats=True,
                             assume_utf8=True,
                             resolve_entities=True)[0]
        try:
            doc = etree.fromstring(raw)
        except etree.XMLSyntaxError:
            try:
                doc = etree.fromstring(raw, parser=RECOVER_PARSER)
                if doc is None:
                    raise Exception('parse failed')
            except:
                doc = etree.fromstring(raw.replace('& ', '&amp;'),
                                       parser=RECOVER_PARSER)
        if doc is None:
            raise ValueError('The FB2 file is not valid XML')
        doc = ensure_namespace(doc)
        try:
            fb_ns = doc.nsmap[doc.prefix]
        except Exception:
            fb_ns = FB2NS

        NAMESPACES = {'f': fb_ns, 'l': XLINK_NS}
        stylesheets = doc.xpath(
            '//*[local-name() = "stylesheet" and @type="text/css"]')
        css = ''
        for s in stylesheets:
            css += etree.tostring(
                s, encoding=str, method='text', with_tail=False) + '\n\n'
        if css:
            import cssutils, logging
            parser = cssutils.CSSParser(fetcher=None,
                                        log=logging.getLogger('calibre.css'))

            XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS
            text = XHTML_CSS_NAMESPACE + css
            log.debug('Parsing stylesheet...')
            stylesheet = parser.parseString(text)
            stylesheet.namespaces['h'] = XHTML_NS
            css = str(stylesheet.cssText).replace('h|style', 'h|span')
            css = re.sub(r'name\s*=\s*', 'class=', css)
        self.extract_embedded_content(doc)
        log.debug('Converting XML to HTML...')
        ss = open(P('templates/fb2.xsl'), 'rb').read()
        ss = ss.replace("__FB_NS__", fb_ns)
        if options.no_inline_fb2_toc:
            log('Disabling generation of inline FB2 TOC')
            ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->',
                            re.DOTALL).sub('', ss)

        styledoc = etree.fromstring(ss)

        transform = etree.XSLT(styledoc)
        result = transform(doc)

        # Handle links of type note and cite
        notes = {
            a.get('href')[1:]: a
            for a in result.xpath('//a[@link_note and @href]')
            if a.get('href').startswith('#')
        }
        cites = {
            a.get('link_cite'): a
            for a in result.xpath('//a[@link_cite]') if not a.get('href', '')
        }
        all_ids = {x for x in result.xpath('//*/@id')}
        for cite, a in cites.items():
            note = notes.get(cite, None)
            if note:
                c = 1
                while 'cite%d' % c in all_ids:
                    c += 1
                if not note.get('id', None):
                    note.set('id', 'cite%d' % c)
                    all_ids.add(note.get('id'))
                a.set('href', '#%s' % note.get('id'))
        for x in result.xpath('//*[@link_note or @link_cite]'):
            x.attrib.pop('link_note', None)
            x.attrib.pop('link_cite', None)

        for img in result.xpath('//img[@src]'):
            src = img.get('src')
            img.set('src', self.binary_map.get(src, src))
        index = transform.tostring(result)
        open('index.xhtml', 'wb').write(index)
        open('inline-styles.css', 'wb').write(css)
        stream.seek(0)
        mi = get_metadata(stream, 'fb2')
        if not mi.title:
            mi.title = _('Unknown')
        if not mi.authors:
            mi.authors = [_('Unknown')]
        cpath = None
        if mi.cover_data and mi.cover_data[1]:
            with open('fb2_cover_calibre_mi.jpg', 'wb') as f:
                f.write(mi.cover_data[1])
            cpath = os.path.abspath('fb2_cover_calibre_mi.jpg')
        else:
            for img in doc.xpath('//f:coverpage/f:image',
                                 namespaces=NAMESPACES):
                href = img.get('{%s}href' % XLINK_NS, img.get('href', None))
                if href is not None:
                    if href.startswith('#'):
                        href = href[1:]
                    cpath = os.path.abspath(href)
                    break

        opf = OPFCreator(os.getcwd(), mi)
        entries = [(f2, guess_type(f2)[0]) for f2 in os.listdir('.')]
        opf.create_manifest(entries)
        opf.create_spine(['index.xhtml'])
        if cpath:
            opf.guide.set_cover(cpath)
        with open('metadata.opf', 'wb') as f:
            opf.render(f)
        return os.path.join(os.getcwd(), 'metadata.opf')
Ejemplo n.º 60
0
    def setup_desktop_integration(self):  # {{{
        try:
            self.info('Setting up desktop integration...')

            env = os.environ.copy()
            cc = check_call
            if getattr(sys, 'frozen_path', False) and 'LD_LIBRARY_PATH' in env:
                paths = env.get('LD_LIBRARY_PATH', '').split(os.pathsep)
                paths = [x for x in paths if x]
                npaths = [x for x in paths if x != sys.frozen_path + '/lib']
                env['LD_LIBRARY_PATH'] = os.pathsep.join(npaths)
                cc = partial(check_call, env=env)

            with TemporaryDirectory() as tdir, CurrentDir(
                    tdir), PreserveMIMEDefaults():

                def install_single_icon(iconsrc,
                                        basename,
                                        size,
                                        context,
                                        is_last_icon=False):
                    filename = '%s-%s.png' % (basename, size)
                    render_img(iconsrc,
                               filename,
                               width=int(size),
                               height=int(size))
                    cmd = [
                        'xdg-icon-resource', 'install', '--noupdate',
                        '--context', context, '--size',
                        str(size), filename, basename
                    ]
                    if is_last_icon:
                        del cmd[2]
                    cc(cmd)
                    self.icon_resources.append((context, basename, str(size)))

                def install_icons(iconsrc,
                                  basename,
                                  context,
                                  is_last_icon=False):
                    sizes = (16, 32, 48, 64, 128, 256)
                    for size in sizes:
                        install_single_icon(iconsrc, basename, size, context,
                                            is_last_icon and size is sizes[-1])

                icons = list(
                    filter(None, [
                        x.strip() for x in '''\
                    mimetypes/lrf.png application-lrf mimetypes
                    mimetypes/lrf.png text-lrs mimetypes
                    mimetypes/mobi.png application-x-mobipocket-ebook mimetypes
                    mimetypes/tpz.png application-x-topaz-ebook mimetypes
                    mimetypes/azw2.png application-x-kindle-application mimetypes
                    mimetypes/azw3.png application-x-mobi8-ebook mimetypes
                    lt.png calibre-gui apps
                    viewer.png calibre-viewer apps
                    tweak.png calibre-ebook-edit apps
                    '''.splitlines()
                    ]))
                for line in icons:
                    iconsrc, basename, context = line.split()
                    install_icons(iconsrc,
                                  basename,
                                  context,
                                  is_last_icon=line is icons[-1])

                mimetypes = set()
                for x in all_input_formats():
                    mt = guess_type('dummy.' + x)[0]
                    if mt and 'chemical' not in mt and 'ctc-posml' not in mt:
                        mimetypes.add(mt)
                mimetypes.discard('application/octet-stream')

                def write_mimetypes(f):
                    polyglot_write(f)('MimeType=%s;\n' % ';'.join(mimetypes))

                from calibre.ebooks.oeb.polish.main import SUPPORTED
                from calibre.ebooks.oeb.polish.import_book import IMPORTABLE
                with open('calibre-lrfviewer.desktop', 'wb') as f:
                    polyglot_write(f)(VIEWER)
                with open('calibre-ebook-viewer.desktop', 'wb') as f:
                    polyglot_write(f)(EVIEWER)
                    write_mimetypes(f)
                with open('calibre-ebook-edit.desktop', 'wb') as f:
                    polyglot_write(f)(ETWEAK)
                    mt = {
                        guess_type('a.' + x.lower())[0]
                        for x in (SUPPORTED | IMPORTABLE)
                    } - {None, 'application/octet-stream'}
                    polyglot_write(f)('MimeType=%s;\n' % ';'.join(mt))
                with open('calibre-gui.desktop', 'wb') as f:
                    polyglot_write(f)(GUI)
                    write_mimetypes(f)
                des = ('calibre-gui.desktop', 'calibre-lrfviewer.desktop',
                       'calibre-ebook-viewer.desktop',
                       'calibre-ebook-edit.desktop')
                appdata = os.path.join(
                    os.path.dirname(self.opts.staging_sharedir), 'metainfo')
                if not os.path.exists(appdata):
                    try:
                        os.mkdir(appdata)
                    except:
                        self.warning(
                            'Failed to create %s not installing appdata files'
                            % appdata)
                if os.path.exists(appdata) and not os.access(appdata, os.W_OK):
                    self.warning(
                        'Do not have write permissions for %s not installing appdata files'
                        % appdata)
                else:
                    from calibre.utils.localization import get_all_translators
                    translators = dict(get_all_translators())

                APPDATA = get_appdata()
                for x in des:
                    cmd = [
                        'xdg-desktop-menu', 'install', '--noupdate', './' + x
                    ]
                    cc(' '.join(cmd), shell=True)
                    self.menu_resources.append(x)
                    ak = x.partition('.')[0]
                    if ak in APPDATA and os.access(appdata, os.W_OK):
                        self.appdata_resources.append(
                            write_appdata(ak, APPDATA[ak], appdata,
                                          translators))
                cc(['xdg-desktop-menu', 'forceupdate'])
                MIME = P('calibre-mimetypes.xml')
                self.mime_resources.append(MIME)
                cc(['xdg-mime', 'install', MIME])
        except Exception:
            if self.opts.fatal_errors:
                raise
            self.task_failed('Setting up desktop integration failed')