Beispiel #1
0
 def __init__(self, path_to_ebook, tdir, log=None):
     log = log or default_log
     book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log)
     ContainerBase.__init__(self, tdir, opfpath, log)
     excluded_names = {
         name for name, mt in self.mime_map.iteritems() if
         name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/')
     }
     self.book_render_data = data = {
         'version': self.RENDER_VERSION,
         'toc':get_toc(self).as_dict,
         'spine':[name for name, is_linear in self.spine_names],
         'link_uid': uuid4(),
         'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'},
         'manifest': list(set(self.name_path_map) - excluded_names),
     }
     # Mark the spine as dirty since we have to ensure it is normalized
     for name in data['spine']:
         self.parsed(name), self.dirty(name)
     self.virtualize_resources()
     self.commit()
     for name in excluded_names:
         os.remove(self.name_path_map[name])
     with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f:
         f.write(json.dumps(self.book_render_data, ensure_ascii=False).encode('utf-8'))
Beispiel #2
0
 def __init__(self, parent=None):
     QNetworkAccessManager.__init__(self, parent)
     self.mathjax_prefix = str(uuid4())
     self.mathjax_base = '%s://%s/%s/' % (FAKE_PROTOCOL, FAKE_HOST, self.mathjax_prefix)
     self.root = self.orig_root = os.path.dirname(P('viewer/blank.html', allow_user_override=False))
     self.mime_map, self.single_pages, self.codec_map = {}, set(), {}
     self.mathjax_dir = P('mathjax', allow_user_override=False)
Beispiel #3
0
 def __init__(self, prefs):
     self.log = default_log
     self.current_frag = None
     self.com_id = str(uuid4())
     QWebEnginePage.__init__(self)
     secure_webengine(self.settings(), for_viewer=True)
     self.titleChanged.connect(self.title_changed)
     self.loadFinished.connect(self.show_frag)
     s = QWebEngineScript()
     s.setName('toc.js')
     s.setInjectionPoint(QWebEngineScript.InjectionPoint.DocumentCreation)
     s.setRunsOnSubFrames(True)
     s.setWorldId(QWebEngineScript.ScriptWorldId.ApplicationWorld)
     js = P('toc.js', allow_user_override=False,
            data=True).decode('utf-8').replace('COM_ID', self.com_id, 1)
     if 'preview_background' in prefs.defaults and 'preview_foreground' in prefs.defaults:
         from calibre.gui2.tweak_book.preview import get_editor_settings
         settings = get_editor_settings(prefs)
     else:
         if is_dark_theme():
             settings = {
                 'is_dark_theme': True,
                 'bg': dark_color.name(),
                 'fg': dark_text_color.name(),
                 'link': dark_link_color.name(),
             }
         else:
             settings = {}
     js = js.replace('SETTINGS', json.dumps(settings), 1)
     dark_mode_css = P('dark_mode.css',
                       data=True,
                       allow_user_override=False).decode('utf-8')
     js = js.replace('CSS', json.dumps(dark_mode_css), 1)
     s.setSourceCode(js)
     self.scripts().insert(s)
Beispiel #4
0
 def write_to_unicode(self, objects):
     try:
         name = self.metrics.postscript_name
     except KeyError:
         name = uuid4()
     cmap = CMap(name, self.metrics.glyph_map, compress=self.compress)
     self.font_dict['ToUnicode'] = objects.add(cmap)
Beispiel #5
0
 def __init__(self, name, glyph_map, compress=False):
     Stream.__init__(self, compress)
     current_map = OrderedDict()
     maps = []
     for glyph_id in sorted(glyph_map):
         if len(current_map) > 99:
             maps.append(current_map)
             current_map = OrderedDict()
         val = []
         for c in glyph_map[glyph_id]:
             c = ord(c)
             val.append(to_hex_string(c))
         glyph_id = '<%s>'%to_hex_string(glyph_id)
         current_map[glyph_id] = '<%s>'%''.join(val)
     if current_map:
         maps.append(current_map)
     mapping = []
     for m in maps:
         meat = '\n'.join('%s %s'%(k, v) for k, v in iteritems(m))
         mapping.append('%d beginbfchar\n%s\nendbfchar'%(len(m), meat))
     try:
         name = name.encode('ascii').decode('ascii')
     except Exception:
         name = uuid4()
     self.write(self.skeleton.format(name=name, mapping='\n'.join(mapping)))
Beispiel #6
0
 def write_to_unicode(self, objects):
     try:
         name = self.metrics.postscript_name
     except KeyError:
         name = uuid4()
     cmap = CMap(name, self.metrics.glyph_map, compress=self.compress)
     self.font_dict['ToUnicode'] = objects.add(cmap)
Beispiel #7
0
 def __init__(self, parent=None):
     QNetworkAccessManager.__init__(self, parent)
     self.mathjax_prefix = str(uuid4())
     self.mathjax_base = '%s://%s/%s/' % (FAKE_PROTOCOL, FAKE_HOST, self.mathjax_prefix)
     self.root = self.orig_root = os.path.dirname(P('viewer/blank.html', allow_user_override=False))
     self.mime_map, self.single_pages, self.codec_map = {}, set(), {}
     self.mathjax_dir = P('mathjax', allow_user_override=False)
Beispiel #8
0
 def __init__(self, name, glyph_map, compress=False):
     Stream.__init__(self, compress)
     current_map = OrderedDict()
     maps = []
     for glyph_id in sorted(glyph_map):
         if len(current_map) > 99:
             maps.append(current_map)
             current_map = OrderedDict()
         val = []
         for c in glyph_map[glyph_id]:
             c = ord(c)
             val.append(to_hex_string(c))
         glyph_id = '<%s>'%to_hex_string(glyph_id)
         current_map[glyph_id] = '<%s>'%''.join(val)
     if current_map:
         maps.append(current_map)
     mapping = []
     for m in maps:
         meat = '\n'.join('%s %s'%(k, v) for k, v in iteritems(m))
         mapping.append('%d beginbfchar\n%s\nendbfchar'%(len(m), meat))
     try:
         name = name.encode('ascii').decode('ascii')
     except Exception:
         name = uuid4()
     self.write(self.skeleton.format(name=name, mapping='\n'.join(mapping)))
Beispiel #9
0
    def __init__(self, path_to_ebook, tdir, log=None, book_hash=None):
        log = log or default_log
        book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log)
        ContainerBase.__init__(self, tdir, opfpath, log)
        excluded_names = {
            name for name, mt in self.mime_map.iteritems() if
            name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or
            name == 'mimetype'
        }

        self.book_render_data = data = {
            'version': RENDER_VERSION,
            'toc':get_toc(self).as_dict,
            'spine':[name for name, is_linear in self.spine_names],
            'link_uid': uuid4(),
            'book_hash': book_hash,
            'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'},
        }
        # Mark the spine as dirty since we have to ensure it is normalized
        for name in data['spine']:
            self.parsed(name), self.dirty(name)
        self.virtualized_names = set()
        self.virtualize_resources()
        def manifest_data(name):
            return {'size':os.path.getsize(self.name_path_map[name]), 'is_virtualized': name in self.virtualized_names, 'mimetype':self.mime_map.get(name)}
        data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names}
        self.commit()
        for name in excluded_names:
            os.remove(self.name_path_map[name])
        with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f:
            f.write(json.dumps(self.book_render_data, ensure_ascii=False).encode('utf-8'))
Beispiel #10
0
    def __init__(self, path_to_ebook, tdir, log=None, book_hash=None):
        log = log or default_log
        book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log)
        ContainerBase.__init__(self, tdir, opfpath, log)
        excluded_names = {
            name for name, mt in self.mime_map.iteritems() if
            name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or
            name == 'mimetype'
        }
        raster_cover_name, titlepage_name = self.create_cover_page(input_fmt.lower())

        self.book_render_data = data = {
            'version': RENDER_VERSION,
            'toc':get_toc(self).as_dict,
            'spine':[name for name, is_linear in self.spine_names],
            'link_uid': uuid4(),
            'book_hash': book_hash,
            'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'},
            'raster_cover_name': raster_cover_name,
            'title_page_name': titlepage_name,
            'has_maths': False,
            'total_length': 0,
            'spine_length': 0,
        }
        # Mark the spine as dirty since we have to ensure it is normalized
        for name in data['spine']:
            self.parsed(name), self.dirty(name)
        self.transform_css()
        self.virtualized_names = set()
        self.virtualize_resources()

        def manifest_data(name):
            mt = (self.mime_map.get(name) or 'application/octet-stream').lower()
            ans = {
                'size':os.path.getsize(self.name_path_map[name]),
                'is_virtualized': name in self.virtualized_names,
                'mimetype':mt,
                'is_html': mt in OEB_DOCS,
            }
            if ans['is_html']:
                root = self.parsed(name)
                ans['length'] = l = get_length(root)
                self.book_render_data['total_length'] += l
                if name in data['spine']:
                    self.book_render_data['spine_length'] += l
                ans['has_maths'] = hm = check_for_maths(root)
                if hm:
                    self.book_render_data['has_maths'] = True
            return ans
        data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names}
        self.commit()
        for name in excluded_names:
            os.remove(self.name_path_map[name])
        with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f:
            f.write(json.dumps(self.book_render_data, ensure_ascii=False).encode('utf-8'))
Beispiel #11
0
def add_all_links(container, margin_files):
    uuid = uuid4()
    name_anchor_map = {}
    for name, is_linear in container.spine_names:
        root = container.parsed(name)
        name_anchor_map[name] = frozenset(root.xpath('//*/@id'))
    for margin_file in margin_files:
        name = margin_file.name
        anchors = name_anchor_map.get(name, set())
        add_anchors_markup(container.parsed(name), uuid, anchors)
        container.dirty(name)
    return uuid
Beispiel #12
0
def prepare_convert(temp_path, key, st):
    tdir = tempfile.mkdtemp(dir=temp_path)
    now = time.time()
    return {
        'path': os.path.basename(tdir),
        'id': uuid4(),
        'status': 'working',
        'mtime': now,
        'atime': now,
        'key': key,
        'file_mtime': st.st_mtime,
        'file_size': st.st_size,
        'cache_size': 0,
    }
Beispiel #13
0
def make_book(options, comic_file, log):
    global _log, _options
    _options = options
    print(options)
    _log = log
    log.prints(INFO, "Extracting images.")
    path = get_work_folder(comic_file)
    image_path = os.path.join(path, "OEBPS", "Images")
    log.prints(INFO, "Processing images.")
    img_directory_processing(image_path)
    chapter_names = sanitize_tree(image_path)

    options['uuid'] = str(uuid4())
    _log.prints(INFO, "Creating EPUB file...")
    return build_epub(path, options, chapter_names)
Beispiel #14
0
 def __init__(self):
     self.log = default_log
     self.current_frag = None
     self.com_id = unicode_type(uuid4())
     QWebEnginePage.__init__(self)
     secure_webengine(self.settings(), for_viewer=True)
     self.titleChanged.connect(self.title_changed)
     self.loadFinished.connect(self.show_frag)
     s = QWebEngineScript()
     s.setName('toc.js')
     s.setInjectionPoint(QWebEngineScript.InjectionPoint.DocumentReady)
     s.setRunsOnSubFrames(True)
     s.setWorldId(QWebEngineScript.ScriptWorldId.ApplicationWorld)
     s.setSourceCode(P('toc.js', allow_user_override=False, data=True).decode('utf-8').replace('COM_ID', self.com_id))
     self.scripts().insert(s)
Beispiel #15
0
def prepare_convert(temp_path, key, st, book_path):
    tdir = tempfile.mkdtemp(dir=temp_path, prefix=f'c{next(td_counter)}-')
    now = time.time()
    return {
        'path': os.path.basename(tdir),
        'id': uuid4(),
        'status': 'working',
        'mtime': now,
        'atime': now,
        'key': key,
        'file_mtime': st.st_mtime,
        'file_size': st.st_size,
        'cache_size': 0,
        'book_path': book_path,
    }
Beispiel #16
0
    def __init__(self, metrics, num, objects, compress):
        self.metrics, self.compress = metrics, compress
        self.is_otf = self.metrics.is_otf
        self.subset_tag = unicode_type(
            re.sub('.', lambda m: chr(int(m.group())+ord('A')), oct(num).replace('o', '')
        )).rjust(6, 'A')
        self.font_stream = FontStream(metrics.is_otf, compress=compress)
        try:
            psname = metrics.postscript_name
        except Exception:
            psname = uuid4()
        self.font_descriptor = Dictionary({
            'Type': Name('FontDescriptor'),
            'FontName': Name('%s+%s'%(self.subset_tag, psname)),
            'Flags': 0b100,  # Symbolic font
            'FontBBox': Array(metrics.pdf_bbox),
            'ItalicAngle': metrics.post.italic_angle,
            'Ascent': metrics.pdf_ascent,
            'Descent': metrics.pdf_descent,
            'CapHeight': metrics.pdf_capheight,
            'AvgWidth': metrics.pdf_avg_width,
            'StemV': metrics.pdf_stemv,
        })
        self.descendant_font = Dictionary({
            'Type':Name('Font'),
            'Subtype':Name('CIDFontType' + ('0' if metrics.is_otf else '2')),
            'BaseFont': self.font_descriptor['FontName'],
            'FontDescriptor':objects.add(self.font_descriptor),
            'CIDSystemInfo':Dictionary({
                'Registry':String('Adobe'),
                'Ordering':String('Identity'),
                'Supplement':0,
            }),
        })
        if not self.is_otf:
            self.descendant_font['CIDToGIDMap'] = Name('Identity')

        self.font_dict = Dictionary({
            'Type':Name('Font'),
            'Subtype':Name('Type0'),
            'Encoding':Name('Identity-H'),
            'BaseFont':self.descendant_font['BaseFont'],
            'DescendantFonts':Array([objects.add(self.descendant_font)]),
        })

        self.used_glyphs = set()
Beispiel #17
0
    def __init__(self, metrics, num, objects, compress):
        self.metrics, self.compress = metrics, compress
        self.is_otf = self.metrics.is_otf
        self.subset_tag = str(
            re.sub('.', lambda m: codepoint_to_chr(int(m.group())+ord('A')), oct(num).replace('o', '')
        )).rjust(6, 'A')
        self.font_stream = FontStream(metrics.is_otf, compress=compress)
        try:
            psname = metrics.postscript_name
        except Exception:
            psname = uuid4()
        self.font_descriptor = Dictionary({
            'Type': Name('FontDescriptor'),
            'FontName': Name('%s+%s'%(self.subset_tag, psname)),
            'Flags': 0b100,  # Symbolic font
            'FontBBox': Array(metrics.pdf_bbox),
            'ItalicAngle': metrics.post.italic_angle,
            'Ascent': metrics.pdf_ascent,
            'Descent': metrics.pdf_descent,
            'CapHeight': metrics.pdf_capheight,
            'AvgWidth': metrics.pdf_avg_width,
            'StemV': metrics.pdf_stemv,
        })
        self.descendant_font = Dictionary({
            'Type':Name('Font'),
            'Subtype':Name('CIDFontType' + ('0' if metrics.is_otf else '2')),
            'BaseFont': self.font_descriptor['FontName'],
            'FontDescriptor':objects.add(self.font_descriptor),
            'CIDSystemInfo':Dictionary({
                'Registry':String('Adobe'),
                'Ordering':String('Identity'),
                'Supplement':0,
            }),
        })
        if not self.is_otf:
            self.descendant_font['CIDToGIDMap'] = Name('Identity')

        self.font_dict = Dictionary({
            'Type':Name('Font'),
            'Subtype':Name('Type0'),
            'Encoding':Name('Identity-H'),
            'BaseFont':self.descendant_font['BaseFont'],
            'DescendantFonts':Array([objects.add(self.descendant_font)]),
        })

        self.used_glyphs = set()
Beispiel #18
0
    def __init__(self, path_to_ebook, tdir, log=None, book_hash=None):
        log = log or default_log
        book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log)
        ContainerBase.__init__(self, tdir, opfpath, log)
        # We do not add zero byte sized files as the IndexedDB API in the
        # browser has no good way to distinguish between zero byte files and
        # load failures.
        excluded_names = {
            name for name, mt in iteritems(self.mime_map) if
            name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or
            name == 'mimetype' or not self.has_name_and_is_not_empty(name)}
        raster_cover_name, titlepage_name = self.create_cover_page(input_fmt.lower())
        toc = get_toc(self).to_dict(count())
        spine = [name for name, is_linear in self.spine_names]
        spineq = frozenset(spine)
        landmarks = [l for l in get_landmarks(self) if l['dest'] in spineq]

        self.book_render_data = data = {
            'version': RENDER_VERSION,
            'toc':toc,
            'spine':spine,
            'link_uid': uuid4(),
            'book_hash': book_hash,
            'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'},
            'raster_cover_name': raster_cover_name,
            'title_page_name': titlepage_name,
            'has_maths': False,
            'total_length': 0,
            'spine_length': 0,
            'toc_anchor_map': toc_anchor_map(toc),
            'landmarks': landmarks,
            'link_to_map': {},
        }
        # Mark the spine as dirty since we have to ensure it is normalized
        for name in data['spine']:
            self.parsed(name), self.dirty(name)
        self.transform_css()
        self.virtualized_names = set()
        self.virtualize_resources()

        def manifest_data(name):
            mt = (self.mime_map.get(name) or 'application/octet-stream').lower()
            ans = {
                'size':os.path.getsize(self.name_path_map[name]),
                'is_virtualized': name in self.virtualized_names,
                'mimetype':mt,
                'is_html': mt in OEB_DOCS,
            }
            if ans['is_html']:
                root = self.parsed(name)
                ans['length'] = l = get_length(root)
                self.book_render_data['total_length'] += l
                if name in data['spine']:
                    self.book_render_data['spine_length'] += l
                ans['has_maths'] = hm = check_for_maths(root)
                if hm:
                    self.book_render_data['has_maths'] = True
                ans['anchor_map'] = anchor_map(root)
            return ans
        data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names}
        self.commit()
        for name in excluded_names:
            os.remove(self.name_path_map[name])
        data = json.dumps(self.book_render_data, ensure_ascii=False)
        if not isinstance(data, bytes):
            data = data.encode('utf-8')
        with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f:
            f.write(data)
Beispiel #19
0
    def __init__(self, path_to_ebook, tdir, log=None, book_hash=None):
        log = log or default_log
        book_fmt, opfpath, input_fmt = extract_book(path_to_ebook,
                                                    tdir,
                                                    log=log)
        ContainerBase.__init__(self, tdir, opfpath, log)
        excluded_names = {
            name
            for name, mt in self.mime_map.iteritems()
            if name == self.opf_name or mt == guess_type('a.ncx')
            or name.startswith('META-INF/') or name == 'mimetype'
        }
        raster_cover_name, titlepage_name = self.create_cover_page(
            input_fmt.lower())
        toc = get_toc(self).to_dict(count())
        spine = [name for name, is_linear in self.spine_names]
        spineq = frozenset(spine)
        landmarks = [l for l in get_landmarks(self) if l['dest'] in spineq]

        self.book_render_data = data = {
            'version': RENDER_VERSION,
            'toc': toc,
            'spine': spine,
            'link_uid': uuid4(),
            'book_hash': book_hash,
            'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'},
            'raster_cover_name': raster_cover_name,
            'title_page_name': titlepage_name,
            'has_maths': False,
            'total_length': 0,
            'spine_length': 0,
            'toc_anchor_map': toc_anchor_map(toc),
            'landmarks': landmarks,
            'link_to_map': {},
        }
        # Mark the spine as dirty since we have to ensure it is normalized
        for name in data['spine']:
            self.parsed(name), self.dirty(name)
        self.transform_css()
        self.virtualized_names = set()
        self.virtualize_resources()

        def manifest_data(name):
            mt = (self.mime_map.get(name)
                  or 'application/octet-stream').lower()
            ans = {
                'size': os.path.getsize(self.name_path_map[name]),
                'is_virtualized': name in self.virtualized_names,
                'mimetype': mt,
                'is_html': mt in OEB_DOCS,
            }
            if ans['is_html']:
                root = self.parsed(name)
                ans['length'] = l = get_length(root)
                self.book_render_data['total_length'] += l
                if name in data['spine']:
                    self.book_render_data['spine_length'] += l
                ans['has_maths'] = hm = check_for_maths(root)
                if hm:
                    self.book_render_data['has_maths'] = True
                ans['anchor_map'] = anchor_map(root)
            return ans

        data['files'] = {
            name: manifest_data(name)
            for name in set(self.name_path_map) - excluded_names
        }
        self.commit()
        for name in excluded_names:
            os.remove(self.name_path_map[name])
        with lopen(os.path.join(self.root, 'calibre-book-manifest.json'),
                   'wb') as f:
            f.write(
                json.dumps(self.book_render_data,
                           ensure_ascii=False).encode('utf-8'))
Beispiel #20
0
    def __init__(self,
                 path_to_ebook,
                 tdir,
                 log=None,
                 book_hash=None,
                 save_bookmark_data=False,
                 book_metadata=None):
        log = log or default_log
        book_fmt, opfpath, input_fmt = extract_book(path_to_ebook,
                                                    tdir,
                                                    log=log)
        ContainerBase.__init__(self, tdir, opfpath, log)
        self.book_metadata = book_metadata
        if save_bookmark_data:
            bm_file = 'META-INF/calibre_bookmarks.txt'
            self.bookmark_data = None
            if self.exists(bm_file):
                with self.open(bm_file, 'rb') as f:
                    self.bookmark_data = f.read()
        # We do not add zero byte sized files as the IndexedDB API in the
        # browser has no good way to distinguish between zero byte files and
        # load failures.
        excluded_names = {
            name
            for name, mt in iteritems(self.mime_map) if name == self.opf_name
            or mt == guess_type('a.ncx') or name.startswith('META-INF/')
            or name == 'mimetype' or not self.has_name_and_is_not_empty(name)
        }
        raster_cover_name, titlepage_name = self.create_cover_page(
            input_fmt.lower())

        toc = get_toc(self).to_dict(count())
        if not toc or not toc.get('children'):
            toc = from_xpaths(self,
                              ['//h:h1', '//h:h2', '//h:h3']).to_dict(count())
        spine = [name for name, is_linear in self.spine_names]
        spineq = frozenset(spine)
        landmarks = [l for l in get_landmarks(self) if l['dest'] in spineq]

        self.book_render_data = data = {
            'version': RENDER_VERSION,
            'toc': toc,
            'book_format': book_fmt,
            'spine': spine,
            'link_uid': uuid4(),
            'book_hash': book_hash,
            'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'},
            'raster_cover_name': raster_cover_name,
            'title_page_name': titlepage_name,
            'has_maths': False,
            'total_length': 0,
            'spine_length': 0,
            'toc_anchor_map': toc_anchor_map(toc),
            'landmarks': landmarks,
            'link_to_map': {},
        }
        # Mark the spine as dirty since we have to ensure it is normalized
        for name in data['spine']:
            self.parsed(name), self.dirty(name)
        self.transform_css()
        self.virtualized_names = set()
        self.virtualize_resources()

        def manifest_data(name):
            mt = (self.mime_map.get(name)
                  or 'application/octet-stream').lower()
            ans = {
                'size': os.path.getsize(self.name_path_map[name]),
                'is_virtualized': name in self.virtualized_names,
                'mimetype': mt,
                'is_html': mt in OEB_DOCS,
            }
            if ans['is_html']:
                root = self.parsed(name)
                ans['length'] = l = get_length(root)
                self.book_render_data['total_length'] += l
                if name in data['spine']:
                    self.book_render_data['spine_length'] += l
                ans['has_maths'] = hm = check_for_maths(root)
                if hm:
                    self.book_render_data['has_maths'] = True
                ans['anchor_map'] = anchor_map(root)
            return ans

        data['files'] = {
            name: manifest_data(name)
            for name in set(self.name_path_map) - excluded_names
        }
        self.commit()
        for name in excluded_names:
            os.remove(self.name_path_map[name])
        data = json.dumps(self.book_render_data, ensure_ascii=False)
        if not isinstance(data, bytes):
            data = data.encode('utf-8')
        with lopen(os.path.join(self.root, 'calibre-book-manifest.json'),
                   'wb') as f:
            f.write(data)
Beispiel #21
0
def process_exploded_book(book_fmt,
                          opfpath,
                          input_fmt,
                          tdir,
                          render_manager,
                          log=None,
                          book_hash=None,
                          save_bookmark_data=False,
                          book_metadata=None,
                          virtualize_resources=True):
    log = log or default_log
    container = SimpleContainer(tdir, opfpath, log)
    input_plugin = plugin_for_input_format(input_fmt)
    is_comic = bool(getattr(input_plugin, 'is_image_collection', False))

    def needs_work(mt):
        return mt in OEB_STYLES or mt in OEB_DOCS or mt == 'image/svg+xml'

    def work_priority(name):
        # ensure workers with large files or stylesheets
        # have the less names
        size = os.path.getsize(container.name_path_map[name]),
        is_html = container.mime_map.get(name) in OEB_DOCS
        return (0 if is_html else 1), size

    if not is_comic:
        render_manager.launch_workers(
            tuple(n for n, mt in iteritems(container.mime_map)
                  if needs_work(mt)), container)

    bookmark_data = None
    if save_bookmark_data:
        bm_file = 'META-INF/calibre_bookmarks.txt'
        if container.exists(bm_file):
            with container.open(bm_file, 'rb') as f:
                bookmark_data = f.read()

    # We do not add zero byte sized files as the IndexedDB API in the
    # browser has no good way to distinguish between zero byte files and
    # load failures.
    excluded_names = {
        name
        for name, mt in iteritems(container.mime_map)
        if name == container.opf_name or mt == guess_type('a.ncx')
        or name.startswith('META-INF/') or name == 'mimetype'
        or not container.has_name_and_is_not_empty(name)
    }
    raster_cover_name, titlepage_name = create_cover_page(
        container, input_fmt.lower(), is_comic, book_metadata)

    toc = get_toc(container, verify_destinations=False).to_dict(count())
    if not toc or not toc.get('children'):
        toc = from_xpaths(container,
                          ['//h:h1', '//h:h2', '//h:h3']).to_dict(count())
    spine = [name for name, is_linear in container.spine_names]
    spineq = frozenset(spine)
    landmarks = [l for l in get_landmarks(container) if l['dest'] in spineq]

    book_render_data = {
        'version': RENDER_VERSION,
        'toc': toc,
        'book_format': book_fmt,
        'spine': spine,
        'link_uid': uuid4(),
        'book_hash': book_hash,
        'is_comic': is_comic,
        'raster_cover_name': raster_cover_name,
        'title_page_name': titlepage_name,
        'has_maths': False,
        'total_length': 0,
        'spine_length': 0,
        'toc_anchor_map': toc_anchor_map(toc),
        'landmarks': landmarks,
        'link_to_map': {},
    }

    names = sorted(
        (n for n, mt in iteritems(container.mime_map) if needs_work(mt)),
        key=work_priority)

    results = render_manager(
        names,
        (tdir, opfpath, virtualize_resources, book_render_data['link_uid'],
         container.data_for_clone()), container)
    ltm = book_render_data['link_to_map']
    html_data = {}
    virtualized_names = set()

    def merge_ltm(dest, src):
        for k, v in iteritems(src):
            if k in dest:
                dest[k] |= v
            else:
                dest[k] = v

    for link_to_map, hdata, vnames in results:
        html_data.update(hdata)
        virtualized_names |= vnames
        for k, v in iteritems(link_to_map):
            if k in ltm:
                merge_ltm(ltm[k], v)
            else:
                ltm[k] = v

    def manifest_data(name):
        mt = (container.mime_map.get(name)
              or 'application/octet-stream').lower()
        ans = {
            'size': os.path.getsize(container.name_path_map[name]),
            'is_virtualized': name in virtualized_names,
            'mimetype': mt,
            'is_html': mt in OEB_DOCS,
        }
        if ans['is_html']:
            data = html_data[name]
            ans['length'] = l = data['length']
            book_render_data['total_length'] += l
            if name in book_render_data['spine']:
                book_render_data['spine_length'] += l
            ans['has_maths'] = hm = data['has_maths']
            if hm:
                book_render_data['has_maths'] = True
            ans['anchor_map'] = data['anchor_map']
        return ans

    book_render_data['files'] = {
        name: manifest_data(name)
        for name in set(container.name_path_map) - excluded_names
    }
    container.commit()

    for name in excluded_names:
        os.remove(container.name_path_map[name])

    ltm = book_render_data['link_to_map']
    for name, amap in iteritems(ltm):
        for k, v in tuple(iteritems(amap)):
            amap[k] = tuple(v)  # needed for JSON serialization

    data = as_bytes(json.dumps(book_render_data, ensure_ascii=False))
    with lopen(os.path.join(container.root, 'calibre-book-manifest.json'),
               'wb') as f:
        f.write(data)

    return container, bookmark_data
Beispiel #22
0
def do_add(dbctx, paths, one_book_per_directory, recurse, add_duplicates,
           otitle, oauthors, oisbn, otags, oseries, oseries_index, ocover,
           oidentifiers, olanguages, compiled_rules, oautomerge):
    request_id = uuid4()
    with add_ctx():
        files, dirs = [], []
        for path in paths:
            path = os.path.abspath(path)
            if os.path.isdir(path):
                dirs.append(path)
            else:
                if os.path.exists(path):
                    files.append(path)
                else:
                    prints(path, 'not found')

        file_duplicates, added_ids, merged_ids = [], set(), set()
        for book in files:
            fmt = os.path.splitext(book)[1]
            fmt = fmt[1:] if fmt else None
            if not fmt:
                continue
            aids, mids, dups, book_title = dbctx.run(
                'add', 'book', dbctx.path(book), os.path.basename(book), fmt,
                add_duplicates, otitle, oauthors, oisbn, otags, oseries,
                oseries_index,
                serialize_cover(ocover) if ocover else None, oidentifiers,
                olanguages, oautomerge, request_id)
            added_ids |= set(aids)
            merged_ids |= set(mids)

            if dups:
                file_duplicates.append((book_title, book))

        dir_dups = []
        scanner = cdb_recursive_find if recurse else cdb_find_in_dir
        for dpath in dirs:
            for formats in scanner(dpath, one_book_per_directory,
                                   compiled_rules):
                cover_data = None
                for fmt in formats:
                    if fmt.lower().endswith('.opf'):
                        with lopen(fmt, 'rb') as f:
                            mi = get_metadata(f, stream_type='opf')
                            if mi.cover_data and mi.cover_data[1]:
                                cover_data = mi.cover_data[1]
                            elif mi.cover:
                                try:
                                    with lopen(mi.cover, 'rb') as f:
                                        cover_data = f.read()
                                except OSError:
                                    pass

                book_title, ids, mids, dups = dbctx.run(
                    'add', 'format_group', tuple(map(dbctx.path, formats)),
                    add_duplicates, oautomerge, request_id, cover_data)
                if book_title is not None:
                    added_ids |= set(ids)
                    merged_ids |= set(mids)
                    if dups:
                        dir_dups.append((book_title, formats))

        sys.stdout = sys.__stdout__

        if dir_dups or file_duplicates:
            prints(_('The following books were not added as '
                     'they already exist in the database '
                     '(see --duplicates option or --automerge option):'),
                   file=sys.stderr)
            for title, formats in dir_dups:
                prints(' ', title, file=sys.stderr)
                for path in formats:
                    prints('   ', path)
            if file_duplicates:
                for title, path in file_duplicates:
                    prints(' ', title, file=sys.stderr)
                    prints('   ', path)

        if added_ids:
            prints(_('Added book ids: %s') % (', '.join(map(str, added_ids))))
        if merged_ids:
            prints(
                _('Merged book ids: %s') % (', '.join(map(str, merged_ids))))