Python from_xpaths Examples, calibre.ebooks.oeb.polish.toc.from_xpaths Python Examples

Example #1

0

Show file

File: main.py Project: j-howell/calibre

 def create_from_xpath(self, xpaths, remove_duplicates=True):
     toc = from_xpaths(self.ebook, xpaths)
     if len(toc) == 0:
         return error_dialog(self, _('No items found'),
             _('No items were found that could be added to the Table of Contents.'), show=True)
     if remove_duplicates:
         toc.remove_duplicates()
     self.insert_toc_fragment(toc)

Example #2

0

Show file

 def create_from_xpath(self, xpaths, remove_duplicates=True, prefer_title=False):
     toc = from_xpaths(self.ebook, xpaths, prefer_title=prefer_title)
     if len(toc) == 0:
         return error_dialog(self, _('No items found'),
             _('No items were found that could be added to the Table of Contents.'), show=True)
     if remove_duplicates:
         toc.remove_duplicates()
     self.insert_toc_fragment(toc)

Example #3

0

Show file

File: main.py Project: pombreda/calibre-1

 def create_from_xpath(self, xpaths):
     toc = from_xpaths(self.ebook, xpaths)
     if len(toc) == 0:
         return error_dialog(
             self,
             _('No items found'),
             _('No items were found that could be added to the Table of Contents.'
               ),
             show=True)
     self.insert_toc_fragment(toc)

Example #4

0

Show file

def process_exploded_book(book_fmt,
                          opfpath,
                          input_fmt,
                          tdir,
                          render_manager,
                          log=None,
                          book_hash=None,
                          save_bookmark_data=False,
                          book_metadata=None,
                          virtualize_resources=True):
    log = log or default_log
    container = SimpleContainer(tdir, opfpath, log)
    input_plugin = plugin_for_input_format(input_fmt)
    is_comic = bool(getattr(input_plugin, 'is_image_collection', False))

    def needs_work(mt):
        return mt in OEB_STYLES or mt in OEB_DOCS or mt == 'image/svg+xml'

    def work_priority(name):
        # ensure workers with large files or stylesheets
        # have the less names
        size = os.path.getsize(container.name_path_map[name]),
        is_html = container.mime_map.get(name) in OEB_DOCS
        return (0 if is_html else 1), size

    if not is_comic:
        render_manager.launch_workers(
            tuple(n for n, mt in iteritems(container.mime_map)
                  if needs_work(mt)), container)

    bookmark_data = None
    if save_bookmark_data:
        bm_file = 'META-INF/calibre_bookmarks.txt'
        if container.exists(bm_file):
            with container.open(bm_file, 'rb') as f:
                bookmark_data = f.read()

    # We do not add zero byte sized files as the IndexedDB API in the
    # browser has no good way to distinguish between zero byte files and
    # load failures.
    excluded_names = {
        name
        for name, mt in iteritems(container.mime_map)
        if name == container.opf_name or mt == guess_type('a.ncx')
        or name.startswith('META-INF/') or name == 'mimetype'
        or not container.has_name_and_is_not_empty(name)
    }
    raster_cover_name, titlepage_name = create_cover_page(
        container, input_fmt.lower(), is_comic, book_metadata)

    toc = get_toc(container, verify_destinations=False).to_dict(count())
    if not toc or not toc.get('children'):
        toc = from_xpaths(container,
                          ['//h:h1', '//h:h2', '//h:h3']).to_dict(count())
    spine = [name for name, is_linear in container.spine_names]
    spineq = frozenset(spine)
    landmarks = [l for l in get_landmarks(container) if l['dest'] in spineq]

    book_render_data = {
        'version': RENDER_VERSION,
        'toc': toc,
        'book_format': book_fmt,
        'spine': spine,
        'link_uid': uuid4(),
        'book_hash': book_hash,
        'is_comic': is_comic,
        'raster_cover_name': raster_cover_name,
        'title_page_name': titlepage_name,
        'has_maths': False,
        'total_length': 0,
        'spine_length': 0,
        'toc_anchor_map': toc_anchor_map(toc),
        'landmarks': landmarks,
        'link_to_map': {},
    }

    names = sorted(
        (n for n, mt in iteritems(container.mime_map) if needs_work(mt)),
        key=work_priority)

    results = render_manager(
        names,
        (tdir, opfpath, virtualize_resources, book_render_data['link_uid'],
         container.data_for_clone()), container)
    ltm = book_render_data['link_to_map']
    html_data = {}
    virtualized_names = set()

    def merge_ltm(dest, src):
        for k, v in iteritems(src):
            if k in dest:
                dest[k] |= v
            else:
                dest[k] = v

    for link_to_map, hdata, vnames in results:
        html_data.update(hdata)
        virtualized_names |= vnames
        for k, v in iteritems(link_to_map):
            if k in ltm:
                merge_ltm(ltm[k], v)
            else:
                ltm[k] = v

    def manifest_data(name):
        mt = (container.mime_map.get(name)
              or 'application/octet-stream').lower()
        ans = {
            'size': os.path.getsize(container.name_path_map[name]),
            'is_virtualized': name in virtualized_names,
            'mimetype': mt,
            'is_html': mt in OEB_DOCS,
        }
        if ans['is_html']:
            data = html_data[name]
            ans['length'] = l = data['length']
            book_render_data['total_length'] += l
            if name in book_render_data['spine']:
                book_render_data['spine_length'] += l
            ans['has_maths'] = hm = data['has_maths']
            if hm:
                book_render_data['has_maths'] = True
            ans['anchor_map'] = data['anchor_map']
        return ans

    book_render_data['files'] = {
        name: manifest_data(name)
        for name in set(container.name_path_map) - excluded_names
    }
    container.commit()

    for name in excluded_names:
        os.remove(container.name_path_map[name])

    ltm = book_render_data['link_to_map']
    for name, amap in iteritems(ltm):
        for k, v in tuple(iteritems(amap)):
            amap[k] = tuple(v)  # needed for JSON serialization

    data = as_bytes(json.dumps(book_render_data, ensure_ascii=False))
    with lopen(os.path.join(container.root, 'calibre-book-manifest.json'),
               'wb') as f:
        f.write(data)

    return container, bookmark_data

Example #5

0

Show file

    def __init__(self,
                 path_to_ebook,
                 tdir,
                 log=None,
                 book_hash=None,
                 save_bookmark_data=False,
                 book_metadata=None):
        log = log or default_log
        book_fmt, opfpath, input_fmt = extract_book(path_to_ebook,
                                                    tdir,
                                                    log=log)
        ContainerBase.__init__(self, tdir, opfpath, log)
        self.book_metadata = book_metadata
        if save_bookmark_data:
            bm_file = 'META-INF/calibre_bookmarks.txt'
            self.bookmark_data = None
            if self.exists(bm_file):
                with self.open(bm_file, 'rb') as f:
                    self.bookmark_data = f.read()
        # We do not add zero byte sized files as the IndexedDB API in the
        # browser has no good way to distinguish between zero byte files and
        # load failures.
        excluded_names = {
            name
            for name, mt in iteritems(self.mime_map) if name == self.opf_name
            or mt == guess_type('a.ncx') or name.startswith('META-INF/')
            or name == 'mimetype' or not self.has_name_and_is_not_empty(name)
        }
        raster_cover_name, titlepage_name = self.create_cover_page(
            input_fmt.lower())

        toc = get_toc(self).to_dict(count())
        if not toc or not toc.get('children'):
            toc = from_xpaths(self,
                              ['//h:h1', '//h:h2', '//h:h3']).to_dict(count())
        spine = [name for name, is_linear in self.spine_names]
        spineq = frozenset(spine)
        landmarks = [l for l in get_landmarks(self) if l['dest'] in spineq]

        self.book_render_data = data = {
            'version': RENDER_VERSION,
            'toc': toc,
            'book_format': book_fmt,
            'spine': spine,
            'link_uid': uuid4(),
            'book_hash': book_hash,
            'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'},
            'raster_cover_name': raster_cover_name,
            'title_page_name': titlepage_name,
            'has_maths': False,
            'total_length': 0,
            'spine_length': 0,
            'toc_anchor_map': toc_anchor_map(toc),
            'landmarks': landmarks,
            'link_to_map': {},
        }
        # Mark the spine as dirty since we have to ensure it is normalized
        for name in data['spine']:
            self.parsed(name), self.dirty(name)
        self.transform_css()
        self.virtualized_names = set()
        self.virtualize_resources()

        def manifest_data(name):
            mt = (self.mime_map.get(name)
                  or 'application/octet-stream').lower()
            ans = {
                'size': os.path.getsize(self.name_path_map[name]),
                'is_virtualized': name in self.virtualized_names,
                'mimetype': mt,
                'is_html': mt in OEB_DOCS,
            }
            if ans['is_html']:
                root = self.parsed(name)
                ans['length'] = l = get_length(root)
                self.book_render_data['total_length'] += l
                if name in data['spine']:
                    self.book_render_data['spine_length'] += l
                ans['has_maths'] = hm = check_for_maths(root)
                if hm:
                    self.book_render_data['has_maths'] = True
                ans['anchor_map'] = anchor_map(root)
            return ans

        data['files'] = {
            name: manifest_data(name)
            for name in set(self.name_path_map) - excluded_names
        }
        self.commit()
        for name in excluded_names:
            os.remove(self.name_path_map[name])
        data = json.dumps(self.book_render_data, ensure_ascii=False)
        if not isinstance(data, bytes):
            data = data.encode('utf-8')
        with lopen(os.path.join(self.root, 'calibre-book-manifest.json'),
                   'wb') as f:
            f.write(data)