def run(self): try: basedir = os.path.dirname(self.src_library_path) try: tdir = TemporaryDirectory('_rlib', dir=basedir) tdir.__enter__() except OSError: # In case we dont have permissions to create directories in the # parent folder of the src library tdir = TemporaryDirectory('_rlib') with tdir as tdir: self.library_path = tdir self.scan_library() if not self.load_preferences(): # Something went wrong with preferences restore. Start over # with a new database and attempt to rebuild the structure # from the metadata in the opf dbpath = os.path.join(self.library_path, 'metadata.db') if os.path.exists(dbpath): os.remove(dbpath) self.create_cc_metadata() self.restore_books() if self.successes == 0 and len(self.dirs) > 0: raise Exception('Something bad happened') self.replace_db() except: self.tb = traceback.format_exc()
def run(self): try: basedir = os.path.dirname(self.src_library_path) try: tdir = TemporaryDirectory('_rlib', dir=basedir) tdir.__enter__() except EnvironmentError: # Incase we dont have permissions to create directories in the # parent folder of the src library tdir = TemporaryDirectory('_rlib') with tdir as tdir: self.library_path = tdir self.scan_library() if not self.load_preferences(): # Something went wrong with preferences restore. Start over # with a new database and attempt to rebuild the structure # from the metadata in the opf dbpath = os.path.join(self.library_path, 'metadata.db') if os.path.exists(dbpath): os.remove(dbpath) self.create_cc_metadata() self.restore_books() if self.successes == 0 and len(self.dirs) > 0: raise Exception(('Something bad happened')) self.replace_db() except: self.tb = traceback.format_exc()
class EbookIterator(BookmarksMixin): CHARACTERS_PER_PAGE = 1000 def __init__(self, pathtoebook, log=None): self.log = log or default_log pathtoebook = pathtoebook.strip() self.pathtoebook = os.path.abspath(pathtoebook) self.config = DynamicConfig(name='iterator') ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower() ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext) self.ebook_ext = ext.replace('original_', '') def search(self, text, index, backwards=False): text = prepare_string_for_xml(text.lower()) pmap = [(i, path) for i, path in enumerate(self.spine)] if backwards: pmap.reverse() for i, path in pmap: if (backwards and i < index) or (not backwards and i > index): with open(path, 'rb') as f: raw = f.read().decode(path.encoding) try: raw = xml_replace_entities(raw) except: pass if text in raw.lower(): return i def __enter__(self, processed=False, only_input_plugin=False, run_char_count=True, read_anchor_map=True, extract_embedded_fonts_for_qt=False): ''' Convert an ebook file into an exploded OEB book suitable for display in viewers/preprocessing etc. ''' from calibre.ebooks.conversion.plumber import Plumber, create_oebbook self.delete_on_exit = [] self._tdir = TemporaryDirectory('_ebook_iter') self.base = self._tdir.__enter__() plumber = Plumber(self.pathtoebook, self.base, self.log) plumber.setup_options() if self.pathtoebook.lower().endswith('.opf'): plumber.opts.dont_package = True if hasattr(plumber.opts, 'no_process'): plumber.opts.no_process = True plumber.input_plugin.for_viewer = True with plumber.input_plugin, open(plumber.input, 'rb') as inf: self.pathtoopf = plumber.input_plugin(inf, plumber.opts, plumber.input_fmt, self.log, {}, self.base) if not only_input_plugin: # Run the HTML preprocess/parsing from the conversion pipeline as # well if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'} and not hasattr(self.pathtoopf, 'manifest')): if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts) if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper() if getattr(plumber.input_plugin, 'is_kf8', False): self.book_format = 'KF8' self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None) if self.opf is None: self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf)) self.language = self.opf.language if self.language: self.language = self.language.lower() ordered = [i for i in self.opf.spine if i.is_linear] + \ [i for i in self.opf.spine if not i.is_linear] self.spine = [] Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, run_char_count=run_char_count) is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'} for i in ordered: spath = i.path mt = None if i.idref is not None: mt = self.opf.manifest.type_for_id(i.idref) if mt is None: mt = guess_type(spath)[0] try: self.spine.append(Spiny(spath, mime_type=mt)) if is_comic: self.spine[-1].is_single_page = True except: self.log.warn('Missing spine item:', repr(spath)) cover = self.opf.cover if cover and self.ebook_ext in { 'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3' }: cfile = os.path.join(self.base, 'calibre_iterator_cover.html') rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/') chtml = (TITLEPAGE % prepare_string_for_xml(rcpath, True)).encode('utf-8') with open(cfile, 'wb') as f: f.write(chtml) self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')] self.delete_on_exit.append(cfile) if self.opf.path_to_html_toc is not None and \ self.opf.path_to_html_toc not in self.spine: try: self.spine.append(Spiny(self.opf.path_to_html_toc)) except: import traceback traceback.print_exc() sizes = [i.character_count for i in self.spine] self.pages = [ math.ceil(i / float(self.CHARACTERS_PER_PAGE)) for i in sizes ] for p, s in zip(self.pages, self.spine): s.pages = p start = 1 for s in self.spine: s.start_page = start start += s.pages s.max_page = s.start_page + s.pages - 1 self.toc = self.opf.toc if read_anchor_map: create_indexing_data(self.spine, self.toc) self.read_bookmarks() if extract_embedded_fonts_for_qt: from calibre.ebooks.oeb.iterator.extract_fonts import extract_fonts try: extract_fonts(self.opf, self.log) except: ol = self.log.filter_level self.log.filter_level = self.log.DEBUG self.log.exception('Failed to extract fonts') self.log.filter_level = ol return self def __exit__(self, *args): self._tdir.__exit__(*args) for x in self.delete_on_exit: try: os.remove(x) except: pass
class Plugin: # {{{ ''' A calibre plugin. Useful members include: * ``self.installation_type``: Stores how the plugin was installed. * ``self.plugin_path``: Stores path to the ZIP file that contains this plugin or None if it is a builtin plugin * ``self.site_customization``: Stores a customization string entered by the user. Methods that should be overridden in sub classes: * :meth:`initialize` * :meth:`customization_help` Useful methods: * :meth:`temporary_file` * :meth:`__enter__` * :meth:`load_resources` ''' #: List of platforms this plugin works on. #: For example: ``['windows', 'osx', 'linux']`` supported_platforms = [] #: The name of this plugin. You must set it something other #: than Trivial Plugin for it to work. name = 'Trivial Plugin' #: The version of this plugin as a 3-tuple (major, minor, revision) version = (1, 0, 0) #: A short string describing what this plugin does description = _('Does absolutely nothing') #: The author of this plugin author = _('Unknown') #: When more than one plugin exists for a filetype, #: the plugins are run in order of decreasing priority. #: Plugins with higher priority will be run first. #: The highest possible priority is ``sys.maxsize``. #: Default priority is 1. priority = 1 #: The earliest version of calibre this plugin requires minimum_calibre_version = (0, 4, 118) #: The way this plugin is installed installation_type = None #: If False, the user will not be able to disable this plugin. Use with #: care. can_be_disabled = True #: The type of this plugin. Used for categorizing plugins in the #: GUI type = _('Base') def __init__(self, plugin_path): self.plugin_path = plugin_path self.site_customization = None def initialize(self): ''' Called once when calibre plugins are initialized. Plugins are re-initialized every time a new plugin is added. Also note that if the plugin is run in a worker process, such as for adding books, then the plugin will be initialized for every new worker process. Perform any plugin specific initialization here, such as extracting resources from the plugin ZIP file. The path to the ZIP file is available as ``self.plugin_path``. Note that ``self.site_customization`` is **not** available at this point. ''' pass def config_widget(self): ''' Implement this method and :meth:`save_settings` in your plugin to use a custom configuration dialog, rather then relying on the simple string based default customization. This method, if implemented, must return a QWidget. The widget can have an optional method validate() that takes no arguments and is called immediately after the user clicks OK. Changes are applied if and only if the method returns True. If for some reason you cannot perform the configuration at this time, return a tuple of two strings (message, details), these will be displayed as a warning dialog to the user and the process will be aborted. ''' raise NotImplementedError() def save_settings(self, config_widget): ''' Save the settings specified by the user with config_widget. :param config_widget: The widget returned by :meth:`config_widget`. ''' raise NotImplementedError() def do_user_config(self, parent=None): ''' This method shows a configuration dialog for this plugin. It returns True if the user clicks OK, False otherwise. The changes are automatically applied. ''' from qt.core import QDialog, QDialogButtonBox, QVBoxLayout, \ QLabel, Qt, QLineEdit from calibre.gui2 import gprefs prefname = 'plugin config dialog:' + self.type + ':' + self.name geom = gprefs.get(prefname, None) config_dialog = QDialog(parent) button_box = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel) v = QVBoxLayout(config_dialog) def size_dialog(): if geom is None: config_dialog.resize(config_dialog.sizeHint()) else: from qt.core import QApplication QApplication.instance().safe_restore_geometry( config_dialog, geom) button_box.accepted.connect(config_dialog.accept) button_box.rejected.connect(config_dialog.reject) config_dialog.setWindowTitle(_('Customize') + ' ' + self.name) try: config_widget = self.config_widget() except NotImplementedError: config_widget = None if isinstance(config_widget, tuple): from calibre.gui2 import warning_dialog warning_dialog(parent, _('Cannot configure'), config_widget[0], det_msg=config_widget[1], show=True) return False if config_widget is not None: v.addWidget(config_widget) v.addWidget(button_box) size_dialog() config_dialog.exec() if config_dialog.result() == QDialog.DialogCode.Accepted: if hasattr(config_widget, 'validate'): if config_widget.validate(): self.save_settings(config_widget) else: self.save_settings(config_widget) else: from calibre.customize.ui import plugin_customization, \ customize_plugin help_text = self.customization_help(gui=True) help_text = QLabel(help_text, config_dialog) help_text.setWordWrap(True) help_text.setTextInteractionFlags( Qt.TextInteractionFlag.LinksAccessibleByMouse | Qt.TextInteractionFlag.LinksAccessibleByKeyboard) help_text.setOpenExternalLinks(True) v.addWidget(help_text) sc = plugin_customization(self) if not sc: sc = '' sc = sc.strip() sc = QLineEdit(sc, config_dialog) v.addWidget(sc) v.addWidget(button_box) size_dialog() config_dialog.exec() if config_dialog.result() == QDialog.DialogCode.Accepted: sc = str(sc.text()).strip() customize_plugin(self, sc) geom = bytearray(config_dialog.saveGeometry()) gprefs[prefname] = geom return config_dialog.result() def load_resources(self, names): ''' If this plugin comes in a ZIP file (user added plugin), this method will allow you to load resources from the ZIP file. For example to load an image:: pixmap = QPixmap() pixmap.loadFromData(self.load_resources(['images/icon.png'])['images/icon.png']) icon = QIcon(pixmap) :param names: List of paths to resources in the ZIP file using / as separator :return: A dictionary of the form ``{name: file_contents}``. Any names that were not found in the ZIP file will not be present in the dictionary. ''' if self.plugin_path is None: raise ValueError('This plugin was not loaded from a ZIP file') ans = {} with zipfile.ZipFile(self.plugin_path, 'r') as zf: for candidate in zf.namelist(): if candidate in names: ans[candidate] = zf.read(candidate) return ans def customization_help(self, gui=False): ''' Return a string giving help on how to customize this plugin. By default raise a :class:`NotImplementedError`, which indicates that the plugin does not require customization. If you re-implement this method in your subclass, the user will be asked to enter a string as customization for this plugin. The customization string will be available as ``self.site_customization``. Site customization could be anything, for example, the path to a needed binary on the user's computer. :param gui: If True return HTML help, otherwise return plain text help. ''' raise NotImplementedError() def temporary_file(self, suffix): ''' Return a file-like object that is a temporary file on the file system. This file will remain available even after being closed and will only be removed on interpreter shutdown. Use the ``name`` member of the returned object to access the full path to the created temporary file. :param suffix: The suffix that the temporary file will have. ''' return PersistentTemporaryFile(suffix) def is_customizable(self): try: self.customization_help() return True except NotImplementedError: return False def __enter__(self, *args): ''' Add this plugin to the python path so that it's contents become directly importable. Useful when bundling large python libraries into the plugin. Use it like this:: with plugin: import something ''' if self.plugin_path is not None: from calibre.utils.zipfile import ZipFile from importlib.machinery import EXTENSION_SUFFIXES with ZipFile(self.plugin_path) as zf: extensions = {x.lower() for x in EXTENSION_SUFFIXES} zip_safe = True for name in zf.namelist(): for q in extensions: if name.endswith(q): zip_safe = False break if not zip_safe: break if zip_safe: sys.path.append(self.plugin_path) self.sys_insertion_path = self.plugin_path else: from calibre.ptempfile import TemporaryDirectory self._sys_insertion_tdir = TemporaryDirectory( 'plugin_unzip') self.sys_insertion_path = self._sys_insertion_tdir.__enter__( *args) zf.extractall(self.sys_insertion_path) sys.path.append(self.sys_insertion_path) def __exit__(self, *args): ip, it = getattr(self, 'sys_insertion_path', None), getattr(self, '_sys_insertion_tdir', None) if ip in sys.path: sys.path.remove(ip) if hasattr(it, '__exit__'): it.__exit__(*args) def cli_main(self, args): ''' This method is the main entry point for your plugins command line interface. It is called when the user does: calibre-debug -r "Plugin Name". Any arguments passed are present in the args variable. ''' raise NotImplementedError( 'The %s plugin has no command line interface' % self.name)
class Plugin(object): # {{{ ''' A calibre plugin. Useful members include: * ``self.plugin_path``: Stores path to the zip file that contains this plugin or None if it is a builtin plugin * ``self.site_customization``: Stores a customization string entered by the user. Methods that should be overridden in sub classes: * :meth:`initialize` * :meth:`customization_help` Useful methods: * :meth:`temporary_file` * :meth:`__enter__` * :meth:`load_resources` ''' #: List of platforms this plugin works on. #: For example: ``['windows', 'osx', 'linux']`` supported_platforms = [] #: The name of this plugin. You must set it something other #: than Trivial Plugin for it to work. name = 'Trivial Plugin' #: The version of this plugin as a 3-tuple (major, minor, revision) version = (1, 0, 0) #: A short string describing what this plugin does description = _('Does absolutely nothing') #: The author of this plugin author = _('Unknown') #: When more than one plugin exists for a filetype, #: the plugins are run in order of decreasing priority #: i.e. plugins with higher priority will be run first. #: The highest possible priority is ``sys.maxint``. #: Default priority is 1. priority = 1 #: The earliest version of calibre this plugin requires minimum_calibre_version = (0, 4, 118) #: If False, the user will not be able to disable this plugin. Use with #: care. can_be_disabled = True #: The type of this plugin. Used for categorizing plugins in the #: GUI type = _('Base') def __init__(self, plugin_path): self.plugin_path = plugin_path self.site_customization = None def initialize(self): ''' Called once when calibre plugins are initialized. Plugins are re-initialized every time a new plugin is added. Also note that if the plugin is run in a worker process, such as for adding books, then the plugin will be initialized for every new worker process. Perform any plugin specific initialization here, such as extracting resources from the plugin zip file. The path to the zip file is available as ``self.plugin_path``. Note that ``self.site_customization`` is **not** available at this point. ''' pass def config_widget(self): ''' Implement this method and :meth:`save_settings` in your plugin to use a custom configuration dialog, rather then relying on the simple string based default customization. This method, if implemented, must return a QWidget. The widget can have an optional method validate() that takes no arguments and is called immediately after the user clicks OK. Changes are applied if and only if the method returns True. If for some reason you cannot perform the configuration at this time, return a tuple of two strings (message, details), these will be displayed as a warning dialog to the user and the process will be aborted. ''' raise NotImplementedError() def save_settings(self, config_widget): ''' Save the settings specified by the user with config_widget. :param config_widget: The widget returned by :meth:`config_widget`. ''' raise NotImplementedError() def do_user_config(self, parent=None): ''' This method shows a configuration dialog for this plugin. It returns True if the user clicks OK, False otherwise. The changes are automatically applied. ''' from PyQt5.Qt import QDialog, QDialogButtonBox, QVBoxLayout, \ QLabel, Qt, QLineEdit from calibre.gui2 import gprefs prefname = 'plugin config dialog:'+self.type + ':' + self.name geom = gprefs.get(prefname, None) config_dialog = QDialog(parent) button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) v = QVBoxLayout(config_dialog) def size_dialog(): if geom is None: config_dialog.resize(config_dialog.sizeHint()) else: config_dialog.restoreGeometry(geom) button_box.accepted.connect(config_dialog.accept) button_box.rejected.connect(config_dialog.reject) config_dialog.setWindowTitle(_('Customize') + ' ' + self.name) try: config_widget = self.config_widget() except NotImplementedError: config_widget = None if isinstance(config_widget, tuple): from calibre.gui2 import warning_dialog warning_dialog(parent, _('Cannot configure'), config_widget[0], det_msg=config_widget[1], show=True) return False if config_widget is not None: v.addWidget(config_widget) v.addWidget(button_box) size_dialog() config_dialog.exec_() if config_dialog.result() == QDialog.Accepted: if hasattr(config_widget, 'validate'): if config_widget.validate(): self.save_settings(config_widget) else: self.save_settings(config_widget) else: from calibre.customize.ui import plugin_customization, \ customize_plugin help_text = self.customization_help(gui=True) help_text = QLabel(help_text, config_dialog) help_text.setWordWrap(True) help_text.setTextInteractionFlags(Qt.LinksAccessibleByMouse | Qt.LinksAccessibleByKeyboard) help_text.setOpenExternalLinks(True) v.addWidget(help_text) sc = plugin_customization(self) if not sc: sc = '' sc = sc.strip() sc = QLineEdit(sc, config_dialog) v.addWidget(sc) v.addWidget(button_box) size_dialog() config_dialog.exec_() if config_dialog.result() == QDialog.Accepted: sc = unicode(sc.text()).strip() customize_plugin(self, sc) geom = bytearray(config_dialog.saveGeometry()) gprefs[prefname] = geom return config_dialog.result() def load_resources(self, names): ''' If this plugin comes in a ZIP file (user added plugin), this method will allow you to load resources from the ZIP file. For example to load an image:: pixmap = QPixmap() pixmap.loadFromData(self.load_resources(['images/icon.png']).itervalues().next()) icon = QIcon(pixmap) :param names: List of paths to resources in the zip file using / as separator :return: A dictionary of the form ``{name: file_contents}``. Any names that were not found in the zip file will not be present in the dictionary. ''' if self.plugin_path is None: raise ValueError('This plugin was not loaded from a ZIP file') ans = {} with zipfile.ZipFile(self.plugin_path, 'r') as zf: for candidate in zf.namelist(): if candidate in names: ans[candidate] = zf.read(candidate) return ans def customization_help(self, gui=False): ''' Return a string giving help on how to customize this plugin. By default raise a :class:`NotImplementedError`, which indicates that the plugin does not require customization. If you re-implement this method in your subclass, the user will be asked to enter a string as customization for this plugin. The customization string will be available as ``self.site_customization``. Site customization could be anything, for example, the path to a needed binary on the user's computer. :param gui: If True return HTML help, otherwise return plain text help. ''' raise NotImplementedError def temporary_file(self, suffix): ''' Return a file-like object that is a temporary file on the file system. This file will remain available even after being closed and will only be removed on interpreter shutdown. Use the ``name`` member of the returned object to access the full path to the created temporary file. :param suffix: The suffix that the temporary file will have. ''' return PersistentTemporaryFile(suffix) def is_customizable(self): try: self.customization_help() return True except NotImplementedError: return False def __enter__(self, *args): ''' Add this plugin to the python path so that it's contents become directly importable. Useful when bundling large python libraries into the plugin. Use it like this:: with plugin: import something ''' if self.plugin_path is not None: from calibre.utils.zipfile import ZipFile zf = ZipFile(self.plugin_path) extensions = set([x.rpartition('.')[-1].lower() for x in zf.namelist()]) zip_safe = True for ext in ('pyd', 'so', 'dll', 'dylib'): if ext in extensions: zip_safe = False break if zip_safe: sys.path.insert(0, self.plugin_path) self.sys_insertion_path = self.plugin_path else: from calibre.ptempfile import TemporaryDirectory self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip') self.sys_insertion_path = self._sys_insertion_tdir.__enter__(*args) zf.extractall(self.sys_insertion_path) sys.path.insert(0, self.sys_insertion_path) zf.close() def __exit__(self, *args): ip, it = getattr(self, 'sys_insertion_path', None), getattr(self, '_sys_insertion_tdir', None) if ip in sys.path: sys.path.remove(ip) if hasattr(it, '__exit__'): it.__exit__(*args) def cli_main(self, args): ''' This method is the main entry point for your plugins command line interface. It is called when the user does: calibre-debug -r "Plugin Name". Any arguments passed are present in the args variable. ''' raise NotImplementedError('The %s plugin has no command line interface' %self.name)
class EbookIterator(BookmarksMixin): CHARACTERS_PER_PAGE = 1000 def __init__(self, pathtoebook, log=None): self.log = log or default_log pathtoebook = pathtoebook.strip() self.pathtoebook = os.path.abspath(pathtoebook) self.config = DynamicConfig(name='iterator') ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower() ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext) self.ebook_ext = ext.replace('original_', '') def search(self, text, index, backwards=False): text = prepare_string_for_xml(text.lower()) pmap = [(i, path) for i, path in enumerate(self.spine)] if backwards: pmap.reverse() for i, path in pmap: if (backwards and i < index) or (not backwards and i > index): with open(path, 'rb') as f: raw = f.read().decode(path.encoding) try: raw = xml_replace_entities(raw) except: pass if text in raw.lower(): return i def __enter__(self, processed=False, only_input_plugin=False, run_char_count=True, read_anchor_map=True, view_kepub=False, read_links=True): ''' Convert an ebook file into an exploded OEB book suitable for display in viewers/preprocessing etc. ''' from calibre.ebooks.conversion.plumber import Plumber, create_oebbook self.delete_on_exit = [] self._tdir = TemporaryDirectory('_ebook_iter') self.base = self._tdir.__enter__() plumber = Plumber(self.pathtoebook, self.base, self.log, view_kepub=view_kepub) plumber.setup_options() if self.pathtoebook.lower().endswith('.opf'): plumber.opts.dont_package = True if hasattr(plumber.opts, 'no_process'): plumber.opts.no_process = True plumber.input_plugin.for_viewer = True with plumber.input_plugin, open(plumber.input, 'rb') as inf: self.pathtoopf = plumber.input_plugin(inf, plumber.opts, plumber.input_fmt, self.log, {}, self.base) if not only_input_plugin: # Run the HTML preprocess/parsing from the conversion pipeline as # well if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'} and not hasattr(self.pathtoopf, 'manifest')): if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts) if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper() if getattr(plumber.input_plugin, 'is_kf8', False): fs = ':joint' if getattr(plumber.input_plugin, 'mobi_is_joint', False) else '' self.book_format = 'KF8' + fs self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None) if self.opf is None: self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf)) self.language = self.opf.language if self.language: self.language = self.language.lower() ordered = [i for i in self.opf.spine if i.is_linear] + \ [i for i in self.opf.spine if not i.is_linear] self.spine = [] Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links, run_char_count=run_char_count, from_epub=self.book_format == 'EPUB') is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'} for i in ordered: spath = i.path mt = None if i.idref is not None: mt = self.opf.manifest.type_for_id(i.idref) if mt is None: mt = guess_type(spath)[0] try: self.spine.append(Spiny(spath, mime_type=mt)) if is_comic: self.spine[-1].is_single_page = True except: self.log.warn('Missing spine item:', repr(spath)) cover = self.opf.cover if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3', 'docx', 'htmlz'}: cfile = os.path.join(self.base, 'calibre_iterator_cover.html') rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/') chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8') with open(cfile, 'wb') as f: f.write(chtml) self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')] self.delete_on_exit.append(cfile) if self.opf.path_to_html_toc is not None and \ self.opf.path_to_html_toc not in self.spine: try: self.spine.append(Spiny(self.opf.path_to_html_toc)) except: import traceback traceback.print_exc() sizes = [i.character_count for i in self.spine] self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes] for p, s in zip(self.pages, self.spine): s.pages = p start = 1 for s in self.spine: s.start_page = start start += s.pages s.max_page = s.start_page + s.pages - 1 self.toc = self.opf.toc if read_anchor_map: create_indexing_data(self.spine, self.toc) self.verify_links() self.read_bookmarks() return self def verify_links(self): spine_paths = {s:s for s in self.spine} for item in self.spine: base = os.path.dirname(item) for link in item.all_links: try: p = urlparse(urlunquote(link)) except Exception: continue if not p.scheme and not p.netloc: path = os.path.abspath(os.path.join(base, p.path)) if p.path else item try: path = spine_paths[path] except Exception: continue if not p.fragment or p.fragment in path.anchor_map: item.verified_links.add((path, p.fragment)) def __exit__(self, *args): self._tdir.__exit__(*args) for x in self.delete_on_exit: try: os.remove(x) except: pass
class EbookIterator(BookmarksMixin): CHARACTERS_PER_PAGE = 1000 def __init__(self, pathtoebook, log=None, copy_bookmarks_to_file=True): BookmarksMixin.__init__(self, copy_bookmarks_to_file=copy_bookmarks_to_file) self.log = log or default_log pathtoebook = pathtoebook.strip() self.pathtoebook = os.path.abspath(pathtoebook) self.config = DynamicConfig(name='iterator') ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower() ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext) self.ebook_ext = ext.replace('original_', '') def search(self, text, index, backwards=False): from calibre.ebooks.oeb.polish.parsing import parse pmap = [(i, path) for i, path in enumerate(self.spine)] if backwards: pmap.reverse() q = text.lower() for i, path in pmap: if (backwards and i < index) or (not backwards and i > index): with open(path, 'rb') as f: raw = f.read().decode(path.encoding) root = parse(raw) fragments = [] def serialize(elem): if elem.text: fragments.append(elem.text.lower()) if elem.tail: fragments.append(elem.tail.lower()) for child in elem.iterchildren(): if hasattr(getattr(child, 'tag', None), 'rpartition') and child.tag.rpartition('}')[-1] not in {'script', 'style', 'del'}: serialize(child) elif getattr(child, 'tail', None): fragments.append(child.tail.lower()) for body in root.xpath('//*[local-name() = "body"]'): body.tail = None serialize(body) if q in ''.join(fragments): return i def __enter__(self, processed=False, only_input_plugin=False, run_char_count=True, read_anchor_map=True, view_kepub=False, read_links=True): ''' Convert an ebook file into an exploded OEB book suitable for display in viewers/preprocessing etc. ''' self.delete_on_exit = [] self._tdir = TemporaryDirectory('_ebook_iter') self.base = self._tdir.__enter__() self.book_format, self.pathtoopf, input_fmt = run_extract_book( self.pathtoebook, self.base, only_input_plugin=only_input_plugin, view_kepub=view_kepub, processed=processed) self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf)) self.mi = self.opf.to_book_metadata() self.language = None if self.mi.languages: self.language = self.mi.languages[0].lower() ordered = [i for i in self.opf.spine if i.is_linear] + \ [i for i in self.opf.spine if not i.is_linear] self.spine = [] Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links, run_char_count=run_char_count, from_epub=self.book_format == 'EPUB') is_comic = input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'} for i in ordered: spath = i.path mt = None if i.idref is not None: mt = self.opf.manifest.type_for_id(i.idref) if mt is None: mt = guess_type(spath)[0] try: self.spine.append(Spiny(spath, mime_type=mt)) if is_comic: self.spine[-1].is_single_page = True except: self.log.warn('Missing spine item:', repr(spath)) cover = self.opf.cover if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3', 'docx', 'htmlz'}: cfile = os.path.join(self.base, 'calibre_iterator_cover.html') rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/') chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8') with open(cfile, 'wb') as f: f.write(chtml) self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')] self.delete_on_exit.append(cfile) if self.opf.path_to_html_toc is not None and \ self.opf.path_to_html_toc not in self.spine: try: self.spine.append(Spiny(self.opf.path_to_html_toc)) except: import traceback traceback.print_exc() sizes = [i.character_count for i in self.spine] self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes] for p, s in zip(self.pages, self.spine): s.pages = p start = 1 for s in self.spine: s.start_page = start start += s.pages s.max_page = s.start_page + s.pages - 1 self.toc = self.opf.toc if read_anchor_map: create_indexing_data(self.spine, self.toc) self.verify_links() self.read_bookmarks() return self def verify_links(self): spine_paths = {s:s for s in self.spine} for item in self.spine: base = os.path.dirname(item) for link in item.all_links: try: p = urlparse(urlunquote(link)) except Exception: continue if not p.scheme and not p.netloc: path = os.path.abspath(os.path.join(base, p.path)) if p.path else item try: path = spine_paths[path] except Exception: continue if not p.fragment or p.fragment in path.anchor_map: item.verified_links.add((path, p.fragment)) def __exit__(self, *args): self._tdir.__exit__(*args) for x in self.delete_on_exit: try: os.remove(x) except: pass
class EbookIterator(BookmarksMixin): CHARACTERS_PER_PAGE = 1000 def __init__(self, pathtoebook, log=None, copy_bookmarks_to_file=True): BookmarksMixin.__init__(self, copy_bookmarks_to_file=copy_bookmarks_to_file) self.log = log or default_log pathtoebook = pathtoebook.strip() self.pathtoebook = os.path.abspath(pathtoebook) self.config = DynamicConfig(name='iterator') ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower() ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext) self.ebook_ext = ext.replace('original_', '') def search(self, text, index, backwards=False): from calibre.ebooks.oeb.polish.parsing import parse pmap = [(i, path) for i, path in enumerate(self.spine)] if backwards: pmap.reverse() q = text.lower() for i, path in pmap: if (backwards and i < index) or (not backwards and i > index): with open(path, 'rb') as f: raw = f.read().decode(path.encoding) root = parse(raw) fragments = [] def serialize(elem): if elem.text: fragments.append(elem.text.lower()) if elem.tail: fragments.append(elem.tail.lower()) for child in elem.iterchildren(): if hasattr(getattr(child, 'tag', None), 'rpartition') and child.tag.rpartition('}')[-1] not in {'script', 'style', 'del'}: serialize(child) elif getattr(child, 'tail', None): fragments.append(child.tail.lower()) for body in root.xpath('//*[local-name() = "body"]'): body.tail = None serialize(body) if q in ''.join(fragments): return i def __enter__(self, processed=False, only_input_plugin=False, run_char_count=True, read_anchor_map=True, view_kepub=False, read_links=True): ''' Convert an ebook file into an exploded OEB book suitable for display in viewers/preprocessing etc. ''' self.delete_on_exit = [] self._tdir = TemporaryDirectory('_ebook_iter') self.base = self._tdir.__enter__() self.book_format, self.pathtoopf, input_fmt = run_extract_book( self.pathtoebook, self.base, only_input_plugin=only_input_plugin, view_kepub=view_kepub, processed=processed) self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf)) self.language = self.opf.language if self.language: self.language = self.language.lower() ordered = [i for i in self.opf.spine if i.is_linear] + \ [i for i in self.opf.spine if not i.is_linear] self.spine = [] Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links, run_char_count=run_char_count, from_epub=self.book_format == 'EPUB') is_comic = input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'} for i in ordered: spath = i.path mt = None if i.idref is not None: mt = self.opf.manifest.type_for_id(i.idref) if mt is None: mt = guess_type(spath)[0] try: self.spine.append(Spiny(spath, mime_type=mt)) if is_comic: self.spine[-1].is_single_page = True except: self.log.warn('Missing spine item:', repr(spath)) cover = self.opf.cover if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3', 'docx', 'htmlz'}: cfile = os.path.join(self.base, 'calibre_iterator_cover.html') rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/') chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8') with open(cfile, 'wb') as f: f.write(chtml) self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')] self.delete_on_exit.append(cfile) if self.opf.path_to_html_toc is not None and \ self.opf.path_to_html_toc not in self.spine: try: self.spine.append(Spiny(self.opf.path_to_html_toc)) except: import traceback traceback.print_exc() sizes = [i.character_count for i in self.spine] self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes] for p, s in zip(self.pages, self.spine): s.pages = p start = 1 for s in self.spine: s.start_page = start start += s.pages s.max_page = s.start_page + s.pages - 1 self.toc = self.opf.toc if read_anchor_map: create_indexing_data(self.spine, self.toc) self.verify_links() self.read_bookmarks() return self def verify_links(self): spine_paths = {s:s for s in self.spine} for item in self.spine: base = os.path.dirname(item) for link in item.all_links: try: p = urlparse(urlunquote(link)) except Exception: continue if not p.scheme and not p.netloc: path = os.path.abspath(os.path.join(base, p.path)) if p.path else item try: path = spine_paths[path] except Exception: continue if not p.fragment or p.fragment in path.anchor_map: item.verified_links.add((path, p.fragment)) def __exit__(self, *args): self._tdir.__exit__(*args) for x in self.delete_on_exit: try: os.remove(x) except: pass