def convert_text(self, oeb_book): from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.pdf.render.from_html import PDFWriter self.log.debug('Serializing oeb input to disk for processing...') self.get_cover_data() self.process_fonts() if self.opts.pdf_use_document_margins and self.stored_page_margins: import json for href, margins in self.stored_page_margins.iteritems(): item = oeb_book.manifest.hrefs.get(href) root = item.data if hasattr(root, 'xpath') and margins: root.set('data-calibre-pdf-output-page-margins', json.dumps(margins)) with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine], getattr(opf, 'toc', None))
def convert_text(self, oeb_book): import json from calibre.ebooks.pdf.html_writer import convert self.get_cover_data() self.process_fonts() if self.opts.pdf_use_document_margins and self.stored_page_margins: for href, margins in iteritems(self.stored_page_margins): item = oeb_book.manifest.hrefs.get(href) if item is not None: root = item.data if hasattr(root, 'xpath') and margins: root.set('data-calibre-pdf-output-page-margins', json.dumps(margins)) with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_dir = os.path.realpath(oeb_dir) oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] convert(opfpath, self.opts, metadata=self.metadata, output_path=self.output_path, log=self.log, cover_data=self.cover_data, report_progress=self.report_progress)
def convert_text(self, oeb_book): from calibre.ebooks.metadata.opf2 import OPF if self.opts.old_pdf_engine: from calibre.ebooks.pdf.writer import PDFWriter PDFWriter else: from calibre.ebooks.pdf.render.from_html import PDFWriter self.log.debug("Serializing oeb input to disk for processing...") self.get_cover_data() self.handle_embedded_fonts() with TemporaryDirectory("_pdf_out") as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format("oeb") oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, "*.opf"))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine], getattr(opf, "toc", None))
def options_for_output_fmt(fmt): from calibre.customize.ui import plugin_for_output_format fmt = fmt.lower() plugin = plugin_for_output_format(fmt) if plugin is None: return None, () full_name = plugin.name.lower().replace(' ', '_') name = full_name.rpartition('_')[0] return full_name, OPTIONS['output'].get(name, ())
def opf_to_azw3(opf, outpath, log): from calibre.ebooks.conversion.plumber import Plumber, create_oebbook plumber = Plumber(opf, outpath, log) plumber.setup_options() inp = plugin_for_input_format('azw3') outp = plugin_for_output_format('azw3') plumber.opts.mobi_passthrough = True oeb = create_oebbook(log, opf, plumber.opts) set_cover(oeb) outp.convert(oeb, outpath, inp, plumber.opts, log)
def do_rebuild(opf, dest_path): plumber = Plumber(opf, dest_path, default_log) plumber.setup_options() inp = plugin_for_input_format('azw3') outp = plugin_for_output_format('azw3') plumber.opts.mobi_passthrough = True oeb = create_oebbook(default_log, opf, plumber.opts) set_cover(oeb) outp.convert(oeb, dest_path, inp, plumber.opts, default_log)
def fetch_scheduled_recipe(arg): # {{{ fmt = prefs['output_format'].lower() # Never use AZW3 for periodicals... if fmt == 'azw3': fmt = 'mobi' pt = PersistentTemporaryFile(suffix='_recipe_out.%s' % fmt.lower()) pt.close() recs = [] ps = load_defaults('page_setup') if 'output_profile' in ps: recs.append(('output_profile', ps['output_profile'], OptionRecommendation.HIGH)) for edge in ('left', 'top', 'bottom', 'right'): edge = 'margin_' + edge if edge in ps: recs.append((edge, ps[edge], OptionRecommendation.HIGH)) lf = load_defaults('look_and_feel') if lf.get('base_font_size', 0.0) != 0.0: recs.append(('base_font_size', lf['base_font_size'], OptionRecommendation.HIGH)) recs.append( ('keep_ligatures', lf.get('keep_ligatures', False), OptionRecommendation.HIGH)) lr = load_defaults('lrf_output') if lr.get('header', False): recs.append(('header', True, OptionRecommendation.HIGH)) recs.append(('header_format', '%t', OptionRecommendation.HIGH)) epub = load_defaults('epub_output') if epub.get('epub_flatten', False): recs.append(('epub_flatten', True, OptionRecommendation.HIGH)) if fmt == 'pdf': pdf = load_defaults('pdf_output') from calibre.customize.ui import plugin_for_output_format p = plugin_for_output_format('pdf') for opt in p.options: recs.append( (opt.option.name, pdf.get(opt.option.name, opt.recommended_value), OptionRecommendation.HIGH)) args = [arg['recipe'], pt.name, recs] if arg['username'] is not None: recs.append(('username', arg['username'], OptionRecommendation.HIGH)) if arg['password'] is not None: recs.append(('password', arg['password'], OptionRecommendation.HIGH)) return 'gui_convert', args, _( 'Fetch news from %s') % arg['title'], fmt.upper(), [pt]
def commit(self, outpath=None, keep_parsed=False): super(AZW3Container, self).commit(keep_parsed=keep_parsed) if outpath is None: outpath = self.pathtoazw3 from calibre.ebooks.conversion.plumber import Plumber, create_oebbook opf = self.name_path_map[self.opf_name] plumber = Plumber(opf, outpath, self.log) plumber.setup_options() inp = plugin_for_input_format('azw3') outp = plugin_for_output_format('azw3') plumber.opts.mobi_passthrough = True oeb = create_oebbook(default_log, opf, plumber.opts) set_cover(oeb) outp.convert(oeb, outpath, inp, plumber.opts, default_log)
def convert_text(self, oeb_book): from calibre.ebooks.pdf.writer import PDFWriter from calibre.ebooks.metadata.opf2 import OPF self.log.debug('Serializing oeb input to disk for processing...') self.get_cover_data() with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine])
def fetch_scheduled_recipe(arg): # {{{ fmt = prefs['output_format'].lower() # Never use AZW3 for periodicals... if fmt == 'azw3': fmt = 'mobi' pt = PersistentTemporaryFile(suffix='_recipe_out.%s'%fmt.lower()) pt.close() recs = [] ps = load_defaults('page_setup') if 'output_profile' in ps: recs.append(('output_profile', ps['output_profile'], OptionRecommendation.HIGH)) for edge in ('left', 'top', 'bottom', 'right'): edge = 'margin_' + edge if edge in ps: recs.append((edge, ps[edge], OptionRecommendation.HIGH)) lf = load_defaults('look_and_feel') if lf.get('base_font_size', 0.0) != 0.0: recs.append(('base_font_size', lf['base_font_size'], OptionRecommendation.HIGH)) recs.append(('keep_ligatures', lf.get('keep_ligatures', False), OptionRecommendation.HIGH)) lr = load_defaults('lrf_output') if lr.get('header', False): recs.append(('header', True, OptionRecommendation.HIGH)) recs.append(('header_format', '%t', OptionRecommendation.HIGH)) epub = load_defaults('epub_output') if epub.get('epub_flatten', False): recs.append(('epub_flatten', True, OptionRecommendation.HIGH)) if fmt == 'pdf': pdf = load_defaults('pdf_output') from calibre.customize.ui import plugin_for_output_format p = plugin_for_output_format('pdf') for opt in p.options: recs.append((opt.option.name, pdf.get(opt.option.name, opt.recommended_value), OptionRecommendation.HIGH)) args = [arg['recipe'], pt.name, recs] if arg['username'] is not None: recs.append(('username', arg['username'], OptionRecommendation.HIGH)) if arg['password'] is not None: recs.append(('password', arg['password'], OptionRecommendation.HIGH)) return 'gui_convert', args, _('Fetch news from ')+arg['title'], fmt.upper(), [pt]
def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb lrf_opts = LRFOptions(output_path, opts, oeb) if input_plugin.is_image_collection: self.convert_images(input_plugin.get_images(), lrf_opts, getattr(opts, 'wide', False)) return self.flatten_toc() from calibre.ptempfile import TemporaryDirectory with TemporaryDirectory('_lrf_output') as tdir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb, tdir, input_plugin, opts, log) opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0] from calibre.ebooks.lrf.html.convert_from import process_file process_file(os.path.join(tdir, opf), lrf_opts, self.log)
def convert_text(self, oeb_book): from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.pdf.render.from_html import PDFWriter self.log.debug('Serializing oeb input to disk for processing...') self.get_cover_data() self.process_fonts() if self.opts.pdf_use_document_margins and self.stored_page_margins: import json for href, margins in iteritems(self.stored_page_margins): item = oeb_book.manifest.hrefs.get(href) if item is not None: root = item.data if hasattr(root, 'xpath') and margins: root.set('data-calibre-pdf-output-page-margins', json.dumps(margins)) # Remove javascript for item in self.oeb.spine: root = item.data if hasattr(root, 'xpath'): for script in root.xpath('//*[local-name()="script"]'): script.text = None script.attrib.clear() for elem in root.iter('*'): for attr in tuple(elem.attrib): if attr.startswith('on'): elem.set(attr, '') with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine], getattr(opf, 'toc', None))
def convert_text(self, oeb_book): from calibre.ebooks.pdf.writer import PDFWriter from calibre.ebooks.metadata.opf2 import OPF self.log.debug('Serializing oeb input to disk for processing...') self.get_cover_data() if iswindows: self.remove_font_specification() else: self.handle_embedded_fonts() with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine], getattr(opf, 'toc', None))
def convert_text(self, oeb_book): from calibre.ebooks.metadata.opf2 import OPF if self.opts.old_pdf_engine: from calibre.ebooks.pdf.writer import PDFWriter PDFWriter else: from calibre.ebooks.pdf.render.from_html import PDFWriter self.log.debug('Serializing oeb input to disk for processing...') self.get_cover_data() self.handle_embedded_fonts() with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine], getattr(opf, 'toc', None))
def do_book(self): if self.i >= len(self.book_ids): return self.do_queue() book_id = self.book_ids[self.i] self.i += 1 temp_files = [] try: input_format = get_input_format_for_book(self.db, book_id, None)[0] input_fmt = self.db.original_fmt(book_id, input_format).lower() same_fmt = input_fmt == self.output_format.lower() mi, opf_file = create_opf_file(self.db, book_id) in_file = PersistentTemporaryFile('.'+input_format) with in_file: self.db.copy_format_to(book_id, input_fmt, in_file, index_is_id=True) out_file = PersistentTemporaryFile('.' + self.output_format) out_file.write(self.output_format) out_file.close() temp_files = [in_file] combined_recs = GuiRecommendations() default_recs = bulk_defaults_for_input_format(input_format) for key in default_recs: combined_recs[key] = default_recs[key] if self.use_saved_single_settings: specific_recs = load_specifics(self.db, book_id) for key in specific_recs: combined_recs[key] = specific_recs[key] for item in self.user_recs: combined_recs[item[0]] = item[1] save_specifics(self.db, book_id, combined_recs) lrecs = list(combined_recs.to_recommendations()) from calibre.customize.ui import plugin_for_output_format op = plugin_for_output_format(self.output_format) if op and op.recommendations: prec = {x[0] for x in op.recommendations} for i, r in enumerate(list(lrecs)): if r[0] in prec: lrecs[i] = (r[0], r[1], OptionRecommendation.HIGH) cover_file = create_cover_file(self.db, book_id) if opf_file is not None: lrecs.append(('read_metadata_from_opf', opf_file.name, OptionRecommendation.HIGH)) temp_files.append(opf_file) if cover_file is not None: lrecs.append(('cover', cover_file.name, OptionRecommendation.HIGH)) temp_files.append(cover_file) for x in list(lrecs): if x[0] == 'debug_pipeline': lrecs.remove(x) try: dtitle = unicode(mi.title) except: dtitle = repr(mi.title) if len(dtitle) > 50: dtitle = dtitle[:50].rpartition(' ')[0]+'...' self.setLabelText(_('Queueing ')+dtitle) desc = _('Convert book %(num)d of %(tot)d (%(title)s)') % dict( num=self.i, tot=len(self.book_ids), title=dtitle) args = [in_file.name, out_file.name, lrecs] temp_files.append(out_file) func = 'gui_convert_override' if same_fmt: func += ':same_fmt' self.jobs.append((func, args, desc, self.output_format.upper(), book_id, temp_files)) self.changed = True self.setValue(self.i) except NoSupportedInputFormats: self.bad.append(book_id) QTimer.singleShot(0, self.do_book)
def __init__(self, input, output, log, report_progress=DummyReporter(), dummy=False, merge_plugin_recs=True, abort_after_input_dump=False, override_input_metadata=False): ''' :param input: Path to input file. :param output: Path to output file/directory ''' if isbytestring(input): input = input.decode(filesystem_encoding) if isbytestring(output): output = output.decode(filesystem_encoding) self.original_input_arg = input self.input = os.path.abspath(input) self.output = os.path.abspath(output) self.log = log self.ui_reporter = report_progress self.abort_after_input_dump = abort_after_input_dump self.override_input_metadata = override_input_metadata # Pipeline options {{{ # Initialize the conversion options that are independent of input and # output formats. The input and output plugins can still disable these # options via recommendations. self.pipeline_options = [ OptionRecommendation(name='verbose', recommended_value=0, level=OptionRecommendation.LOW, short_switch='v', help=_('Level of verbosity. Specify multiple times for greater ' 'verbosity.') ), OptionRecommendation(name='debug_pipeline', recommended_value=None, level=OptionRecommendation.LOW, short_switch='d', help=_('Save the output from different stages of the conversion ' 'pipeline to the specified ' 'directory. Useful if you are unsure at which stage ' 'of the conversion process a bug is occurring.') ), OptionRecommendation(name='input_profile', recommended_value='default', level=OptionRecommendation.LOW, choices=[x.short_name for x in input_profiles()], help=_('Specify the input profile. The input profile gives the ' 'conversion system information on how to interpret ' 'various information in the input document. For ' 'example resolution dependent lengths (i.e. lengths in ' 'pixels). Choices are:')+\ ', '.join([x.short_name for x in input_profiles()]) ), OptionRecommendation(name='output_profile', recommended_value='default', level=OptionRecommendation.LOW, choices=[x.short_name for x in output_profiles()], help=_('Specify the output profile. The output profile ' 'tells the conversion system how to optimize the ' 'created document for the specified device. In some cases, ' 'an output profile is required to produce documents that ' 'will work on a device. For example EPUB on the SONY reader. ' 'Choices are:') + \ ', '.join([x.short_name for x in output_profiles()]) ), OptionRecommendation(name='base_font_size', recommended_value=0, level=OptionRecommendation.LOW, help=_('The base font size in pts. All font sizes in the produced book ' 'will be rescaled based on this size. By choosing a larger ' 'size you can make the fonts in the output bigger and vice ' 'versa. By default, the base font size is chosen based on ' 'the output profile you chose.' ) ), OptionRecommendation(name='font_size_mapping', recommended_value=None, level=OptionRecommendation.LOW, help=_('Mapping from CSS font names to font sizes in pts. ' 'An example setting is 12,12,14,16,18,20,22,24. ' 'These are the mappings for the sizes xx-small to xx-large, ' 'with the final size being for huge fonts. The font ' 'rescaling algorithm uses these sizes to intelligently ' 'rescale fonts. The default is to use a mapping based on ' 'the output profile you chose.' ) ), OptionRecommendation(name='disable_font_rescaling', recommended_value=False, level=OptionRecommendation.LOW, help=_('Disable all rescaling of font sizes.' ) ), OptionRecommendation(name='minimum_line_height', recommended_value=120.0, level=OptionRecommendation.LOW, help=_( 'The minimum line height, as a percentage of the element\'s ' 'calculated font size. calibre will ensure that every element ' 'has a line height of at least this setting, irrespective of ' 'what the input document specifies. Set to zero to disable. ' 'Default is 120%. Use this setting in preference to ' 'the direct line height specification, unless you know what ' 'you are doing. For example, you can achieve "double spaced" ' 'text by setting this to 240.' ) ), OptionRecommendation(name='line_height', recommended_value=0, level=OptionRecommendation.LOW, help=_( 'The line height in pts. Controls spacing between consecutive ' 'lines of text. Only applies to elements that do not define ' 'their own line height. In most cases, the minimum line height ' 'option is more useful. ' 'By default no line height manipulation is performed.' ) ), OptionRecommendation(name='linearize_tables', recommended_value=False, level=OptionRecommendation.LOW, help=_('Some badly designed documents use tables to control the ' 'layout of text on the page. When converted these documents ' 'often have text that runs off the page and other artifacts. ' 'This option will extract the content from the tables and ' 'present it in a linear fashion.' ) ), OptionRecommendation(name='level1_toc', recommended_value=None, level=OptionRecommendation.LOW, help=_('XPath expression that specifies all tags that ' 'should be added to the Table of Contents at level one. If ' 'this is specified, it takes precedence over other forms ' 'of auto-detection.' ' See the XPath Tutorial in the calibre User Manual for examples.' ) ), OptionRecommendation(name='level2_toc', recommended_value=None, level=OptionRecommendation.LOW, help=_('XPath expression that specifies all tags that should be ' 'added to the Table of Contents at level two. Each entry is added ' 'under the previous level one entry.' ' See the XPath Tutorial in the calibre User Manual for examples.' ) ), OptionRecommendation(name='level3_toc', recommended_value=None, level=OptionRecommendation.LOW, help=_('XPath expression that specifies all tags that should be ' 'added to the Table of Contents at level three. Each entry ' 'is added under the previous level two entry.' ' See the XPath Tutorial in the calibre User Manual for examples.' ) ), OptionRecommendation(name='use_auto_toc', recommended_value=False, level=OptionRecommendation.LOW, help=_('Normally, if the source file already has a Table of ' 'Contents, it is used in preference to the auto-generated one. ' 'With this option, the auto-generated one is always used.' ) ), OptionRecommendation(name='no_chapters_in_toc', recommended_value=False, level=OptionRecommendation.LOW, help=_("Don't add auto-detected chapters to the Table of " 'Contents.' ) ), OptionRecommendation(name='toc_threshold', recommended_value=6, level=OptionRecommendation.LOW, help=_( 'If fewer than this number of chapters is detected, then links ' 'are added to the Table of Contents. Default: %default') ), OptionRecommendation(name='max_toc_links', recommended_value=50, level=OptionRecommendation.LOW, help=_('Maximum number of links to insert into the TOC. Set to 0 ' 'to disable. Default is: %default. Links are only added to the ' 'TOC if less than the threshold number of chapters were detected.' ) ), OptionRecommendation(name='toc_filter', recommended_value=None, level=OptionRecommendation.LOW, help=_('Remove entries from the Table of Contents whose titles ' 'match the specified regular expression. Matching entries and all ' 'their children are removed.' ) ), OptionRecommendation(name='duplicate_links_in_toc', recommended_value=False, level=OptionRecommendation.LOW, help=_('When creating a TOC from links in the input document, ' 'allow duplicate entries, i.e. allow more than one entry ' 'with the same text, provided that they point to a ' 'different location.') ), OptionRecommendation(name='chapter', recommended_value="//*[((name()='h1' or name()='h2') and " r"re:test(., '\s*((chapter|book|section|part)\s+)|((prolog|prologue|epilogue)(\s+|$))', 'i')) or @class " "= 'chapter']", level=OptionRecommendation.LOW, help=_('An XPath expression to detect chapter titles. The default ' 'is to consider <h1> or <h2> tags that contain the words ' '"chapter","book","section", "prologue", "epilogue", or "part" as chapter titles as ' 'well as any tags that have class="chapter". The expression ' 'used must evaluate to a list of elements. To disable chapter ' 'detection, use the expression "/". See the XPath Tutorial ' 'in the calibre User Manual for further help on using this ' 'feature.' ) ), OptionRecommendation(name='chapter_mark', recommended_value='pagebreak', level=OptionRecommendation.LOW, choices=['pagebreak', 'rule', 'both', 'none'], help=_('Specify how to mark detected chapters. A value of ' '"pagebreak" will insert page breaks before chapters. ' 'A value of "rule" will insert a line before chapters. ' 'A value of "none" will disable chapter marking and a ' 'value of "both" will use both page breaks and lines ' 'to mark chapters.') ), OptionRecommendation(name='extra_css', recommended_value=None, level=OptionRecommendation.LOW, help=_('Either the path to a CSS stylesheet or raw CSS. ' 'This CSS will be appended to the style rules from ' 'the source file, so it can be used to override those ' 'rules.') ), OptionRecommendation(name='filter_css', recommended_value=None, level=OptionRecommendation.LOW, help=_('A comma separated list of CSS properties that ' 'will be removed from all CSS style rules. This is useful ' 'if the presence of some style information prevents it ' 'from being overridden on your device. ' 'For example: ' 'font-family,color,margin-left,margin-right') ), OptionRecommendation(name='page_breaks_before', recommended_value="//*[name()='h1' or name()='h2']", level=OptionRecommendation.LOW, help=_('An XPath expression. Page breaks are inserted ' 'before the specified elements.') ), OptionRecommendation(name='remove_fake_margins', recommended_value=True, level=OptionRecommendation.LOW, help=_('Some documents specify page margins by ' 'specifying a left and right margin on each individual ' 'paragraph. calibre will try to detect and remove these ' 'margins. Sometimes, this can cause the removal of ' 'margins that should not have been removed. In this ' 'case you can disable the removal.') ), OptionRecommendation(name='margin_top', recommended_value=5.0, level=OptionRecommendation.LOW, help=_('Set the top margin in pts. Default is %default. ' 'Setting this to less than zero will cause no margin to be set. ' 'Note: 72 pts equals 1 inch')), OptionRecommendation(name='margin_bottom', recommended_value=5.0, level=OptionRecommendation.LOW, help=_('Set the bottom margin in pts. Default is %default. ' 'Setting this to less than zero will cause no margin to be set. ' 'Note: 72 pts equals 1 inch')), OptionRecommendation(name='margin_left', recommended_value=5.0, level=OptionRecommendation.LOW, help=_('Set the left margin in pts. Default is %default. ' 'Setting this to less than zero will cause no margin to be set. ' 'Note: 72 pts equals 1 inch')), OptionRecommendation(name='margin_right', recommended_value=5.0, level=OptionRecommendation.LOW, help=_('Set the right margin in pts. Default is %default. ' 'Setting this to less than zero will cause no margin to be set. ' 'Note: 72 pts equals 1 inch')), OptionRecommendation(name='change_justification', recommended_value='original', level=OptionRecommendation.LOW, choices=['left','justify','original'], help=_('Change text justification. A value of "left" converts all' ' justified text in the source to left aligned (i.e. ' 'unjustified) text. A value of "justify" converts all ' 'unjustified text to justified. A value of "original" ' '(the default) does not change justification in the ' 'source file. Note that only some output formats support ' 'justification.')), OptionRecommendation(name='remove_paragraph_spacing', recommended_value=False, level=OptionRecommendation.LOW, help=_('Remove spacing between paragraphs. Also sets an indent on ' 'paragraphs of 1.5em. Spacing removal will not work ' 'if the source file does not use paragraphs (<p> or <div> tags).') ), OptionRecommendation(name='remove_paragraph_spacing_indent_size', recommended_value=1.5, level=OptionRecommendation.LOW, help=_('When calibre removes blank lines between paragraphs, it automatically ' 'sets a paragraph indent, to ensure that paragraphs can be easily ' 'distinguished. This option controls the width of that indent (in em). ' 'If you set this value negative, then the indent specified in the input ' 'document is used, that is, calibre does not change the indentation.') ), OptionRecommendation(name='prefer_metadata_cover', recommended_value=False, level=OptionRecommendation.LOW, help=_('Use the cover detected from the source file in preference ' 'to the specified cover.') ), OptionRecommendation(name='insert_blank_line', recommended_value=False, level=OptionRecommendation.LOW, help=_('Insert a blank line between paragraphs. Will not work ' 'if the source file does not use paragraphs (<p> or <div> tags).' ) ), OptionRecommendation(name='insert_blank_line_size', recommended_value=0.5, level=OptionRecommendation.LOW, help=_('Set the height of the inserted blank lines (in em).' ' The height of the lines between paragraphs will be twice the value' ' set here.') ), OptionRecommendation(name='remove_first_image', recommended_value=False, level=OptionRecommendation.LOW, help=_('Remove the first image from the input ebook. Useful if the ' 'input document has a cover image that is not identified as a cover. ' 'In this case, if you set a cover in calibre, the output document will ' 'end up with two cover images if you do not specify this option.' ) ), OptionRecommendation(name='insert_metadata', recommended_value=False, level=OptionRecommendation.LOW, help=_('Insert the book metadata at the start of ' 'the book. This is useful if your ebook reader does not support ' 'displaying/searching metadata directly.' ) ), OptionRecommendation(name='smarten_punctuation', recommended_value=False, level=OptionRecommendation.LOW, help=_('Convert plain quotes, dashes and ellipsis to their ' 'typographically correct equivalents. For details, see ' 'http://daringfireball.net/projects/smartypants' ) ), OptionRecommendation(name='unsmarten_punctuation', recommended_value=False, level=OptionRecommendation.LOW, help=_('Convert fancy quotes, dashes and ellipsis to their ' 'plain equivalents.' ) ), OptionRecommendation(name='read_metadata_from_opf', recommended_value=None, level=OptionRecommendation.LOW, short_switch='m', help=_('Read metadata from the specified OPF file. Metadata read ' 'from this file will override any metadata in the source ' 'file.') ), OptionRecommendation(name='asciiize', recommended_value=False, level=OptionRecommendation.LOW, help=(_('Transliterate unicode characters to an ASCII ' 'representation. Use with care because this will replace ' 'unicode characters with ASCII. For instance it will replace "%s" ' 'with "Mikhail Gorbachiov". Also, note that in ' 'cases where there are multiple representations of a character ' '(characters shared by Chinese and Japanese for instance) the ' 'representation based on the current calibre interface language will be ' 'used.')%\ u'\u041c\u0438\u0445\u0430\u0438\u043b ' u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432' ) ), OptionRecommendation(name='keep_ligatures', recommended_value=False, level=OptionRecommendation.LOW, help=_('Preserve ligatures present in the input document. ' 'A ligature is a special rendering of a pair of ' 'characters like ff, fi, fl et cetera. ' 'Most readers do not have support for ' 'ligatures in their default fonts, so they are ' 'unlikely to render correctly. By default, calibre ' 'will turn a ligature into the corresponding pair of normal ' 'characters. This option will preserve them instead.') ), OptionRecommendation(name='title', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the title.')), OptionRecommendation(name='authors', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the authors. Multiple authors should be separated by ' 'ampersands.')), OptionRecommendation(name='title_sort', recommended_value=None, level=OptionRecommendation.LOW, help=_('The version of the title to be used for sorting. ')), OptionRecommendation(name='author_sort', recommended_value=None, level=OptionRecommendation.LOW, help=_('String to be used when sorting by author. ')), OptionRecommendation(name='cover', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the cover to the specified file or URL')), OptionRecommendation(name='comments', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the ebook description.')), OptionRecommendation(name='publisher', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the ebook publisher.')), OptionRecommendation(name='series', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the series this ebook belongs to.')), OptionRecommendation(name='series_index', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the index of the book in this series.')), OptionRecommendation(name='rating', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the rating. Should be a number between 1 and 5.')), OptionRecommendation(name='isbn', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the ISBN of the book.')), OptionRecommendation(name='tags', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the tags for the book. Should be a comma separated list.')), OptionRecommendation(name='book_producer', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the book producer.')), OptionRecommendation(name='language', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the language.')), OptionRecommendation(name='pubdate', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the publication date.')), OptionRecommendation(name='timestamp', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the book timestamp (no longer used anywhere)')), OptionRecommendation(name='enable_heuristics', recommended_value=False, level=OptionRecommendation.LOW, help=_('Enable heuristic processing. This option must be set for any ' 'heuristic processing to take place.')), OptionRecommendation(name='markup_chapter_headings', recommended_value=True, level=OptionRecommendation.LOW, help=_('Detect unformatted chapter headings and sub headings. Change ' 'them to h2 and h3 tags. This setting will not create a TOC, ' 'but can be used in conjunction with structure detection to create ' 'one.')), OptionRecommendation(name='italicize_common_cases', recommended_value=True, level=OptionRecommendation.LOW, help=_('Look for common words and patterns that denote ' 'italics and italicize them.')), OptionRecommendation(name='fix_indents', recommended_value=True, level=OptionRecommendation.LOW, help=_('Turn indentation created from multiple non-breaking space entities ' 'into CSS indents.')), OptionRecommendation(name='html_unwrap_factor', recommended_value=0.40, level=OptionRecommendation.LOW, help=_('Scale used to determine the length at which a line should ' 'be unwrapped. Valid values are a decimal between 0 and 1. The ' 'default is 0.4, just below the median line length. If only a ' 'few lines in the document require unwrapping this value should ' 'be reduced')), OptionRecommendation(name='unwrap_lines', recommended_value=True, level=OptionRecommendation.LOW, help=_('Unwrap lines using punctuation and other formatting clues.')), OptionRecommendation(name='delete_blank_paragraphs', recommended_value=True, level=OptionRecommendation.LOW, help=_('Remove empty paragraphs from the document when they exist between ' 'every other paragraph')), OptionRecommendation(name='format_scene_breaks', recommended_value=True, level=OptionRecommendation.LOW, help=_('Left aligned scene break markers are center aligned. ' 'Replace soft scene breaks that use multiple blank lines with ' 'horizontal rules.')), OptionRecommendation(name='replace_scene_breaks', recommended_value='', level=OptionRecommendation.LOW, help=_('Replace scene breaks with the specified text. By default, the ' 'text from the input document is used.')), OptionRecommendation(name='dehyphenate', recommended_value=True, level=OptionRecommendation.LOW, help=_('Analyze hyphenated words throughout the document. The ' 'document itself is used as a dictionary to determine whether hyphens ' 'should be retained or removed.')), OptionRecommendation(name='renumber_headings', recommended_value=True, level=OptionRecommendation.LOW, help=_('Looks for occurrences of sequential <h1> or <h2> tags. ' 'The tags are renumbered to prevent splitting in the middle ' 'of chapter headings.')), OptionRecommendation(name='sr1_search', recommended_value='', level=OptionRecommendation.LOW, help=_('Search pattern (regular expression) to be replaced with ' 'sr1-replace.')), OptionRecommendation(name='sr1_replace', recommended_value='', level=OptionRecommendation.LOW, help=_('Replacement to replace the text found with sr1-search.')), OptionRecommendation(name='sr2_search', recommended_value='', level=OptionRecommendation.LOW, help=_('Search pattern (regular expression) to be replaced with ' 'sr2-replace.')), OptionRecommendation(name='sr2_replace', recommended_value='', level=OptionRecommendation.LOW, help=_('Replacement to replace the text found with sr2-search.')), OptionRecommendation(name='sr3_search', recommended_value='', level=OptionRecommendation.LOW, help=_('Search pattern (regular expression) to be replaced with ' 'sr3-replace.')), OptionRecommendation(name='sr3_replace', recommended_value='', level=OptionRecommendation.LOW, help=_('Replacement to replace the text found with sr3-search.')), OptionRecommendation(name='search_replace', recommended_value=None, level=OptionRecommendation.LOW, help=_( 'Path to a file containing search and replace regular expressions. ' 'The file must contain alternating lines of regular expression ' 'followed by replacement pattern (which can be an empty line). ' 'The regular expression must be in the python regex syntax and ' 'the file must be UTF-8 encoded.')), ] # }}} input_fmt = os.path.splitext(self.input)[1] if not input_fmt: raise ValueError('Input file must have an extension') input_fmt = input_fmt[1:].lower().replace('original_', '') self.archive_input_tdir = None if input_fmt in ARCHIVE_FMTS: self.log('Processing archive...') tdir = PersistentTemporaryDirectory('_plumber_archive') self.input, input_fmt = self.unarchive(self.input, tdir) self.archive_input_tdir = tdir if os.access(self.input, os.R_OK): nfp = run_plugins_on_preprocess(self.input, input_fmt) if nfp != self.input: self.input = nfp input_fmt = os.path.splitext(self.input)[1] if not input_fmt: raise ValueError('Input file must have an extension') input_fmt = input_fmt[1:].lower() if os.path.exists(self.output) and os.path.isdir(self.output): output_fmt = 'oeb' else: output_fmt = os.path.splitext(self.output)[1] if not output_fmt: output_fmt = '.oeb' output_fmt = output_fmt[1:].lower() self.input_plugin = plugin_for_input_format(input_fmt) self.output_plugin = plugin_for_output_format(output_fmt) if self.input_plugin is None: raise ValueError('No plugin to handle input format: '+input_fmt) if self.output_plugin is None: raise ValueError('No plugin to handle output format: '+output_fmt) self.input_fmt = input_fmt self.output_fmt = output_fmt self.all_format_options = set() self.input_options = set() self.output_options = set() # Build set of all possible options. Two options are equal if their # names are the same. if not dummy: self.input_options = self.input_plugin.options.union( self.input_plugin.common_options) self.output_options = self.output_plugin.options.union( self.output_plugin.common_options) else: for fmt in available_input_formats(): input_plugin = plugin_for_input_format(fmt) if input_plugin: self.all_format_options = self.all_format_options.union( input_plugin.options.union(input_plugin.common_options)) for fmt in available_output_formats(): output_plugin = plugin_for_output_format(fmt) if output_plugin: self.all_format_options = self.all_format_options.union( output_plugin.options.union(output_plugin.common_options)) # Remove the options that have been disabled by recommendations from the # plugins. for w in ('input_options', 'output_options', 'all_format_options'): temp = set([]) for x in getattr(self, w): temp.add(x.clone()) setattr(self, w, temp) if merge_plugin_recs: self.merge_plugin_recommendations()
def do_book(self): if self.i >= len(self.book_ids): return self.do_queue() book_id = self.book_ids[self.i] self.i += 1 temp_files = [] try: input_format = get_input_format_for_book(self.db, book_id, None)[0] input_fmt = self.db.original_fmt(book_id, input_format).lower() same_fmt = input_fmt == self.output_format.lower() mi, opf_file = create_opf_file(self.db, book_id) in_file = PersistentTemporaryFile('.' + input_format) with in_file: self.db.copy_format_to(book_id, input_fmt, in_file, index_is_id=True) out_file = PersistentTemporaryFile('.' + self.output_format) out_file.write(self.output_format) out_file.close() temp_files = [in_file] combined_recs = GuiRecommendations() default_recs = bulk_defaults_for_input_format(input_format) for key in default_recs: combined_recs[key] = default_recs[key] if self.use_saved_single_settings: specific_recs = load_specifics(self.db, book_id) for key in specific_recs: combined_recs[key] = specific_recs[key] for item in self.user_recs: combined_recs[item[0]] = item[1] save_specifics(self.db, book_id, combined_recs) lrecs = list(combined_recs.to_recommendations()) from calibre.customize.ui import plugin_for_output_format op = plugin_for_output_format(self.output_format) if op and op.recommendations: prec = {x[0] for x in op.recommendations} for i, r in enumerate(list(lrecs)): if r[0] in prec: lrecs[i] = (r[0], r[1], OptionRecommendation.HIGH) cover_file = create_cover_file(self.db, book_id) if opf_file is not None: lrecs.append(('read_metadata_from_opf', opf_file.name, OptionRecommendation.HIGH)) temp_files.append(opf_file) if cover_file is not None: lrecs.append( ('cover', cover_file.name, OptionRecommendation.HIGH)) temp_files.append(cover_file) for x in list(lrecs): if x[0] == 'debug_pipeline': lrecs.remove(x) try: dtitle = unicode(mi.title) except: dtitle = repr(mi.title) if len(dtitle) > 50: dtitle = dtitle[:50].rpartition(' ')[0] + '...' self.setLabelText(_('Queueing ') + dtitle) desc = _('Convert book %(num)d of %(tot)d (%(title)s)') % dict( num=self.i, tot=len(self.book_ids), title=dtitle) args = [in_file.name, out_file.name, lrecs] temp_files.append(out_file) func = 'gui_convert_override' if same_fmt: func += ':same_fmt' self.jobs.append((func, args, desc, self.output_format.upper(), book_id, temp_files)) self.changed = True self.setValue(self.i) except NoSupportedInputFormats: self.bad.append(book_id) QTimer.singleShot(0, self.do_book)
def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb if self.opts.epub_inline_toc: from calibre.ebooks.mobi.writer8.toc import TOCAdder opts.mobi_toc_at_start = not opts.epub_toc_at_end opts.mobi_passthrough = False opts.no_inline_toc = False TOCAdder(oeb, opts, replace_previous_inline_toc=True, ignore_existing_toc=True) if self.opts.epub_flatten: from calibre.ebooks.oeb.transforms.filenames import FlatFilenames FlatFilenames()(oeb, opts) else: from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames UniqueFilenames()(oeb, opts) self.workaround_ade_quirks() self.workaround_webkit_quirks() self.upshift_markup() from calibre.ebooks.oeb.transforms.rescale import RescaleImages RescaleImages(check_colorspaces=True)(oeb, opts) from calibre.ebooks.oeb.transforms.split import Split split = Split(not self.opts.dont_split_on_page_breaks, max_flow_size=self.opts.flow_size * 1024) split(self.oeb, self.opts) from calibre.ebooks.oeb.transforms.cover import CoverManager cm = CoverManager( no_default_cover=self.opts.no_default_epub_cover, no_svg_cover=self.opts.no_svg_cover, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) cm(self.oeb, self.opts, self.log) self.workaround_sony_quirks() if self.oeb.toc.count() == 0: self.log.warn('This EPUB file has no Table of Contents. ' 'Creating a default TOC') first = next(iter(self.oeb.spine)) self.oeb.toc.add(_('Start'), first.href) from calibre.ebooks.oeb.base import OPF identifiers = oeb.metadata['identifier'] uuid = None for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'uuid' or str(x).startswith('urn:uuid:'): uuid = str(x).split(':')[-1] break encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', []) if uuid is None: self.log.warn('No UUID identifier found') from uuid import uuid4 uuid = str(uuid4()) oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid) if encrypted_fonts and not uuid.startswith('urn:uuid:'): # Apparently ADE requires this value to start with urn:uuid: # for some absurd reason, or it will throw a hissy fit and refuse # to use the obfuscated fonts. for x in identifiers: if str(x) == uuid: x.content = 'urn:uuid:' + uuid with TemporaryDirectory('_epub_output') as tdir: from calibre.customize.ui import plugin_for_output_format metadata_xml = None extra_entries = [] if self.is_periodical: if self.opts.output_profile.epub_periodical_format == 'sony': from calibre.ebooks.epub.periodical import sony_metadata metadata_xml, atom_xml = sony_metadata(oeb) extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)] oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb, tdir, input_plugin, opts, log) opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0] self.condense_ncx([ os.path.join(tdir, x) for x in os.listdir(tdir) if x.endswith('.ncx') ][0]) encryption = None if encrypted_fonts: encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid) from calibre.ebooks.epub import initialize_container with initialize_container(output_path, os.path.basename(opf), extra_entries=extra_entries) as epub: epub.add_dir(tdir) if encryption is not None: epub.writestr('META-INF/encryption.xml', encryption) if metadata_xml is not None: epub.writestr('META-INF/metadata.xml', metadata_xml.encode('utf-8')) if opts.extract_to is not None: from calibre.utils.zipfile import ZipFile if os.path.exists(opts.extract_to): if os.path.isdir(opts.extract_to): shutil.rmtree(opts.extract_to) else: os.remove(opts.extract_to) os.mkdir(opts.extract_to) with ZipFile(output_path) as zf: zf.extractall(path=opts.extract_to) self.log.info('EPUB extracted to', opts.extract_to)
def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb if self.opts.epub_inline_toc: from calibre.ebooks.mobi.writer8.toc import TOCAdder opts.mobi_toc_at_start = not opts.epub_toc_at_end opts.mobi_passthrough = False opts.no_inline_toc = False TOCAdder(oeb, opts, replace_previous_inline_toc=True, ignore_existing_toc=True) if self.opts.epub_flatten: from calibre.ebooks.oeb.transforms.filenames import FlatFilenames FlatFilenames()(oeb, opts) else: from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames UniqueFilenames()(oeb, opts) self.workaround_ade_quirks() self.workaround_webkit_quirks() self.upshift_markup() from calibre.ebooks.oeb.transforms.rescale import RescaleImages RescaleImages(check_colorspaces=True)(oeb, opts) from calibre.ebooks.oeb.transforms.split import Split split = Split(not self.opts.dont_split_on_page_breaks, max_flow_size=self.opts.flow_size*1024 ) split(self.oeb, self.opts) from calibre.ebooks.oeb.transforms.cover import CoverManager cm = CoverManager( no_default_cover=self.opts.no_default_epub_cover, no_svg_cover=self.opts.no_svg_cover, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) cm(self.oeb, self.opts, self.log) self.workaround_sony_quirks() if self.oeb.toc.count() == 0: self.log.warn('This EPUB file has no Table of Contents. ' 'Creating a default TOC') first = iter(self.oeb.spine).next() self.oeb.toc.add(_('Start'), first.href) from calibre.ebooks.oeb.base import OPF identifiers = oeb.metadata['identifier'] uuid = None for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'): uuid = unicode(x).split(':')[-1] break encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', []) if uuid is None: self.log.warn('No UUID identifier found') from uuid import uuid4 uuid = str(uuid4()) oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid) if encrypted_fonts and not uuid.startswith('urn:uuid:'): # Apparently ADE requires this value to start with urn:uuid: # for some absurd reason, or it will throw a hissy fit and refuse # to use the obfuscated fonts. for x in identifiers: if unicode(x) == uuid: x.content = 'urn:uuid:'+uuid with TemporaryDirectory(u'_epub_output') as tdir: from calibre.customize.ui import plugin_for_output_format metadata_xml = None extra_entries = [] if self.is_periodical: if self.opts.output_profile.epub_periodical_format == 'sony': from calibre.ebooks.epub.periodical import sony_metadata metadata_xml, atom_xml = sony_metadata(oeb) extra_entries = [(u'atom.xml', 'application/atom+xml', atom_xml)] oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb, tdir, input_plugin, opts, log) opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0] self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir) if x.endswith('.ncx')][0]) if self.opts.epub_version == '3': self.upgrade_to_epub3(tdir, opf) encryption = None if encrypted_fonts: encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid) from calibre.ebooks.epub import initialize_container with initialize_container(output_path, os.path.basename(opf), extra_entries=extra_entries) as epub: epub.add_dir(tdir) if encryption is not None: epub.writestr('META-INF/encryption.xml', encryption) if metadata_xml is not None: epub.writestr('META-INF/metadata.xml', metadata_xml.encode('utf-8')) if opts.extract_to is not None: from calibre.utils.zipfile import ZipFile if os.path.exists(opts.extract_to): if os.path.isdir(opts.extract_to): shutil.rmtree(opts.extract_to) else: os.remove(opts.extract_to) os.mkdir(opts.extract_to) with ZipFile(output_path) as zf: zf.extractall(path=opts.extract_to) self.log.info('EPUB extracted to', opts.extract_to)