def create_epub(self, *args, **kw): n = next(counter) ep = os.path.join(self.tdir, str(n) + 'book.epub') with open(ep, 'wb') as f: f.write(create_epub(*args, **kw).getvalue()) c = get_container(ep, tdir=os.path.join(self.tdir, 'container%d' % n), tweak_mode=True) return c
def develop(): container = get_container('/t/demo.epub', tweak_mode=True) fname = 'index_split_002.html' data = {'class': 'xxx', 'sourceline_address': (13, ['body'])} print( find_first_matching_rule(container, fname, container.open(fname).read(), data))
def test(cls): import sys from calibre.ebooks.oeb.polish.container import get_container c = get_container(sys.argv[-1], tweak_mode=True) d = cls(c) if d.exec_() == d.Accepted: pass
def test(cls): import sys from calibre.ebooks.oeb.polish.container import get_container c = get_container(sys.argv[-1], tweak_mode=True) d = cls(c, next(c.spine_names)[0]) if d.exec_() == d.Accepted: print (d.href, d.text)
def plugin_button(self): self.t = time.time() if len(self.gui.library_view.get_selected_ids()) != 1: d = error_dialog(self.gui, _('Select One Book'), _('Please select exactly one book to split.'), show_copy_button=False) d.exec_() else: with busy_cursor(): self.previous = self.gui.library_view.currentIndex() db=self.gui.current_db self.book_count = 1 # for series Source columns #logger.debug("1:%s"%(time.time()-self.t)) self.t = time.time() source_id = self.gui.library_view.get_selected_ids()[0] misource = db.get_metadata(source_id, index_is_id=True) if db.has_format(source_id,'EPUB',index_is_id=True): splitepub = SplitEpub(BytesIO(db.format(source_id,'EPUB',index_is_id=True))) from calibre.ebooks.oeb.polish.container import get_container container = get_container(db.format_abspath(source_id,'EPUB',index_is_id=True)) if container.opf_version_parsed.major >= 3: d = error_dialog(self.gui, _('EPUB3 Detected'), _('This plugin only works on EPUB2 format ebooks.')) d.exec_() return else: d = error_dialog(self.gui, _('No EPUB'), _('This plugin only works on EPUB format ebooks.')) d.exec_() return lines = splitepub.get_split_lines() # for line in lines: # logger.debug("line(%d):%s"%(line['num'],line)) # logger.debug() d = SelectLinesDialog(self.gui, _('Select Sections to Split Off'), prefs, self.qaction.icon(), lines, partial(self._do_split, db, source_id, misource, splitepub, lines), partial(self._do_splits, db, source_id, misource, splitepub, lines), partial(self._get_split_size, splitepub), partial(self.interface_action_base_plugin.do_user_config,parent=self.gui) ) d.exec_() return if d.result() != d.Accepted: return
def test(cls): from calibre.ebooks.oeb.polish.container import get_container from calibre.gui2.tweak_book import set_current_container set_current_container(get_container(sys.argv[-1], tweak_mode=True)) set_book_locale(current_container().mi.language) d = cls() QTimer.singleShot(0, d.refresh) d.exec_()
def cli_main(self, argv): fileName = argv[1] container = get_container(fileName, tweak_mode=True) reshape_book(container, True) print('done.')
def test(cls): import sys from calibre.ebooks.oeb.polish.container import get_container c = get_container(sys.argv[-1], tweak_mode=True) d = cls(c) if d.exec_() == d.Accepted: import pprint pprint.pprint(d.changed_type_map) d.apply_changes(d.container)
def polish(file_map, opts, log, report): st = time.time() for inbook, outbook in file_map.iteritems(): report(_('## Polishing: %s')%(inbook.rpartition('.')[-1].upper())) ebook = get_container(inbook, log) polish_one(ebook, opts, report) ebook.commit(outbook) report('-'*70) report(_('Polishing took: %.1f seconds')%(time.time()-st))
def polish(file_map, opts, log, report): rt = lambda x: report('\n### ' + x) st = time.time() for inbook, outbook in file_map.iteritems(): report(_('## Polishing: %s') % (inbook.rpartition('.')[-1].upper())) ebook = get_container(inbook, log) jacket = None if opts.subset: stats = StatsCollector(ebook) if opts.opf: rt(_('Updating metadata')) update_metadata(ebook, opts.opf) jacket = find_existing_jacket(ebook) if jacket is not None: replace_jacket(ebook, jacket) report(_('Updated metadata jacket')) report(_('Metadata updated\n')) if opts.cover: rt(_('Setting cover')) set_cover(ebook, opts.cover, report) report('') if opts.jacket: rt(_('Inserting metadata jacket')) if jacket is None: if add_or_replace_jacket(ebook): report(_('Existing metadata jacket replaced')) else: report(_('Metadata jacket inserted')) else: report(_('Existing metadata jacket replaced')) report('') if opts.remove_jacket: rt(_('Removing metadata jacket')) if remove_jacket(ebook): report(_('Metadata jacket removed')) else: report(_('No metadata jacket found')) report('') if opts.smarten_punctuation: rt(_('Smartening punctuation')) smarten_punctuation(ebook, report) report('') if opts.subset: rt(_('Subsetting embedded fonts')) subset_all_fonts(ebook, stats.font_stats, report) report('') ebook.commit(outbook) report('-' * 70) report(_('Polishing took: %.1f seconds') % (time.time() - st))
def polish(file_map, opts, log, report): rt = lambda x: report('\n### ' + x) st = time.time() for inbook, outbook in file_map.iteritems(): report(_('## Polishing: %s')%(inbook.rpartition('.')[-1].upper())) ebook = get_container(inbook, log) jacket = None if opts.subset: stats = StatsCollector(ebook) if opts.opf: rt(_('Updating metadata')) update_metadata(ebook, opts.opf) jacket = find_existing_jacket(ebook) if jacket is not None: replace_jacket(ebook, jacket) report(_('Updated metadata jacket')) report(_('Metadata updated\n')) if opts.cover: rt(_('Setting cover')) set_cover(ebook, opts.cover, report) report('') if opts.jacket: rt(_('Inserting metadata jacket')) if jacket is None: if add_or_replace_jacket(ebook): report(_('Existing metadata jacket replaced')) else: report(_('Metadata jacket inserted')) else: report(_('Existing metadata jacket replaced')) report('') if opts.remove_jacket: rt(_('Removing metadata jacket')) if remove_jacket(ebook): report(_('Metadata jacket removed')) else: report(_('No metadata jacket found')) report('') if opts.smarten_punctuation: rt(_('Smartening punctuation')) smarten_punctuation(ebook, report) report('') if opts.subset: rt(_('Subsetting embedded fonts')) subset_all_fonts(ebook, stats.font_stats, report) report('') ebook.commit(outbook) report('-'*70) report(_('Polishing took: %.1f seconds')%(time.time()-st))
def build_epub(self, outdir=None, outname=None): if outdir: EpubBuilder.build_epub(self, outdir, outname) else: EpubBuilder.build_epub(self) outdir = self.outdir outname = self.config.epub_basename + '.epub' container = get_container(os.path.join(outdir, outname)) self.fix_epub(container) container.commit()
def explode(self): tb = None try: self.ebook = get_container(self.pathtobook, log=self.log) except: import traceback tb = traceback.format_exc() if self.working: self.working = False self.explode_done.emit(tb)
def test_file_removal(self): ' Test removal of files from the container ' book = get_simple_book() c = get_container(book, tdir=self.tdir) files = ('toc.ncx', 'cover.png', 'titlepage.xhtml') self.assertIn('titlepage.xhtml', {x[0] for x in c.spine_names}) self.assertTrue(c.opf_xpath('//opf:meta[@name="cover"]')) for x in files: c.remove_item(x) self.assertIn(c.opf_name, c.dirtied) self.assertNotIn('titlepage.xhtml', {x[0] for x in c.spine_names}) self.assertFalse(c.opf_xpath('//opf:meta[@name="cover"]')) raw = c.serialize_item(c.opf_name).decode('utf-8') for x in files: self.assertNotIn(x, raw)
def test_toc_detection(self): ep = os.path.join(self.tdir, 'book.epub') create_book(Metadata('Test ToC'), ep) c = get_container(ep, tdir=os.path.join(self.tdir, 'container'), tweak_mode=True) self.assertEqual(2, c.opf_version_parsed.major) self.assertTrue(len(get_toc(c))) c.opf.set('version', '3.0') self.assertEqual(3, c.opf_version_parsed.major) self.assertTrue(len(get_toc(c))) # detect NCX toc even in epub 3 files c.add_file('nav.html', b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">' '<body><nav epub:type="toc"><ol><li><a href="start.xhtml">EPUB 3 nav</a></li></ol></nav></body></html>', process_manifest_item=lambda item:item.set('properties', 'nav')) toc = get_toc(c) self.assertTrue(len(toc)) self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav')
def test_toc_detection(self): ep = os.path.join(self.tdir, 'book.epub') create_book(Metadata('Test ToC'), ep) c = get_container(ep, tdir=os.path.join(self.tdir, 'container'), tweak_mode=True) self.assertEqual(2, c.opf_version_parsed.major) self.assertTrue(len(get_toc(c))) c.opf.set('version', '3.0') self.assertEqual(3, c.opf_version_parsed.major) self.assertTrue(len(get_toc(c))) # detect NCX toc even in epub 3 files c.add_file( 'nav.html', b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">' b'<body><nav epub:type="toc"><ol><li><a href="start.xhtml">EPUB 3 nav</a></li></ol></nav></body></html>', process_manifest_item=lambda item: item.set('properties', 'nav')) toc = get_toc(c) self.assertTrue(len(toc)) self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav') def tfx(linear, expected): items = ['<t{0}>{0}</t{0}>'.format(x) for x in linear] html = '<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">' html += '<body>%s</body></html>' % '\n'.join(items) with c.open('nav.html', 'wb') as f: f.write(html.encode('utf-8')) toc = toc_from_xpaths(c, ['//h:t' + x for x in sorted(set(linear))]) def p(node): ans = '' if node.children: ans += '[' for c in node.children: ans += c.title + p(c) ans += ']' return ans self.assertEqual('[%s]' % expected, p(toc)) tfx('121333', '1[2]1[333]') tfx('1223424', '1[22[3[4]]2[4]]') tfx('32123', '321[2[3]]') tfx('123123', '1[2[3]]1[2[3]]')
def test_toc_detection(self): ep = os.path.join(self.tdir, 'book.epub') create_book(Metadata('Test ToC'), ep) c = get_container(ep, tdir=os.path.join(self.tdir, 'container'), tweak_mode=True) self.assertEqual(2, c.opf_version_parsed.major) self.assertTrue(len(get_toc(c))) c.opf.set('version', '3.0') self.assertEqual(3, c.opf_version_parsed.major) self.assertTrue(len(get_toc(c))) # detect NCX toc even in epub 3 files c.add_file( 'nav.html', b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">' '<body><nav epub:type="toc"><ol><li><a href="start.xhtml">EPUB 3 nav</a></li></ol></nav></body></html>', process_manifest_item=lambda item: item.set('properties', 'nav')) toc = get_toc(c) self.assertTrue(len(toc)) self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav')
def test_clone(self): ' Test cloning of containers ' for fmt in ('epub', 'azw3'): base = os.path.join(self.tdir, fmt + '-') book = get_simple_book(fmt) tdir = base + 'first' os.mkdir(tdir) c1 = get_container(book, tdir=tdir) tdir = base + 'second' os.mkdir(tdir) c2 = clone_container(c1, tdir) for c in (c1, c2): for name, path in c.name_path_map.iteritems(): self.assertEqual(2, nlinks_file(path), 'The file %s is not linked' % name) for name in c1.name_path_map: self.assertIn(name, c2.name_path_map) self.assertEqual(c1.open(name).read(), c2.open(name).read(), 'The file %s differs' % name) spine_names = tuple(x[0] for x in c1.spine_names) text = spine_names[0] root = c2.parsed(text) root.xpath('//*[local-name()="body"]')[0].set('id', 'changed id for test') c2.dirty(text) c2.commit_item(text) for c in (c1, c2): self.assertEqual(1, nlinks_file(c.name_path_map[text])) self.assertNotEqual(c1.open(text).read(), c2.open(text).read()) name = spine_names[1] with c1.open(name, mode='r+b') as f: f.seek(0, 2) f.write(b' ') for c in (c1, c2): self.assertEqual(1, nlinks_file(c.name_path_map[name])) self.assertNotEqual(c1.open(name).read(), c2.open(name).read()) x = base + 'out.' + fmt for c in (c1, c2): c.commit(outpath=x)
def test_toc_detection(self): ep = os.path.join(self.tdir, 'book.epub') create_book(Metadata('Test ToC'), ep) c = get_container(ep, tdir=os.path.join(self.tdir, 'container'), tweak_mode=True) self.assertEqual(2, c.opf_version_parsed.major) self.assertTrue(len(get_toc(c))) c.opf.set('version', '3.0') self.assertEqual(3, c.opf_version_parsed.major) self.assertTrue(len(get_toc(c))) # detect NCX toc even in epub 3 files c.add_file('nav.html', b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">' '<body><nav epub:type="toc"><ol><li><a href="start.xhtml">EPUB 3 nav</a></li></ol></nav></body></html>', process_manifest_item=lambda item:item.set('properties', 'nav')) toc = get_toc(c) self.assertTrue(len(toc)) self.assertEqual(toc.as_dict['children'][0]['title'], 'EPUB 3 nav') def tfx(linear, expected): items = ['<t{0}>{0}</t{0}>'.format(x) for x in linear] html = '<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">' html += '<body>%s</body></html>' % '\n'.join(items) with c.open('nav.html', 'wb') as f: f.write(html.encode('utf-8')) toc = toc_from_xpaths(c, ['//h:t'+x for x in sorted(set(linear))]) def p(node): ans = '' if node.children: ans += '[' for c in node.children: ans += c.title + p(c) ans += ']' return ans self.assertEqual('[%s]'%expected, p(toc)) tfx('121333', '1[2]1[333]') tfx('1223424', '1[22[3[4]]2[4]]') tfx('32123', '321[2[3]]') tfx('123123', '1[2[3]]1[2[3]]')
def _populate_book_from_calibre_id(self, book, db=None): mi = db.get_metadata(book['calibre_id'], index_is_id=True) #book = {} book['good'] = True book['calibre_id'] = mi.id book['title'] = mi.title book['authors'] = mi.authors book['author_sort'] = mi.author_sort book['tags'] = mi.tags book['series'] = mi.series book['comments'] = mi.comments book['publisher'] = mi.publisher book['pubdate'] = mi.pubdate if book['series']: book['series_index'] = mi.series_index else: book['series_index'] = None book['languages'] = mi.languages book['error'] = '' if db.has_format(mi.id,'EPUB',index_is_id=True): book['epub'] = BytesIO(db.format(mi.id,'EPUB',index_is_id=True)) from calibre.ebooks.oeb.polish.container import get_container container = get_container(db.format_abspath(mi.id,'EPUB',index_is_id=True)) if container.opf_version_parsed.major >= 3: book['good'] = False; book['error'] = _("%s by %s is EPUB3, EpubMerge only supports EPUB2.")%(mi.title,', '.join(mi.authors)) else: if prefs['keepmeta']: # save calibre metadata inside epub if keeping unmerge # data. set_metadata(book['epub'], mi, stream_type='epub') book['epub_size'] = len(book['epub'].getvalue()) else: book['good'] = False; book['error'] = _("%s by %s doesn't have an EPUB.")%(mi.title,', '.join(mi.authors))
def build_epub(self, outdir, outname): EpubBuilder.build_epub(self, outdir, outname) container = get_container(os.path.join(outdir, outname)) self.fix_epub(container) container.commit()
def remove_fonts(self): fonts = self.get_selected_data() if not fonts: return changed = False for font in fonts: changed |= change_font(current_container(), font) if changed: self.model.build() self.container_changed.emit() def embed_fonts(self): self.embed_all_fonts.emit() def subset_fonts(self): self.subset_all_fonts.emit() def refresh(self): self.model.build() if __name__ == '__main__': from calibre.gui2 import Application app = Application([]) c = get_container(sys.argv[-1], tweak_mode=True) set_current_container(c) d = ManageFonts() d.exec_() del app
sheet = container.parse_css(style.text, name) if remove_font_face_rules(container, sheet, remove, name): style.text = sheet.cssText container.dirty(name) if total_old > 0: report(_('Reduced total font size to %.1f%% of original')%( total_new/total_old*100)) else: report(_('No embedded fonts found')) return changed if __name__ == '__main__': from calibre.ebooks.oeb.polish.container import get_container from calibre.ebooks.oeb.polish.stats import StatsCollector from calibre.utils.logging import default_log default_log.filter_level = default_log.DEBUG inbook = sys.argv[-1] ebook = get_container(inbook, default_log) report = [] stats = StatsCollector(ebook).font_stats subset_all_fonts(ebook, stats, report.append) outbook, ext = inbook.rpartition('.')[0::2] outbook += '_subset.'+ext ebook.commit(outbook) prints('\nReport:') for msg in report: prints(msg) print() prints('Output written to:', outbook)
def main(argv, plugin_version, usage=None): import argparse import glob converter = OpenCC(get_resource_file) criteria = None list_of_locales = ['cn', 'hk', 'tw'] list_of_directions = ['t2s', 's2t', 't2t', 'none'] quotation_types = ['w', 'e', 'no_change'] text_directions = ['h', 'v', 'no_change'] optimization = ['r', 'k', 'none'] parser = argparse.ArgumentParser(description=_('Convert Chinese characters between traditional/simplified types and/or change text style.\nPlugin Version: ') + str(plugin_version[0]) + '.' + str(plugin_version[1]) + '.' + str(plugin_version[2])) parser.add_argument('-il', '--input-locale', dest='orig_opt', default='cn', help=_('Set to the ebook origin locale if known (Default: cn)'), choices=list_of_locales) parser.add_argument('-ol', '--output-locale', dest='dest_opt', default='cn', help=_('Set to the ebook target locale (Default: cn)'), choices=list_of_locales) parser.add_argument('-d', '--direction', dest='direction_opt', default='none', help=_('Set to the ebook conversion direction (Default: none)'), choices=list_of_directions) parser.add_argument('-p', '--phrase_convert', dest='phrase_opt', help=_('Convert phrases to target locale versions (Default: False)'), action='store_true') parser.add_argument('-qt', '--quotation-type', dest='quote_type_opt', default='no_change', help=_('Set to Western or East Asian (Default: no_change)'), choices=quotation_types) parser.add_argument('-sq', '--smart_quotes', dest='smart_quotes_opt', help=_('Use smart quotes if applicable (Default: False)'), action='store_true') parser.add_argument('-td', '--text-direction', dest='text_dir_opt', default='no_change', help=_('Set to the ebook origin locale if known (Default: no_change)'), choices=text_directions) parser.add_argument('-tdo', '--text-device-optimize', dest='optimization_opt', help=_('Optimize text for device (Default: none)'), choices=optimization) parser.add_argument('-v', '--verbose', dest='verbose_opt', help=_('Print out details as the conversion progresses (Default: False)'), action='store_true') parser.add_argument('-t', '--test', dest='test_opt', help=_('Run conversion operations without saving results (Default: False)'), action='store_true') parser.add_argument('-q', '--quiet', dest='quiet_opt', help=_('Do not print anything, ignore warnings - this option overides the -s option (Default: False)'), action='store_true') parser.add_argument('-od', '--output-dir', dest='outdir_opt', help=_('Set to the ebook output file directory (Default: overwrite existing ebook file)')) parser.add_argument('-a', '--append_suffix', dest='append_suffix_opt', default='', help=_('Append a suffix to the output file basename (Default: '')')) parser.add_argument('-f', '--force', dest='force_opt', help=_('Force processing by ignoring warnings (e.g. allow overwriting files with no prompt)'), action='store_true') parser.add_argument('-s', '--show', dest='show_opt', help=_('Show the settings based on user cmdline options and exit (Default: False)'), action='store_true') parser.add_argument('ebookFiles', metavar='ebook-filepath', nargs='+', help=_('One or more epub and/or azw3 ebook filepaths - UNIX style wildcards accepted')) args = parser.parse_args(argv) #Pull out the list of ebooks file_set = set() if args.outdir_opt == None: output_dir = None else: dir_list = glob.glob(args.outdir_opt) if len(dir_list) == 0: if not args.quiet_opt: print(_('Output directory not found')) return(1) elif len(dir_list) > 1: if not args.quiet_opt: print(_('Multiple output directory not found - only one allowed:')) for dir in dir_list: print(dir) return(1) else: output_dir = os.path.abspath(dir_list[0]) if not os.path.isdir(output_dir): if not args.quiet_opt: print(_('Output directory not a directory')) return(1) for filespec in args.ebookFiles: #Get a list of files file_list = glob.glob(filespec) for filename in file_list: #Discard any non-files if not os.path.isfile(filename): if not args.quiet_opt: print(_('Discarding - Not a file: ') + filename) continue #Discard any files not ending in ebook if not filename.lower().endswith(".epub") and not filename.lower().endswith(".azw3"): if not args.quiet_opt: print(_('Discarding - Does not end in \'.epub\' or \'.azw3\': ') + filename) continue #Add filename to set file_set.add(filename) #Determine the conversion criteria tuple values criteria = cli_get_criteria(args) #set convertor properties conversion = get_configuration(criteria) if conversion == 'None': if not args.quiet_opt: print_conversion_info(args, file_set, plugin_version, '??') print(_('The input/output/direction combination selected is not supported.\n Please use a different input/output/direction combination')) return(1) elif conversion == 'no_convert': pass else: if args.verbose_opt and not args.quiet_opt: print(_('Using opencc-python conversion configuration file: ') + conversion + '.json') converter.set_conversion(conversion) #Print out the conversion info if not args.quiet_opt: print_conversion_info(args, file_set, plugin_version, conversion + '.json') #If show option given, exit after displaying settings if args.show_opt: return(0) if (args.outdir_opt == None) and args.append_suffix_opt == '': if not args.force_opt: response = str(raw_input(_('No output directory specified, original ebook file will be overwritten. Is this OK? [N] or Y: '))).lower().strip() if (len(response)) > 0 and (response[0] == 'y'): pass else: print(_('Exiting without changes')) return(0) if len(file_set) == 0: if not args.quiet_opt: print(_('No ebook files specified!')) return(0) #Loop through the filenames for filename in file_set: #Print out the current operation if not args.quiet_opt: print(_('Converting ebook: ') + os.path.basename(filename + ' .... '), end="") #Create a Container object from the file container = get_container(filename) #Update the container changed_files = cli_process_files(criteria, container, converter) if (len(changed_files) > 0) and not args.quiet_opt: print(_('Changed')) if args.verbose_opt: for changed_file_name in changed_files: print(' ' + changed_file_name) else: if not args.quiet_opt: print(_('Unchanged - No file written')) #if changes, save the container as an ebook file with a name based on the conversion criteria if len(changed_files) > 0: if (args.outdir_opt == None) and (args.append_suffix_opt == ''): if not args.quiet_opt: print(_(' Overwriting file with changes: ') + filename, end="") if args.test_opt: print(_(' --- TEST MODE - No Changes Written')) else: print('') if not args.test_opt: container.commit() else: #Create absolute path to filename. Earlier code already verified that it ends in '.epub' or '.azw3' file_path_portion, file_name_portion = os.path.split(filename) adjusted_file_name = file_name_portion[:-5] + args.append_suffix_opt + file_name_portion[-5:] if args.outdir_opt != None: output_path = os.path.join(output_dir, adjusted_file_name) else: output_path = os.path.join(file_path_portion, adjusted_file_name) if not args.quiet_opt: print(_(' Saving file to: ') + output_path, end="") if args.test_opt: print(_(' --- TEST MODE - No Changes Written')) else: print('') if not args.test_opt: container.commit(outpath=output_path) return(0)
text = getattr(node, attr) replacement = loc.elided_prefix + new_word rtext, replaced = replace(text, loc.original_word, replacement, locale.langcode) if replaced: if undo_cache is not None: undo_cache[(loc.file_name, node, is_attr, attr)] = text if is_attr: node.set(attr, rtext) else: setattr(node, attr, rtext) container.replace(loc.file_name, node.getroottree().getroot()) changed.add(loc.file_name) return changed def undo_replace_word(container, undo_cache): changed = set() for (file_name, node, is_attr, attr), text in iteritems(undo_cache): node.set(attr, text) if is_attr else setattr(node, attr, text) container.replace(file_name, node.getroottree().getroot()) changed.add(file_name) return changed if __name__ == '__main__': import pprint from calibre.gui2.tweak_book import set_book_locale, dictionaries container = get_container(sys.argv[-1], tweak_mode=True) set_book_locale(container.mi.language) pprint.pprint(get_all_words(container, dictionaries.default_locale))
def new_container(): count[0] += 1 tdir = os.mkdir(os.path.join(self.tdir, str(count[0]))) return get_container(book, tdir=tdir)
ff = [icu_lower(x) for x in font.get('font-family', [])] if ff and ff[0] not in bad_fonts: keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'} key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys)) val = fu[key] if not val: val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys}) val['text'] = set() val['text'] |= text self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu) if self.do_embed: self.page.evaljs('window.font_stats.get_font_families()') font_families = self.page.bridge_value if not isinstance(font_families, dict): raise Exception('Unknown error occurred while reading font families') self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set() for font_dict, text, pseudo in pseudo_element_font_usage: font_families[font_dict['font-family']] = True for raw in font_families.iterkeys(): for x in parse_font_families(self.parser, raw): if x.lower() not in bad_fonts: fs.add(x) if __name__ == '__main__': from calibre.ebooks.oeb.polish.container import get_container from calibre.utils.logging import default_log default_log.filter_level = default_log.DEBUG ebook = get_container(sys.argv[-1], default_log) print (StatsCollector(ebook, do_embed=True).font_stats)
def ebook_diff(path1, path2): from calibre.ebooks.oeb.polish.container import get_container left = get_container(path1, tweak_mode=True) right = get_container(path2, tweak_mode=True) return container_diff(left, right)
def epub_to_book(epub, outpath=None): container = get_container(epub, tweak_mode=True) outpath = outpath or (epub.rpartition('.')[0] + '.azw3') opf_to_book(container.name_to_abspath(container.opf_name), outpath, container)
def initialise_new_file(self, pathtoebook): self.meta, self.errors = {}, {} self.rename_file_map = {} self.is_scrambled = False self.dummyimg = None self.dummysvg = '' self.runButton.setEnabled(True) self.buttonBox.button(QDialogButtonBox.Save).setEnabled(False) fileok = True if not os.path.isfile(pathtoebook): fileok = False else: try: self.ebook = get_container(pathtoebook) except: fileok = False msg = "Source ebook must be de-DRM'd and in one of these formats:" \ "\n- azw3\n- epub\n- kepub\n- kepub.epub.\n\nPlease select another." error_dialog(self, CAPTION, msg, show=True, show_copy_button=True) if not fileok: self.log.append('No ebook selected yet') else: self.cleanup_dirs.append(self.ebook.root) tdir = PersistentTemporaryDirectory('_scramble_clone_orig') self.cleanup_dirs.append(tdir) self.eborig = clone_container(self.ebook, tdir) dirn, fname, ext, is_kepub_epub = get_fileparts( self.ebook.path_to_ebook) ext = ext.lower() format = 'kepub' if is_kepub_epub else ext if self.book_id is not None: # calibre library book self.cleanup_files.append(self.ebook.path_to_ebook) sourcepath = self.ebook.path_to_ebook self.dummyimg = get_resources('images/' + format + '.png') self.dummysvg = get_resources('images/' + format + '.svg') if self.from_calibre: # calibre plugin self.dirout = '' else: # standalone version self.dirout = dirn self.log.append('\n--- New ebook: %s' % sourcepath) fn = fname + '_scrambled.' fn += 'kepub.' + ext if is_kepub_epub else ext self.fname_scrambled_ebook = ascii_text(fn) self.sourcefile.setText(sourcepath) self.savefile.setText(self.fname_scrambled_ebook) self.meta['orig'] = get_metadata(self.ebook) self.errors['orig'] = get_run_check_error(self.ebook) self.viewlog()
def load_book(path_to_ebook, base_tdir): tdir = tempfile.mkdtemp(dir=base_tdir) return get_container(path_to_ebook, tdir=tdir)
if ff and ff[0] not in bad_fonts: keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'} key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys)) val = fu[key] if not val: val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys}) val['text'] = set() val['text'] |= text self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu) if self.do_embed: self.page.evaljs('window.font_stats.get_font_families()') font_families = self.page.bridge_value if not isinstance(font_families, dict): raise Exception('Unknown error occurred while reading font families') self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set() for font_dict, text, pseudo in pseudo_element_font_usage: font_families[font_dict['font-family']] = True for raw in font_families.iterkeys(): for x in parse_font_families(self.parser, raw): if x.lower() not in bad_fonts: fs.add(x) if __name__ == '__main__': from calibre.ebooks.oeb.polish.container import get_container from calibre.utils.logging import default_log default_log.filter_level = default_log.DEBUG ebook = get_container(sys.argv[-1], default_log) from pprint import pprint pprint(StatsCollector(ebook, do_embed=True).font_stats)
def do_merge_bg(args, cpus, notification=lambda x, y: x): # logger.debug("do_merge_bg(%s,%s)"%(args,cpus)) # This server is an arbitrary_n job, so there is a notifier available. ## for purposes of %done, autoconvert, merging output are each ## considered 1/2 of total. def notify_progress(percent): notification(max(percent / 2, 0.01), _('Autoconverting...')) # Set the % complete to a small number to avoid the 'unavailable' indicator notify_progress(0.01) for j in range(0, len(args['inputepubfns'])): fn = args['inputepubfns'][j] title = args['epubtitles'][fn] try: container = get_container(fn) if container.opf_version_parsed.major >= 3: print("=" * 50) print( "Found EPUB3 for %s, automatically creating a temporary EPUB2 for merging...\n" % title) # this temp file is deleted when the BG process quits, # so don't expect it to still be there. epub2 = PersistentTemporaryFile(prefix="epub2_", suffix=".epub", dir=args['tdir']) fn2 = epub2.name # ebook-convert epub3.epub epub2.epub --epub-version=2 ebook_convert_cli_main([ 'epubmerge calling convert', fn, fn2, '--epub-version=2', '--no-default-epub-cover' ]) args['inputepubfns'][j] = fn2 print("Converted to temporary EPUB2: %s" % fn2) notify_progress(float(j) / len(args['inputepubfns'])) except: print("=" * 20) print("Exception auto converting %s to EPUB2 from EPUB3" % title) print("Quiting...") print("=" * 50) raise def notify_progress(percent): notification(percent / 2 + 0.5, _('Merging...')) print("=" * 50) print("\nBeginning Merge...\n") print("=" * 50) doMerge(args['outputepubfn'], args['inputepubfns'], args['authoropts'], args['titleopt'], args['descopt'], args['tags'], args['languages'], args['titlenavpoints'], args['originalnavpoints'], args['flattentoc'], args['printtimes'], args['coverjpgpath'], args['keepmetadatafiles'], notify_progress=notify_progress) print("=" * 50) print("\nFinished Merge...\n") print("=" * 50)