def viewer_socket_address(): if viewer_socket_address.ans is None: if iswindows: viewer_socket_address.ans = r'\\.\pipe\CalibreViewer' try: user = get_windows_username() except: user = None if user: user = ascii_filename(user).replace(' ', '_') if user: viewer_socket_address.ans += '-' + user[:100] + 'x' else: user = os.environ.get('USER', '') if not user: user = os.path.basename(os.path.expanduser('~')) if islinux: viewer_socket_address.ans = (u'\0%s-calibre-viewer.socket' % ascii_filename(force_unicode(user))) else: from tempfile import gettempdir tmp = gettempdir() viewer_socket_address.ans = os.path.join(tmp, user+'-calibre-viewer.socket') if not ispy3 and not isinstance(viewer_socket_address.ans, bytes): viewer_socket_address.ans = viewer_socket_address.ans.encode(filesystem_encoding) return viewer_socket_address.ans
def construct_path_name(self, book_id): print(" PbSourceS is %s" % (__name__)) print(" PbSourceS: self : %s" % (self)) print(" PbSourceS: database: %s" % (self.database)) print(" PbSourceS: book id : %s" % (book_id)) path_element = None try: metadata = self.database.get_metadata(book_id, index_is_id=True) print(" PbSourceS: metadata: '%s'" % (metadata)) source = metadata.get(self.source_field_name) print(" PbSourceS: metadata.source: '%s'" % (source)) # Special case for books which have no source set if source == None: source = self.default_source if source: source_prefix = self.source_prefix source_prefix = ascii_filename(source_prefix[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') source_name = ascii_filename(source[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') source_name = source_name.lower() # TODO: use a regex for whitespace, r = re.compile(r"^\s+", re.MULTILINE) source_name = source_name.replace(' ', '_') path_element = source_prefix + "_" + source_name except: traceback.print_exc() print(" PbSourceS: path_element: '%s'" % (path_element)) return path_element
def construct_file_name(self, book_id, title, author): ''' Construct the file name for this book based on its metadata. ''' author = ascii_filename(author )[:self.PATH_LIMIT].decode('ascii', 'replace') title = ascii_filename(title )[:self.PATH_LIMIT].decode('ascii', 'replace') name = title + ' - ' + author while name.endswith('.'): name = name[:-1] return name
def construct_path_name(self, book_id, title, author): ''' Construct the directory name for this book based on its metadata. ''' author = ascii_filename(author )[:self.PATH_LIMIT].decode('ascii', 'replace') title = ascii_filename(title )[:self.PATH_LIMIT].decode('ascii', 'replace') while author[-1] in (' ', '.'): author = author[:-1] if not author: author = ascii_filename(_('Unknown')).decode( 'ascii', 'replace') return '%s/%s (%d)'%(author, title, book_id)
def gui_socket_address(): global ADDRESS if ADDRESS is None: if iswindows: ADDRESS = r'\\.\pipe\CalibreGUI' try: user = get_windows_username() except: user = None if user: from calibre.utils.filenames import ascii_filename user = ascii_filename(user).replace(' ', '_') if user: ADDRESS += '-' + user[:100] + 'x' else: user = os.environ.get('USER', '') if not user: user = os.path.basename(os.path.expanduser('~')) if islinux: ADDRESS = (u'\0%s-calibre-gui.socket' % user).encode('ascii') else: from tempfile import gettempdir tmp = gettempdir() ADDRESS = os.path.join(tmp, user+'-calibre-gui.socket') return ADDRESS
def generate_filename(self, rid, base=None, rid_map=None): rid_map = self.rid_map if rid_map is None else rid_map fname = rid_map[rid] if fname in self.used: return self.used[fname] raw = self.docx.read(fname) base = base or ascii_filename(rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image' ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg' if ext == 'emf': # For an example, see: https://bugs.launchpad.net/bugs/1224849 self.log('Found an EMF image: %s, trying to extract embedded raster image' % base) from calibre.utils.wmf.emf import emf_unwrap try: raw = emf_unwrap(raw) except Exception as e: self.log.exception('Failed to extract embedded raster image from EMF') else: ext = 'png' base = base.rpartition('.')[0] if not base: base = 'image' base += '.' + ext exists = frozenset(self.used.itervalues()) c = 1 name = base while name in exists: n, e = base.rpartition('.')[0::2] name = '%s-%d.%s' % (n, c, e) c += 1 self.used[fname] = name with open(os.path.join(self.dest_dir, name), 'wb') as f: f.write(raw) self.all_images.add('images/' + name) return name
def __init__(self, gui, name): from calibre.gui2 import JSONConfig self.gui = gui self.name = name self.base_plugin = None self.config = JSONConfig('store/stores/' + ascii_filename(self.name))
def start_download(self, request): if not self.gui: return url = unicode(request.url().toString()) cf = self.get_cookies() filename = get_download_filename(url, cf) ext = os.path.splitext(filename)[1][1:].lower() filename = ascii_filename(filename[:60] + '.' + ext) if ext not in BOOK_EXTENSIONS: if ext == 'acsm': from calibre.gui2.dialogs.confirm_delete import confirm if not confirm('<p>' + _('This ebook is a DRMed EPUB file. ' 'You will be prompted to save this file to your ' 'computer. Once it is saved, open it with ' '<a href="http://www.adobe.com/products/digitaleditions/">' 'Adobe Digital Editions</a> (ADE).<p>ADE, in turn ' 'will download the actual ebook, which will be a ' '.epub file. You can add this book to calibre ' 'using "Add Books" and selecting the file from ' 'the ADE library folder.'), 'acsm_download', self): return home = os.path.expanduser('~') name = QFileDialog.getSaveFileName(self, _('File is not a supported ebook type. Save to disk?'), os.path.join(home, filename), '*.*') if name: name = unicode(name) self.gui.download_ebook(url, cf, name, name, False) else: self.gui.download_ebook(url, cf, filename, tags=self.tags)
def read_image_data(self, fname, base=None): if fname.startswith("file://"): src = fname[len("file://") :] if iswindows and src and src[0] == "/": src = src[1:] if not src or not os.path.exists(src): raise LinkedImageNotFound(src) with open(src, "rb") as rawsrc: raw = rawsrc.read() else: raw = self.docx.read(fname) base = base or ascii_filename(fname.rpartition("/")[-1]).replace(" ", "_") or "image" ext = what(None, raw) or base.rpartition(".")[-1] or "jpeg" if ext == "emf": # For an example, see: https://bugs.launchpad.net/bugs/1224849 self.log("Found an EMF image: %s, trying to extract embedded raster image" % fname) from calibre.utils.wmf.emf import emf_unwrap try: raw = emf_unwrap(raw) except Exception: self.log.exception("Failed to extract embedded raster image from EMF") else: ext = "png" base = base.rpartition(".")[0] if not base: base = "image" base += "." + ext return raw, base
def write(self, outpath): records = [self.record0] + self.records[1:] # with open(outpath, 'wb') as f: if outpath != None: f = outpath # Write PalmDB Header title = ascii_filename(self.full_title.decode('utf-8')) # title = ascii_filename(self.full_title.decode('utf-8')).replace( # ' ', '_')[:31] title += (b'\0' * (32 - len(title))) now = int(time.time()) nrecords = len(records) f.write(title) f.write(pack(b'>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0)) f.write(b'BOOKMOBI') f.write(pack(b'>IIH', (2*nrecords)-1, 0, nrecords)) offset = f.tell() + (8 * nrecords) + 2 for i, record in enumerate(records): f.write(pack(b'>I', offset)) f.write(b'\0' + pack(b'>I', 2*i)[1:]) offset += len(record) f.write(b'\0\0') for rec in records: f.write(rec)
def pic_to_img(self, pic, alt, parent): name = None link = None for hl in XPath('descendant::a:hlinkClick[@r:id]')(parent): link = {'id':get(hl, 'r:id')} tgt = hl.get('tgtFrame', None) if tgt: link['target'] = tgt title = hl.get('tooltip', None) if title: link['title'] = title for pr in XPath('descendant::pic:cNvPr')(pic): name = pr.get('name', None) if name: name = ascii_filename(name).replace(' ', '_') alt = pr.get('descr', None) for a in XPath('descendant::a:blip[@r:embed or @r:link]')(pic): rid = get(a, 'r:embed') if not rid: rid = get(a, 'r:link') if rid and rid in self.rid_map: try: src = self.generate_filename(rid, name) except LinkedImageNotFound as err: self.log.warn('Linked image: %s not found, ignoring' % err.fname) continue img = IMG(src='images/%s' % src) img.set('alt', alt or 'Image') if link is not None: self.links.append((img, link)) return img
def start_download(self, request): if not self.gui: return url = unicode(request.url().toString(NO_URL_FORMATTING)) cf = self.get_cookies() filename = get_download_filename(url, cf) ext = os.path.splitext(filename)[1][1:].lower() filename = ascii_filename(filename[:60] + '.' + ext) if ext not in BOOK_EXTENSIONS: if ext == 'acsm': from calibre.gui2.dialogs.confirm_delete import confirm if not confirm('<p>' + _('This ebook is a DRMed EPUB file. ' 'You will be prompted to save this file to your ' 'computer. Once it is saved, open it with ' '<a href="https://www.adobe.com/products/digitaleditions/">' 'Adobe Digital Editions</a> (ADE).<p>ADE, in turn ' 'will download the actual ebook, which will be a ' '.epub file. You can add this book to calibre ' 'using "Add Books" and selecting the file from ' 'the ADE library folder.'), 'acsm_download', self): return name = choose_save_file(self, 'web-store-download-unknown', _('File is not a supported ebook type. Save to disk?'), initial_filename=filename) if name: self.gui.download_ebook(url, cf, name, name, False, create_browser=self.create_browser) else: show_download_info(filename, self) self.gui.download_ebook(url, cf, filename, tags=self.tags, create_browser=self.create_browser)
def _download(self, cookie_file, url, filename, save_loc, add_to_lib): dfilename = "" if not url: raise Exception(_("No file specified to download.")) if not save_loc and not add_to_lib: # Nothing to do. return dfilename if not filename: filename = get_download_filename(url, cookie_file) filename, ext = os.path.splitext(filename) filename = filename[:60] + ext filename = ascii_filename(filename) br = browser() if cookie_file: cj = MozillaCookieJar() cj.load(cookie_file) br.set_cookiejar(cj) with closing(br.open(url)) as r: temp_path = os.path.join(PersistentTemporaryDirectory(), filename) tf = open(temp_path, "w+b") tf.write(r.read()) dfilename = tf.name return dfilename
def download_book(self, result): d = ChooseFormatDialog(self, _('Choose format to download to your library.'), result.downloads.keys()) if d.exec_() == d.Accepted: ext = d.format() fname = result.title[:60] + '.' + ext.lower() fname = ascii_filename(fname) self.gui.download_ebook(result.downloads[ext], filename=fname)
def email_news(mi, remove, get_fmts, done, job_manager): opts = email_config().parse() accounts = [(account, [x.strip().lower() for x in x[0].split(',')]) for account, x in opts.accounts.items() if x[1]] sent_mails = [] for i, x in enumerate(accounts): account, fmts = x files = get_fmts(fmts) files = [f for f in files if f is not None] if not files: continue attachment = files[0] to_s = [account] subjects = [_('News:')+' '+mi.title] texts = [ _('Attached is the %s periodical downloaded by calibre.') % (mi.title,) ] attachment_names = [ascii_filename(mi.title)+os.path.splitext(attachment)[1]] attachments = [attachment] jobnames = [mi.title] do_remove = [] if i == len(accounts) - 1: do_remove = remove send_mails(jobnames, Dispatcher(partial(done, remove=do_remove)), attachments, to_s, subjects, texts, attachment_names, job_manager) sent_mails.append(to_s[0]) return sent_mails
def read_image_data(self, fname, base=None): if fname.startswith('file://'): src = fname[len('file://'):] if iswindows and src and src[0] == '/': src = src[1:] if not src or not os.path.exists(src): raise LinkedImageNotFound(src) with open(src, 'rb') as rawsrc: raw = rawsrc.read() else: raw = self.docx.read(fname) base = base or ascii_filename(fname.rpartition('/')[-1]).replace(' ', '_') or 'image' ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg' if ext == 'emf': # For an example, see: https://bugs.launchpad.net/bugs/1224849 self.log('Found an EMF image: %s, trying to extract embedded raster image' % fname) from calibre.utils.wmf.emf import emf_unwrap try: raw = emf_unwrap(raw) except Exception: self.log.exception('Failed to extract embedded raster image from EMF') else: ext = 'png' base = base.rpartition('.')[0] if not base: base = 'image' base += '.' + ext return raw, base
def pic_to_img(self, pic, alt, parent): name = None link = None for hl in XPath('descendant::a:hlinkClick[@r:id]')(parent): link = {'id':get(hl, 'r:id')} tgt = hl.get('tgtFrame', None) if tgt: link['target'] = tgt title = hl.get('tooltip', None) if title: link['title'] = title for pr in XPath('descendant::pic:cNvPr')(pic): name = pr.get('name', None) if name: name = ascii_filename(name).replace(' ', '_') alt = pr.get('descr', None) for a in XPath('descendant::a:blip[@r:embed]')(pic): rid = get(a, 'r:embed') if rid in self.rid_map: src = self.generate_filename(rid, name) img = IMG(src='images/%s' % src) img.set('alt', alt or 'Image') if link is not None: self.links.append((img, link)) return img
def pic_to_img(self, pic, alt, parent): XPath, get = self.namespace.XPath, self.namespace.get name = None link = None for hl in XPath("descendant::a:hlinkClick[@r:id]")(parent): link = {"id": get(hl, "r:id")} tgt = hl.get("tgtFrame", None) if tgt: link["target"] = tgt title = hl.get("tooltip", None) if title: link["title"] = title for pr in XPath("descendant::pic:cNvPr")(pic): name = pr.get("name", None) if name: name = ascii_filename(name).replace(" ", "_") alt = pr.get("descr", None) for a in XPath("descendant::a:blip[@r:embed or @r:link]")(pic): rid = get(a, "r:embed") if not rid: rid = get(a, "r:link") if rid and rid in self.rid_map: try: src = self.generate_filename(rid, name) except LinkedImageNotFound as err: self.log.warn("Linked image: %s not found, ignoring" % err.fname) continue img = IMG(src="images/%s" % src) img.set("alt", alt or "Image") if link is not None: self.links.append((img, link, self.rid_map)) return img
def _add_item(self, oeb, title, path): bname = os.path.basename(path) id, href = oeb.manifest.generate(id='html', href=ascii_filename(bname)) item = oeb.manifest.add(id, href, 'text/html') item.html_input_href = bname oeb.spine.add(item, True) oeb.toc.add(title, item.href)
def __init__(self, gui, name, config=None, base_plugin=None): self.gui = gui self.name = name self.base_plugin = base_plugin if config is None: from calibre.gui2 import JSONConfig config = JSONConfig('store/stores/' + ascii_filename(self.name)) self.config = config
def book_filename(rd, book_id, mi, fmt): au = authors_to_string(mi.authors or [_('Unknown')]) title = mi.title or _('Unknown') ext = (fmt or '').lower() if ext == 'kepub' and 'Kobo Touch' in rd.inheaders.get('User-Agent', ''): ext = 'kepub.epub' fname = '%s - %s_%s.%s' % (title[:30], au[:30], book_id, ext) fname = ascii_filename(fname).replace('"', '_') return fname
def get_format(self, id, format): format = format.upper() fm = self.db.format_metadata(id, format, allow_cache=False) if not fm: raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format)) update_metadata = format in {'MOBI', 'EPUB', 'AZW3'} mi = newmi = self.db.get_metadata( id, index_is_id=True, cover_as_data=True, get_cover=update_metadata) cherrypy.response.headers['Last-Modified'] = \ self.last_modified(max(fm['mtime'], mi.last_modified)) fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb') if fmt is None: raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format)) mt = guess_type('dummy.'+format.lower())[0] if mt is None: mt = 'application/octet-stream' cherrypy.response.headers['Content-Type'] = mt if format.lower() in plugboard_content_server_formats: # Get any plugboards for the content server plugboards = self.db.prefs.get('plugboards', {}) cpb = find_plugboard(plugboard_content_server_value, format.lower(), plugboards) if cpb: # Transform the metadata via the plugboard newmi = mi.deepcopy_metadata() newmi.template_to_attribute(mi, cpb) if update_metadata: # Write the updated file from calibre.ebooks.metadata.meta import set_metadata set_metadata(fmt, newmi, format.lower()) fmt.seek(0) fmt.seek(0, 2) cherrypy.response.headers['Content-Length'] = fmt.tell() fmt.seek(0) ua = cherrypy.request.headers.get('User-Agent', '').strip() have_kobo_browser = self.is_kobo_browser(ua) file_extension = "kepub.epub" if have_kobo_browser and format.lower() == "kepub" else format au = authors_to_string(newmi.authors if newmi.authors else [_('Unknown')]) title = newmi.title if newmi.title else _('Unknown') fname = u'%s - %s_%s.%s'%(title[:30], au[:30], id, file_extension.lower()) fname = ascii_filename(fname).replace('"', '_') cherrypy.response.headers['Content-Disposition'] = \ b'attachment; filename="%s"'%fname cherrypy.response.body = fmt cherrypy.response.timeout = 3600 return fmt
def to_xml(self, write_files=True): bookinfo = u'<BookInformation>\n<Info version="1.1">\n<BookInfo>\n' bookinfo += u'<Title reading="%s">%s</Title>\n'%(self.metadata.title_reading, self.metadata.title) bookinfo += u'<Author reading="%s">%s</Author>\n'%(self.metadata.author_reading, self.metadata.author) bookinfo += u'<BookID>%s</BookID>\n'%(self.metadata.book_id,) bookinfo += u'<Publisher reading="">%s</Publisher>\n'%(self.metadata.publisher,) bookinfo += u'<Label reading="">%s</Label>\n'%(self.metadata.label,) bookinfo += u'<Category reading="">%s</Category>\n'%(self.metadata.category,) bookinfo += u'<Classification reading="">%s</Classification>\n'%(self.metadata.classification,) bookinfo += u'<FreeText reading="">%s</FreeText>\n</BookInfo>\n<DocInfo>\n'%(self.metadata.free_text,) th = self.doc_info.thumbnail if th: prefix = ascii_filename(self.metadata.title) bookinfo += u'<CThumbnail file="%s" />\n'%(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension,) if write_files: open(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension, 'wb').write(th) bookinfo += u'<Language reading="">%s</Language>\n'%(self.doc_info.language,) bookinfo += u'<Creator reading="">%s</Creator>\n'%(self.doc_info.creator,) bookinfo += u'<Producer reading="">%s</Producer>\n'%(self.doc_info.producer,) bookinfo += u'<SumPage>%s</SumPage>\n</DocInfo>\n</Info>\n%s</BookInformation>\n'%(self.doc_info.page,self.toc) pages = u'' done_main = False pt_id = -1 for page_tree in self: if not done_main: done_main = True pages += u'<Main>\n' close = u'</Main>\n' pt_id = page_tree.id else: pages += u'<PageTree objid="%d">\n'%(page_tree.id,) close = u'</PageTree>\n' for page in page_tree: pages += unicode(page) pages += close traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id] objects = u'\n<Objects>\n' styles = u'\n<Style>\n' for obj in self.objects: obj = self.objects[obj] if obj.id in traversed_objects: continue if isinstance(obj, (Font, Text, TOCObject)): continue if isinstance(obj, StyleObject): styles += unicode(obj) else: objects += unicode(obj) styles += '</Style>\n' objects += '</Objects>\n' if write_files: self.write_files() return '<BBeBXylog version="1.0">\n' + bookinfo + pages + styles + objects + '</BBeBXylog>'
def process_images(self, soup, baseurl): diskpath = unicode_path(os.path.join(self.current_dir, 'images')) if not os.path.exists(diskpath): os.mkdir(diskpath) c = 0 for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): iurl = tag['src'] if iurl.startswith('data:image/'): try: data = b64decode(iurl.partition(',')[-1]) except: self.log.exception('Failed to decode embedded image') continue else: if callable(self.image_url_processor): iurl = self.image_url_processor(baseurl, iurl) if not urlparse.urlsplit(iurl).scheme: iurl = urlparse.urljoin(baseurl, iurl, False) with self.imagemap_lock: if self.imagemap.has_key(iurl): tag['src'] = self.imagemap[iurl] continue try: data = self.fetch_url(iurl) if data == 'GIF89a\x01': # Skip empty GIF files as PIL errors on them anyway continue except Exception: self.log.exception('Could not fetch image ', iurl) continue c += 1 fname = ascii_filename('img'+str(c)) if isinstance(fname, unicode): fname = fname.encode('ascii', 'replace') imgpath = os.path.join(diskpath, fname+'.jpg') if (imghdr.what(None, data) is None and b'<svg' in data[:1024]): # SVG image imgpath = os.path.join(diskpath, fname+'.svg') with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath else: try: im = Image.open(StringIO(data)).convert('RGBA') with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: im.save(x, 'JPEG') tag['src'] = imgpath except: traceback.print_exc() continue
def create_filename(self, href, fmt): fname = ascii_filename(urlunquote(posixpath.basename(href))) fname = posixpath.splitext(fname)[0] fname = fname[:75].rstrip('.') or 'image' num = 0 base = fname while fname.lower() in self.seen_filenames: num += 1 fname = base + str(num) self.seen_filenames.add(fname.lower()) fname += os.extsep + fmt.lower() return fname
def book_filename(rd, book_id, mi, fmt, as_encoded_unicode=False): au = authors_to_string(mi.authors or [_('Unknown')]) title = mi.title or _('Unknown') ext = (fmt or '').lower() if ext == 'kepub' and 'Kobo Touch' in rd.inheaders.get('User-Agent', ''): ext = 'kepub.epub' fname = '%s - %s_%s.%s' % (title[:30], au[:30], book_id, ext) if as_encoded_unicode: # See https://tools.ietf.org/html/rfc6266 fname = sanitize_file_name(fname).encode('utf-8') fname = unicode_type(quote(fname)) else: fname = ascii_filename(fname).replace('"', '_') return fname
def construct_path_name(self, book_id): print(" PbSeriesS is %s" % (__name__)) print(" PbSeriesS: self : %s" % (self)) print(" PbSeriesS: database: %s" % (self.database)) print(" PbSeriesS: book id : %s" % (book_id)) path_element = None try: series = self.database.series(book_id, index_is_id=True) print(" PbSourceS: series: '%s'" % (series)) if series: series_prefix = self.series_prefix series_prefix = ascii_filename(series_prefix[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') series_name = ascii_filename(series[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') path_element = series_prefix + "/" + series_name except: traceback.print_exc() # print("PathnameBySeriesStrategy: path_name_element: '%s'" % (path_name_element)) return path_element
def get_embed_font_info(self, family, failure_critical=True): efi = [] body_font_family = None if not family: return body_font_family, efi from calibre.utils.fonts.scanner import font_scanner, NoFonts from calibre.utils.fonts.utils import panose_to_css_generic_family try: faces = font_scanner.fonts_for_family(family) except NoFonts: msg = (u'No embeddable fonts found for family: %r'%family) if failure_critical: raise ValueError(msg) self.oeb.log.warn(msg) return body_font_family, efi if not faces: msg = (u'No embeddable fonts found for family: %r'%family) if failure_critical: raise ValueError(msg) self.oeb.log.warn(msg) return body_font_family, efi for i, font in enumerate(faces): ext = 'otf' if font['is_otf'] else 'ttf' fid, href = self.oeb.manifest.generate(id=u'font', href=u'fonts/%s.%s'%(ascii_filename(font['full_name']).replace(u' ', u'-'), ext)) item = self.oeb.manifest.add(fid, href, guess_type('dummy.'+ext)[0], data=font_scanner.get_font_data(font)) item.unload_data_from_memory() cfont = { u'font-family':u'"%s"'%font['font-family'], u'panose-1': u' '.join(map(unicode_type, font['panose'])), u'src': u'url(%s)'%item.href, } if i == 0: generic_family = panose_to_css_generic_family(font['panose']) body_font_family = u"'%s',%s"%(font['font-family'], generic_family) self.oeb.log(u'Embedding font: %s'%font['font-family']) for k in (u'font-weight', u'font-style', u'font-stretch'): if font[k] != u'normal': cfont[k] = font[k] rule = '@font-face { %s }'%('; '.join(u'%s:%s'%(k, v) for k, v in iteritems(cfont))) rule = css_parser.parseString(rule) efi.append(rule) return body_font_family, efi
def do_embed(f): data = font_scanner.get_font_data(f) name = f['full_name'] ext = 'otf' if f['is_otf'] else 'ttf' name = ascii_filename(name).replace(' ', '-').replace('(', '').replace(')', '') fid, href = self.oeb.manifest.generate(id=u'font', href=u'fonts/%s.%s'%(name, ext)) item = self.oeb.manifest.add(fid, href, guess_type('dummy.'+ext)[0], data=data) item.unload_data_from_memory() page_sheet = self.get_page_sheet() href = page_sheet.relhref(item.href) css = '''@font-face { font-family: "%s"; font-weight: %s; font-style: %s; font-stretch: %s; src: url(%s) }''' % ( f['font-family'], f['font-weight'], f['font-style'], f['font-stretch'], href) sheet = self.parser.parseString(css, validate=False) page_sheet.data.insertRule(sheet.cssRules[0], len(page_sheet.data.cssRules)) return find_font_face_rules(sheet, self.oeb)[0]
def __init__(self, name): from calibre.utils.filenames import ascii_filename BaseError.__init__(self, _('Filename contains unsafe characters'), name) qname = urlquote(name) def esc(n): return ''.join(x if x in URL_SAFE else '_' for x in n) self.sname = '/'.join(esc(ascii_filename(x)) for x in name.split('/')) self.HELP = _( 'The filename {0} contains unsafe characters, that must be escaped, like' ' this {1}. This can cause problems with some ebook readers. To be' ' absolutely safe, use only the English alphabet [a-z], the numbers [0-9],' ' underscores and hyphens in your file names. While many other characters' ' are allowed, they may cause problems with some software.').format(name, qname) self.INDIVIDUAL_FIX = _( 'Rename the file {0} to {1}').format(name, self.sname)
def sanitize_file_name(x): ans = re.sub(r'\s+', ' ', re.sub(r'[?&=;#]', '_', ascii_filename(x))).strip().rstrip('.') ans, ext = ans.rpartition('.')[::2] return (ans.strip() + '.' + ext.strip()).rstrip('.')
def extract_content(self, output_dir, parse_cache): output_dir = os.path.abspath(output_dir) self.check_for_drm() processed_records = self.extract_text() if self.debug is not None: parse_cache['calibre_raw_mobi_markup'] = self.mobi_html self.add_anchors() self.processed_html = self.processed_html.decode( self.book_header.codec, 'ignore') self.processed_html = self.processed_html.replace('</</', '</') self.processed_html = re.sub(r'</([a-zA-Z]+)<', r'</\1><', self.processed_html) self.processed_html = self.processed_html.replace(u'\ufeff', '') # Remove tags of the form <xyz: ...> as they can cause issues further # along the pipeline self.processed_html = re.sub(r'</{0,1}[a-zA-Z]+:\s+[^>]*>', '', self.processed_html) for pat in ENCODING_PATS: self.processed_html = pat.sub('', self.processed_html) self.processed_html = re.sub(r'&(\S+?);', xml_entity_to_unicode, self.processed_html) self.extract_images(processed_records, output_dir) self.replace_page_breaks() self.cleanup_html() self.log.debug('Parsing HTML...') self.processed_html = clean_ascii_chars(self.processed_html) try: root = html.fromstring(self.processed_html) if len(root.xpath('//html')) > 5: root = html.fromstring( self.processed_html.replace('\x0c', '').replace('\x14', '')) except: self.log.warning( 'MOBI markup appears to contain random bytes. Stripping.') self.processed_html = self.remove_random_bytes(self.processed_html) root = html.fromstring(self.processed_html) if root.xpath('descendant::p/descendant::p'): from calibre.utils.soupparser import fromstring self.log.warning('Malformed markup, parsing using BeautifulSoup') try: root = fromstring(self.processed_html) except Exception: self.log.warning( 'MOBI markup appears to contain random bytes. Stripping.') self.processed_html = self.remove_random_bytes( self.processed_html) root = fromstring(self.processed_html) if len(root.xpath('body/descendant::*')) < 1: # There are probably stray </html>s in the markup self.processed_html = self.processed_html.replace( '</html>', '') root = fromstring(self.processed_html) if root.tag != 'html': self.log.warn('File does not have opening <html> tag') nroot = html.fromstring('<html><head></head><body></body></html>') bod = nroot.find('body') for child in list(root): child.getparent().remove(child) bod.append(child) root = nroot htmls = list(root.xpath('//html')) if len(htmls) > 1: self.log.warn('Markup contains multiple <html> tags, merging.') # Merge all <head> and <body> sections for h in htmls: p = h.getparent() if hasattr(p, 'remove'): p.remove(h) bodies, heads = root.xpath('//body'), root.xpath('//head') for x in root: root.remove(x) head, body = map(root.makeelement, ('head', 'body')) for h in heads: for x in h: h.remove(x) head.append(x) for b in bodies: for x in b: b.remove(x) body.append(x) root.append(head), root.append(body) for x in root.xpath('//script'): x.getparent().remove(x) head = root.xpath('//head') if head: head = head[0] else: head = root.makeelement('head', {}) root.insert(0, head) head.text = '\n\t' link = head.makeelement('link', { 'type': 'text/css', 'href': 'styles.css', 'rel': 'stylesheet' }) head.insert(0, link) link.tail = '\n\t' title = head.xpath('descendant::title') m = head.makeelement('meta', { 'http-equiv': 'Content-Type', 'content': 'text/html; charset=utf-8' }) head.insert(0, m) if not title: title = head.makeelement('title', {}) title.text = self.book_header.title title.tail = '\n\t' head.insert(0, title) head.text = '\n\t' self.upshift_markup(root) guides = root.xpath('//guide') guide = guides[0] if guides else None metadata_elems = root.xpath('//metadata') if metadata_elems and self.book_header.exth is None: self.read_embedded_metadata(root, metadata_elems[0], guide) for elem in guides + metadata_elems: elem.getparent().remove(elem) fname = self.name.encode('ascii', 'replace') fname = re.sub(r'[\x08\x15\0]+', '', fname) if not fname: fname = 'dummy' htmlfile = os.path.join(output_dir, ascii_filename(fname) + '.html') try: for ref in guide.xpath('descendant::reference'): if ref.attrib.has_key('href'): ref.attrib['href'] = os.path.basename( htmlfile) + ref.attrib['href'] except AttributeError: pass parse_cache[htmlfile] = root self.htmlfile = htmlfile ncx = cStringIO.StringIO() opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root) self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf' opf.render(open(self.created_opf_path, 'wb'), ncx, ncx_manifest_entry=ncx_manifest_entry) ncx = ncx.getvalue() if ncx: ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx') open(ncx_path, 'wb').write(ncx) with open('styles.css', 'wb') as s: s.write(self.base_css_rules + '\n\n') for cls, rule in self.tag_css_rules.items(): if isinstance(rule, unicode): rule = rule.encode('utf-8') s.write('.%s { %s }\n\n' % (cls, rule)) if self.book_header.exth is not None or self.embedded_mi is not None: self.log.debug('Creating OPF...') ncx = cStringIO.StringIO() opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root) opf.render(open(os.path.splitext(htmlfile)[0] + '.opf', 'wb'), ncx, ncx_manifest_entry) ncx = ncx.getvalue() if ncx: open(os.path.splitext(htmlfile)[0] + '.ncx', 'wb').write(ncx)
def send_by_mail(self, to, fmts, delete_from_library, subject='', send_ids=None, do_auto_convert=True, specific_format=None): ids = [ self.library_view.model().id(r) for r in self.library_view.selectionModel().selectedRows() ] if send_ids is None else send_ids if not ids or len(ids) == 0: return files, _auto_ids = self.library_view.model( ).get_preferred_formats_from_ids( ids, fmts, set_metadata=True, specific_format=specific_format, exclude_auto=do_auto_convert, use_plugboard=plugboard_email_value, plugboard_formats=plugboard_email_formats) if do_auto_convert: nids = list(set(ids).difference(_auto_ids)) ids = [i for i in ids if i in nids] else: _auto_ids = [] full_metadata = self.library_view.model().metadata_for(ids, get_cover=False) bad, remove_ids, jobnames = [], [], [] texts, subjects, attachments, attachment_names = [], [], [], [] for f, mi, id in zip(files, full_metadata, ids): t = mi.title if not t: t = _('Unknown') if f is None: bad.append(t) else: remove_ids.append(id) jobnames.append(t) attachments.append(f) if not subject: subjects.append(_('E-book:') + ' ' + t) else: components = get_components(subject, mi, id) if not components: components = [mi.title] subjects.append(os.path.join(*components)) a = authors_to_string(mi.authors if mi.authors else \ [_('Unknown')]) texts.append(_('Attached, you will find the e-book') + \ '\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \ _('in the %s format.') % os.path.splitext(f)[1][1:].upper()) prefix = ascii_filename(t + ' - ' + a) if not isinstance(prefix, unicode): prefix = prefix.decode(preferred_encoding, 'replace') attachment_names.append(prefix + os.path.splitext(f)[1]) remove = remove_ids if delete_from_library else [] to_s = list(repeat(to, len(attachments))) if attachments: send_mails(jobnames, Dispatcher(partial(self.email_sent, remove=remove)), attachments, to_s, subjects, texts, attachment_names, self.job_manager) self.status_bar.show_message( _('Sending email to') + ' ' + to, 3000) auto = [] if _auto_ids != []: for id in _auto_ids: if specific_format == None: dbfmts = self.library_view.model().db.formats( id, index_is_id=True) formats = [ f.lower() for f in (dbfmts.split(',') if dbfmts else []) ] if list( set(formats).intersection( available_input_formats())) != [] and list( set(fmts).intersection( available_output_formats())) != []: auto.append(id) else: bad.append(self.library_view.model().db.title( id, index_is_id=True)) else: if specific_format in list( set(fmts).intersection( set(available_output_formats()))): auto.append(id) else: bad.append(self.library_view.model().db.title( id, index_is_id=True)) if auto != []: format = specific_format if specific_format in list( set(fmts).intersection(set( available_output_formats()))) else None if not format: for fmt in fmts: if fmt in list( set(fmts).intersection( set(available_output_formats()))): format = fmt break if format is None: bad += auto else: autos = [ self.library_view.model().db.title(id, index_is_id=True) for id in auto ] if self.auto_convert_question( _('Auto convert the following books before sending via ' 'email?'), autos): self.iactions['Convert Books'].auto_convert_mail( to, fmts, delete_from_library, auto, format, subject) if bad: bad = '\n'.join('%s' % (i, ) for i in bad) d = warning_dialog( self, _('No suitable formats'), _('Could not email the following books ' 'as no suitable formats were found:'), bad) d.exec_()
def image_filename(x): return ascii_filename(x).replace(' ', '_').replace('#', '_')
def process_links(self, soup, baseurl, recursion_level, into_dir='links'): res = '' diskpath = os.path.join(self.current_dir, into_dir) if not os.path.exists(diskpath): os.mkdir(diskpath) prev_dir = self.current_dir try: self.current_dir = diskpath tags = list(soup.findAll('a', href=True)) for c, tag in enumerate(tags): if self.show_progress: print('.', end=' ') sys.stdout.flush() sys.stdout.flush() iurl = self.absurl(baseurl, tag, 'href', filter=recursion_level != 0) if not iurl: continue nurl = self.normurl(iurl) if nurl in self.filemap: # noqa self.localize_link(tag, 'href', self.filemap[nurl]) continue if self.files > self.max_files: return res linkdir = 'link' + str(c) if into_dir else '' linkdiskpath = os.path.join(diskpath, linkdir) if not os.path.exists(linkdiskpath): os.mkdir(linkdiskpath) try: self.current_dir = linkdiskpath dsrc = self.fetch_url(iurl) newbaseurl = dsrc.newurl if len(dsrc) == 0 or \ len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0: raise ValueError('No content at URL %r' % iurl) if callable(self.encoding): dsrc = self.encoding(dsrc) elif self.encoding is not None: dsrc = dsrc.decode(self.encoding, 'replace') else: dsrc = xml_to_unicode(dsrc, self.verbose)[0] st = time.time() soup = self.get_soup(dsrc, url=iurl) self.log.debug('Parsed %s in %.1f seconds' % (iurl, time.time() - st)) base = soup.find('base', href=True) if base is not None: newbaseurl = base['href'] self.log.debug('Processing images...') self.process_images(soup, newbaseurl) if self.download_stylesheets: self.process_stylesheets(soup, newbaseurl) _fname = basename(iurl) if not isinstance(_fname, str): _fname.decode('latin1', 'replace') _fname = _fname.encode('ascii', 'replace').replace( '%', '').replace(os.sep, '') _fname = ascii_filename(_fname) _fname = os.path.splitext(_fname)[0][:120] + '.xhtml' res = os.path.join(linkdiskpath, _fname) self.downloaded_paths.append(res) self.filemap[nurl] = res if recursion_level < self.max_recursions: self.log.debug('Processing links...') self.process_links(soup, newbaseurl, recursion_level + 1) else: self.process_return_links(soup, newbaseurl) self.log.debug( 'Recursion limit reached. Skipping links in', iurl) if newbaseurl and not newbaseurl.startswith('/'): for atag in soup.findAll( 'a', href=lambda x: x and x.startswith('/')): atag['href'] = urllib.parse.urljoin( newbaseurl, atag['href'], True) if callable(self.postprocess_html_ext): soup = self.postprocess_html_ext( soup, c == 0 and recursion_level == 0 and not getattr(self, 'called_first', False), self.job_info) if c == 0 and recursion_level == 0: self.called_first = True save_soup(soup, res) self.localize_link(tag, 'href', res) except Exception as err: if isinstance(err, AbortArticle): raise self.failed_links.append((iurl, traceback.format_exc())) self.log.exception('Could not fetch link', iurl) finally: self.current_dir = diskpath self.files += 1 finally: self.current_dir = prev_dir if self.show_progress: print() return res
def image_filename(x): return sanitize_file_name( re.sub(r'[^0-9a-zA-Z.-]', '_', ascii_filename(x)).lstrip('_').lstrip('.'))
def sanitize_file_name(x): return re.sub(r'[?&=;#]', '_', ascii_filename(x))
def convert(self, stream, options, file_ext, log, accelerators): import uuid from lxml import etree from calibre.ebooks.oeb.base import DirContainer from calibre.ebooks.snb.snbfile import SNBFile log.debug("Parsing SNB file...") snbFile = SNBFile() try: snbFile.Parse(stream) except: raise ValueError("Invalid SNB file") if not snbFile.IsValid(): log.debug("Invalid SNB file") raise ValueError("Invalid SNB file") log.debug("Handle meta data ...") from calibre.ebooks.conversion.plumber import create_oebbook oeb = create_oebbook(log, None, options, encoding=options.input_encoding, populate=False) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = etree.fromstring(meta) l = { 'title': './/head/name', 'creator': './/head/author', 'language': './/head/language', 'generator': './/head/generator', 'publisher': './/head/publisher', 'cover': './/head/cover', } d = {} for item in l: node = meta.find(l[item]) if node is not None: d[item] = node.text if node.text is not None else '' else: d[item] = '' oeb.metadata.add('title', d['title']) oeb.metadata.add('creator', d['creator'], attrib={'role': 'aut'}) oeb.metadata.add('language', d['language'].lower().replace('_', '-')) oeb.metadata.add('generator', d['generator']) oeb.metadata.add('publisher', d['publisher']) if d['cover'] != '': oeb.guide.add('cover', 'Cover', d['cover']) bookid = unicode_type(uuid.uuid4()) oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') for ident in oeb.metadata.identifier: if 'id' in ident.attrib: oeb.uid = oeb.metadata.identifier[0] break with TemporaryDirectory('_snb2oeb', keep=True) as tdir: log.debug('Process TOC ...') toc = snbFile.GetFileStream('snbf/toc.snbf') oeb.container = DirContainer(tdir, log) if toc is not None: toc = etree.fromstring(toc) i = 1 for ch in toc.find('.//body'): chapterName = ch.text chapterSrc = ch.get('src') fname = 'ch_%d.htm' % i data = snbFile.GetFileStream('snbc/' + chapterSrc) if data is None: continue snbc = etree.fromstring(data) lines = [] for line in snbc.find('.//body'): if line.tag == 'text': lines.append('<p>%s</p>' % html_encode(line.text)) elif line.tag == 'img': lines.append('<p><img src="%s" /></p>' % html_encode(line.text)) with open(os.path.join(tdir, fname), 'wb') as f: f.write((HTML_TEMPLATE % (chapterName, '\n'.join(lines))).encode( 'utf-8', 'replace')) oeb.toc.add(ch.text, fname) id, href = oeb.manifest.generate( id='html', href=ascii_filename(fname)) item = oeb.manifest.add(id, href, 'text/html') item.html_input_href = fname oeb.spine.add(item, True) i = i + 1 imageFiles = snbFile.OutputImageFiles(tdir) for f, m in imageFiles: id, href = oeb.manifest.generate(id='image', href=ascii_filename(f)) item = oeb.manifest.add(id, href, m) item.html_input_href = f return oeb
def get_format(self, id, format): format = format.upper() fm = self.db.format_metadata(id, format, allow_cache=False) if not fm: raise cherrypy.HTTPError( 404, 'book: %d does not have format: %s' % (id, format)) update_metadata = format in {'MOBI', 'EPUB', 'AZW3'} mi = newmi = self.db.get_metadata(id, index_is_id=True, cover_as_data=True, get_cover=update_metadata) cherrypy.response.headers['Last-Modified'] = \ self.last_modified(max(fm['mtime'], mi.last_modified)) fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb') if fmt is None: raise cherrypy.HTTPError( 404, 'book: %d does not have format: %s' % (id, format)) mt = guess_type('dummy.' + format.lower())[0] if mt is None: mt = 'application/octet-stream' cherrypy.response.headers['Content-Type'] = mt if format.lower() in plugboard_content_server_formats: # Get any plugboards for the content server plugboards = self.db.prefs.get('plugboards', {}) cpb = find_plugboard(plugboard_content_server_value, format.lower(), plugboards) if cpb: # Transform the metadata via the plugboard newmi = mi.deepcopy_metadata() newmi.template_to_attribute(mi, cpb) if update_metadata: # Write the updated file from calibre.ebooks.metadata.meta import set_metadata set_metadata(fmt, newmi, format.lower()) fmt.seek(0) fmt.seek(0, 2) cherrypy.response.headers['Content-Length'] = fmt.tell() fmt.seek(0) ua = cherrypy.request.headers.get('User-Agent', '').strip() have_kobo_browser = self.is_kobo_browser(ua) file_extension = "kepub.epub" if have_kobo_browser and format.lower( ) == "kepub" else format au = authors_to_string( newmi.authors if newmi.authors else [_('Unknown')]) title = newmi.title if newmi.title else _('Unknown') fname = u'%s - %s_%s.%s' % (title[:30], au[:30], id, file_extension.lower()) fname = ascii_filename(fname).replace('"', '_') cherrypy.response.headers['Content-Disposition'] = \ b'attachment; filename="%s"'%fname cherrypy.response.body = fmt cherrypy.response.timeout = 3600 return fmt
def make_filename_safe(name): from calibre.utils.filenames import ascii_filename def esc(n): return ''.join(x if x in URL_SAFE else '_' for x in n) return '/'.join(esc(ascii_filename(x)) for x in name.split('/'))
def build_index(books, num, search, sort, order, start, total, url_base, CKEYS, prefix, have_kobo_browser=False): logo = DIV(IMG(src=prefix + '/static/calibre.png', alt=__appname__), id='logo') search_box = build_search_box(num, search, sort, order, prefix) navigation = build_navigation(start, num, total, prefix + url_base) navigation2 = build_navigation(start, num, total, prefix + url_base) bookt = TABLE(id='listing') body = BODY(logo, search_box, navigation, HR(CLASS('spacer')), bookt, HR(CLASS('spacer')), navigation2) # Book list {{{ for book in books: thumbnail = TD( IMG(type='image/jpeg', border='0', src=prefix + '/get/thumb/%s' % book['id']), CLASS('thumbnail')) data = TD() for fmt in book['formats'].split(','): if not fmt or fmt.lower().startswith('original_'): continue file_extension = "kepub.epub" if have_kobo_browser and fmt.lower( ) == "kepub" else fmt a = quote(ascii_filename(book['authors'])) t = quote(ascii_filename(book['title'])) s = SPAN( A(fmt.lower(), href=prefix + '/get/%s/%s-%s_%d.%s' % (fmt, a, t, book['id'], file_extension.lower())), CLASS('button')) s.tail = u'' data.append(s) div = DIV(CLASS('data-container')) data.append(div) series = u'[%s - %s]'%(book['series'], book['series_index']) \ if book['series'] else '' tags = u'Tags=[%s]' % book['tags'] if book['tags'] else '' ctext = '' for key in CKEYS: val = book.get(key, None) if val: ctext += '%s=[%s] ' % tuple(val.split(':#:')) first = SPAN( u'\u202f%s %s by %s' % (book['title'], series, book['authors']), CLASS('first-line')) div.append(first) second = SPAN( u'%s - %s %s %s' % (book['size'], book['timestamp'], tags, ctext), CLASS('second-line')) div.append(second) bookt.append(TR(thumbnail, data)) # }}} body.append( DIV(A(_('Switch to the full interface (non-mobile interface)'), href=prefix + "/browse", style="text-decoration: none; color: blue", title=_('The full interface gives you many more features, ' 'but it may not work well on a small screen')), style="text-align:center")) return HTML( HEAD( TITLE(__appname__ + ' Library'), LINK(rel='icon', href='http://calibre-ebook.com/favicon.ico', type='image/x-icon'), LINK(rel='stylesheet', type='text/css', href=prefix + '/mobile/style.css'), LINK(rel='apple-touch-icon', href="/static/calibre.png")), # End head body) # End html
def get_download_filename(response): filename = get_download_filename_from_response(response) filename, ext = os.path.splitext(filename) filename = filename[:60] + ext filename = ascii_filename(filename) return filename
def convert(self, oeb_book, output_path, input_plugin, opts, log): from lxml import etree from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf from calibre.utils.zipfile import ZipFile from calibre.utils.filenames import ascii_filename # HTML if opts.htmlz_css_type == 'inline': from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer OEB2HTMLizer = OEB2HTMLInlineCSSizer elif opts.htmlz_css_type == 'tag': from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer OEB2HTMLizer = OEB2HTMLNoCSSizer else: from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer with TemporaryDirectory(u'_htmlz_output') as tdir: htmlizer = OEB2HTMLizer(log) html = htmlizer.oeb2html(oeb_book, opts) fname = u'index' if opts.htmlz_title_filename: from calibre.utils.filenames import shorten_components_to fname = shorten_components_to( 100, (ascii_filename(unicode(oeb_book.metadata.title[0])), ))[0] with open(os.path.join(tdir, fname + u'.html'), 'wb') as tf: if isinstance(html, unicode): html = html.encode('utf-8') tf.write(html) # CSS if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external': with open(os.path.join(tdir, u'style.css'), 'wb') as tf: tf.write(htmlizer.get_css(oeb_book)) # Images images = htmlizer.images if images: if not os.path.exists(os.path.join(tdir, u'images')): os.makedirs(os.path.join(tdir, u'images')) for item in oeb_book.manifest: if item.media_type in OEB_IMAGES and item.href in images: if item.media_type == SVG_MIME: data = unicode( etree.tostring(item.data, encoding=unicode)) else: data = item.data fname = os.path.join(tdir, u'images', images[item.href]) with open(fname, 'wb') as img: img.write(data) # Cover cover_path = None try: cover_data = None if oeb_book.metadata.cover: term = oeb_book.metadata.cover[0].term cover_data = oeb_book.guide[term].item.data if cover_data: from calibre.utils.img import save_cover_data_to cover_path = os.path.join(tdir, u'cover.jpg') with lopen(cover_path, 'w') as cf: cf.write('') save_cover_data_to(cover_data, cover_path) except: import traceback traceback.print_exc() # Metadata with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf: opf = OPF(StringIO(etree.tostring( oeb_book.metadata.to_opf1()))) mi = opf.to_book_metadata() if cover_path: mi.cover = u'cover.jpg' mdataf.write(metadata_to_opf(mi)) htmlz = ZipFile(output_path, 'w') htmlz.add_dir(tdir)
def to_xml(self, write_files=True): bookinfo = u'<BookInformation>\n<Info version="1.1">\n<BookInfo>\n' bookinfo += u'<Title reading="%s">%s</Title>\n' % ( self.metadata.title_reading, self.metadata.title) bookinfo += u'<Author reading="%s">%s</Author>\n' % ( self.metadata.author_reading, self.metadata.author) bookinfo += u'<BookID>%s</BookID>\n' % (self.metadata.book_id, ) bookinfo += u'<Publisher reading="">%s</Publisher>\n' % ( self.metadata.publisher, ) bookinfo += u'<Label reading="">%s</Label>\n' % (self.metadata.label, ) bookinfo += u'<Category reading="">%s</Category>\n' % ( self.metadata.category, ) bookinfo += u'<Classification reading="">%s</Classification>\n' % ( self.metadata.classification, ) bookinfo += u'<FreeText reading="">%s</FreeText>\n</BookInfo>\n<DocInfo>\n' % ( self.metadata.free_text, ) th = self.doc_info.thumbnail if th: prefix = ascii_filename(self.metadata.title) bookinfo += u'<CThumbnail file="%s" />\n' % ( prefix + '_thumbnail.' + self.doc_info.thumbnail_extension, ) if write_files: open( prefix + '_thumbnail.' + self.doc_info.thumbnail_extension, 'wb').write(th) bookinfo += u'<Language reading="">%s</Language>\n' % ( self.doc_info.language, ) bookinfo += u'<Creator reading="">%s</Creator>\n' % ( self.doc_info.creator, ) bookinfo += u'<Producer reading="">%s</Producer>\n' % ( self.doc_info.producer, ) bookinfo += u'<SumPage>%s</SumPage>\n</DocInfo>\n</Info>\n%s</BookInformation>\n' % ( self.doc_info.page, self.toc) pages = u'' done_main = False pt_id = -1 for page_tree in self: if not done_main: done_main = True pages += u'<Main>\n' close = u'</Main>\n' pt_id = page_tree.id else: pages += u'<PageTree objid="%d">\n' % (page_tree.id, ) close = u'</PageTree>\n' for page in page_tree: pages += unicode_type(page) pages += close traversed_objects = [ int(i) for i in re.findall(r'objid="(\w+)"', pages) ] + [pt_id] objects = u'\n<Objects>\n' styles = u'\n<Style>\n' for obj in self.objects: obj = self.objects[obj] if obj.id in traversed_objects: continue if isinstance(obj, (Font, Text, TOCObject)): continue if isinstance(obj, StyleObject): styles += unicode_type(obj) else: objects += unicode_type(obj) styles += '</Style>\n' objects += '</Objects>\n' if write_files: self.write_files() return '<BBeBXylog version="1.0">\n' + bookinfo + pages + styles + objects + '</BBeBXylog>'
def create_oebbook(self, htmlpath, basedir, opts, log, mi): import uuid from calibre.ebooks.conversion.plumber import create_oebbook from calibre.ebooks.oeb.base import (DirContainer, rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, xpath) from calibre import guess_type from calibre.ebooks.oeb.transforms.metadata import \ meta_info_to_oeb_metadata from calibre.ebooks.html.input import get_filelist import cssutils, logging cssutils.log.setLevel(logging.WARN) self.OEB_STYLES = OEB_STYLES oeb = create_oebbook(log, None, opts, self, encoding=opts.input_encoding, populate=False) self.oeb = oeb metadata = oeb.metadata meta_info_to_oeb_metadata(mi, metadata, log) if not metadata.language: oeb.logger.warn(u'Language not specified') metadata.add('language', get_lang().replace('_', '-')) if not metadata.creator: oeb.logger.warn('Creator not specified') metadata.add('creator', self.oeb.translate(__('Unknown'))) if not metadata.title: oeb.logger.warn('Title not specified') metadata.add('title', self.oeb.translate(__('Unknown'))) bookid = str(uuid.uuid4()) metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') for ident in metadata.identifier: if 'id' in ident.attrib: self.oeb.uid = metadata.identifier[0] break filelist = get_filelist(htmlpath, basedir, opts, log) filelist = [f for f in filelist if not f.is_binary] htmlfile_map = {} for f in filelist: path = f.path oeb.container = DirContainer(os.path.dirname(path), log, ignore_opf=True) bname = os.path.basename(path) id, href = oeb.manifest.generate(id='html', href=ascii_filename(bname)) htmlfile_map[path] = href item = oeb.manifest.add(id, href, 'text/html') item.html_input_href = bname oeb.spine.add(item, True) self.added_resources = {} self.log = log self.log('Normalizing filename cases') for path, href in htmlfile_map.items(): if not self.is_case_sensitive(path): path = path.lower() self.added_resources[path] = href self.urlnormalize, self.DirContainer = urlnormalize, DirContainer self.urldefrag = urldefrag self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME self.log('Rewriting HTML links') for f in filelist: path = f.path dpath = os.path.dirname(path) oeb.container = DirContainer(dpath, log, ignore_opf=True) item = oeb.manifest.hrefs[htmlfile_map[path]] rewrite_links(item.data, partial(self.resource_adder, base=dpath)) for item in oeb.manifest.values(): if item.media_type in self.OEB_STYLES: dpath = None for path, href in self.added_resources.items(): if href == item.href: dpath = os.path.dirname(path) break cssutils.replaceUrls(item.data, partial(self.resource_adder, base=dpath)) toc = self.oeb.toc self.oeb.auto_generated_toc = True titles = [] headers = [] for item in self.oeb.spine: if not item.linear: continue html = item.data title = ''.join(xpath(html, '/h:html/h:head/h:title/text()')) title = re.sub(r'\s+', ' ', title.strip()) if title: titles.append(title) headers.append('(unlabled)') for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'): expr = '/h:html/h:body//h:%s[position()=1]/text()' header = ''.join(xpath(html, expr % tag)) header = re.sub(r'\s+', ' ', header.strip()) if header: headers[-1] = header break use = titles if len(titles) > len(set(titles)): use = headers for title, item in izip(use, self.oeb.spine): if not item.linear: continue toc.add(title, item.href) oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True) return oeb
def browse_get_book_args(self, mi, id_, add_category_links=False): fmts = self.db.formats(id_, index_is_id=True) if not fmts: fmts = '' fmts = [x.lower() for x in fmts.split(',') if x] pf = prefs['output_format'].lower() try: fmt = pf if pf in fmts else fmts[0] except: fmt = None args = { 'id': id_, 'mi': mi, } ccache = self.categories_cache() if add_category_links else {} ftitle = fauthors = '' for key in mi.all_field_keys(): val = mi.format_field(key)[1] if not val: val = '' if key == 'title': ftitle = xml(val, True) elif key == 'authors': fauthors = xml(val, True) if add_category_links: added_key = False fm = mi.metadata_for_field(key) if val and fm and fm['is_category'] and not fm['is_csp'] and\ key != 'formats' and fm['datatype'] not in ['rating']: categories = mi.get(key) if isinstance(categories, basestring): categories = [categories] dbtags = [] for category in categories: dbtag = None for tag in ccache[key]: if tag.name == category: dbtag = tag break dbtags.append(dbtag) if None not in dbtags: vals = [] for tag in dbtags: tval = ( '<a title="Browse books by {3}: {0}"' ' href="{1}" class="details_category_link">{2}</a>' ) href='%s/browse/matches/%s/%s' % \ (self.opts.url_prefix, quote(tag.category), quote(str(tag.id))) vals.append( tval.format( xml(tag.name, True), xml(href, True), xml(val if len(dbtags) == 1 else tag.name), xml(key, True))) join = ' & ' if key == 'authors' or \ (fm['is_custom'] and fm['display'].get('is_names', False)) \ else ', ' args[key] = join.join(vals) added_key = True if not added_key: args[key] = xml(val, True) else: args[key] = xml(val, True) fname = quote( ascii_filename(ftitle) + ' - ' + ascii_filename(fauthors)) return args, fmt, fmts, fname
def name(self): return ascii_filename(self.theme.title).replace(' ', '_').replace('.', '_').lower()
def xml(self, start='0', num='50', sort=None, search=None, _=None, order='ascending'): ''' Serves metadata from the calibre database as XML. :param sort: Sort results by ``sort``. Can be one of `title,author,rating`. :param search: Filter results by ``search`` query. See :class:`SearchQueryParser` for query syntax :param start,num: Return the slice `[start:start+num]` of the sorted and filtered results :param _: Firefox seems to sometimes send this when using XMLHttpRequest with no caching ''' try: start = int(start) except ValueError: raise cherrypy.HTTPError(400, 'start: %s is not an integer' % start) try: num = int(num) except ValueError: raise cherrypy.HTTPError(400, 'num: %s is not an integer' % num) order = order.lower().strip() == 'ascending' if not search: search = '' if isbytestring(search): search = search.decode('UTF-8') ids = self.search_for_books(search) FM = self.db.FIELD_MAP items = [r for r in iter(self.db) if r[FM['id']] in ids] if sort is not None: self.sort(items, sort, order) books = [] def serialize(x): if isinstance(x, unicode): return x if isbytestring(x): return x.decode(preferred_encoding, 'replace') return unicode(x) # This method uses its own book dict, not the Metadata dict. The loop # below could be changed to use db.get_metadata instead of reading # info directly from the record made by the view, but it doesn't seem # worth it at the moment. for record in items[start:start + num]: kwargs = {} aus = record[FM['authors']] if record[ FM['authors']] else __builtin__._('Unknown') authors = '|'.join([i.replace('|', ',') for i in aus.split(',')]) kwargs['authors'] = authors kwargs['series_index'] = \ fmt_sidx(float(record[FM['series_index']])) for x in ('timestamp', 'pubdate'): kwargs[x] = strftime('%Y/%m/%d %H:%M:%S', record[FM[x]]) for x in ('id', 'title', 'sort', 'author_sort', 'rating', 'size'): kwargs[x] = serialize(record[FM[x]]) for x in ('formats', 'series', 'tags', 'publisher', 'comments', 'identifiers'): y = record[FM[x]] if x == 'tags': y = format_tag_string(y, ',', ignore_max=True) kwargs[x] = serialize(y) if y else '' isbn = self.db.isbn(record[FM['id']], index_is_id=True) kwargs['isbn'] = serialize(isbn if isbn else '') kwargs['safe_title'] = ascii_filename(kwargs['title']) c = kwargs.pop('comments') CFM = self.db.field_metadata CKEYS = [ key for key in sorted(custom_fields_to_display(self.db), key=lambda x: sort_key(CFM[x]['name'])) ] custcols = [] for key in CKEYS: def concat(name, val): return '%s:#:%s' % (name, unicode(val)) mi = self.db.get_metadata(record[CFM['id']['rec_index']], index_is_id=True) name, val = mi.format_field(key) if not val: continue datatype = CFM[key]['datatype'] if datatype in ['comments']: continue k = str('CF_' + key[1:]) name = CFM[key]['name'] custcols.append(k) if datatype == 'text' and CFM[key]['is_multiple']: kwargs[k] = \ concat('#T#'+name, format_tag_string(val, CFM[key]['is_multiple']['ui_to_list'], ignore_max=True, joinval=CFM[key]['is_multiple']['list_to_ui'])) else: kwargs[k] = concat(name, val) kwargs['custcols'] = ','.join(custcols) books.append(E.book(c, **kwargs)) updated = self.db.last_modified() kwargs = dict(start=str(start), updated=updated.strftime('%Y-%m-%dT%H:%M:%S+00:00'), total=str(len(ids)), num=str(len(books))) ans = E.library(*books, **kwargs) cherrypy.response.headers['Content-Type'] = 'text/xml' cherrypy.response.headers['Last-Modified'] = self.last_modified( updated) return etree.tostring(ans, encoding='utf-8', pretty_print=True, xml_declaration=True)
def custom_recipe_filename(id_, title): from calibre.utils.filenames import ascii_filename return ascii_filename(title[:50]) + \ ('_%s.recipe'%id_)
def process_images(self, soup, baseurl): diskpath = unicode_path(os.path.join(self.current_dir, 'images')) if not os.path.exists(diskpath): os.mkdir(diskpath) c = 0 for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and 'src' in tag): # noqa iurl = tag['src'] if iurl.startswith('data:image/'): try: data = b64decode(iurl.partition(',')[-1]) except: self.log.exception('Failed to decode embedded image') continue else: if callable(self.image_url_processor): iurl = self.image_url_processor(baseurl, iurl) if not urllib.parse.urlsplit(iurl).scheme: iurl = urllib.parse.urljoin(baseurl, iurl, False) with self.imagemap_lock: if iurl in self.imagemap: # noqa tag['src'] = self.imagemap[iurl] continue try: data = self.fetch_url(iurl) if data == 'GIF89a\x01': # Skip empty GIF files as PIL errors on them anyway continue except Exception: self.log.exception('Could not fetch image ', iurl) continue c += 1 fname = ascii_filename('img' + str(c)) if isinstance(fname, str): fname = fname.encode('ascii', 'replace') data = self.preprocess_image_ext( data, iurl) if self.preprocess_image_ext is not None else data if data is None: continue itype = what(None, data) if itype == 'svg' or (itype is None and b'<svg' in data[:1024]): # SVG image imgpath = os.path.join(diskpath, fname + '.svg') with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath else: try: # Ensure image is valid img = image_from_data(data) if itype not in {'png', 'jpg', 'jpeg'}: itype = 'png' if itype == 'gif' else 'jpeg' data = image_to_data(img, fmt=itype) if self.compress_news_images and itype in {'jpg', 'jpeg'}: try: data = self.rescale_image(data) except Exception: self.log.exception('failed to compress image ' + iurl) # Moon+ apparently cannot handle .jpeg files if itype == 'jpeg': itype = 'jpg' imgpath = os.path.join(diskpath, fname + '.' + itype) with self.imagemap_lock: self.imagemap[iurl] = imgpath with open(imgpath, 'wb') as x: x.write(data) tag['src'] = imgpath except Exception: traceback.print_exc() continue