Ejemplo n.º 1
0
def viewer_socket_address():
    if viewer_socket_address.ans is None:
        if iswindows:
            viewer_socket_address.ans = r'\\.\pipe\CalibreViewer'
            try:
                user = get_windows_username()
            except:
                user = None
            if user:
                user = ascii_filename(user).replace(' ', '_')
                if user:
                    viewer_socket_address.ans += '-' + user[:100] + 'x'
        else:
            user = os.environ.get('USER', '')
            if not user:
                user = os.path.basename(os.path.expanduser('~'))
            if islinux:
                viewer_socket_address.ans = (u'\0%s-calibre-viewer.socket' % ascii_filename(force_unicode(user)))
            else:
                from tempfile import gettempdir
                tmp = gettempdir()
                viewer_socket_address.ans = os.path.join(tmp, user+'-calibre-viewer.socket')
        if not ispy3 and not isinstance(viewer_socket_address.ans, bytes):
            viewer_socket_address.ans = viewer_socket_address.ans.encode(filesystem_encoding)
    return viewer_socket_address.ans
    def construct_path_name(self, book_id):
        print("  PbSourceS is %s" % (__name__))
        print("  PbSourceS: self : %s" % (self))
        print("  PbSourceS: database: %s" % (self.database))
        print("  PbSourceS: book id : %s" % (book_id))

        path_element = None

        try:
            metadata = self.database.get_metadata(book_id, index_is_id=True)
            print("    PbSourceS: metadata: '%s'" % (metadata))
            source = metadata.get(self.source_field_name)
            print("    PbSourceS: metadata.source: '%s'" % (source))

            # Special case for books which have no source set
            if source == None:
                source = self.default_source

            if source:
                source_prefix = self.source_prefix
                source_prefix = ascii_filename(source_prefix[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
                source_name = ascii_filename(source[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
                source_name = source_name.lower()
                # TODO: use a regex for whitespace, r = re.compile(r"^\s+", re.MULTILINE)
                source_name = source_name.replace(' ', '_')
                path_element = source_prefix + "_" + source_name
        except:
            traceback.print_exc()

        print("      PbSourceS: path_element: '%s'" % (path_element))

        return path_element
Ejemplo n.º 3
0
 def construct_file_name(self, book_id, title, author):
     '''
     Construct the file name for this book based on its metadata.
     '''
     author = ascii_filename(author
                 )[:self.PATH_LIMIT].decode('ascii', 'replace')
     title  = ascii_filename(title
                 )[:self.PATH_LIMIT].decode('ascii', 'replace')
     name   = title + ' - ' + author
     while name.endswith('.'):
         name = name[:-1]
     return name
Ejemplo n.º 4
0
 def construct_path_name(self, book_id, title, author):
     '''
     Construct the directory name for this book based on its metadata.
     '''
     author = ascii_filename(author
                 )[:self.PATH_LIMIT].decode('ascii', 'replace')
     title  = ascii_filename(title
                 )[:self.PATH_LIMIT].decode('ascii', 'replace')
     while author[-1] in (' ', '.'):
         author = author[:-1]
     if not author:
         author = ascii_filename(_('Unknown')).decode(
                 'ascii', 'replace')
     return '%s/%s (%d)'%(author, title, book_id)
Ejemplo n.º 5
0
def gui_socket_address():
    global ADDRESS
    if ADDRESS is None:
        if iswindows:
            ADDRESS = r'\\.\pipe\CalibreGUI'
            try:
                user = get_windows_username()
            except:
                user = None
            if user:
                from calibre.utils.filenames import ascii_filename
                user = ascii_filename(user).replace(' ', '_')
                if user:
                    ADDRESS += '-' + user[:100] + 'x'
        else:
            user = os.environ.get('USER', '')
            if not user:
                user = os.path.basename(os.path.expanduser('~'))
            if islinux:
                ADDRESS = (u'\0%s-calibre-gui.socket' % user).encode('ascii')
            else:
                from tempfile import gettempdir
                tmp = gettempdir()
                ADDRESS = os.path.join(tmp, user+'-calibre-gui.socket')
    return ADDRESS
Ejemplo n.º 6
0
    def generate_filename(self, rid, base=None, rid_map=None):
        rid_map = self.rid_map if rid_map is None else rid_map
        fname = rid_map[rid]
        if fname in self.used:
            return self.used[fname]
        raw = self.docx.read(fname)
        base = base or ascii_filename(rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image'
        ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
        if ext == 'emf':
            # For an example, see: https://bugs.launchpad.net/bugs/1224849
            self.log('Found an EMF image: %s, trying to extract embedded raster image' % base)
            from calibre.utils.wmf.emf import emf_unwrap
            try:
                raw = emf_unwrap(raw)
            except Exception as e:
                self.log.exception('Failed to extract embedded raster image from EMF')
            else:
                ext = 'png'

        base = base.rpartition('.')[0]
        if not base:
            base = 'image'
        base += '.' + ext
        exists = frozenset(self.used.itervalues())
        c = 1
        name = base
        while name in exists:
            n, e = base.rpartition('.')[0::2]
            name = '%s-%d.%s' % (n, c, e)
            c += 1
        self.used[fname] = name
        with open(os.path.join(self.dest_dir, name), 'wb') as f:
            f.write(raw)
        self.all_images.add('images/' + name)
        return name
Ejemplo n.º 7
0
    def __init__(self, gui, name):
        from calibre.gui2 import JSONConfig

        self.gui = gui
        self.name = name
        self.base_plugin = None
        self.config = JSONConfig('store/stores/' + ascii_filename(self.name))
Ejemplo n.º 8
0
    def start_download(self, request):
        if not self.gui:
            return

        url = unicode(request.url().toString())
        cf = self.get_cookies()

        filename = get_download_filename(url, cf)
        ext = os.path.splitext(filename)[1][1:].lower()
        filename = ascii_filename(filename[:60] + '.' + ext)
        if ext not in BOOK_EXTENSIONS:
            if ext == 'acsm':
                from calibre.gui2.dialogs.confirm_delete import confirm
                if not confirm('<p>' + _('This ebook is a DRMed EPUB file.  '
                          'You will be prompted to save this file to your '
                          'computer. Once it is saved, open it with '
                          '<a href="http://www.adobe.com/products/digitaleditions/">'
                          'Adobe Digital Editions</a> (ADE).<p>ADE, in turn '
                          'will download the actual ebook, which will be a '
                          '.epub file. You can add this book to calibre '
                          'using "Add Books" and selecting the file from '
                          'the ADE library folder.'),
                          'acsm_download', self):
                    return
            home = os.path.expanduser('~')
            name = QFileDialog.getSaveFileName(self,
                _('File is not a supported ebook type. Save to disk?'),
                os.path.join(home, filename),
                '*.*')
            if name:
                name = unicode(name)
                self.gui.download_ebook(url, cf, name, name, False)
        else:
            self.gui.download_ebook(url, cf, filename, tags=self.tags)
Ejemplo n.º 9
0
    def read_image_data(self, fname, base=None):
        if fname.startswith("file://"):
            src = fname[len("file://") :]
            if iswindows and src and src[0] == "/":
                src = src[1:]
            if not src or not os.path.exists(src):
                raise LinkedImageNotFound(src)
            with open(src, "rb") as rawsrc:
                raw = rawsrc.read()
        else:
            raw = self.docx.read(fname)
        base = base or ascii_filename(fname.rpartition("/")[-1]).replace(" ", "_") or "image"
        ext = what(None, raw) or base.rpartition(".")[-1] or "jpeg"
        if ext == "emf":
            # For an example, see: https://bugs.launchpad.net/bugs/1224849
            self.log("Found an EMF image: %s, trying to extract embedded raster image" % fname)
            from calibre.utils.wmf.emf import emf_unwrap

            try:
                raw = emf_unwrap(raw)
            except Exception:
                self.log.exception("Failed to extract embedded raster image from EMF")
            else:
                ext = "png"
        base = base.rpartition(".")[0]
        if not base:
            base = "image"
        base += "." + ext
        return raw, base
Ejemplo n.º 10
0
    def write(self, outpath):
        records = [self.record0] + self.records[1:]

#        with open(outpath, 'wb') as f:
        if outpath != None:
            f = outpath

            # Write PalmDB Header

            title = ascii_filename(self.full_title.decode('utf-8'))
#            title = ascii_filename(self.full_title.decode('utf-8')).replace(
#                    ' ', '_')[:31]
            title += (b'\0' * (32 - len(title)))
            now = int(time.time())
            nrecords = len(records)
            f.write(title)
            f.write(pack(b'>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0))
            f.write(b'BOOKMOBI')
            f.write(pack(b'>IIH', (2*nrecords)-1, 0, nrecords))
            offset = f.tell() + (8 * nrecords) + 2
            for i, record in enumerate(records):
                f.write(pack(b'>I', offset))
                f.write(b'\0' + pack(b'>I', 2*i)[1:])
                offset += len(record)
            f.write(b'\0\0')

            for rec in records:
                f.write(rec)
Ejemplo n.º 11
0
    def pic_to_img(self, pic, alt, parent):
        name = None
        link = None
        for hl in XPath('descendant::a:hlinkClick[@r:id]')(parent):
            link = {'id':get(hl, 'r:id')}
            tgt = hl.get('tgtFrame', None)
            if tgt:
                link['target'] = tgt
            title = hl.get('tooltip', None)
            if title:
                link['title'] = title

        for pr in XPath('descendant::pic:cNvPr')(pic):
            name = pr.get('name', None)
            if name:
                name = ascii_filename(name).replace(' ', '_')
            alt = pr.get('descr', None)
            for a in XPath('descendant::a:blip[@r:embed or @r:link]')(pic):
                rid = get(a, 'r:embed')
                if not rid:
                    rid = get(a, 'r:link')
                if rid and rid in self.rid_map:
                    try:
                        src = self.generate_filename(rid, name)
                    except LinkedImageNotFound as err:
                        self.log.warn('Linked image: %s not found, ignoring' % err.fname)
                        continue
                    img = IMG(src='images/%s' % src)
                    img.set('alt', alt or 'Image')
                    if link is not None:
                        self.links.append((img, link))
                    return img
Ejemplo n.º 12
0
    def start_download(self, request):
        if not self.gui:
            return

        url = unicode(request.url().toString(NO_URL_FORMATTING))
        cf = self.get_cookies()

        filename = get_download_filename(url, cf)
        ext = os.path.splitext(filename)[1][1:].lower()
        filename = ascii_filename(filename[:60] + '.' + ext)
        if ext not in BOOK_EXTENSIONS:
            if ext == 'acsm':
                from calibre.gui2.dialogs.confirm_delete import confirm
                if not confirm('<p>' + _('This ebook is a DRMed EPUB file.  '
                          'You will be prompted to save this file to your '
                          'computer. Once it is saved, open it with '
                          '<a href="https://www.adobe.com/products/digitaleditions/">'
                          'Adobe Digital Editions</a> (ADE).<p>ADE, in turn '
                          'will download the actual ebook, which will be a '
                          '.epub file. You can add this book to calibre '
                          'using "Add Books" and selecting the file from '
                          'the ADE library folder.'),
                          'acsm_download', self):
                    return
            name = choose_save_file(self, 'web-store-download-unknown', _('File is not a supported ebook type. Save to disk?'), initial_filename=filename)
            if name:
                self.gui.download_ebook(url, cf, name, name, False, create_browser=self.create_browser)
        else:
            show_download_info(filename, self)
            self.gui.download_ebook(url, cf, filename, tags=self.tags, create_browser=self.create_browser)
Ejemplo n.º 13
0
    def _download(self, cookie_file, url, filename, save_loc, add_to_lib):
        dfilename = ""

        if not url:
            raise Exception(_("No file specified to download."))
        if not save_loc and not add_to_lib:
            # Nothing to do.
            return dfilename

        if not filename:
            filename = get_download_filename(url, cookie_file)
            filename, ext = os.path.splitext(filename)
            filename = filename[:60] + ext
            filename = ascii_filename(filename)

        br = browser()
        if cookie_file:
            cj = MozillaCookieJar()
            cj.load(cookie_file)
            br.set_cookiejar(cj)
        with closing(br.open(url)) as r:
            temp_path = os.path.join(PersistentTemporaryDirectory(), filename)
            tf = open(temp_path, "w+b")
            tf.write(r.read())
            dfilename = tf.name

        return dfilename
Ejemplo n.º 14
0
 def download_book(self, result):
     d = ChooseFormatDialog(self, _('Choose format to download to your library.'), result.downloads.keys())
     if d.exec_() == d.Accepted:
         ext = d.format()
         fname = result.title[:60] + '.' + ext.lower()
         fname = ascii_filename(fname)
         self.gui.download_ebook(result.downloads[ext], filename=fname)
Ejemplo n.º 15
0
def email_news(mi, remove, get_fmts, done, job_manager):
    opts = email_config().parse()
    accounts = [(account, [x.strip().lower() for x in x[0].split(',')])
            for account, x in opts.accounts.items() if x[1]]
    sent_mails = []
    for i, x in enumerate(accounts):
        account, fmts = x
        files = get_fmts(fmts)
        files = [f for f in files if f is not None]
        if not files:
            continue
        attachment = files[0]
        to_s = [account]
        subjects = [_('News:')+' '+mi.title]
        texts    = [
                _('Attached is the %s periodical downloaded by calibre.')
                    % (mi.title,)
                ]
        attachment_names = [ascii_filename(mi.title)+os.path.splitext(attachment)[1]]
        attachments = [attachment]
        jobnames = [mi.title]
        do_remove = []
        if i == len(accounts) - 1:
            do_remove = remove
        send_mails(jobnames,
                Dispatcher(partial(done, remove=do_remove)),
                attachments, to_s, subjects, texts, attachment_names,
                job_manager)
        sent_mails.append(to_s[0])
    return sent_mails
Ejemplo n.º 16
0
 def read_image_data(self, fname, base=None):
     if fname.startswith('file://'):
         src = fname[len('file://'):]
         if iswindows and src and src[0] == '/':
             src = src[1:]
         if not src or not os.path.exists(src):
             raise LinkedImageNotFound(src)
         with open(src, 'rb') as rawsrc:
             raw = rawsrc.read()
     else:
         raw = self.docx.read(fname)
     base = base or ascii_filename(fname.rpartition('/')[-1]).replace(' ', '_') or 'image'
     ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
     if ext == 'emf':
         # For an example, see: https://bugs.launchpad.net/bugs/1224849
         self.log('Found an EMF image: %s, trying to extract embedded raster image' % fname)
         from calibre.utils.wmf.emf import emf_unwrap
         try:
             raw = emf_unwrap(raw)
         except Exception:
             self.log.exception('Failed to extract embedded raster image from EMF')
         else:
             ext = 'png'
     base = base.rpartition('.')[0]
     if not base:
         base = 'image'
     base += '.' + ext
     return raw, base
Ejemplo n.º 17
0
    def pic_to_img(self, pic, alt, parent):
        name = None
        link = None
        for hl in XPath('descendant::a:hlinkClick[@r:id]')(parent):
            link = {'id':get(hl, 'r:id')}
            tgt = hl.get('tgtFrame', None)
            if tgt:
                link['target'] = tgt
            title = hl.get('tooltip', None)
            if title:
                link['title'] = title

        for pr in XPath('descendant::pic:cNvPr')(pic):
            name = pr.get('name', None)
            if name:
                name = ascii_filename(name).replace(' ', '_')
            alt = pr.get('descr', None)
            for a in XPath('descendant::a:blip[@r:embed]')(pic):
                rid = get(a, 'r:embed')
                if rid in self.rid_map:
                    src = self.generate_filename(rid, name)
                    img = IMG(src='images/%s' % src)
                    img.set('alt', alt or 'Image')
                    if link is not None:
                        self.links.append((img, link))
                    return img
Ejemplo n.º 18
0
    def pic_to_img(self, pic, alt, parent):
        XPath, get = self.namespace.XPath, self.namespace.get
        name = None
        link = None
        for hl in XPath("descendant::a:hlinkClick[@r:id]")(parent):
            link = {"id": get(hl, "r:id")}
            tgt = hl.get("tgtFrame", None)
            if tgt:
                link["target"] = tgt
            title = hl.get("tooltip", None)
            if title:
                link["title"] = title

        for pr in XPath("descendant::pic:cNvPr")(pic):
            name = pr.get("name", None)
            if name:
                name = ascii_filename(name).replace(" ", "_")
            alt = pr.get("descr", None)
            for a in XPath("descendant::a:blip[@r:embed or @r:link]")(pic):
                rid = get(a, "r:embed")
                if not rid:
                    rid = get(a, "r:link")
                if rid and rid in self.rid_map:
                    try:
                        src = self.generate_filename(rid, name)
                    except LinkedImageNotFound as err:
                        self.log.warn("Linked image: %s not found, ignoring" % err.fname)
                        continue
                    img = IMG(src="images/%s" % src)
                    img.set("alt", alt or "Image")
                    if link is not None:
                        self.links.append((img, link, self.rid_map))
                    return img
Ejemplo n.º 19
0
 def _add_item(self, oeb, title, path):
     bname = os.path.basename(path)
     id, href = oeb.manifest.generate(id='html',
             href=ascii_filename(bname))
     item = oeb.manifest.add(id, href, 'text/html')
     item.html_input_href = bname
     oeb.spine.add(item, True)
     oeb.toc.add(title, item.href)
Ejemplo n.º 20
0
 def __init__(self, gui, name, config=None, base_plugin=None):
     self.gui = gui
     self.name = name
     self.base_plugin = base_plugin
     if config is None:
         from calibre.gui2 import JSONConfig
         config = JSONConfig('store/stores/' + ascii_filename(self.name))
     self.config = config
Ejemplo n.º 21
0
def book_filename(rd, book_id, mi, fmt):
    au = authors_to_string(mi.authors or [_('Unknown')])
    title = mi.title or _('Unknown')
    ext = (fmt or '').lower()
    if ext == 'kepub' and 'Kobo Touch' in rd.inheaders.get('User-Agent', ''):
        ext = 'kepub.epub'
    fname = '%s - %s_%s.%s' % (title[:30], au[:30], book_id, ext)
    fname = ascii_filename(fname).replace('"', '_')
    return fname
Ejemplo n.º 22
0
    def get_format(self, id, format):
        format = format.upper()
        fm = self.db.format_metadata(id, format, allow_cache=False)
        if not fm:
            raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
        update_metadata = format in {'MOBI', 'EPUB', 'AZW3'}
        mi = newmi = self.db.get_metadata(
            id, index_is_id=True, cover_as_data=True, get_cover=update_metadata)

        cherrypy.response.headers['Last-Modified'] = \
            self.last_modified(max(fm['mtime'], mi.last_modified))

        fmt = self.db.format(id, format, index_is_id=True, as_file=True,
                mode='rb')
        if fmt is None:
            raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
        mt = guess_type('dummy.'+format.lower())[0]
        if mt is None:
            mt = 'application/octet-stream'
        cherrypy.response.headers['Content-Type'] = mt

        if format.lower() in plugboard_content_server_formats:
            # Get any plugboards for the content server
            plugboards = self.db.prefs.get('plugboards', {})
            cpb = find_plugboard(plugboard_content_server_value,
                                 format.lower(), plugboards)
            if cpb:
                # Transform the metadata via the plugboard
                newmi = mi.deepcopy_metadata()
                newmi.template_to_attribute(mi, cpb)

        if update_metadata:
            # Write the updated file
            from calibre.ebooks.metadata.meta import set_metadata
            set_metadata(fmt, newmi, format.lower())
            fmt.seek(0)

        fmt.seek(0, 2)
        cherrypy.response.headers['Content-Length'] = fmt.tell()
        fmt.seek(0)

        ua = cherrypy.request.headers.get('User-Agent', '').strip()
        have_kobo_browser = self.is_kobo_browser(ua)
        file_extension = "kepub.epub" if have_kobo_browser and format.lower() == "kepub" else format

        au = authors_to_string(newmi.authors if newmi.authors else
                [_('Unknown')])
        title = newmi.title if newmi.title else _('Unknown')
        fname = u'%s - %s_%s.%s'%(title[:30], au[:30], id, file_extension.lower())
        fname = ascii_filename(fname).replace('"', '_')
        cherrypy.response.headers['Content-Disposition'] = \
                b'attachment; filename="%s"'%fname
        cherrypy.response.body = fmt
        cherrypy.response.timeout = 3600
        return fmt
Ejemplo n.º 23
0
    def to_xml(self, write_files=True):
        bookinfo = u'<BookInformation>\n<Info version="1.1">\n<BookInfo>\n'
        bookinfo += u'<Title reading="%s">%s</Title>\n'%(self.metadata.title_reading, self.metadata.title)
        bookinfo += u'<Author reading="%s">%s</Author>\n'%(self.metadata.author_reading, self.metadata.author)
        bookinfo += u'<BookID>%s</BookID>\n'%(self.metadata.book_id,)
        bookinfo += u'<Publisher reading="">%s</Publisher>\n'%(self.metadata.publisher,)
        bookinfo += u'<Label reading="">%s</Label>\n'%(self.metadata.label,)
        bookinfo += u'<Category reading="">%s</Category>\n'%(self.metadata.category,)
        bookinfo += u'<Classification reading="">%s</Classification>\n'%(self.metadata.classification,)
        bookinfo += u'<FreeText reading="">%s</FreeText>\n</BookInfo>\n<DocInfo>\n'%(self.metadata.free_text,)
        th = self.doc_info.thumbnail
        if th:
            prefix = ascii_filename(self.metadata.title)
            bookinfo += u'<CThumbnail file="%s" />\n'%(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension,)
            if write_files:
                open(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension, 'wb').write(th)
        bookinfo += u'<Language reading="">%s</Language>\n'%(self.doc_info.language,)
        bookinfo += u'<Creator reading="">%s</Creator>\n'%(self.doc_info.creator,)
        bookinfo += u'<Producer reading="">%s</Producer>\n'%(self.doc_info.producer,)
        bookinfo += u'<SumPage>%s</SumPage>\n</DocInfo>\n</Info>\n%s</BookInformation>\n'%(self.doc_info.page,self.toc)
        pages = u''
        done_main = False
        pt_id = -1
        for page_tree in self:
            if not done_main:
                done_main = True
                pages += u'<Main>\n'
                close = u'</Main>\n'
                pt_id = page_tree.id
            else:
                pages += u'<PageTree objid="%d">\n'%(page_tree.id,)
                close = u'</PageTree>\n'
            for page in page_tree:
                pages += unicode(page)
            pages += close
        traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id]

        objects = u'\n<Objects>\n'
        styles  = u'\n<Style>\n'
        for obj in self.objects:
            obj = self.objects[obj]
            if obj.id in traversed_objects:
                continue
            if isinstance(obj, (Font, Text, TOCObject)):
                continue
            if isinstance(obj, StyleObject):
                styles += unicode(obj)
            else:
                objects += unicode(obj)
        styles += '</Style>\n'
        objects += '</Objects>\n'
        if write_files:
            self.write_files()
        return '<BBeBXylog version="1.0">\n' + bookinfo + pages + styles + objects + '</BBeBXylog>'
Ejemplo n.º 24
0
 def process_images(self, soup, baseurl):
     diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
     if not os.path.exists(diskpath):
         os.mkdir(diskpath)
     c = 0
     for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
         iurl = tag['src']
         if iurl.startswith('data:image/'):
             try:
                 data = b64decode(iurl.partition(',')[-1])
             except:
                 self.log.exception('Failed to decode embedded image')
                 continue
         else:
             if callable(self.image_url_processor):
                 iurl = self.image_url_processor(baseurl, iurl)
             if not urlparse.urlsplit(iurl).scheme:
                 iurl = urlparse.urljoin(baseurl, iurl, False)
             with self.imagemap_lock:
                 if self.imagemap.has_key(iurl):
                     tag['src'] = self.imagemap[iurl]
                     continue
             try:
                 data = self.fetch_url(iurl)
                 if data == 'GIF89a\x01':
                     # Skip empty GIF files as PIL errors on them anyway
                     continue
             except Exception:
                 self.log.exception('Could not fetch image ', iurl)
                 continue
         c += 1
         fname = ascii_filename('img'+str(c))
         if isinstance(fname, unicode):
             fname = fname.encode('ascii', 'replace')
         imgpath = os.path.join(diskpath, fname+'.jpg')
         if (imghdr.what(None, data) is None and b'<svg' in data[:1024]):
             # SVG image
             imgpath = os.path.join(diskpath, fname+'.svg')
             with self.imagemap_lock:
                 self.imagemap[iurl] = imgpath
             with open(imgpath, 'wb') as x:
                 x.write(data)
             tag['src'] = imgpath
         else:
             try:
                 im = Image.open(StringIO(data)).convert('RGBA')
                 with self.imagemap_lock:
                     self.imagemap[iurl] = imgpath
                 with open(imgpath, 'wb') as x:
                     im.save(x, 'JPEG')
                 tag['src'] = imgpath
             except:
                 traceback.print_exc()
                 continue
Ejemplo n.º 25
0
 def create_filename(self, href, fmt):
     fname = ascii_filename(urlunquote(posixpath.basename(href)))
     fname = posixpath.splitext(fname)[0]
     fname = fname[:75].rstrip('.') or 'image'
     num = 0
     base = fname
     while fname.lower() in self.seen_filenames:
         num += 1
         fname = base + str(num)
     self.seen_filenames.add(fname.lower())
     fname += os.extsep + fmt.lower()
     return fname
Ejemplo n.º 26
0
def book_filename(rd, book_id, mi, fmt, as_encoded_unicode=False):
    au = authors_to_string(mi.authors or [_('Unknown')])
    title = mi.title or _('Unknown')
    ext = (fmt or '').lower()
    if ext == 'kepub' and 'Kobo Touch' in rd.inheaders.get('User-Agent', ''):
        ext = 'kepub.epub'
    fname = '%s - %s_%s.%s' % (title[:30], au[:30], book_id, ext)
    if as_encoded_unicode:
        # See https://tools.ietf.org/html/rfc6266
        fname = sanitize_file_name(fname).encode('utf-8')
        fname = unicode_type(quote(fname))
    else:
        fname = ascii_filename(fname).replace('"', '_')
    return fname
    def construct_path_name(self, book_id):
        print("  PbSeriesS is %s" % (__name__))
        print("  PbSeriesS: self : %s" % (self))
        print("  PbSeriesS: database: %s" % (self.database))
        print("  PbSeriesS: book id : %s" % (book_id))

        path_element = None

        try:
            series = self.database.series(book_id, index_is_id=True)
            print("    PbSourceS: series: '%s'" % (series))

            if series:
                series_prefix = self.series_prefix
                series_prefix = ascii_filename(series_prefix[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
                series_name = ascii_filename(series[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
                path_element = series_prefix + "/" + series_name
        except:
            traceback.print_exc()

        # print("PathnameBySeriesStrategy: path_name_element: '%s'" % (path_name_element))

        return path_element
Ejemplo n.º 28
0
    def get_embed_font_info(self, family, failure_critical=True):
        efi = []
        body_font_family = None
        if not family:
            return body_font_family, efi
        from calibre.utils.fonts.scanner import font_scanner, NoFonts
        from calibre.utils.fonts.utils import panose_to_css_generic_family
        try:
            faces = font_scanner.fonts_for_family(family)
        except NoFonts:
            msg = (u'No embeddable fonts found for family: %r'%family)
            if failure_critical:
                raise ValueError(msg)
            self.oeb.log.warn(msg)
            return body_font_family, efi
        if not faces:
            msg = (u'No embeddable fonts found for family: %r'%family)
            if failure_critical:
                raise ValueError(msg)
            self.oeb.log.warn(msg)
            return body_font_family, efi

        for i, font in enumerate(faces):
            ext = 'otf' if font['is_otf'] else 'ttf'
            fid, href = self.oeb.manifest.generate(id=u'font',
                href=u'fonts/%s.%s'%(ascii_filename(font['full_name']).replace(u' ', u'-'), ext))
            item = self.oeb.manifest.add(fid, href,
                    guess_type('dummy.'+ext)[0],
                    data=font_scanner.get_font_data(font))
            item.unload_data_from_memory()

            cfont = {
                    u'font-family':u'"%s"'%font['font-family'],
                    u'panose-1': u' '.join(map(unicode_type, font['panose'])),
                    u'src': u'url(%s)'%item.href,
            }

            if i == 0:
                generic_family = panose_to_css_generic_family(font['panose'])
                body_font_family = u"'%s',%s"%(font['font-family'], generic_family)
                self.oeb.log(u'Embedding font: %s'%font['font-family'])
            for k in (u'font-weight', u'font-style', u'font-stretch'):
                if font[k] != u'normal':
                    cfont[k] = font[k]
            rule = '@font-face { %s }'%('; '.join(u'%s:%s'%(k, v) for k, v in
                iteritems(cfont)))
            rule = css_parser.parseString(rule)
            efi.append(rule)

        return body_font_family, efi
Ejemplo n.º 29
0
 def do_embed(f):
     data = font_scanner.get_font_data(f)
     name = f['full_name']
     ext = 'otf' if f['is_otf'] else 'ttf'
     name = ascii_filename(name).replace(' ', '-').replace('(', '').replace(')', '')
     fid, href = self.oeb.manifest.generate(id=u'font', href=u'fonts/%s.%s'%(name, ext))
     item = self.oeb.manifest.add(fid, href, guess_type('dummy.'+ext)[0], data=data)
     item.unload_data_from_memory()
     page_sheet = self.get_page_sheet()
     href = page_sheet.relhref(item.href)
     css = '''@font-face { font-family: "%s"; font-weight: %s; font-style: %s; font-stretch: %s; src: url(%s) }''' % (
         f['font-family'], f['font-weight'], f['font-style'], f['font-stretch'], href)
     sheet = self.parser.parseString(css, validate=False)
     page_sheet.data.insertRule(sheet.cssRules[0], len(page_sheet.data.cssRules))
     return find_font_face_rules(sheet, self.oeb)[0]
Ejemplo n.º 30
0
 def __init__(self, name):
     from calibre.utils.filenames import ascii_filename
     BaseError.__init__(self, _('Filename contains unsafe characters'), name)
     qname = urlquote(name)
     def esc(n):
         return ''.join(x if x in URL_SAFE else '_' for x in n)
     self.sname = '/'.join(esc(ascii_filename(x)) for x in name.split('/'))
     self.HELP = _(
         'The filename {0} contains unsafe characters, that must be escaped, like'
         ' this {1}. This can cause problems with some ebook readers. To be'
         ' absolutely safe, use only the English alphabet [a-z], the numbers [0-9],'
         ' underscores and hyphens in your file names. While many other characters'
         ' are allowed, they may cause problems with some software.').format(name, qname)
     self.INDIVIDUAL_FIX = _(
         'Rename the file {0} to {1}').format(name, self.sname)
Ejemplo n.º 31
0
def sanitize_file_name(x):
    ans = re.sub(r'\s+', ' ', re.sub(r'[?&=;#]', '_', ascii_filename(x))).strip().rstrip('.')
    ans, ext = ans.rpartition('.')[::2]
    return (ans.strip() + '.' + ext.strip()).rstrip('.')
Ejemplo n.º 32
0
    def extract_content(self, output_dir, parse_cache):
        output_dir = os.path.abspath(output_dir)
        self.check_for_drm()
        processed_records = self.extract_text()
        if self.debug is not None:
            parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
        self.add_anchors()
        self.processed_html = self.processed_html.decode(
            self.book_header.codec, 'ignore')
        self.processed_html = self.processed_html.replace('</</', '</')
        self.processed_html = re.sub(r'</([a-zA-Z]+)<', r'</\1><',
                                     self.processed_html)
        self.processed_html = self.processed_html.replace(u'\ufeff', '')
        # Remove tags of the form <xyz: ...> as they can cause issues further
        # along the pipeline
        self.processed_html = re.sub(r'</{0,1}[a-zA-Z]+:\s+[^>]*>', '',
                                     self.processed_html)

        for pat in ENCODING_PATS:
            self.processed_html = pat.sub('', self.processed_html)
        self.processed_html = re.sub(r'&(\S+?);', xml_entity_to_unicode,
                                     self.processed_html)
        self.extract_images(processed_records, output_dir)
        self.replace_page_breaks()
        self.cleanup_html()

        self.log.debug('Parsing HTML...')
        self.processed_html = clean_ascii_chars(self.processed_html)
        try:
            root = html.fromstring(self.processed_html)
            if len(root.xpath('//html')) > 5:
                root = html.fromstring(
                    self.processed_html.replace('\x0c',
                                                '').replace('\x14', ''))
        except:
            self.log.warning(
                'MOBI markup appears to contain random bytes. Stripping.')
            self.processed_html = self.remove_random_bytes(self.processed_html)
            root = html.fromstring(self.processed_html)
        if root.xpath('descendant::p/descendant::p'):
            from calibre.utils.soupparser import fromstring
            self.log.warning('Malformed markup, parsing using BeautifulSoup')
            try:
                root = fromstring(self.processed_html)
            except Exception:
                self.log.warning(
                    'MOBI markup appears to contain random bytes. Stripping.')
                self.processed_html = self.remove_random_bytes(
                    self.processed_html)
                root = fromstring(self.processed_html)
            if len(root.xpath('body/descendant::*')) < 1:
                # There are probably stray </html>s in the markup
                self.processed_html = self.processed_html.replace(
                    '</html>', '')
                root = fromstring(self.processed_html)

        if root.tag != 'html':
            self.log.warn('File does not have opening <html> tag')
            nroot = html.fromstring('<html><head></head><body></body></html>')
            bod = nroot.find('body')
            for child in list(root):
                child.getparent().remove(child)
                bod.append(child)
            root = nroot

        htmls = list(root.xpath('//html'))

        if len(htmls) > 1:
            self.log.warn('Markup contains multiple <html> tags, merging.')
            # Merge all <head> and <body> sections
            for h in htmls:
                p = h.getparent()
                if hasattr(p, 'remove'):
                    p.remove(h)
            bodies, heads = root.xpath('//body'), root.xpath('//head')
            for x in root:
                root.remove(x)
            head, body = map(root.makeelement, ('head', 'body'))
            for h in heads:
                for x in h:
                    h.remove(x)
                    head.append(x)
            for b in bodies:
                for x in b:
                    b.remove(x)
                    body.append(x)
            root.append(head), root.append(body)
        for x in root.xpath('//script'):
            x.getparent().remove(x)

        head = root.xpath('//head')
        if head:
            head = head[0]
        else:
            head = root.makeelement('head', {})
            root.insert(0, head)
        head.text = '\n\t'
        link = head.makeelement('link', {
            'type': 'text/css',
            'href': 'styles.css',
            'rel': 'stylesheet'
        })
        head.insert(0, link)
        link.tail = '\n\t'
        title = head.xpath('descendant::title')
        m = head.makeelement('meta', {
            'http-equiv': 'Content-Type',
            'content': 'text/html; charset=utf-8'
        })
        head.insert(0, m)
        if not title:
            title = head.makeelement('title', {})
            title.text = self.book_header.title
            title.tail = '\n\t'
            head.insert(0, title)
            head.text = '\n\t'

        self.upshift_markup(root)
        guides = root.xpath('//guide')
        guide = guides[0] if guides else None
        metadata_elems = root.xpath('//metadata')
        if metadata_elems and self.book_header.exth is None:
            self.read_embedded_metadata(root, metadata_elems[0], guide)
        for elem in guides + metadata_elems:
            elem.getparent().remove(elem)
        fname = self.name.encode('ascii', 'replace')
        fname = re.sub(r'[\x08\x15\0]+', '', fname)
        if not fname:
            fname = 'dummy'
        htmlfile = os.path.join(output_dir, ascii_filename(fname) + '.html')
        try:
            for ref in guide.xpath('descendant::reference'):
                if ref.attrib.has_key('href'):
                    ref.attrib['href'] = os.path.basename(
                        htmlfile) + ref.attrib['href']
        except AttributeError:
            pass
        parse_cache[htmlfile] = root
        self.htmlfile = htmlfile
        ncx = cStringIO.StringIO()
        opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
        self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf'
        opf.render(open(self.created_opf_path, 'wb'),
                   ncx,
                   ncx_manifest_entry=ncx_manifest_entry)
        ncx = ncx.getvalue()
        if ncx:
            ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx')
            open(ncx_path, 'wb').write(ncx)

        with open('styles.css', 'wb') as s:
            s.write(self.base_css_rules + '\n\n')
            for cls, rule in self.tag_css_rules.items():
                if isinstance(rule, unicode):
                    rule = rule.encode('utf-8')
                s.write('.%s { %s }\n\n' % (cls, rule))

        if self.book_header.exth is not None or self.embedded_mi is not None:
            self.log.debug('Creating OPF...')
            ncx = cStringIO.StringIO()
            opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
            opf.render(open(os.path.splitext(htmlfile)[0] + '.opf', 'wb'), ncx,
                       ncx_manifest_entry)
            ncx = ncx.getvalue()
            if ncx:
                open(os.path.splitext(htmlfile)[0] + '.ncx', 'wb').write(ncx)
Ejemplo n.º 33
0
    def send_by_mail(self,
                     to,
                     fmts,
                     delete_from_library,
                     subject='',
                     send_ids=None,
                     do_auto_convert=True,
                     specific_format=None):
        ids = [
            self.library_view.model().id(r)
            for r in self.library_view.selectionModel().selectedRows()
        ] if send_ids is None else send_ids
        if not ids or len(ids) == 0:
            return

        files, _auto_ids = self.library_view.model(
        ).get_preferred_formats_from_ids(
            ids,
            fmts,
            set_metadata=True,
            specific_format=specific_format,
            exclude_auto=do_auto_convert,
            use_plugboard=plugboard_email_value,
            plugboard_formats=plugboard_email_formats)
        if do_auto_convert:
            nids = list(set(ids).difference(_auto_ids))
            ids = [i for i in ids if i in nids]
        else:
            _auto_ids = []

        full_metadata = self.library_view.model().metadata_for(ids,
                                                               get_cover=False)

        bad, remove_ids, jobnames = [], [], []
        texts, subjects, attachments, attachment_names = [], [], [], []
        for f, mi, id in zip(files, full_metadata, ids):
            t = mi.title
            if not t:
                t = _('Unknown')
            if f is None:
                bad.append(t)
            else:
                remove_ids.append(id)
                jobnames.append(t)
                attachments.append(f)
                if not subject:
                    subjects.append(_('E-book:') + ' ' + t)
                else:
                    components = get_components(subject, mi, id)
                    if not components:
                        components = [mi.title]
                    subjects.append(os.path.join(*components))
                a = authors_to_string(mi.authors if mi.authors else \
                        [_('Unknown')])
                texts.append(_('Attached, you will find the e-book') + \
                        '\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \
                        _('in the %s format.') %
                        os.path.splitext(f)[1][1:].upper())
                prefix = ascii_filename(t + ' - ' + a)
                if not isinstance(prefix, unicode):
                    prefix = prefix.decode(preferred_encoding, 'replace')
                attachment_names.append(prefix + os.path.splitext(f)[1])
        remove = remove_ids if delete_from_library else []

        to_s = list(repeat(to, len(attachments)))
        if attachments:
            send_mails(jobnames,
                       Dispatcher(partial(self.email_sent,
                                          remove=remove)), attachments, to_s,
                       subjects, texts, attachment_names, self.job_manager)
            self.status_bar.show_message(
                _('Sending email to') + ' ' + to, 3000)

        auto = []
        if _auto_ids != []:
            for id in _auto_ids:
                if specific_format == None:
                    dbfmts = self.library_view.model().db.formats(
                        id, index_is_id=True)
                    formats = [
                        f.lower()
                        for f in (dbfmts.split(',') if dbfmts else [])
                    ]
                    if list(
                            set(formats).intersection(
                                available_input_formats())) != [] and list(
                                    set(fmts).intersection(
                                        available_output_formats())) != []:
                        auto.append(id)
                    else:
                        bad.append(self.library_view.model().db.title(
                            id, index_is_id=True))
                else:
                    if specific_format in list(
                            set(fmts).intersection(
                                set(available_output_formats()))):
                        auto.append(id)
                    else:
                        bad.append(self.library_view.model().db.title(
                            id, index_is_id=True))

        if auto != []:
            format = specific_format if specific_format in list(
                set(fmts).intersection(set(
                    available_output_formats()))) else None
            if not format:
                for fmt in fmts:
                    if fmt in list(
                            set(fmts).intersection(
                                set(available_output_formats()))):
                        format = fmt
                        break
            if format is None:
                bad += auto
            else:
                autos = [
                    self.library_view.model().db.title(id, index_is_id=True)
                    for id in auto
                ]
                if self.auto_convert_question(
                        _('Auto convert the following books before sending via '
                          'email?'), autos):
                    self.iactions['Convert Books'].auto_convert_mail(
                        to, fmts, delete_from_library, auto, format, subject)

        if bad:
            bad = '\n'.join('%s' % (i, ) for i in bad)
            d = warning_dialog(
                self, _('No suitable formats'),
                _('Could not email the following books '
                  'as no suitable formats were found:'), bad)
            d.exec_()
Ejemplo n.º 34
0
def image_filename(x):
    return ascii_filename(x).replace(' ', '_').replace('#', '_')
Ejemplo n.º 35
0
    def process_links(self, soup, baseurl, recursion_level, into_dir='links'):
        res = ''
        diskpath = os.path.join(self.current_dir, into_dir)
        if not os.path.exists(diskpath):
            os.mkdir(diskpath)
        prev_dir = self.current_dir
        try:
            self.current_dir = diskpath
            tags = list(soup.findAll('a', href=True))

            for c, tag in enumerate(tags):
                if self.show_progress:
                    print('.', end=' ')
                    sys.stdout.flush()
                sys.stdout.flush()
                iurl = self.absurl(baseurl,
                                   tag,
                                   'href',
                                   filter=recursion_level != 0)
                if not iurl:
                    continue
                nurl = self.normurl(iurl)
                if nurl in self.filemap:  # noqa
                    self.localize_link(tag, 'href', self.filemap[nurl])
                    continue
                if self.files > self.max_files:
                    return res
                linkdir = 'link' + str(c) if into_dir else ''
                linkdiskpath = os.path.join(diskpath, linkdir)
                if not os.path.exists(linkdiskpath):
                    os.mkdir(linkdiskpath)
                try:
                    self.current_dir = linkdiskpath
                    dsrc = self.fetch_url(iurl)
                    newbaseurl = dsrc.newurl
                    if len(dsrc) == 0 or \
                       len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0:
                        raise ValueError('No content at URL %r' % iurl)
                    if callable(self.encoding):
                        dsrc = self.encoding(dsrc)
                    elif self.encoding is not None:
                        dsrc = dsrc.decode(self.encoding, 'replace')
                    else:
                        dsrc = xml_to_unicode(dsrc, self.verbose)[0]

                    st = time.time()
                    soup = self.get_soup(dsrc, url=iurl)
                    self.log.debug('Parsed %s in %.1f seconds' %
                                   (iurl, time.time() - st))

                    base = soup.find('base', href=True)
                    if base is not None:
                        newbaseurl = base['href']
                    self.log.debug('Processing images...')
                    self.process_images(soup, newbaseurl)
                    if self.download_stylesheets:
                        self.process_stylesheets(soup, newbaseurl)

                    _fname = basename(iurl)
                    if not isinstance(_fname, str):
                        _fname.decode('latin1', 'replace')
                    _fname = _fname.encode('ascii', 'replace').replace(
                        '%', '').replace(os.sep, '')
                    _fname = ascii_filename(_fname)
                    _fname = os.path.splitext(_fname)[0][:120] + '.xhtml'
                    res = os.path.join(linkdiskpath, _fname)
                    self.downloaded_paths.append(res)
                    self.filemap[nurl] = res
                    if recursion_level < self.max_recursions:
                        self.log.debug('Processing links...')
                        self.process_links(soup, newbaseurl,
                                           recursion_level + 1)
                    else:
                        self.process_return_links(soup, newbaseurl)
                        self.log.debug(
                            'Recursion limit reached. Skipping links in', iurl)

                    if newbaseurl and not newbaseurl.startswith('/'):
                        for atag in soup.findAll(
                                'a', href=lambda x: x and x.startswith('/')):
                            atag['href'] = urllib.parse.urljoin(
                                newbaseurl, atag['href'], True)
                    if callable(self.postprocess_html_ext):
                        soup = self.postprocess_html_ext(
                            soup, c == 0 and recursion_level == 0
                            and not getattr(self, 'called_first', False),
                            self.job_info)

                        if c == 0 and recursion_level == 0:
                            self.called_first = True

                    save_soup(soup, res)
                    self.localize_link(tag, 'href', res)
                except Exception as err:
                    if isinstance(err, AbortArticle):
                        raise
                    self.failed_links.append((iurl, traceback.format_exc()))
                    self.log.exception('Could not fetch link', iurl)
                finally:
                    self.current_dir = diskpath
                    self.files += 1
        finally:
            self.current_dir = prev_dir
        if self.show_progress:
            print()
        return res
Ejemplo n.º 36
0
def image_filename(x):
    return sanitize_file_name(
        re.sub(r'[^0-9a-zA-Z.-]', '_',
               ascii_filename(x)).lstrip('_').lstrip('.'))
Ejemplo n.º 37
0
def sanitize_file_name(x):
    return re.sub(r'[?&=;#]', '_', ascii_filename(x))
Ejemplo n.º 38
0
    def convert(self, stream, options, file_ext, log, accelerators):
        import uuid
        from lxml import etree

        from calibre.ebooks.oeb.base import DirContainer
        from calibre.ebooks.snb.snbfile import SNBFile

        log.debug("Parsing SNB file...")
        snbFile = SNBFile()
        try:
            snbFile.Parse(stream)
        except:
            raise ValueError("Invalid SNB file")
        if not snbFile.IsValid():
            log.debug("Invalid SNB file")
            raise ValueError("Invalid SNB file")
        log.debug("Handle meta data ...")
        from calibre.ebooks.conversion.plumber import create_oebbook
        oeb = create_oebbook(log,
                             None,
                             options,
                             encoding=options.input_encoding,
                             populate=False)
        meta = snbFile.GetFileStream('snbf/book.snbf')
        if meta is not None:
            meta = etree.fromstring(meta)
            l = {
                'title': './/head/name',
                'creator': './/head/author',
                'language': './/head/language',
                'generator': './/head/generator',
                'publisher': './/head/publisher',
                'cover': './/head/cover',
            }
            d = {}
            for item in l:
                node = meta.find(l[item])
                if node is not None:
                    d[item] = node.text if node.text is not None else ''
                else:
                    d[item] = ''

            oeb.metadata.add('title', d['title'])
            oeb.metadata.add('creator', d['creator'], attrib={'role': 'aut'})
            oeb.metadata.add('language',
                             d['language'].lower().replace('_', '-'))
            oeb.metadata.add('generator', d['generator'])
            oeb.metadata.add('publisher', d['publisher'])
            if d['cover'] != '':
                oeb.guide.add('cover', 'Cover', d['cover'])

        bookid = unicode_type(uuid.uuid4())
        oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in oeb.metadata.identifier:
            if 'id' in ident.attrib:
                oeb.uid = oeb.metadata.identifier[0]
                break

        with TemporaryDirectory('_snb2oeb', keep=True) as tdir:
            log.debug('Process TOC ...')
            toc = snbFile.GetFileStream('snbf/toc.snbf')
            oeb.container = DirContainer(tdir, log)
            if toc is not None:
                toc = etree.fromstring(toc)
                i = 1
                for ch in toc.find('.//body'):
                    chapterName = ch.text
                    chapterSrc = ch.get('src')
                    fname = 'ch_%d.htm' % i
                    data = snbFile.GetFileStream('snbc/' + chapterSrc)
                    if data is None:
                        continue
                    snbc = etree.fromstring(data)
                    lines = []
                    for line in snbc.find('.//body'):
                        if line.tag == 'text':
                            lines.append('<p>%s</p>' % html_encode(line.text))
                        elif line.tag == 'img':
                            lines.append('<p><img src="%s" /></p>' %
                                         html_encode(line.text))
                    with open(os.path.join(tdir, fname), 'wb') as f:
                        f.write((HTML_TEMPLATE %
                                 (chapterName, '\n'.join(lines))).encode(
                                     'utf-8', 'replace'))
                    oeb.toc.add(ch.text, fname)
                    id, href = oeb.manifest.generate(
                        id='html', href=ascii_filename(fname))
                    item = oeb.manifest.add(id, href, 'text/html')
                    item.html_input_href = fname
                    oeb.spine.add(item, True)
                    i = i + 1
                imageFiles = snbFile.OutputImageFiles(tdir)
                for f, m in imageFiles:
                    id, href = oeb.manifest.generate(id='image',
                                                     href=ascii_filename(f))
                    item = oeb.manifest.add(id, href, m)
                    item.html_input_href = f

        return oeb
Ejemplo n.º 39
0
    def get_format(self, id, format):
        format = format.upper()
        fm = self.db.format_metadata(id, format, allow_cache=False)
        if not fm:
            raise cherrypy.HTTPError(
                404, 'book: %d does not have format: %s' % (id, format))
        update_metadata = format in {'MOBI', 'EPUB', 'AZW3'}
        mi = newmi = self.db.get_metadata(id,
                                          index_is_id=True,
                                          cover_as_data=True,
                                          get_cover=update_metadata)

        cherrypy.response.headers['Last-Modified'] = \
            self.last_modified(max(fm['mtime'], mi.last_modified))

        fmt = self.db.format(id,
                             format,
                             index_is_id=True,
                             as_file=True,
                             mode='rb')
        if fmt is None:
            raise cherrypy.HTTPError(
                404, 'book: %d does not have format: %s' % (id, format))
        mt = guess_type('dummy.' + format.lower())[0]
        if mt is None:
            mt = 'application/octet-stream'
        cherrypy.response.headers['Content-Type'] = mt

        if format.lower() in plugboard_content_server_formats:
            # Get any plugboards for the content server
            plugboards = self.db.prefs.get('plugboards', {})
            cpb = find_plugboard(plugboard_content_server_value,
                                 format.lower(), plugboards)
            if cpb:
                # Transform the metadata via the plugboard
                newmi = mi.deepcopy_metadata()
                newmi.template_to_attribute(mi, cpb)

        if update_metadata:
            # Write the updated file
            from calibre.ebooks.metadata.meta import set_metadata
            set_metadata(fmt, newmi, format.lower())
            fmt.seek(0)

        fmt.seek(0, 2)
        cherrypy.response.headers['Content-Length'] = fmt.tell()
        fmt.seek(0)

        ua = cherrypy.request.headers.get('User-Agent', '').strip()
        have_kobo_browser = self.is_kobo_browser(ua)
        file_extension = "kepub.epub" if have_kobo_browser and format.lower(
        ) == "kepub" else format

        au = authors_to_string(
            newmi.authors if newmi.authors else [_('Unknown')])
        title = newmi.title if newmi.title else _('Unknown')
        fname = u'%s - %s_%s.%s' % (title[:30], au[:30], id,
                                    file_extension.lower())
        fname = ascii_filename(fname).replace('"', '_')
        cherrypy.response.headers['Content-Disposition'] = \
                b'attachment; filename="%s"'%fname
        cherrypy.response.body = fmt
        cherrypy.response.timeout = 3600
        return fmt
Ejemplo n.º 40
0
def make_filename_safe(name):
    from calibre.utils.filenames import ascii_filename

    def esc(n):
        return ''.join(x if x in URL_SAFE else '_' for x in n)
    return '/'.join(esc(ascii_filename(x)) for x in name.split('/'))
Ejemplo n.º 41
0
def build_index(books,
                num,
                search,
                sort,
                order,
                start,
                total,
                url_base,
                CKEYS,
                prefix,
                have_kobo_browser=False):
    logo = DIV(IMG(src=prefix + '/static/calibre.png', alt=__appname__),
               id='logo')

    search_box = build_search_box(num, search, sort, order, prefix)
    navigation = build_navigation(start, num, total, prefix + url_base)
    navigation2 = build_navigation(start, num, total, prefix + url_base)
    bookt = TABLE(id='listing')

    body = BODY(logo, search_box, navigation, HR(CLASS('spacer')), bookt,
                HR(CLASS('spacer')), navigation2)

    # Book list {{{
    for book in books:
        thumbnail = TD(
            IMG(type='image/jpeg',
                border='0',
                src=prefix + '/get/thumb/%s' % book['id']), CLASS('thumbnail'))

        data = TD()
        for fmt in book['formats'].split(','):
            if not fmt or fmt.lower().startswith('original_'):
                continue
            file_extension = "kepub.epub" if have_kobo_browser and fmt.lower(
            ) == "kepub" else fmt
            a = quote(ascii_filename(book['authors']))
            t = quote(ascii_filename(book['title']))
            s = SPAN(
                A(fmt.lower(),
                  href=prefix + '/get/%s/%s-%s_%d.%s' %
                  (fmt, a, t, book['id'], file_extension.lower())),
                CLASS('button'))
            s.tail = u''
            data.append(s)

        div = DIV(CLASS('data-container'))
        data.append(div)

        series = u'[%s - %s]'%(book['series'], book['series_index']) \
                if book['series'] else ''
        tags = u'Tags=[%s]' % book['tags'] if book['tags'] else ''

        ctext = ''
        for key in CKEYS:
            val = book.get(key, None)
            if val:
                ctext += '%s=[%s] ' % tuple(val.split(':#:'))

        first = SPAN(
            u'\u202f%s %s by %s' % (book['title'], series, book['authors']),
            CLASS('first-line'))
        div.append(first)
        second = SPAN(
            u'%s - %s %s %s' % (book['size'], book['timestamp'], tags, ctext),
            CLASS('second-line'))
        div.append(second)

        bookt.append(TR(thumbnail, data))
    # }}}

    body.append(
        DIV(A(_('Switch to the full interface (non-mobile interface)'),
              href=prefix + "/browse",
              style="text-decoration: none; color: blue",
              title=_('The full interface gives you many more features, '
                      'but it may not work well on a small screen')),
            style="text-align:center"))
    return HTML(
        HEAD(
            TITLE(__appname__ + ' Library'),
            LINK(rel='icon',
                 href='http://calibre-ebook.com/favicon.ico',
                 type='image/x-icon'),
            LINK(rel='stylesheet',
                 type='text/css',
                 href=prefix + '/mobile/style.css'),
            LINK(rel='apple-touch-icon',
                 href="/static/calibre.png")),  # End head
        body)  # End html
Ejemplo n.º 42
0
def get_download_filename(response):
    filename = get_download_filename_from_response(response)
    filename, ext = os.path.splitext(filename)
    filename = filename[:60] + ext
    filename = ascii_filename(filename)
    return filename
Ejemplo n.º 43
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from calibre.utils.zipfile import ZipFile
        from calibre.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == 'inline':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == 'tag':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory(u'_htmlz_output') as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = u'index'
            if opts.htmlz_title_filename:
                from calibre.utils.filenames import shorten_components_to
                fname = shorten_components_to(
                    100,
                    (ascii_filename(unicode(oeb_book.metadata.title[0])), ))[0]
            with open(os.path.join(tdir, fname + u'.html'), 'wb') as tf:
                if isinstance(html, unicode):
                    html = html.encode('utf-8')
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
                with open(os.path.join(tdir, u'style.css'), 'wb') as tf:
                    tf.write(htmlizer.get_css(oeb_book))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, u'images')):
                    os.makedirs(os.path.join(tdir, u'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = unicode(
                                etree.tostring(item.data, encoding=unicode))
                        else:
                            data = item.data
                        fname = os.path.join(tdir, u'images',
                                             images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.img import save_cover_data_to
                    cover_path = os.path.join(tdir, u'cover.jpg')
                    with lopen(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf:
                opf = OPF(StringIO(etree.tostring(
                    oeb_book.metadata.to_opf1())))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = u'cover.jpg'
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
Ejemplo n.º 44
0
    def to_xml(self, write_files=True):
        bookinfo = u'<BookInformation>\n<Info version="1.1">\n<BookInfo>\n'
        bookinfo += u'<Title reading="%s">%s</Title>\n' % (
            self.metadata.title_reading, self.metadata.title)
        bookinfo += u'<Author reading="%s">%s</Author>\n' % (
            self.metadata.author_reading, self.metadata.author)
        bookinfo += u'<BookID>%s</BookID>\n' % (self.metadata.book_id, )
        bookinfo += u'<Publisher reading="">%s</Publisher>\n' % (
            self.metadata.publisher, )
        bookinfo += u'<Label reading="">%s</Label>\n' % (self.metadata.label, )
        bookinfo += u'<Category reading="">%s</Category>\n' % (
            self.metadata.category, )
        bookinfo += u'<Classification reading="">%s</Classification>\n' % (
            self.metadata.classification, )
        bookinfo += u'<FreeText reading="">%s</FreeText>\n</BookInfo>\n<DocInfo>\n' % (
            self.metadata.free_text, )
        th = self.doc_info.thumbnail
        if th:
            prefix = ascii_filename(self.metadata.title)
            bookinfo += u'<CThumbnail file="%s" />\n' % (
                prefix + '_thumbnail.' + self.doc_info.thumbnail_extension, )
            if write_files:
                open(
                    prefix + '_thumbnail.' + self.doc_info.thumbnail_extension,
                    'wb').write(th)
        bookinfo += u'<Language reading="">%s</Language>\n' % (
            self.doc_info.language, )
        bookinfo += u'<Creator reading="">%s</Creator>\n' % (
            self.doc_info.creator, )
        bookinfo += u'<Producer reading="">%s</Producer>\n' % (
            self.doc_info.producer, )
        bookinfo += u'<SumPage>%s</SumPage>\n</DocInfo>\n</Info>\n%s</BookInformation>\n' % (
            self.doc_info.page, self.toc)
        pages = u''
        done_main = False
        pt_id = -1
        for page_tree in self:
            if not done_main:
                done_main = True
                pages += u'<Main>\n'
                close = u'</Main>\n'
                pt_id = page_tree.id
            else:
                pages += u'<PageTree objid="%d">\n' % (page_tree.id, )
                close = u'</PageTree>\n'
            for page in page_tree:
                pages += unicode_type(page)
            pages += close
        traversed_objects = [
            int(i) for i in re.findall(r'objid="(\w+)"', pages)
        ] + [pt_id]

        objects = u'\n<Objects>\n'
        styles = u'\n<Style>\n'
        for obj in self.objects:
            obj = self.objects[obj]
            if obj.id in traversed_objects:
                continue
            if isinstance(obj, (Font, Text, TOCObject)):
                continue
            if isinstance(obj, StyleObject):
                styles += unicode_type(obj)
            else:
                objects += unicode_type(obj)
        styles += '</Style>\n'
        objects += '</Objects>\n'
        if write_files:
            self.write_files()
        return '<BBeBXylog version="1.0">\n' + bookinfo + pages + styles + objects + '</BBeBXylog>'
Ejemplo n.º 45
0
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        import uuid
        from calibre.ebooks.conversion.plumber import create_oebbook
        from calibre.ebooks.oeb.base import (DirContainer, rewrite_links,
                                             urlnormalize, urldefrag,
                                             BINARY_MIME, OEB_STYLES, xpath)
        from calibre import guess_type
        from calibre.ebooks.oeb.transforms.metadata import \
            meta_info_to_oeb_metadata
        from calibre.ebooks.html.input import get_filelist
        import cssutils, logging
        cssutils.log.setLevel(logging.WARN)
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log,
                             None,
                             opts,
                             self,
                             encoding=opts.input_encoding,
                             populate=False)
        self.oeb = oeb

        metadata = oeb.metadata
        meta_info_to_oeb_metadata(mi, metadata, log)
        if not metadata.language:
            oeb.logger.warn(u'Language not specified')
            metadata.add('language', get_lang().replace('_', '-'))
        if not metadata.creator:
            oeb.logger.warn('Creator not specified')
            metadata.add('creator', self.oeb.translate(__('Unknown')))
        if not metadata.title:
            oeb.logger.warn('Title not specified')
            metadata.add('title', self.oeb.translate(__('Unknown')))
        bookid = str(uuid.uuid4())
        metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in metadata.identifier:
            if 'id' in ident.attrib:
                self.oeb.uid = metadata.identifier[0]
                break

        filelist = get_filelist(htmlpath, basedir, opts, log)
        filelist = [f for f in filelist if not f.is_binary]
        htmlfile_map = {}
        for f in filelist:
            path = f.path
            oeb.container = DirContainer(os.path.dirname(path),
                                         log,
                                         ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id='html',
                                             href=ascii_filename(bname))
            htmlfile_map[path] = href
            item = oeb.manifest.add(id, href, 'text/html')
            item.html_input_href = bname
            oeb.spine.add(item, True)

        self.added_resources = {}
        self.log = log
        self.log('Normalizing filename cases')
        for path, href in htmlfile_map.items():
            if not self.is_case_sensitive(path):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urldefrag
        self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME

        self.log('Rewriting HTML links')
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
            oeb.container = DirContainer(dpath, log, ignore_opf=True)
            item = oeb.manifest.hrefs[htmlfile_map[path]]
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))

        for item in oeb.manifest.values():
            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
                        dpath = os.path.dirname(path)
                        break
                cssutils.replaceUrls(item.data,
                                     partial(self.resource_adder, base=dpath))

        toc = self.oeb.toc
        self.oeb.auto_generated_toc = True
        titles = []
        headers = []
        for item in self.oeb.spine:
            if not item.linear: continue
            html = item.data
            title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
            title = re.sub(r'\s+', ' ', title.strip())
            if title:
                titles.append(title)
            headers.append('(unlabled)')
            for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
                expr = '/h:html/h:body//h:%s[position()=1]/text()'
                header = ''.join(xpath(html, expr % tag))
                header = re.sub(r'\s+', ' ', header.strip())
                if header:
                    headers[-1] = header
                    break
        use = titles
        if len(titles) > len(set(titles)):
            use = headers
        for title, item in izip(use, self.oeb.spine):
            if not item.linear: continue
            toc.add(title, item.href)

        oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
        return oeb
Ejemplo n.º 46
0
 def browse_get_book_args(self, mi, id_, add_category_links=False):
     fmts = self.db.formats(id_, index_is_id=True)
     if not fmts:
         fmts = ''
     fmts = [x.lower() for x in fmts.split(',') if x]
     pf = prefs['output_format'].lower()
     try:
         fmt = pf if pf in fmts else fmts[0]
     except:
         fmt = None
     args = {
         'id': id_,
         'mi': mi,
     }
     ccache = self.categories_cache() if add_category_links else {}
     ftitle = fauthors = ''
     for key in mi.all_field_keys():
         val = mi.format_field(key)[1]
         if not val:
             val = ''
         if key == 'title':
             ftitle = xml(val, True)
         elif key == 'authors':
             fauthors = xml(val, True)
         if add_category_links:
             added_key = False
             fm = mi.metadata_for_field(key)
             if val and fm and fm['is_category'] and not fm['is_csp'] and\
                     key != 'formats' and fm['datatype'] not in ['rating']:
                 categories = mi.get(key)
                 if isinstance(categories, basestring):
                     categories = [categories]
                 dbtags = []
                 for category in categories:
                     dbtag = None
                     for tag in ccache[key]:
                         if tag.name == category:
                             dbtag = tag
                             break
                     dbtags.append(dbtag)
                 if None not in dbtags:
                     vals = []
                     for tag in dbtags:
                         tval = (
                             '<a title="Browse books by {3}: {0}"'
                             ' href="{1}" class="details_category_link">{2}</a>'
                         )
                         href='%s/browse/matches/%s/%s' % \
                         (self.opts.url_prefix, quote(tag.category), quote(str(tag.id)))
                         vals.append(
                             tval.format(
                                 xml(tag.name, True), xml(href, True),
                                 xml(val if len(dbtags) == 1 else tag.name),
                                 xml(key, True)))
                     join = ' &amp; ' if key == 'authors' or \
                                         (fm['is_custom'] and
                                          fm['display'].get('is_names', False)) \
                                      else ', '
                     args[key] = join.join(vals)
                     added_key = True
             if not added_key:
                 args[key] = xml(val, True)
         else:
             args[key] = xml(val, True)
     fname = quote(
         ascii_filename(ftitle) + ' - ' + ascii_filename(fauthors))
     return args, fmt, fmts, fname
Ejemplo n.º 47
0
 def name(self):
     return ascii_filename(self.theme.title).replace(' ', '_').replace('.', '_').lower()
Ejemplo n.º 48
0
    def xml(self,
            start='0',
            num='50',
            sort=None,
            search=None,
            _=None,
            order='ascending'):
        '''
        Serves metadata from the calibre database as XML.

        :param sort: Sort results by ``sort``. Can be one of `title,author,rating`.
        :param search: Filter results by ``search`` query. See :class:`SearchQueryParser` for query syntax
        :param start,num: Return the slice `[start:start+num]` of the sorted and filtered results
        :param _: Firefox seems to sometimes send this when using XMLHttpRequest with no caching
        '''
        try:
            start = int(start)
        except ValueError:
            raise cherrypy.HTTPError(400,
                                     'start: %s is not an integer' % start)
        try:
            num = int(num)
        except ValueError:
            raise cherrypy.HTTPError(400, 'num: %s is not an integer' % num)

        order = order.lower().strip() == 'ascending'

        if not search:
            search = ''
        if isbytestring(search):
            search = search.decode('UTF-8')

        ids = self.search_for_books(search)

        FM = self.db.FIELD_MAP

        items = [r for r in iter(self.db) if r[FM['id']] in ids]
        if sort is not None:
            self.sort(items, sort, order)

        books = []

        def serialize(x):
            if isinstance(x, unicode):
                return x
            if isbytestring(x):
                return x.decode(preferred_encoding, 'replace')
            return unicode(x)

        # This method uses its own book dict, not the Metadata dict. The loop
        # below could be changed to use db.get_metadata instead of reading
        # info directly from the record made by the view, but it doesn't seem
        # worth it at the moment.
        for record in items[start:start + num]:
            kwargs = {}
            aus = record[FM['authors']] if record[
                FM['authors']] else __builtin__._('Unknown')
            authors = '|'.join([i.replace('|', ',') for i in aus.split(',')])
            kwargs['authors'] = authors

            kwargs['series_index'] = \
                fmt_sidx(float(record[FM['series_index']]))

            for x in ('timestamp', 'pubdate'):
                kwargs[x] = strftime('%Y/%m/%d %H:%M:%S', record[FM[x]])

            for x in ('id', 'title', 'sort', 'author_sort', 'rating', 'size'):
                kwargs[x] = serialize(record[FM[x]])

            for x in ('formats', 'series', 'tags', 'publisher', 'comments',
                      'identifiers'):
                y = record[FM[x]]
                if x == 'tags':
                    y = format_tag_string(y, ',', ignore_max=True)
                kwargs[x] = serialize(y) if y else ''

            isbn = self.db.isbn(record[FM['id']], index_is_id=True)
            kwargs['isbn'] = serialize(isbn if isbn else '')

            kwargs['safe_title'] = ascii_filename(kwargs['title'])

            c = kwargs.pop('comments')

            CFM = self.db.field_metadata
            CKEYS = [
                key for key in sorted(custom_fields_to_display(self.db),
                                      key=lambda x: sort_key(CFM[x]['name']))
            ]
            custcols = []
            for key in CKEYS:

                def concat(name, val):
                    return '%s:#:%s' % (name, unicode(val))

                mi = self.db.get_metadata(record[CFM['id']['rec_index']],
                                          index_is_id=True)
                name, val = mi.format_field(key)
                if not val:
                    continue
                datatype = CFM[key]['datatype']
                if datatype in ['comments']:
                    continue
                k = str('CF_' + key[1:])
                name = CFM[key]['name']
                custcols.append(k)
                if datatype == 'text' and CFM[key]['is_multiple']:
                    kwargs[k] = \
                        concat('#T#'+name,
                               format_tag_string(val,
                                   CFM[key]['is_multiple']['ui_to_list'],
                                   ignore_max=True,
                                   joinval=CFM[key]['is_multiple']['list_to_ui']))
                else:
                    kwargs[k] = concat(name, val)
            kwargs['custcols'] = ','.join(custcols)
            books.append(E.book(c, **kwargs))

        updated = self.db.last_modified()
        kwargs = dict(start=str(start),
                      updated=updated.strftime('%Y-%m-%dT%H:%M:%S+00:00'),
                      total=str(len(ids)),
                      num=str(len(books)))
        ans = E.library(*books, **kwargs)

        cherrypy.response.headers['Content-Type'] = 'text/xml'
        cherrypy.response.headers['Last-Modified'] = self.last_modified(
            updated)

        return etree.tostring(ans,
                              encoding='utf-8',
                              pretty_print=True,
                              xml_declaration=True)
Ejemplo n.º 49
0
def custom_recipe_filename(id_, title):
    from calibre.utils.filenames import ascii_filename
    return ascii_filename(title[:50]) + \
                        ('_%s.recipe'%id_)
Ejemplo n.º 50
0
 def process_images(self, soup, baseurl):
     diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
     if not os.path.exists(diskpath):
         os.mkdir(diskpath)
     c = 0
     for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and 'src'
                             in tag):  # noqa
         iurl = tag['src']
         if iurl.startswith('data:image/'):
             try:
                 data = b64decode(iurl.partition(',')[-1])
             except:
                 self.log.exception('Failed to decode embedded image')
                 continue
         else:
             if callable(self.image_url_processor):
                 iurl = self.image_url_processor(baseurl, iurl)
             if not urllib.parse.urlsplit(iurl).scheme:
                 iurl = urllib.parse.urljoin(baseurl, iurl, False)
             with self.imagemap_lock:
                 if iurl in self.imagemap:  # noqa
                     tag['src'] = self.imagemap[iurl]
                     continue
             try:
                 data = self.fetch_url(iurl)
                 if data == 'GIF89a\x01':
                     # Skip empty GIF files as PIL errors on them anyway
                     continue
             except Exception:
                 self.log.exception('Could not fetch image ', iurl)
                 continue
         c += 1
         fname = ascii_filename('img' + str(c))
         if isinstance(fname, str):
             fname = fname.encode('ascii', 'replace')
         data = self.preprocess_image_ext(
             data, iurl) if self.preprocess_image_ext is not None else data
         if data is None:
             continue
         itype = what(None, data)
         if itype == 'svg' or (itype is None and b'<svg' in data[:1024]):
             # SVG image
             imgpath = os.path.join(diskpath, fname + '.svg')
             with self.imagemap_lock:
                 self.imagemap[iurl] = imgpath
             with open(imgpath, 'wb') as x:
                 x.write(data)
             tag['src'] = imgpath
         else:
             try:
                 # Ensure image is valid
                 img = image_from_data(data)
                 if itype not in {'png', 'jpg', 'jpeg'}:
                     itype = 'png' if itype == 'gif' else 'jpeg'
                     data = image_to_data(img, fmt=itype)
                 if self.compress_news_images and itype in {'jpg', 'jpeg'}:
                     try:
                         data = self.rescale_image(data)
                     except Exception:
                         self.log.exception('failed to compress image ' +
                                            iurl)
                 # Moon+ apparently cannot handle .jpeg files
                 if itype == 'jpeg':
                     itype = 'jpg'
                 imgpath = os.path.join(diskpath, fname + '.' + itype)
                 with self.imagemap_lock:
                     self.imagemap[iurl] = imgpath
                 with open(imgpath, 'wb') as x:
                     x.write(data)
                 tag['src'] = imgpath
             except Exception:
                 traceback.print_exc()
                 continue