예제 #1
0
 def read_image(self, href):
     if href not in self.images:
         item = self.oeb.manifest.hrefs.get(
             href) or self.oeb.manifest.hrefs.get(urlquote(href))
         try:
             if item is None or not isinstance(item.data, bytes):
                 self.log.warning('Failed to find image:', href)
                 return
         except FileNotFoundError:
             self.log.warning('Failed to find image:', href)
             return
         try:
             fmt, width, height = identify(item.data)
         except Exception:
             self.log.warning('Replacing corrupted image with blank: %s' %
                              href)
             item.data = I('blank.png',
                           data=True,
                           allow_user_override=False)
             fmt, width, height = identify(item.data)
         image_fname = 'media/' + self.create_filename(href, fmt)
         image_rid = self.document_relationships.add_image(image_fname)
         self.images[href] = Image(image_rid, image_fname, width, height,
                                   fmt, item)
         item.unload_data_from_memory()
     return self.images[href]
예제 #2
0
파일: odt.py 프로젝트: MarioJC/calibre
def read_cover(stream, zin, mi, opfmeta, extract_cover):
    # search for an draw:image in a draw:frame with the name 'opf.cover'
    # if opf.metadata prop is false, just use the first image that
    # has a proper size (borrowed from docx)
    otext = odLoad(stream)
    cover_href = None
    cover_data = None
    cover_frame = None
    imgnum = 0
    for frm in otext.topnode.getElementsByType(odFrame):
        img = frm.getElementsByType(odImage)
        if len(img) == 0:
            continue
        i_href = img[0].getAttribute('href')
        try:
            raw = zin.read(i_href)
        except KeyError:
            continue
        try:
            fmt, width, height = identify(bytes(raw))
        except Exception:
            continue
        imgnum += 1
        if opfmeta and frm.getAttribute('name').lower() == u'opf.cover':
            cover_href = i_href
            cover_data = (fmt, raw)
            cover_frame = frm.getAttribute('name')  # could have upper case
            break
        if cover_href is None and imgnum == 1 and 0.8 <= height/width <= 1.8 and height*width >= 12000:
            # Pick the first image as the cover if it is of a suitable size
            cover_href = i_href
            cover_data = (fmt, raw)
            if not opfmeta:
                break

    if cover_href is not None:
        mi.cover = cover_href
        mi.odf_cover_frame = cover_frame
        if extract_cover:
            if not cover_data:
                raw = zin.read(cover_href)
                try:
                    fmt = identify(bytes(raw))[0]
                except Exception:
                    pass
                else:
                    cover_data = (fmt, raw)
            mi.cover_data = cover_data
예제 #3
0
def read_cover(stream, zin, mi, opfmeta, extract_cover):
    # search for an draw:image in a draw:frame with the name 'opf.cover'
    # if opf.metadata prop is false, just use the first image that
    # has a proper size (borrowed from docx)
    otext = odLoad(stream)
    cover_href = None
    cover_data = None
    cover_frame = None
    imgnum = 0
    for frm in otext.topnode.getElementsByType(odFrame):
        img = frm.getElementsByType(odImage)
        if len(img) == 0:
            continue
        i_href = img[0].getAttribute('href')
        try:
            raw = zin.read(i_href)
        except KeyError:
            continue
        try:
            fmt, width, height = identify(bytes(raw))
        except Exception:
            continue
        imgnum += 1
        if opfmeta and frm.getAttribute('name').lower() == u'opf.cover':
            cover_href = i_href
            cover_data = (fmt, raw)
            cover_frame = frm.getAttribute('name')  # could have upper case
            break
        if cover_href is None and imgnum == 1 and 0.8 <= height / width <= 1.8 and height * width >= 12000:
            # Pick the first image as the cover if it is of a suitable size
            cover_href = i_href
            cover_data = (fmt, raw)
            if not opfmeta:
                break

    if cover_href is not None:
        mi.cover = cover_href
        mi.odf_cover_frame = cover_frame
        if extract_cover:
            if not cover_data:
                raw = zin.read(cover_href)
                try:
                    fmt = identify(bytes(raw))[0]
                except Exception:
                    pass
                else:
                    cover_data = (fmt, raw)
            mi.cover_data = cover_data
예제 #4
0
파일: images.py 프로젝트: j-howell/calibre
 def read_image(self, href):
     if href not in self.images:
         item = self.oeb.manifest.hrefs.get(href)
         if item is None or not isinstance(item.data, bytes):
             return
         try:
             fmt, width, height = identify(item.data)
         except Exception:
             self.log.warning('Replacing corrupted image with blank: %s' % href)
             item.data = I('blank.png', data=True, allow_user_override=False)
             fmt, width, height = identify(item.data)
         image_fname = 'media/' + self.create_filename(href, fmt)
         image_rid = self.document_relationships.add_image(image_fname)
         self.images[href] = Image(image_rid, image_fname, width, height, fmt, item)
         item.unload_data_from_memory()
     return self.images[href]
예제 #5
0
파일: kfx.py 프로젝트: shadyproject/calibre
def read_metadata_kfx(stream, read_cover=True):
    ' Read the metadata.kfx file that is found in the sdr book folder for KFX files '
    c = Container(stream.read())
    m = extract_metadata(c.decode())

    # dump_metadata(m)

    def has(x):
        return m[x] and m[x][0]

    def get(x, single=True):
        ans = m[x]
        if single:
            ans = clean_xml_chars(ans[0]) if ans else ''
        else:
            ans = [clean_xml_chars(y) for y in ans]
        return ans

    title = get('title') or _('Unknown')
    authors = get('authors', False) or [_('Unknown')]
    auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$')

    def fix_author(x):
        if tweaks['author_sort_copy_method'] != 'copy':
            m = auth_pat.match(x.strip())
            if m is not None:
                return m.group(2) + ' ' + m.group(1)
        return x

    mi = Metadata(title, [fix_author(x) for x in authors])
    if has('author'):
        mi.author_sort = get('author')
    if has('ASIN'):
        mi.set_identifier('mobi-asin', get('ASIN'))
    elif has('content_id'):
        mi.set_identifier('mobi-asin', get('content_id'))
    if has('languages'):
        langs = list(
            filter(None,
                   (canonicalize_lang(x) for x in get('languages', False))))
        if langs:
            mi.languages = langs
    if has('issue_date'):
        try:
            mi.pubdate = parse_only_date(get('issue_date'))
        except Exception:
            pass
    if has('publisher') and get('publisher') != 'Unknown':
        mi.publisher = get('publisher')
    if read_cover and m[COVER_KEY]:
        try:
            data = base64.standard_b64decode(m[COVER_KEY])
            fmt, w, h = identify(bytes(data))
        except Exception:
            w, h, fmt = 0, 0, None
        if fmt and w > -1 and h > -1:
            mi.cover_data = (fmt, data)

    return mi
예제 #6
0
def safe_img_data(container, name, mt):
    if 'svg' in mt:
        return 0, 0
    try:
        fmt, width, height = identify(container.name_to_abspath(name))
    except Exception:
        width = height = 0
    return width, height
예제 #7
0
파일: report.py 프로젝트: Riva3000/calibre
def safe_img_data(container, name, mt):
    if 'svg' in mt:
        return 0, 0
    try:
        fmt, width, height = identify(container.name_to_abspath(name))
    except Exception:
        width = height = 0
    return width, height
예제 #8
0
 def __init__(self, path_or_bytes):
     if not isinstance(path_or_bytes, bytes):
         with open(path_or_bytes, 'rb') as f:
             path_or_bytes = f.read()
     self.img_data = path_or_bytes
     fmt, width, height = identify(path_or_bytes)
     self.img, self.fmt = image_and_format_from_data(path_or_bytes)
     self.width, self.height = self.img.width(), self.img.height()
     self.cache_key = self.img.cacheKey()
예제 #9
0
파일: kfx.py 프로젝트: Riva3000/calibre
def read_metadata_kfx(stream, read_cover=True):
    ' Read the metadata.kfx file that is found in the sdr book folder for KFX files '
    c = Container(stream.read())
    m = extract_metadata(c.decode())
    # dump_metadata(m)

    def has(x):
        return m[x] and m[x][0]

    def get(x, single=True):
        ans = m[x]
        if single:
            ans = clean_xml_chars(ans[0]) if ans else ''
        else:
            ans = [clean_xml_chars(y) for y in ans]
        return ans

    title = get('title') or _('Unknown')
    authors = get('authors', False) or [_('Unknown')]
    auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$')

    def fix_author(x):
        if tweaks['author_sort_copy_method'] != 'copy':
            m = auth_pat.match(x.strip())
            if m is not None:
                return m.group(2) + ' ' + m.group(1)
        return x

    mi = Metadata(title, [fix_author(x) for x in authors])
    if has('author'):
        mi.author_sort = get('author')
    if has('ASIN'):
        mi.set_identifier('mobi-asin', get('ASIN'))
    elif has('content_id'):
        mi.set_identifier('mobi-asin', get('content_id'))
    if has('languages'):
        langs = list(filter(None, (canonicalize_lang(x) for x in get('languages', False))))
        if langs:
            mi.languages = langs
    if has('issue_date'):
        try:
            mi.pubdate = parse_only_date(get('issue_date'))
        except Exception:
            pass
    if has('publisher') and get('publisher') != 'Unknown':
        mi.publisher = get('publisher')
    if read_cover and m[COVER_KEY]:
        try:
            data = base64.standard_b64decode(m[COVER_KEY])
            fmt, w, h = identify(bytes(data))
        except Exception:
            w, h, fmt = 0, 0, None
        if fmt and w > -1 and h > -1:
            mi.cover_data = (fmt, data)

    return mi
예제 #10
0
 def image_to_hexstring(self, data):
     # Images must be hex-encoded in 128 character lines
     data = save_cover_data_to(data)
     width, height = identify(data)[1:]
     lines = []
     v = memoryview(data)
     for i in range(0, len(data), 64):
         lines.append(hexlify(v[i:i + 64]))
     hex_string = b'\n'.join(lines).decode('ascii')
     return hex_string, width, height
예제 #11
0
파일: epub.py 프로젝트: Coi-l/calibre
 def workaround_ade_quirks(self, container, name):
     root = container.parsed(name)
     # ADE blows up floating images if their sizes are not specified
     for img in root.xpath('//*[local-name() = "img" and (@class = "float-right-img" or @class = "float-left-img")]'):
         if 'style' not in img.attrib:
             imgname = container.href_to_name(img.get('src'), name)
             fmt, width, height = identify(container.raw_data(imgname))
             if width == -1:
                 raise ValueError('Failed to read size of: %s' % imgname)
             img.set('style', 'width: %dpx; height: %dpx' % (width, height))
예제 #12
0
 def workaround_ade_quirks(self, container, name):
     root = container.parsed(name)
     # ADE blows up floating images if their sizes are not specified
     for img in root.xpath('//*[local-name() = "img" and (@class = "float-right-img" or @class = "float-left-img")]'):
         if 'style' not in img.attrib:
             imgname = container.href_to_name(img.get('src'), name)
             fmt, width, height = identify(container.raw_data(imgname))
             if width == -1:
                 raise ValueError('Failed to read size of: %s' % imgname)
             img.set('style', 'width: %dpx; height: %dpx' % (width, height))
예제 #13
0
파일: cover.py 프로젝트: wynick27/calibre
 def inspect_cover(self, href):
     from calibre.ebooks.oeb.base import urlnormalize
     for x in self.oeb.manifest:
         if x.href == urlnormalize(href):
             try:
                 raw = x.data
                 return identify(raw)[1:]
             except Exception:
                 self.log.exception('Failed to read cover image dimensions')
     return -1, -1
예제 #14
0
파일: canvas.py 프로젝트: Coi-l/calibre
 def load_image(self, data):
     self.is_valid = False
     try:
         fmt = identify(data)[0].encode('ascii')
     except Exception:
         fmt = b''
     self.original_image_format = fmt.decode('ascii').lower()
     self.selection_state.reset()
     self.original_image_data = data
     self.current_image = i = self.original_image = (
         QImage.fromData(data, format=fmt) if fmt else QImage.fromData(data))
     self.is_valid = not i.isNull()
     self.update()
     self.image_changed.emit(self.current_image)
예제 #15
0
 def load_image(self, data):
     self.is_valid = False
     try:
         fmt = identify(data)[0].encode('ascii')
     except Exception:
         fmt = b''
     self.original_image_format = fmt.decode('ascii').lower()
     self.selection_state.reset()
     self.original_image_data = data
     self.current_image = i = self.original_image = (QImage.fromData(
         data, format=fmt) if fmt else QImage.fromData(data))
     self.is_valid = not i.isNull()
     self.update()
     self.image_changed.emit(self.current_image)
예제 #16
0
파일: content.py 프로젝트: Mymei2/calibre
 def test(src, url, sz=None):
     raw = P(src, data=True)
     conn.request('GET', url)
     r = conn.getresponse()
     self.ae(r.status, httplib.OK)
     data = r.read()
     if sz is None:
         self.ae(data, raw)
     else:
         self.ae(sz, identify(data)[1])
     test_response(r)
     conn.request('GET', url, headers={'If-None-Match':r.getheader('ETag')})
     r = conn.getresponse()
     self.ae(r.status, httplib.NOT_MODIFIED)
     self.ae(b'', r.read())
예제 #17
0
파일: content.py 프로젝트: qving11/calibre
 def test(src, url, sz=None):
     raw = P(src, data=True)
     conn.request('GET', url)
     r = conn.getresponse()
     self.ae(r.status, http_client.OK)
     data = r.read()
     if sz is None:
         self.ae(data, raw)
     else:
         self.ae(sz, identify(data)[1])
     test_response(r)
     conn.request('GET', url, headers={'If-None-Match':r.getheader('ETag')})
     r = conn.getresponse()
     self.ae(r.status, http_client.NOT_MODIFIED)
     self.ae(b'', r.read())
예제 #18
0
def encode_thumbnail(thumbnail):
    '''
    Encode the image part of a thumbnail, then return the 3 part tuple
    '''
    from calibre.utils.imghdr import identify
    if thumbnail is None:
        return None
    if not isinstance(thumbnail, (tuple, list)):
        try:
            width, height = identify(as_bytes(thumbnail))[1:]
            if width < 0 or height < 0:
                return None
            thumbnail = (width, height, thumbnail)
        except Exception:
            return None
    return (thumbnail[0], thumbnail[1], as_base64_unicode(thumbnail[2]))
예제 #19
0
def encode_thumbnail(thumbnail):
    '''
    Encode the image part of a thumbnail, then return the 3 part tuple
    '''
    from calibre.utils.imghdr import identify
    if thumbnail is None:
        return None
    if not isinstance(thumbnail, (tuple, list)):
        try:
            width, height = identify(as_bytes(thumbnail))[1:]
            if width < 0 or height < 0:
                return None
            thumbnail = (width, height, thumbnail)
        except Exception:
            return None
    return (thumbnail[0], thumbnail[1], as_base64_unicode(thumbnail[2]))
예제 #20
0
def get_cover(docx):
    doc = docx.document
    get = docx.namespace.get
    images = docx.namespace.XPath(
        '//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]')
    rid_map = docx.document_relationships[0]
    for image in images(doc):
        rid = get(image, 'r:embed') or get(image, 'r:id')
        if rid in rid_map:
            try:
                raw = docx.read(rid_map[rid])
                fmt, width, height = identify(bytes(raw))
            except Exception:
                continue
            if width < 0 or height < 0:
                continue
            if 0.8 <= height/width <= 1.8 and height*width >= 160000:
                return (fmt, raw)
예제 #21
0
파일: covers.py 프로젝트: zyhong/calibre
def process_result(log, result):
    plugin, data = result
    try:
        if getattr(plugin, 'auto_trim_covers', False):
            img = image_from_data(data)
            nimg = remove_borders_from_image(img)
            if nimg is not img:
                data = image_to_data(nimg)
        fmt, width, height = identify(data)
        if width < 0 or height < 0:
            raise ValueError('Could not read cover image dimensions')
        if width < 50 or height < 50:
            raise ValueError('Image too small')
        data = save_cover_data_to(data)
    except Exception:
        log.exception('Invalid cover from', plugin.name)
        return None
    return (plugin, width, height, fmt, data)
예제 #22
0
파일: covers.py 프로젝트: Coi-l/calibre
def process_result(log, result):
    plugin, data = result
    try:
        if getattr(plugin, 'auto_trim_covers', False):
            img = image_from_data(data)
            nimg = remove_borders_from_image(img)
            if nimg is not img:
                data = image_to_data(nimg)
        fmt, width, height = identify(data)
        if width < 0 or height < 0:
            raise ValueError('Could not read cover image dimensions')
        if width < 50 or height < 50:
            raise ValueError('Image too small')
        data = save_cover_data_to(data)
    except Exception:
        log.exception('Invalid cover from', plugin.name)
        return None
    return (plugin, width, height, fmt, data)
예제 #23
0
파일: docx.py 프로젝트: Coi-l/calibre
def get_cover(docx):
    doc = docx.document
    get = docx.namespace.get
    images = docx.namespace.XPath(
        '//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]')
    rid_map = docx.document_relationships[0]
    for image in images(doc):
        rid = get(image, 'r:embed') or get(image, 'r:id')
        if rid in rid_map:
            try:
                raw = docx.read(rid_map[rid])
                fmt, width, height = identify(bytes(raw))
            except Exception:
                continue
            if width < 0 or height < 0:
                continue
            if 0.8 <= height/width <= 1.8 and height*width >= 160000:
                return (fmt, raw)
예제 #24
0
파일: fb2.py 프로젝트: Riva3000/calibre
def _parse_cover_data(root, imgid, mi, ctx):
    from calibre.ebooks.fb2 import base64_decode
    elm_binary = ctx.XPath('//fb:binary[@id="%s"]'%imgid)(root)
    if elm_binary:
        mimetype = elm_binary[0].get('content-type', 'image/jpeg')
        mime_extensions = guess_all_extensions(mimetype)

        if not mime_extensions and mimetype.startswith('image/'):
            mimetype_fromid = guess_type(imgid)[0]
            if mimetype_fromid and mimetype_fromid.startswith('image/'):
                mime_extensions = guess_all_extensions(mimetype_fromid)

        if mime_extensions:
            pic_data = elm_binary[0].text
            if pic_data:
                cdata = base64_decode(pic_data.strip())
                fmt = identify(bytes(cdata))[0]
                mi.cover_data = (fmt, cdata)
        else:
            prints("WARNING: Unsupported coverpage mime-type '%s' (id=#%s)" % (mimetype, imgid))
예제 #25
0
파일: fb2.py 프로젝트: zhanglix/calibre
def _parse_cover_data(root, imgid, mi, ctx):
    from calibre.ebooks.fb2 import base64_decode
    elm_binary = ctx.XPath('//fb:binary[@id="%s"]'%imgid)(root)
    if elm_binary:
        mimetype = elm_binary[0].get('content-type', 'image/jpeg')
        mime_extensions = guess_all_extensions(mimetype)

        if not mime_extensions and mimetype.startswith('image/'):
            mimetype_fromid = guess_type(imgid)[0]
            if mimetype_fromid and mimetype_fromid.startswith('image/'):
                mime_extensions = guess_all_extensions(mimetype_fromid)

        if mime_extensions:
            pic_data = elm_binary[0].text
            if pic_data:
                cdata = base64_decode(pic_data.strip())
                fmt = identify(cdata)[0]
                mi.cover_data = (fmt, cdata)
        else:
            prints("WARNING: Unsupported coverpage mime-type '%s' (id=#%s)" % (mimetype, imgid))
예제 #26
0
파일: rtfml.py 프로젝트: Farb/calibre
    def image_to_hexstring(self, data):
        data = save_cover_data_to(data)
        width, height = identify(data)[1:]

        raw_hex = ''
        for char in data:
            raw_hex += hex(ord(char)).replace('0x', '').rjust(2, '0')

        # Images must be broken up so that they are no longer than 129 chars
        # per line
        hex_string = ''
        col = 1
        for char in raw_hex:
            if col == 129:
                hex_string += '\n'
                col = 1
            col += 1
            hex_string += char

        return (hex_string, width, height)
예제 #27
0
    def image_to_hexstring(self, data):
        data = save_cover_data_to(data)
        width, height = identify(data)[1:]

        raw_hex = ''
        for char in data:
            raw_hex += hex(ord(char)).replace('0x', '').rjust(2, '0')

        # Images must be broken up so that they are no longer than 129 chars
        # per line
        hex_string = ''
        col = 1
        for char in raw_hex:
            if col == 129:
                hex_string += '\n'
                col = 1
            col += 1
            hex_string += char

        return (hex_string, width, height)
예제 #28
0
def get_cover_data(stream, ext):  # {{{
    from calibre.ebooks.metadata.meta import get_metadata
    old = prefs['read_file_metadata']
    if not old:
        prefs['read_file_metadata'] = True
    cdata = area = None

    try:
        with stream:
            mi = get_metadata(stream, ext)
        if mi.cover and os.access(mi.cover, os.R_OK):
            cdata = open(mi.cover).read()
        elif mi.cover_data[1] is not None:
            cdata = mi.cover_data[1]
        if cdata:
            fmt, width, height = identify(cdata)
            area = width*height
    except:
        cdata = area = None

    if old != prefs['read_file_metadata']:
        prefs['read_file_metadata'] = old

    return cdata, area
예제 #29
0
    def mobimlize_elem(self,
                       elem,
                       stylizer,
                       bstate,
                       istates,
                       ignore_valign=False):
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            return
        style = stylizer.style(elem)
        # <mbp:frame-set/> does not exist lalalala
        if ((style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot')
             or style['visibility'] == 'hidden') and
                elem.get('data-calibre-jacket-searchable-tags', None) != '1'):
            id_ = elem.get('id', None)
            if id_:
                # Keep anchors so people can use display:none
                # to generate hidden TOCs
                tail = elem.tail
                elem.clear()
                elem.text = None
                elem.set('id', id_)
                elem.tail = tail
                elem.tag = XHTML('a')
            else:
                return
        tag = barename(elem.tag)
        istate = copy.copy(istates[-1])
        istate.rendered = False
        istate.list_num = 0
        if tag == 'ol' and 'start' in elem.attrib:
            try:
                istate.list_num = int(elem.attrib['start']) - 1
            except:
                pass
        istates.append(istate)
        left = 0
        display = style['display']
        if display == 'table-cell':
            display = 'inline'
        elif display.startswith('table'):
            display = 'block'
        isblock = (not display.startswith('inline')
                   and style['display'] != 'none')
        isblock = isblock and style['float'] == 'none'
        isblock = isblock and tag != 'br'
        if isblock:
            bstate.para = None
            istate.halign = style['text-align']
            rawti = style._get('text-indent')
            istate.indent = style['text-indent']
            if hasattr(rawti, 'strip') and '%' in rawti:
                # We have a percentage text indent, these can come out looking
                # too large if the user chooses a wide output profile like
                # tablet
                istate.indent = min(style._unit_convert(rawti, base=500),
                                    istate.indent)
            if style['margin-left'] == 'auto' \
               and style['margin-right'] == 'auto':
                istate.halign = 'center'
            margin = asfloat(style['margin-left'])
            padding = asfloat(style['padding-left'])
            if tag != 'body':
                left = margin + padding
            istate.left += left
            vmargin = asfloat(style['margin-top'])
            bstate.vmargin = max((bstate.vmargin, vmargin))
            vpadding = asfloat(style['padding-top'])
            if vpadding > 0:
                bstate.vpadding += bstate.vmargin
                bstate.vmargin = 0
                bstate.vpadding += vpadding
        elif not istate.href:
            margin = asfloat(style['margin-left'])
            padding = asfloat(style['padding-left'])
            lspace = margin + padding
            if lspace > 0:
                spaces = int(round((lspace * 3) / style['font-size']))
                elem.text = (u'\xa0' * spaces) + (elem.text or '')
            margin = asfloat(style['margin-right'])
            padding = asfloat(style['padding-right'])
            rspace = margin + padding
            if rspace > 0:
                spaces = int(round((rspace * 3) / style['font-size']))
                if len(elem) == 0:
                    elem.text = (elem.text or '') + (u'\xa0' * spaces)
                else:
                    last = elem[-1]
                    last.text = (last.text or '') + (u'\xa0' * spaces)
        if bstate.content and style['page-break-before'] in PAGE_BREAKS:
            bstate.pbreak = True
        istate.fsize = self.mobimlize_font(style['font-size'])
        istate.italic = True if style['font-style'] == 'italic' else False
        weight = style['font-weight']
        istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400
        istate.preserve = style['white-space'] == 'pre'
        istate.pre_wrap = style['white-space'] == 'pre-wrap'
        istate.bgcolor = style['background-color']
        istate.fgcolor = style['color']
        istate.strikethrough = style.effective_text_decoration == 'line-through'
        istate.underline = style.effective_text_decoration == 'underline'
        ff = style['font-family'].lower() if hasattr(style['font-family'],
                                                     'lower') else ''
        if 'monospace' in ff or 'courier' in ff or ff.endswith(' mono'):
            istate.family = 'monospace'
        elif ('sans-serif' in ff or 'sansserif' in ff or 'verdana' in ff
              or 'arial' in ff or 'helvetica' in ff):
            istate.family = 'sans-serif'
        else:
            istate.family = 'serif'
        if 'id' in elem.attrib:
            istate.ids.add(elem.attrib['id'])
        if 'name' in elem.attrib:
            istate.ids.add(elem.attrib['name'])
        if tag == 'a' and 'href' in elem.attrib:
            istate.href = elem.attrib['href']
        istate.attrib.clear()
        if tag == 'img' and 'src' in elem.attrib:
            istate.attrib['src'] = elem.attrib['src']
            istate.attrib['align'] = 'baseline'
            cssdict = style.cssdict()
            valign = cssdict.get('vertical-align', None)
            if valign in ('top', 'bottom', 'middle'):
                istate.attrib['align'] = valign
            for prop in ('width', 'height'):
                if cssdict[prop] != 'auto':
                    value = style[prop]
                    if value == getattr(self.profile, prop):
                        result = '100%'
                    else:
                        # Amazon's renderer does not support
                        # img sizes in units other than px
                        # See #7520 for test case
                        try:
                            pixs = int(
                                round(float(value) / (72. / self.profile.dpi)))
                        except:
                            continue
                        result = str(pixs)
                    istate.attrib[prop] = result
            if 'width' not in istate.attrib or 'height' not in istate.attrib:
                href = self.current_spine_item.abshref(elem.attrib['src'])
                try:
                    item = self.oeb.manifest.hrefs[urlnormalize(href)]
                except:
                    self.oeb.logger.warn('Failed to find image:', href)
                else:
                    try:
                        width, height = identify(item.data)[1:]
                    except Exception:
                        self.oeb.logger.warn('Invalid image:', href)
                    else:
                        if 'width' not in istate.attrib and 'height' not in \
                                    istate.attrib:
                            istate.attrib['width'] = str(width)
                            istate.attrib['height'] = str(height)
                        else:
                            ar = float(width) / float(height)
                            if 'width' not in istate.attrib:
                                try:
                                    width = int(istate.attrib['height']) * ar
                                except:
                                    pass
                                istate.attrib['width'] = str(int(width))
                            else:
                                try:
                                    height = int(istate.attrib['width']) / ar
                                except:
                                    pass
                                istate.attrib['height'] = str(int(height))
                        item.unload_data_from_memory()
        elif tag == 'hr' and asfloat(style['width']) > 0 and style._get(
                'width') not in {'100%', 'auto'}:
            raww = style._get('width')
            if hasattr(raww, 'strip') and '%' in raww:
                istate.attrib['width'] = raww
            else:
                prop = style['width'] / self.profile.width
                istate.attrib['width'] = "%d%%" % int(round(prop * 100))
        elif display == 'table':
            tag = 'table'
        elif display == 'table-row':
            tag = 'tr'
        elif display == 'table-cell':
            tag = 'td'
        if tag in TABLE_TAGS and self.ignore_tables:
            tag = 'span' if tag == 'td' else 'div'

        if tag in ('table', 'td', 'tr'):
            col = style.backgroundColor
            if col:
                elem.set('bgcolor', col)
            css = style.cssdict()
            if 'border' in css or 'border-width' in css:
                elem.set('border', '1')
        if tag in TABLE_TAGS:
            for attr in ('rowspan', 'colspan', 'width', 'border', 'scope',
                         'bgcolor'):
                if attr in elem.attrib:
                    istate.attrib[attr] = elem.attrib[attr]
        if tag == 'q':
            t = elem.text
            if not t:
                t = ''
            elem.text = u'\u201c' + t
            t = elem.tail
            if not t:
                t = ''
            elem.tail = u'\u201d' + t
        text = None
        if elem.text:
            if istate.preserve or istate.pre_wrap:
                text = elem.text
            elif (len(elem) > 0 and isspace(elem.text)
                  and hasattr(elem[0].tag, 'rpartition')
                  and elem[0].tag.rpartition('}')[-1] not in INLINE_TAGS):
                text = None
            else:
                text = COLLAPSE.sub(' ', elem.text)
        valign = style['vertical-align']
        not_baseline = valign in ('super', 'sub', 'text-top', 'text-bottom',
                                  'top', 'bottom') or (isinstance(
                                      valign,
                                      (float, int)) and abs(valign) != 0)
        issup = valign in ('super', 'text-top',
                           'top') or (isinstance(valign,
                                                 (float, int)) and valign > 0)
        vtag = 'sup' if issup else 'sub'
        if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
            nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
            vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))
            vbstate.para = etree.SubElement(vbstate.body, XHTML('p'))
            self.mobimlize_elem(elem,
                                stylizer,
                                vbstate,
                                istates,
                                ignore_valign=True)
            if len(istates) > 0:
                istates.pop()
            if len(istates) == 0:
                istates.append(FormatState())
            at_start = bstate.para is None
            if at_start:
                self.mobimlize_content('span', '', bstate, istates)
            parent = bstate.para if bstate.inline is None else bstate.inline
            if parent is not None:
                vtag = etree.SubElement(parent, XHTML(vtag))
                vtag = etree.SubElement(vtag, XHTML('small'))
                # Add anchors
                for child in vbstate.body:
                    if child is not vbstate.para:
                        vtag.append(child)
                    else:
                        break
                if vbstate.para is not None:
                    if vbstate.para.text:
                        vtag.text = vbstate.para.text
                    for child in vbstate.para:
                        vtag.append(child)
                return

        if tag == 'blockquote':
            old_mim = self.opts.mobi_ignore_margins
            self.opts.mobi_ignore_margins = False

        if (text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS or (
                # We have an id but no text and no children, the id should still
                # be added.
                istate.ids and tag in ('a', 'span', 'i', 'b', 'u')
                and len(elem) == 0)):
            if tag == 'li' and len(istates) > 1 and 'value' in elem.attrib:
                try:
                    value = int(elem.attrib['value'])
                    istates[-2].list_num = value - 1
                except:
                    pass
            self.mobimlize_content(tag, text, bstate, istates)
        for child in elem:
            self.mobimlize_elem(child, stylizer, bstate, istates)
            tail = None
            if child.tail:
                if istate.preserve or istate.pre_wrap:
                    tail = child.tail
                elif bstate.para is None and isspace(child.tail):
                    tail = None
                else:
                    tail = COLLAPSE.sub(' ', child.tail)
            if tail:
                self.mobimlize_content(tag, tail, bstate, istates)

        if tag == 'blockquote':
            self.opts.mobi_ignore_margins = old_mim

        if bstate.content and style['page-break-after'] in PAGE_BREAKS:
            bstate.pbreak = True
        if isblock:
            para = bstate.para
            if para is not None and para.text == u'\xa0' and len(para) < 1:
                if style.height > 2:
                    para.getparent().replace(para, etree.Element(XHTML('br')))
                else:
                    # This is too small to be rendered effectively, drop it
                    para.getparent().remove(para)
            bstate.para = None
            bstate.istate = None
            vmargin = asfloat(style['margin-bottom'])
            bstate.vmargin = max((bstate.vmargin, vmargin))
            vpadding = asfloat(style['padding-bottom'])
            if vpadding > 0:
                bstate.vpadding += bstate.vmargin
                bstate.vmargin = 0
                bstate.vpadding += vpadding
        if bstate.nested and bstate.nested[-1].tag == elem.tag:
            bstate.nested.pop()
        istates.pop()
예제 #30
0
파일: cleanup.py 프로젝트: Coi-l/calibre
def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath):
    # Move <hr>s outside paragraphs, if possible.
    pancestor = XPath('|'.join('ancestor::%s[1]' % x for x in ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6')))
    for hr in root.xpath('//span/hr'):
        p = pancestor(hr)
        if p:
            p = p[0]
            descendants = tuple(p.iterdescendants())
            if descendants[-1] is hr:
                parent = p.getparent()
                idx = parent.index(p)
                parent.insert(idx+1, hr)
                hr.tail = '\n\t'

    # Merge consecutive spans that have the same styling
    current_run = []
    for span in root.xpath('//span[not(@style or @lang)]'):
        if not current_run:
            current_run.append(span)
        else:
            last = current_run[-1]
            if mergeable(last, span):
                current_run.append(span)
            else:
                if len(current_run) > 1:
                    merge_run(current_run)
                current_run = [span]

    # Remove unnecessary span tags that are the only child of a parent block
    # element
    class_map = dict(styles.classes.itervalues())
    parents = ('p', 'div') + tuple('h%d' % i for i in xrange(1, 7))
    for parent in root.xpath('//*[(%s) and count(span)=1]' % ' or '.join('name()="%s"' % t for t in parents)):
        if len(parent) == 1 and not parent.text and not parent[0].tail and not parent[0].get('id', None):
            # We have a block whose contents are entirely enclosed in a <span>
            span = parent[0]
            span_class = span.get('class', None)
            span_css = class_map.get(span_class, {})
            if liftable(span_css):
                pclass = parent.get('class', None)
                if span_class:
                    pclass = (pclass + ' ' + span_class) if pclass else span_class
                    parent.set('class', pclass)
                parent.text = span.text
                parent.remove(span)
                if span.get('lang'):
                    parent.set('lang', span.get('lang'))
                for child in span:
                    parent.append(child)

    # Make spans whose only styling is bold or italic into <b> and <i> tags
    for span in root.xpath('//span[@class and not(@style)]'):
        css = class_map.get(span.get('class', None), {})
        if len(css) == 1:
            if css == {'font-style':'italic'}:
                span.tag = 'i'
                del span.attrib['class']
            elif css == {'font-weight':'bold'}:
                span.tag = 'b'
                del span.attrib['class']

    # Get rid of <span>s that have no styling
    for span in root.xpath('//span[not(@class or @id or @style or @lang)]'):
        lift(span)

    # Convert <p><br style="page-break-after:always"> </p> style page breaks
    # into something the viewer will render as a page break
    for p in root.xpath('//p[br[@style="page-break-after:always"]]'):
        if len(p) == 1 and (not p[0].tail or not p[0].tail.strip()):
            p.remove(p[0])
            prefix = p.get('style', '')
            if prefix:
                prefix += '; '
            p.set('style', prefix + 'page-break-after:always')
            p.text = NBSP if not p.text else p.text

    if detect_cover:
        # Check if the first image in the document is possibly a cover
        img = root.xpath('//img[@src][1]')
        if img:
            img = img[0]
            path = os.path.join(dest_dir, img.get('src'))
            if os.path.exists(path) and before_count(root, img, limit=10) < 5:
                from calibre.utils.imghdr import identify
                try:
                    with lopen(path, 'rb') as imf:
                        fmt, width, height = identify(imf)
                except:
                    width, height, fmt = 0, 0, None  # noqa
                del fmt
                try:
                    is_cover = 0.8 <= height/width <= 1.8 and height*width >= 160000
                except ZeroDivisionError:
                    is_cover = False
                if is_cover:
                    log.debug('Detected an image that looks like a cover')
                    img.getparent().remove(img)
                    return path
예제 #31
0
파일: cleanup.py 프로젝트: vrosnet/calibre
def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath):
    # Move <hr>s outside paragraphs, if possible.
    pancestor = XPath('|'.join('ancestor::%s[1]' % x
                               for x in ('p', 'h1', 'h2', 'h3', 'h4', 'h5',
                                         'h6')))
    for hr in root.xpath('//span/hr'):
        p = pancestor(hr)
        if p:
            p = p[0]
            descendants = tuple(p.iterdescendants())
            if descendants[-1] is hr:
                parent = p.getparent()
                idx = parent.index(p)
                parent.insert(idx + 1, hr)
                hr.tail = '\n\t'

    # Merge consecutive spans that have the same styling
    current_run = []
    for span in root.xpath('//span[not(@style or @lang)]'):
        if not current_run:
            current_run.append(span)
        else:
            last = current_run[-1]
            if mergeable(last, span):
                current_run.append(span)
            else:
                if len(current_run) > 1:
                    merge_run(current_run)
                current_run = [span]

    # Remove unnecessary span tags that are the only child of a parent block
    # element
    class_map = dict(styles.classes.itervalues())
    parents = ('p', 'div') + tuple('h%d' % i for i in xrange(1, 7))
    for parent in root.xpath('//*[(%s) and count(span)=1]' %
                             ' or '.join('name()="%s"' % t for t in parents)):
        if len(parent) == 1 and not parent.text and not parent[
                0].tail and not parent[0].get('id', None):
            # We have a block whose contents are entirely enclosed in a <span>
            span = parent[0]
            span_class = span.get('class', None)
            span_css = class_map.get(span_class, {})
            if liftable(span_css):
                pclass = parent.get('class', None)
                if span_class:
                    pclass = (pclass + ' ' +
                              span_class) if pclass else span_class
                    parent.set('class', pclass)
                parent.text = span.text
                parent.remove(span)
                if span.get('lang'):
                    parent.set('lang', span.get('lang'))
                for child in span:
                    parent.append(child)

    # Make spans whose only styling is bold or italic into <b> and <i> tags
    for span in root.xpath('//span[@class and not(@style)]'):
        css = class_map.get(span.get('class', None), {})
        if len(css) == 1:
            if css == {'font-style': 'italic'}:
                span.tag = 'i'
                del span.attrib['class']
            elif css == {'font-weight': 'bold'}:
                span.tag = 'b'
                del span.attrib['class']

    # Get rid of <span>s that have no styling
    for span in root.xpath('//span[not(@class or @id or @style or @lang)]'):
        lift(span)

    # Convert <p><br style="page-break-after:always"> </p> style page breaks
    # into something the viewer will render as a page break
    for p in root.xpath('//p[br[@style="page-break-after:always"]]'):
        if len(p) == 1 and (not p[0].tail or not p[0].tail.strip()):
            p.remove(p[0])
            prefix = p.get('style', '')
            if prefix:
                prefix += '; '
            p.set('style', prefix + 'page-break-after:always')
            p.text = NBSP if not p.text else p.text

    if detect_cover:
        # Check if the first image in the document is possibly a cover
        img = root.xpath('//img[@src][1]')
        if img:
            img = img[0]
            path = os.path.join(dest_dir, img.get('src'))
            if os.path.exists(path) and before_count(root, img, limit=10) < 5:
                from calibre.utils.imghdr import identify
                try:
                    with lopen(path, 'rb') as imf:
                        fmt, width, height = identify(imf)
                except:
                    width, height, fmt = 0, 0, None  # noqa
                del fmt
                try:
                    is_cover = 0.8 <= height / width <= 1.8 and height * width >= 160000
                except ZeroDivisionError:
                    is_cover = False
                if is_cover:
                    log.debug('Detected an image that looks like a cover')
                    img.getparent().remove(img)
                    return path
예제 #32
0
파일: cover.py 프로젝트: x007007007/calibre
def create_epub_cover(container, cover_path, existing_image, options=None):
    from calibre.ebooks.conversion.config import load_defaults
    from calibre.ebooks.oeb.transforms.cover import CoverManager

    try:
        ext = cover_path.rpartition(".")[-1].lower()
    except Exception:
        ext = "jpeg"
    cname, tname = "cover." + ext, "titlepage.xhtml"
    recommended_folders = get_recommended_folders(container, (cname, tname))

    if existing_image:
        raster_cover = existing_image
        manifest_id = {v: k for k, v in container.manifest_id_map.iteritems()}[existing_image]
        raster_cover_item = container.opf_xpath('//opf:manifest/*[@id="%s"]' % manifest_id)[0]
    else:
        folder = recommended_folders[cname]
        if folder:
            cname = folder + "/" + cname
        raster_cover_item = container.generate_item(cname, id_prefix="cover")
        raster_cover = container.href_to_name(raster_cover_item.get("href"), container.opf_name)

        with container.open(raster_cover, "wb") as dest:
            if callable(cover_path):
                cover_path("write_image", dest)
            else:
                with lopen(cover_path, "rb") as src:
                    shutil.copyfileobj(src, dest)
    if options is None:
        opts = load_defaults("epub_output")
        keep_aspect = opts.get("preserve_cover_aspect_ratio", False)
        no_svg = opts.get("no_svg_cover", False)
    else:
        keep_aspect = options.get("keep_aspect", False)
        no_svg = options.get("no_svg", False)
    if no_svg:
        style = 'style="height: 100%%"'
        templ = CoverManager.NONSVG_TEMPLATE.replace("__style__", style)
    else:
        if callable(cover_path):
            templ = (options or {}).get("template", CoverManager.SVG_TEMPLATE)
        else:
            width, height = 600, 800
            try:
                if existing_image:
                    width, height = identify(container.raw_data(existing_image, decode=False))[1:]
                else:
                    with lopen(cover_path, "rb") as csrc:
                        width, height = identify(csrc)[1:]
            except:
                container.log.exception("Failed to get width and height of cover")
            ar = "xMidYMid meet" if keep_aspect else "none"
            templ = CoverManager.SVG_TEMPLATE.replace("__ar__", ar)
            templ = templ.replace("__viewbox__", "0 0 %d %d" % (width, height))
            templ = templ.replace("__width__", str(width))
            templ = templ.replace("__height__", str(height))
    folder = recommended_folders[tname]
    if folder:
        tname = folder + "/" + tname
    titlepage_item = container.generate_item(tname, id_prefix="titlepage")
    titlepage = container.href_to_name(titlepage_item.get("href"), container.opf_name)
    raw = templ % container.name_to_href(raster_cover, titlepage).encode("utf-8")
    with container.open(titlepage, "wb") as f:
        f.write(raw)

    # We have to make sure the raster cover item has id="cover" for the moron
    # that wrote the Nook firmware
    if raster_cover_item.get("id") != "cover":
        from calibre.ebooks.oeb.base import uuid_id

        newid = uuid_id()
        for item in container.opf_xpath('//*[@id="cover"]'):
            item.set("id", newid)
        for item in container.opf_xpath('//*[@idref="cover"]'):
            item.set("idref", newid)
        raster_cover_item.set("id", "cover")

    spine = container.opf_xpath("//opf:spine")[0]
    ref = spine.makeelement(OPF("itemref"), idref=titlepage_item.get("id"))
    container.insert_into_xml(spine, ref, index=0)
    guide = container.opf_get_or_create("guide")
    container.insert_into_xml(
        guide,
        guide.makeelement(
            OPF("reference"),
            type="cover",
            title=_("Cover"),
            href=container.name_to_href(titlepage, base=container.opf_name),
        ),
    )
    metadata = container.opf_get_or_create("metadata")
    meta = metadata.makeelement(OPF("meta"), name="cover")
    meta.set("content", raster_cover_item.get("id"))
    container.insert_into_xml(metadata, meta)

    return raster_cover, titlepage
예제 #33
0
파일: reader.py 프로젝트: Coi-l/calibre
    def extract_content(self, output_dir):
        # Each text record is independent (unless the continuation
        # value is set in the previous record). Put each converted
        # text recored into a separate file. We will reference the
        # home.html file as the first file and let the HTML input
        # plugin assemble the order based on hyperlinks.
        with CurrentDir(output_dir):
            for uid, num in self.uid_text_secion_number.items():
                self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid))
                with open('%s.html' % uid, 'wb') as htmlf:
                    html = u'<html><body>'
                    section_header, section_data = self.sections[num]
                    if section_header.type == DATATYPE_PHTML:
                        html += self.process_phtml(section_data.data, section_data.header.paragraph_offsets)
                    elif section_header.type == DATATYPE_PHTML_COMPRESSED:
                        d = self.decompress_phtml(section_data.data)
                        html += self.process_phtml(d, section_data.header.paragraph_offsets).decode(self.get_text_uid_encoding(section_header.uid), 'replace')
                    html += '</body></html>'
                    htmlf.write(html.encode('utf-8'))

        # Images.
        # Cache the image sizes in case they are used by a composite image.
        images = set()
        if not os.path.exists(os.path.join(output_dir, 'images/')):
            os.makedirs(os.path.join(output_dir, 'images/'))
        with CurrentDir(os.path.join(output_dir, 'images/')):
            # Single images.
            for uid, num in self.uid_image_section_number.items():
                section_header, section_data = self.sections[num]
                if section_data:
                    idata = None
                    if section_header.type == DATATYPE_TBMP:
                        idata = section_data
                    elif section_header.type == DATATYPE_TBMP_COMPRESSED:
                        if self.header_record.compression == 1:
                            idata = decompress_doc(section_data)
                        elif self.header_record.compression == 2:
                            idata = zlib.decompress(section_data)
                    try:
                        save_cover_data_to(idata, '%s.jpg' % uid, compression_quality=70)
                        images.add(uid)
                        self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid))
                    except Exception as e:
                        self.log.error('Failed to write image with uid %s: %s' % (uid, e))
                else:
                    self.log.error('Failed to write image with uid %s: No data.' % uid)
            # Composite images.
            # We're going to use the already compressed .jpg images here.
            for uid, num in self.uid_composite_image_section_number.items():
                try:
                    section_header, section_data = self.sections[num]
                    # Get the final width and height.
                    width = 0
                    height = 0
                    for row in section_data.layout:
                        row_width = 0
                        col_height = 0
                        for col in row:
                            if col not in images:
                                raise Exception('Image with uid: %s missing.' % col)
                            w, h = identify(lopen('%s.jpg' % col, 'rb'))[1:]
                            row_width += w
                            if col_height < h:
                                col_height = h
                        if width < row_width:
                            width = row_width
                        height += col_height
                    # Create a new image the total size of all image
                    # parts. Put the parts into the new image.
                    with Canvas(width, height) as canvas:
                        y_off = 0
                        for row in section_data.layout:
                            x_off = 0
                            largest_height = 0
                            for col in row:
                                im = image_from_data(lopen('%s.jpg' % col, 'rb').read())
                                canvas.compose(im, x_off, y_off)
                                w, h = im.width(), im.height()
                                x_off += w
                                if largest_height < h:
                                    largest_height = h
                            y_off += largest_height
                    with lopen('%s.jpg' % uid) as out:
                        out.write(canvas.export(compression_quality=70))
                    self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid))
                except Exception as e:
                    self.log.error('Failed to write composite image with uid %s: %s' % (uid, e))

        # Run the HTML through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(self.options, opt.option.name, opt.recommended_value)
        self.options.input_encoding = 'utf-8'
        odi = self.options.debug_pipeline
        self.options.debug_pipeline = None
        # Determine the home.html record uid. This should be set in the
        # reserved values in the metadata recored. home.html is the first
        # text record (should have hyper link references to other records)
        # in the document.
        try:
            home_html = self.header_record.home_html
            if not home_html:
                home_html = self.uid_text_secion_number.items()[0][0]
        except:
            raise Exception('Could not determine home.html')
        # Generate oeb from html conversion.
        oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
        self.options.debug_pipeline = odi

        return oeb
예제 #34
0
파일: content.py 프로젝트: Mymei2/calibre
    def test_get(self):  # {{{
        'Test /get'
        with self.create_server() as server:
            db = server.handler.router.ctx.library_broker.get(None)
            conn = server.connect()

            def get(what, book_id, library_id=None, q=''):
                q = ('?' + q) if q else q
                conn.request('GET', '/get/%s/%s' % (what, book_id) + (('/' + library_id) if library_id else '') + q)
                r = conn.getresponse()
                return r, r.read()

            # Test various invalid parameters
            def bad(*args):
                r, data = get(*args)
                self.ae(r.status, httplib.NOT_FOUND)
            bad('xxx', 1)
            bad('fmt1', 10)
            bad('fmt1', 1, 'zzzz')
            bad('fmt1', 'xx')

            # Test simple fetching of format without metadata update
            r, data = get('fmt1', 1, db.server_library_id)
            self.ae(data, db.format(1, 'fmt1'))
            self.assertIsNotNone(r.getheader('Content-Disposition'))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('fmt1', 1)
            self.ae(data, db.format(1, 'fmt1'))
            self.ae(r.getheader('Used-Cache'), 'yes')

            # Test fetching of format with metadata update
            raw = P('quick_start/eng.epub', data=True)
            r, data = get('epub', 1)
            self.ae(r.status, httplib.OK)
            etag = r.getheader('ETag')
            self.assertIsNotNone(etag)
            self.ae(r.getheader('Used-Cache'), 'no')
            self.assertTrue(data.startswith(b'PK'))
            self.assertGreaterEqual(len(data), len(raw))
            db.set_field('title', {1:'changed'})
            r, data = get('epub', 1)
            self.assertNotEqual(r.getheader('ETag'), etag)
            etag = r.getheader('ETag')
            self.ae(r.getheader('Used-Cache'), 'no')
            mi = get_metadata(BytesIO(data), extract_cover=False)
            self.ae(mi.title, 'changed')
            r, data = get('epub', 1)
            self.ae(r.getheader('Used-Cache'), 'yes')

            # Test plugboards
            import calibre.library.save_to_disk as c
            orig, c.DEBUG = c.DEBUG, False
            try:
                db.set_pref('plugboards', {u'epub': {u'content_server': [[u'changed, {title}', u'title']]}})
                # this is needed as the cache is not invalidated for plugboard changes
                db.set_field('title', {1:'again'})
                r, data = get('epub', 1)
                self.assertNotEqual(r.getheader('ETag'), etag)
                etag = r.getheader('ETag')
                self.ae(r.getheader('Used-Cache'), 'no')
                mi = get_metadata(BytesIO(data), extract_cover=False)
                self.ae(mi.title, 'changed, again')
            finally:
                c.DEBUG = orig

            # Test the serving of covers
            def change_cover(count, book_id=2):
                cpath = db.format_abspath(book_id, '__COVER_INTERNAL__')
                db.set_cover({2:I('lt.png', data=True)})
                t = time.time() + 1 + count
                # Ensure mtime changes, needed on OS X where HFS+ has a 1s
                # mtime resolution
                os.utime(cpath, (t, t))

            r, data = get('cover', 1)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(1))
            self.ae(r.getheader('Used-Cache'), 'no')
            self.ae(r.getheader('Content-Type'), 'image/jpeg')
            r, data = get('cover', 1)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(1))
            self.ae(r.getheader('Used-Cache'), 'yes')
            r, data = get('cover', 3)
            self.ae(r.status, httplib.OK)  # Auto generated cover
            r, data = get('thumb', 1)
            self.ae(r.status, httplib.OK)
            self.ae(identify(data), ('jpeg', 60, 60))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('thumb', 1)
            self.ae(r.status, httplib.OK)
            self.ae(r.getheader('Used-Cache'), 'yes')
            r, data = get('thumb', 1, q='sz=100')
            self.ae(r.status, httplib.OK)
            self.ae(identify(data), ('jpeg', 100, 100))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('thumb', 1, q='sz=100x100')
            self.ae(r.status, httplib.OK)
            self.ae(r.getheader('Used-Cache'), 'yes')
            change_cover(1, 1)
            r, data = get('thumb', 1, q='sz=100')
            self.ae(r.status, httplib.OK)
            self.ae(identify(data), ('jpeg', 100, 100))
            self.ae(r.getheader('Used-Cache'), 'no')

            # Test file sharing in cache
            r, data = get('cover', 2)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            path = binascii.unhexlify(r.getheader('Tempfile')).decode('utf-8')
            f, fdata = share_open(path, 'rb'), data
            # Now force an update
            change_cover(1)
            r, data = get('cover', 2)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            path = binascii.unhexlify(r.getheader('Tempfile')).decode('utf-8')
            f2, f2data = share_open(path, 'rb'), data
            # Do it again
            change_cover(2)
            r, data = get('cover', 2)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            self.ae(f.read(), fdata)
            self.ae(f2.read(), f2data)

            # Test serving of metadata as opf
            r, data = get('opf', 1)
            self.ae(r.status, httplib.OK)
            self.ae(r.getheader('Content-Type'), 'application/oebps-package+xml; charset=UTF-8')
            self.assertIsNotNone(r.getheader('Last-Modified'))
            opf = OPF(BytesIO(data), populate_spine=False, try_to_guess_cover=False)
            self.ae(db.field_for('title', 1), opf.title)
            self.ae(db.field_for('authors', 1), tuple(opf.authors))
            conn.request('GET', '/get/opf/1', headers={'Accept-Encoding':'gzip'})
            r = conn.getresponse()
            self.ae(r.status, httplib.OK), self.ae(r.getheader('Content-Encoding'), 'gzip')
            raw = r.read()
            self.ae(zlib.decompress(raw, 16+zlib.MAX_WBITS), data)

            # Test serving metadata as json
            r, data = get('json', 1)
            self.ae(r.status, httplib.OK)
            self.ae(db.field_for('title', 1), json.loads(data)['title'])
            conn.request('GET', '/get/json/1', headers={'Accept-Encoding':'gzip'})
            r = conn.getresponse()
            self.ae(r.status, httplib.OK), self.ae(r.getheader('Content-Encoding'), 'gzip')
            raw = r.read()
            self.ae(zlib.decompress(raw, 16+zlib.MAX_WBITS), data)
예제 #35
0
def create_epub_cover(container, cover_path, existing_image, options=None):
    from calibre.ebooks.conversion.config import load_defaults
    from calibre.ebooks.oeb.transforms.cover import CoverManager

    try:
        ext = cover_path.rpartition('.')[-1].lower()
    except Exception:
        ext = 'jpeg'
    cname, tname = 'cover.' + ext, 'titlepage.xhtml'
    recommended_folders = get_recommended_folders(container, (cname, tname))

    if existing_image:
        raster_cover = existing_image
        manifest_id = {v: k
                       for k, v in iteritems(container.manifest_id_map)
                       }[existing_image]
        raster_cover_item = container.opf_xpath('//opf:manifest/*[@id="%s"]' %
                                                manifest_id)[0]
    else:
        folder = recommended_folders[cname]
        if folder:
            cname = folder + '/' + cname
        raster_cover_item = container.generate_item(cname, id_prefix='cover')
        raster_cover = container.href_to_name(raster_cover_item.get('href'),
                                              container.opf_name)

        with container.open(raster_cover, 'wb') as dest:
            if callable(cover_path):
                cover_path('write_image', dest)
            else:
                with lopen(cover_path, 'rb') as src:
                    shutil.copyfileobj(src, dest)
    if options is None:
        opts = load_defaults('epub_output')
        keep_aspect = opts.get('preserve_cover_aspect_ratio', False)
        no_svg = opts.get('no_svg_cover', False)
    else:
        keep_aspect = options.get('keep_aspect', False)
        no_svg = options.get('no_svg', False)
    if no_svg:
        style = 'style="height: 100%%"'
        templ = CoverManager.NONSVG_TEMPLATE.replace('__style__', style)
        has_svg = False
    else:
        if callable(cover_path):
            templ = (options or {}).get('template', CoverManager.SVG_TEMPLATE)
            has_svg = 'xlink:href' in templ
        else:
            width, height = 600, 800
            has_svg = True
            try:
                if existing_image:
                    width, height = identify(
                        container.raw_data(existing_image, decode=False))[1:]
                else:
                    with lopen(cover_path, 'rb') as csrc:
                        width, height = identify(csrc)[1:]
            except:
                container.log.exception(
                    "Failed to get width and height of cover")
            ar = 'xMidYMid meet' if keep_aspect else 'none'
            templ = CoverManager.SVG_TEMPLATE.replace('__ar__', ar)
            templ = templ.replace('__viewbox__', '0 0 %d %d' % (width, height))
            templ = templ.replace('__width__', str(width))
            templ = templ.replace('__height__', str(height))
    folder = recommended_folders[tname]
    if folder:
        tname = folder + '/' + tname
    titlepage_item = container.generate_item(tname, id_prefix='titlepage')
    titlepage = container.href_to_name(titlepage_item.get('href'),
                                       container.opf_name)
    raw = templ % container.name_to_href(raster_cover,
                                         titlepage).encode('utf-8')
    with container.open(titlepage, 'wb') as f:
        f.write(raw)

    # We have to make sure the raster cover item has id="cover" for the moron
    # that wrote the Nook firmware
    if raster_cover_item.get('id') != 'cover':
        from calibre.ebooks.oeb.base import uuid_id
        newid = uuid_id()
        for item in container.opf_xpath('//*[@id="cover"]'):
            item.set('id', newid)
        for item in container.opf_xpath('//*[@idref="cover"]'):
            item.set('idref', newid)
        raster_cover_item.set('id', 'cover')

    spine = container.opf_xpath('//opf:spine')[0]
    ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id'))
    container.insert_into_xml(spine, ref, index=0)
    ver = container.opf_version_parsed
    if ver.major < 3:
        guide = container.opf_get_or_create('guide')
        container.insert_into_xml(
            guide,
            guide.makeelement(OPF('reference'),
                              type='cover',
                              title=_('Cover'),
                              href=container.name_to_href(
                                  titlepage, base=container.opf_name)))
        metadata = container.opf_get_or_create('metadata')
        meta = metadata.makeelement(OPF('meta'), name='cover')
        meta.set('content', raster_cover_item.get('id'))
        container.insert_into_xml(metadata, meta)
    else:
        container.apply_unique_properties(raster_cover, 'cover-image')
        container.apply_unique_properties(titlepage, 'calibre:title-page')
        if has_svg:
            container.add_properties(titlepage, 'svg')

    return raster_cover, titlepage
예제 #36
0
파일: cover.py 프로젝트: MarioJC/calibre
def create_epub_cover(container, cover_path, existing_image, options=None):
    from calibre.ebooks.conversion.config import load_defaults
    from calibre.ebooks.oeb.transforms.cover import CoverManager

    try:
        ext = cover_path.rpartition('.')[-1].lower()
    except Exception:
        ext = 'jpeg'
    cname, tname = 'cover.' + ext, 'titlepage.xhtml'
    recommended_folders = get_recommended_folders(container, (cname, tname))

    if existing_image:
        raster_cover = existing_image
        manifest_id = {v:k for k, v in container.manifest_id_map.iteritems()}[existing_image]
        raster_cover_item = container.opf_xpath('//opf:manifest/*[@id="%s"]' % manifest_id)[0]
    else:
        folder = recommended_folders[cname]
        if folder:
            cname = folder + '/' + cname
        raster_cover_item = container.generate_item(cname, id_prefix='cover')
        raster_cover = container.href_to_name(raster_cover_item.get('href'), container.opf_name)

        with container.open(raster_cover, 'wb') as dest:
            if callable(cover_path):
                cover_path('write_image', dest)
            else:
                with lopen(cover_path, 'rb') as src:
                    shutil.copyfileobj(src, dest)
    if options is None:
        opts = load_defaults('epub_output')
        keep_aspect = opts.get('preserve_cover_aspect_ratio', False)
        no_svg = opts.get('no_svg_cover', False)
    else:
        keep_aspect = options.get('keep_aspect', False)
        no_svg = options.get('no_svg', False)
    if no_svg:
        style = 'style="height: 100%%"'
        templ = CoverManager.NONSVG_TEMPLATE.replace('__style__', style)
        has_svg = False
    else:
        if callable(cover_path):
            templ = (options or {}).get('template', CoverManager.SVG_TEMPLATE)
            has_svg = 'xlink:href' in templ
        else:
            width, height = 600, 800
            has_svg = True
            try:
                if existing_image:
                    width, height = identify(container.raw_data(existing_image, decode=False))[1:]
                else:
                    with lopen(cover_path, 'rb') as csrc:
                        width, height = identify(csrc)[1:]
            except:
                container.log.exception("Failed to get width and height of cover")
            ar = 'xMidYMid meet' if keep_aspect else 'none'
            templ = CoverManager.SVG_TEMPLATE.replace('__ar__', ar)
            templ = templ.replace('__viewbox__', '0 0 %d %d'%(width, height))
            templ = templ.replace('__width__', str(width))
            templ = templ.replace('__height__', str(height))
    folder = recommended_folders[tname]
    if folder:
        tname = folder + '/' + tname
    titlepage_item = container.generate_item(tname, id_prefix='titlepage')
    titlepage = container.href_to_name(titlepage_item.get('href'),
                                          container.opf_name)
    raw = templ%container.name_to_href(raster_cover, titlepage).encode('utf-8')
    with container.open(titlepage, 'wb') as f:
        f.write(raw)

    # We have to make sure the raster cover item has id="cover" for the moron
    # that wrote the Nook firmware
    if raster_cover_item.get('id') != 'cover':
        from calibre.ebooks.oeb.base import uuid_id
        newid = uuid_id()
        for item in container.opf_xpath('//*[@id="cover"]'):
            item.set('id', newid)
        for item in container.opf_xpath('//*[@idref="cover"]'):
            item.set('idref', newid)
        raster_cover_item.set('id', 'cover')

    spine = container.opf_xpath('//opf:spine')[0]
    ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id'))
    container.insert_into_xml(spine, ref, index=0)
    ver = container.opf_version_parsed
    if ver.major < 3:
        guide = container.opf_get_or_create('guide')
        container.insert_into_xml(guide, guide.makeelement(
            OPF('reference'), type='cover', title=_('Cover'),
            href=container.name_to_href(titlepage, base=container.opf_name)))
        metadata = container.opf_get_or_create('metadata')
        meta = metadata.makeelement(OPF('meta'), name='cover')
        meta.set('content', raster_cover_item.get('id'))
        container.insert_into_xml(metadata, meta)
    else:
        container.apply_unique_properties(raster_cover, 'cover-image')
        container.apply_unique_properties(titlepage, 'calibre:title-page')
        if has_svg:
            container.add_properties(titlepage, 'svg')

    return raster_cover, titlepage
예제 #37
0
파일: mobiml.py 프로젝트: MarioJC/calibre
    def mobimlize_elem(self, elem, stylizer, bstate, istates,
            ignore_valign=False):
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            return
        style = stylizer.style(elem)
        # <mbp:frame-set/> does not exist lalalala
        if ((style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or style['visibility'] == 'hidden') and
                elem.get('data-calibre-jacket-searchable-tags', None) != '1'):
            id_ = elem.get('id', None)
            if id_:
                # Keep anchors so people can use display:none
                # to generate hidden TOCs
                tail = elem.tail
                elem.clear()
                elem.text = None
                elem.set('id', id_)
                elem.tail = tail
                elem.tag = XHTML('a')
            else:
                return
        tag = barename(elem.tag)
        istate = copy.copy(istates[-1])
        istate.rendered = False
        istate.list_num = 0
        if tag == 'ol' and 'start' in elem.attrib:
            try:
                istate.list_num = int(elem.attrib['start'])-1
            except:
                pass
        istates.append(istate)
        left = 0
        display = style['display']
        if display == 'table-cell':
            display = 'inline'
        elif display.startswith('table'):
            display = 'block'
        isblock = (not display.startswith('inline') and style['display'] !=
                'none')
        isblock = isblock and style['float'] == 'none'
        isblock = isblock and tag != 'br'
        if isblock:
            bstate.para = None
            istate.halign = style['text-align']
            rawti = style._get('text-indent')
            istate.indent = style['text-indent']
            if hasattr(rawti, 'strip') and '%' in rawti:
                # We have a percentage text indent, these can come out looking
                # too large if the user chooses a wide output profile like
                # tablet
                istate.indent = min(style._unit_convert(rawti, base=500), istate.indent)
            if style['margin-left'] == 'auto' \
               and style['margin-right'] == 'auto':
                istate.halign = 'center'
            margin = asfloat(style['margin-left'])
            padding = asfloat(style['padding-left'])
            if tag != 'body':
                left = margin + padding
            istate.left += left
            vmargin = asfloat(style['margin-top'])
            bstate.vmargin = max((bstate.vmargin, vmargin))
            vpadding = asfloat(style['padding-top'])
            if vpadding > 0:
                bstate.vpadding += bstate.vmargin
                bstate.vmargin = 0
                bstate.vpadding += vpadding
        elif not istate.href:
            margin = asfloat(style['margin-left'])
            padding = asfloat(style['padding-left'])
            lspace = margin + padding
            if lspace > 0:
                spaces = int(round((lspace * 3) / style['font-size']))
                elem.text = (u'\xa0' * spaces) + (elem.text or '')
            margin = asfloat(style['margin-right'])
            padding = asfloat(style['padding-right'])
            rspace = margin + padding
            if rspace > 0:
                spaces = int(round((rspace * 3) / style['font-size']))
                if len(elem) == 0:
                    elem.text = (elem.text or '') + (u'\xa0' * spaces)
                else:
                    last = elem[-1]
                    last.text = (last.text or '') + (u'\xa0' * spaces)
        if bstate.content and style['page-break-before'] in PAGE_BREAKS:
            bstate.pbreak = True
        istate.fsize = self.mobimlize_font(style['font-size'])
        istate.italic = True if style['font-style'] == 'italic' else False
        weight = style['font-weight']
        istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400
        istate.preserve = style['white-space'] == 'pre'
        istate.pre_wrap = style['white-space'] == 'pre-wrap'
        istate.bgcolor  = style['background-color']
        istate.fgcolor  = style['color']
        istate.strikethrough = style.effective_text_decoration == 'line-through'
        istate.underline = style.effective_text_decoration == 'underline'
        ff = style['font-family'].lower() if hasattr(style['font-family'], 'lower') else ''
        if 'monospace' in ff or 'courier' in ff or ff.endswith(' mono'):
            istate.family = 'monospace'
        elif ('sans-serif' in ff or 'sansserif' in ff or 'verdana' in ff or
                'arial' in ff or 'helvetica' in ff):
            istate.family = 'sans-serif'
        else:
            istate.family = 'serif'
        if 'id' in elem.attrib:
            istate.ids.add(elem.attrib['id'])
        if 'name' in elem.attrib:
            istate.ids.add(elem.attrib['name'])
        if tag == 'a' and 'href' in elem.attrib:
            istate.href = elem.attrib['href']
        istate.attrib.clear()
        if tag == 'img' and 'src' in elem.attrib:
            istate.attrib['src'] = elem.attrib['src']
            istate.attrib['align'] = 'baseline'
            cssdict = style.cssdict()
            valign = cssdict.get('vertical-align', None)
            if valign in ('top', 'bottom', 'middle'):
                istate.attrib['align'] = valign
            for prop in ('width', 'height'):
                if cssdict[prop] != 'auto':
                    value = style[prop]
                    if value == getattr(self.profile, prop):
                        result = '100%'
                    else:
                        # Amazon's renderer does not support
                        # img sizes in units other than px
                        # See #7520 for test case
                        try:
                            pixs = int(round(float(value) /
                                (72./self.profile.dpi)))
                        except:
                            continue
                        result = str(pixs)
                    istate.attrib[prop] = result
            if 'width' not in istate.attrib or 'height' not in istate.attrib:
                href = self.current_spine_item.abshref(elem.attrib['src'])
                try:
                    item = self.oeb.manifest.hrefs[urlnormalize(href)]
                except:
                    self.oeb.logger.warn('Failed to find image:',
                            href)
                else:
                    try:
                        width, height = identify(item.data)[1:]
                    except Exception:
                        self.oeb.logger.warn('Invalid image:', href)
                    else:
                        if 'width' not in istate.attrib and 'height' not in \
                                    istate.attrib:
                            istate.attrib['width'] = str(width)
                            istate.attrib['height'] = str(height)
                        else:
                            ar = float(width)/float(height)
                            if 'width' not in istate.attrib:
                                try:
                                    width = int(istate.attrib['height'])*ar
                                except:
                                    pass
                                istate.attrib['width'] = str(int(width))
                            else:
                                try:
                                    height = int(istate.attrib['width'])/ar
                                except:
                                    pass
                                istate.attrib['height'] = str(int(height))
                        item.unload_data_from_memory()
        elif tag == 'hr' and asfloat(style['width']) > 0 and style._get('width') not in {'100%', 'auto'}:
            raww = style._get('width')
            if hasattr(raww, 'strip') and '%' in raww:
                istate.attrib['width'] = raww
            else:
                prop = style['width'] / self.profile.width
                istate.attrib['width'] = "%d%%" % int(round(prop * 100))
        elif display == 'table':
            tag = 'table'
        elif display == 'table-row':
            tag = 'tr'
        elif display == 'table-cell':
            tag = 'td'
        if tag in TABLE_TAGS and self.ignore_tables:
            tag = 'span' if tag == 'td' else 'div'

        if tag in ('table', 'td', 'tr'):
            col = style.backgroundColor
            if col:
                elem.set('bgcolor', col)
            css = style.cssdict()
            if 'border' in css or 'border-width' in css:
                elem.set('border', '1')
        if tag in TABLE_TAGS:
            for attr in ('rowspan', 'colspan', 'width', 'border', 'scope',
                    'bgcolor'):
                if attr in elem.attrib:
                    istate.attrib[attr] = elem.attrib[attr]
        if tag == 'q':
            t = elem.text
            if not t:
                t = ''
            elem.text = u'\u201c' + t
            t = elem.tail
            if not t:
                t = ''
            elem.tail = u'\u201d' + t
        text = None
        if elem.text:
            if istate.preserve or istate.pre_wrap:
                text = elem.text
            elif (len(elem) > 0 and isspace(elem.text) and hasattr(elem[0].tag, 'rpartition') and
                  elem[0].tag.rpartition('}')[-1] not in INLINE_TAGS):
                text = None
            else:
                text = COLLAPSE.sub(' ', elem.text)
        valign = style['vertical-align']
        not_baseline = valign in ('super', 'sub', 'text-top',
                'text-bottom', 'top', 'bottom') or (
                isinstance(valign, (float, int)) and abs(valign) != 0)
        issup = valign in ('super', 'text-top', 'top') or (
            isinstance(valign, (float, int)) and valign > 0)
        vtag = 'sup' if issup else 'sub'
        if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
            nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
            vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))
            vbstate.para = etree.SubElement(vbstate.body, XHTML('p'))
            self.mobimlize_elem(elem, stylizer, vbstate, istates,
                    ignore_valign=True)
            if len(istates) > 0:
                istates.pop()
            if len(istates) == 0:
                istates.append(FormatState())
            at_start = bstate.para is None
            if at_start:
                self.mobimlize_content('span', '', bstate, istates)
            parent = bstate.para if bstate.inline is None else bstate.inline
            if parent is not None:
                vtag = etree.SubElement(parent, XHTML(vtag))
                vtag = etree.SubElement(vtag, XHTML('small'))
                # Add anchors
                for child in vbstate.body:
                    if child is not vbstate.para:
                        vtag.append(child)
                    else:
                        break
                if vbstate.para is not None:
                    if vbstate.para.text:
                        vtag.text = vbstate.para.text
                    for child in vbstate.para:
                        vtag.append(child)
                return

        if tag == 'blockquote':
            old_mim = self.opts.mobi_ignore_margins
            self.opts.mobi_ignore_margins = False

        if (text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS or (
            # We have an id but no text and no children, the id should still
            # be added.
            istate.ids and tag in ('a', 'span', 'i', 'b', 'u') and
            len(elem)==0)):
                if tag == 'li' and len(istates) > 1 and 'value' in elem.attrib:
                    try:
                        value = int(elem.attrib['value'])
                        istates[-2].list_num = value - 1
                    except:
                        pass
                self.mobimlize_content(tag, text, bstate, istates)
        for child in elem:
            self.mobimlize_elem(child, stylizer, bstate, istates)
            tail = None
            if child.tail:
                if istate.preserve or istate.pre_wrap:
                    tail = child.tail
                elif bstate.para is None and isspace(child.tail):
                    tail = None
                else:
                    tail = COLLAPSE.sub(' ', child.tail)
            if tail:
                self.mobimlize_content(tag, tail, bstate, istates)

        if tag == 'blockquote':
            self.opts.mobi_ignore_margins = old_mim

        if bstate.content and style['page-break-after'] in PAGE_BREAKS:
            bstate.pbreak = True
        if isblock:
            para = bstate.para
            if para is not None and para.text == u'\xa0' and len(para) < 1:
                if style.height > 2:
                    para.getparent().replace(para, etree.Element(XHTML('br')))
                else:
                    # This is too small to be rendered effectively, drop it
                    para.getparent().remove(para)
            bstate.para = None
            bstate.istate = None
            vmargin = asfloat(style['margin-bottom'])
            bstate.vmargin = max((bstate.vmargin, vmargin))
            vpadding = asfloat(style['padding-bottom'])
            if vpadding > 0:
                bstate.vpadding += bstate.vmargin
                bstate.vmargin = 0
                bstate.vpadding += vpadding
        if bstate.nested and bstate.nested[-1].tag == elem.tag:
            bstate.nested.pop()
        istates.pop()
예제 #38
0
    def test_get(self):  # {{{
        'Test /get'
        with self.create_server() as server:
            db = server.handler.router.ctx.library_broker.get(None)
            conn = server.connect()

            def get(what, book_id, library_id=None, q=''):
                q = ('?' + q) if q else q
                conn.request(
                    'GET', '/get/%s/%s' % (what, book_id) +
                    (('/' + library_id) if library_id else '') + q)
                r = conn.getresponse()
                return r, r.read()

            # Test various invalid parameters
            def bad(*args):
                r, data = get(*args)
                self.ae(r.status, http_client.NOT_FOUND)

            bad('xxx', 1)
            bad('fmt1', 10)
            bad('fmt1', 1, 'zzzz')
            bad('fmt1', 'xx')

            # Test simple fetching of format without metadata update
            r, data = get('fmt1', 1, db.server_library_id)
            self.ae(data, db.format(1, 'fmt1'))
            self.assertIsNotNone(r.getheader('Content-Disposition'))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('fmt1', 1)
            self.ae(data, db.format(1, 'fmt1'))
            self.ae(r.getheader('Used-Cache'), 'yes')

            # Test fetching of format with metadata update
            raw = P('quick_start/eng.epub', data=True)
            r, data = get('epub', 1)
            self.ae(r.status, http_client.OK)
            etag = r.getheader('ETag')
            self.assertIsNotNone(etag)
            self.ae(r.getheader('Used-Cache'), 'no')
            self.assertTrue(data.startswith(b'PK'))
            self.assertGreaterEqual(len(data), len(raw))
            db.set_field('title', {1: 'changed'})
            r, data = get('epub', 1)
            self.assertNotEqual(r.getheader('ETag'), etag)
            etag = r.getheader('ETag')
            self.ae(r.getheader('Used-Cache'), 'no')
            mi = get_metadata(BytesIO(data), extract_cover=False)
            self.ae(mi.title, 'changed')
            r, data = get('epub', 1)
            self.ae(r.getheader('Used-Cache'), 'yes')

            # Test plugboards
            import calibre.library.save_to_disk as c
            orig, c.DEBUG = c.DEBUG, False
            try:
                db.set_pref(
                    'plugboards', {
                        u'epub': {
                            u'content_server':
                            [[u'changed, {title}', u'title']]
                        }
                    })
                # this is needed as the cache is not invalidated for plugboard changes
                db.set_field('title', {1: 'again'})
                r, data = get('epub', 1)
                self.assertNotEqual(r.getheader('ETag'), etag)
                etag = r.getheader('ETag')
                self.ae(r.getheader('Used-Cache'), 'no')
                mi = get_metadata(BytesIO(data), extract_cover=False)
                self.ae(mi.title, 'changed, again')
            finally:
                c.DEBUG = orig

            # Test the serving of covers
            def change_cover(count, book_id=2):
                cpath = db.format_abspath(book_id, '__COVER_INTERNAL__')
                db.set_cover({2: I('lt.png', data=True)})
                t = time.time() + 1 + count
                # Ensure mtime changes, needed on OS X where HFS+ has a 1s
                # mtime resolution
                os.utime(cpath, (t, t))

            r, data = get('cover', 1)
            self.ae(r.status, http_client.OK)
            self.ae(data, db.cover(1))
            self.ae(r.getheader('Used-Cache'), 'no')
            self.ae(r.getheader('Content-Type'), 'image/jpeg')
            r, data = get('cover', 1)
            self.ae(r.status, http_client.OK)
            self.ae(data, db.cover(1))
            self.ae(r.getheader('Used-Cache'), 'yes')
            r, data = get('cover', 3)
            self.ae(r.status, http_client.OK)  # Auto generated cover
            r, data = get('thumb', 1)
            self.ae(r.status, http_client.OK)
            self.ae(identify(data), ('jpeg', 60, 60))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('thumb', 1)
            self.ae(r.status, http_client.OK)
            self.ae(r.getheader('Used-Cache'), 'yes')
            r, data = get('thumb', 1, q='sz=100')
            self.ae(r.status, http_client.OK)
            self.ae(identify(data), ('jpeg', 100, 100))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('thumb', 1, q='sz=100x100')
            self.ae(r.status, http_client.OK)
            self.ae(r.getheader('Used-Cache'), 'yes')
            change_cover(1, 1)
            r, data = get('thumb', 1, q='sz=100')
            self.ae(r.status, http_client.OK)
            self.ae(identify(data), ('jpeg', 100, 100))
            self.ae(r.getheader('Used-Cache'), 'no')

            # Test file sharing in cache
            r, data = get('cover', 2)
            self.ae(r.status, http_client.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            path = from_hex_unicode(r.getheader('Tempfile'))
            f, fdata = share_open(path, 'rb'), data
            # Now force an update
            change_cover(1)
            r, data = get('cover', 2)
            self.ae(r.status, http_client.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            path = from_hex_unicode(r.getheader('Tempfile'))
            f2, f2data = share_open(path, 'rb'), data
            # Do it again
            change_cover(2)
            r, data = get('cover', 2)
            self.ae(r.status, http_client.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            self.ae(f.read(), fdata)
            self.ae(f2.read(), f2data)

            # Test serving of metadata as opf
            r, data = get('opf', 1)
            self.ae(r.status, http_client.OK)
            self.ae(r.getheader('Content-Type'),
                    'application/oebps-package+xml; charset=UTF-8')
            self.assertIsNotNone(r.getheader('Last-Modified'))
            opf = OPF(BytesIO(data),
                      populate_spine=False,
                      try_to_guess_cover=False)
            self.ae(db.field_for('title', 1), opf.title)
            self.ae(db.field_for('authors', 1), tuple(opf.authors))
            conn.request('GET',
                         '/get/opf/1',
                         headers={'Accept-Encoding': 'gzip'})
            r = conn.getresponse()
            self.ae(r.status,
                    http_client.OK), self.ae(r.getheader('Content-Encoding'),
                                             'gzip')
            raw = r.read()
            self.ae(zlib.decompress(raw, 16 + zlib.MAX_WBITS), data)

            # Test serving metadata as json
            r, data = get('json', 1)
            self.ae(r.status, http_client.OK)
            self.ae(db.field_for('title', 1), json.loads(data)['title'])
            conn.request('GET',
                         '/get/json/1',
                         headers={'Accept-Encoding': 'gzip'})
            r = conn.getresponse()
            self.ae(r.status,
                    http_client.OK), self.ae(r.getheader('Content-Encoding'),
                                             'gzip')
            raw = r.read()
            self.ae(zlib.decompress(raw, 16 + zlib.MAX_WBITS), data)
예제 #39
0
파일: legacy.py 프로젝트: j-howell/calibre
 def identify(self, data):
     fmt, width, height = identify(data)
     return width, height, fmt
예제 #40
0
 def identify(self, data):
     fmt, width, height = identify(data)
     return width, height, fmt
예제 #41
0
def get_image_size(image_path):
    fmt, width, height = identify(open(image_path, 'rb'))
    return width, height
예제 #42
0
    def extract_content(self, output_dir):
        # Each text record is independent (unless the continuation
        # value is set in the previous record). Put each converted
        # text recored into a separate file. We will reference the
        # home.html file as the first file and let the HTML input
        # plugin assemble the order based on hyperlinks.
        with CurrentDir(output_dir):
            for uid, num in self.uid_text_secion_number.items():
                self.log.debug('Writing record with uid: %s as %s.html' %
                               (uid, uid))
                with open('%s.html' % uid, 'wb') as htmlf:
                    html = u'<html><body>'
                    section_header, section_data = self.sections[num]
                    if section_header.type == DATATYPE_PHTML:
                        html += self.process_phtml(
                            section_data.data,
                            section_data.header.paragraph_offsets)
                    elif section_header.type == DATATYPE_PHTML_COMPRESSED:
                        d = self.decompress_phtml(section_data.data)
                        html += self.process_phtml(
                            d, section_data.header.paragraph_offsets).decode(
                                self.get_text_uid_encoding(section_header.uid),
                                'replace')
                    html += '</body></html>'
                    htmlf.write(html.encode('utf-8'))

        # Images.
        # Cache the image sizes in case they are used by a composite image.
        images = set()
        if not os.path.exists(os.path.join(output_dir, 'images/')):
            os.makedirs(os.path.join(output_dir, 'images/'))
        with CurrentDir(os.path.join(output_dir, 'images/')):
            # Single images.
            for uid, num in self.uid_image_section_number.items():
                section_header, section_data = self.sections[num]
                if section_data:
                    idata = None
                    if section_header.type == DATATYPE_TBMP:
                        idata = section_data
                    elif section_header.type == DATATYPE_TBMP_COMPRESSED:
                        if self.header_record.compression == 1:
                            idata = decompress_doc(section_data)
                        elif self.header_record.compression == 2:
                            idata = zlib.decompress(section_data)
                    try:
                        save_cover_data_to(idata,
                                           '%s.jpg' % uid,
                                           compression_quality=70)
                        images.add(uid)
                        self.log.debug(
                            'Wrote image with uid %s to images/%s.jpg' %
                            (uid, uid))
                    except Exception as e:
                        self.log.error(
                            'Failed to write image with uid %s: %s' % (uid, e))
                else:
                    self.log.error(
                        'Failed to write image with uid %s: No data.' % uid)
            # Composite images.
            # We're going to use the already compressed .jpg images here.
            for uid, num in self.uid_composite_image_section_number.items():
                try:
                    section_header, section_data = self.sections[num]
                    # Get the final width and height.
                    width = 0
                    height = 0
                    for row in section_data.layout:
                        row_width = 0
                        col_height = 0
                        for col in row:
                            if col not in images:
                                raise Exception('Image with uid: %s missing.' %
                                                col)
                            w, h = identify(lopen('%s.jpg' % col, 'rb'))[1:]
                            row_width += w
                            if col_height < h:
                                col_height = h
                        if width < row_width:
                            width = row_width
                        height += col_height
                    # Create a new image the total size of all image
                    # parts. Put the parts into the new image.
                    with Canvas(width, height) as canvas:
                        y_off = 0
                        for row in section_data.layout:
                            x_off = 0
                            largest_height = 0
                            for col in row:
                                im = image_from_data(
                                    lopen('%s.jpg' % col, 'rb').read())
                                canvas.compose(im, x_off, y_off)
                                w, h = im.width(), im.height()
                                x_off += w
                                if largest_height < h:
                                    largest_height = h
                            y_off += largest_height
                    with lopen('%s.jpg' % uid) as out:
                        out.write(canvas.export(compression_quality=70))
                    self.log.debug(
                        'Wrote composite image with uid %s to images/%s.jpg' %
                        (uid, uid))
                except Exception as e:
                    self.log.error(
                        'Failed to write composite image with uid %s: %s' %
                        (uid, e))

        # Run the HTML through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(self.options, opt.option.name, opt.recommended_value)
        self.options.input_encoding = 'utf-8'
        odi = self.options.debug_pipeline
        self.options.debug_pipeline = None
        # Determine the home.html record uid. This should be set in the
        # reserved values in the metadata recored. home.html is the first
        # text record (should have hyper link references to other records)
        # in the document.
        try:
            home_html = self.header_record.home_html
            if not home_html:
                home_html = self.uid_text_secion_number.items()[0][0]
        except:
            raise Exception('Could not determine home.html')
        # Generate oeb from html conversion.
        oeb = html_input.convert(open('%s.html' % home_html, 'rb'),
                                 self.options, 'html', self.log, {})
        self.options.debug_pipeline = odi

        return oeb