Python identify_data Examples, calibre.utils.magick.draw.identify_data Python Examples

Example #1

0

Show file

File: odt.py Project: 089git/calibre

def read_cover(stream, zin, mi, opfmeta, extract_cover):
    # search for an draw:image in a draw:frame with the name 'opf.cover'
    # if opf.metadata prop is false, just use the first image that
    # has a proper size (borrowed from docx)
    otext = odLoad(stream)
    cover_href = None
    cover_data = None
    cover_frame = None
    imgnum = 0
    for frm in otext.topnode.getElementsByType(odFrame):
        img = frm.getElementsByType(odImage)
        if len(img) == 0:
            continue
        i_href = img[0].getAttribute('href')
        try:
            raw = zin.read(i_href)
        except KeyError:
            continue
        try:
            width, height, fmt = identify_data(raw)
        except:
            continue
        imgnum += 1
        if opfmeta and frm.getAttribute('name').lower() == u'opf.cover':
            cover_href = i_href
            cover_data = (fmt, raw)
            cover_frame = frm.getAttribute('name')  # could have upper case
            break
        if cover_href is None and imgnum == 1 and 0.8 <= height/width <= 1.8 and height*width >= 12000:
            # Pick the first image as the cover if it is of a suitable size
            cover_href = i_href
            cover_data = (fmt, raw)
            if not opfmeta:
                break

    if cover_href is not None:
        mi.cover = cover_href
        mi.odf_cover_frame = cover_frame
        if extract_cover:
            if not cover_data:
                raw = zin.read(cover_href)
                try:
                    width, height, fmt = identify_data(raw)
                except:
                    pass
                else:
                    cover_data = (fmt, raw)
            mi.cover_data = cover_data

Example #2

0

Show file

File: odt.py Project: pombreda/calibre-1

def read_cover(stream, zin, mi, opfmeta, extract_cover):
    # search for an draw:image in a draw:frame with the name 'opf.cover'
    # if opf.metadata prop is false, just use the first image that
    # has a proper size (borrowed from docx)
    otext = odLoad(stream)
    cover_href = None
    cover_data = None
    cover_frame = None
    imgnum = 0
    for frm in otext.topnode.getElementsByType(odFrame):
        img = frm.getElementsByType(odImage)
        if len(img) == 0:
            continue
        i_href = img[0].getAttribute('href')
        try:
            raw = zin.read(i_href)
        except KeyError:
            continue
        try:
            width, height, fmt = identify_data(raw)
        except:
            continue
        imgnum += 1
        if opfmeta and frm.getAttribute('name').lower() == u'opf.cover':
            cover_href = i_href
            cover_data = (fmt, raw)
            cover_frame = frm.getAttribute('name')  # could have upper case
            break
        if cover_href is None and imgnum == 1 and 0.8 <= height / width <= 1.8 and height * width >= 12000:
            # Pick the first image as the cover if it is of a suitable size
            cover_href = i_href
            cover_data = (fmt, raw)
            if not opfmeta:
                break

    if cover_href is not None:
        mi.cover = cover_href
        mi.odf_cover_frame = cover_frame
        if extract_cover:
            if not cover_data:
                raw = zin.read(cover_href)
                try:
                    width, height, fmt = identify_data(raw)
                except:
                    pass
                else:
                    cover_data = (fmt, raw)
            mi.cover_data = cover_data

Example #3

0

Show file

File: images.py Project: AEliu/calibre

 def read_image(self, href):
     if href not in self.images:
         item = self.oeb.manifest.hrefs.get(href)
         if item is None or not isinstance(item.data, bytes):
             return
         try:
             width, height, fmt = identify_data(item.data)
         except Exception:
             self.log.warning('Replacing corrupted image with blank: %s' % href)
             item.data = I('blank.png', data=True, allow_user_override=False)
             width, height, fmt = identify_data(item.data)
         image_fname = 'media/' + self.create_filename(href, fmt)
         image_rid = self.document_relationships.add_image(image_fname)
         self.images[href] = Image(image_rid, image_fname, width, height, fmt, item)
         item.unload_data_from_memory()
     return self.images[href]

Example #4

0

Show file

File: epub.py Project: AEliu/calibre

 def workaround_ade_quirks(self, container, name):
     root = container.parsed(name)
     # ADE blows up floating images if their sizes are not specified
     for img in root.xpath('//*[local-name() = "img" and (@class = "float-right-img" or @class = "float-left-img")]'):
         if 'style' not in img.attrib:
             imgname = container.href_to_name(img.get('src'), name)
             width, height, fmt = identify_data(container.raw_data(imgname))
             img.set('style', 'width: %dpx; height: %dpx' % (width, height))

Example #5

0

Show file

File: containers.py Project: AtulKumar2/calibre

def find_imgtype(data):
    imgtype = what(None, data)
    if imgtype is None:
        try:
            imgtype = identify_data(data)[2]
        except Exception:
            imgtype = 'unknown'
    return imgtype

Example #6

0

Show file

File: containers.py Project: tokot/calibre

def find_imgtype(data):
    imgtype = what(None, data)
    if imgtype is None:
        try:
            imgtype = identify_data(data)[2]
        except Exception:
            imgtype = 'unknown'
    return imgtype

Example #7

0

Show file

def read_metadata_kfx(stream, read_cover=True):
    ' Read the metadata.kfx file that is found in the sdr book folder for KFX files '
    c = Container(stream.read())
    m = extract_metadata(c.decode())
    # dump_metadata(m)

    def has(x):
        return m[x] and m[x][0]

    def get(x, single=True):
        ans = m[x]
        if single:
            ans = clean_xml_chars(ans[0]) if ans else ''
        else:
            ans = [clean_xml_chars(y) for y in ans]
        return ans

    title = get('title') or _('Unknown')
    authors = get('authors', False) or [_('Unknown')]
    auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$')

    def fix_author(x):
        if tweaks['author_sort_copy_method'] != 'copy':
            m = auth_pat.match(x.strip())
            if m is not None:
                return m.group(2) + ' ' + m.group(1)
        return x

    mi = Metadata(title, [fix_author(x) for x in authors])
    if has('author'):
        mi.author_sort = get('author')
    if has('ASIN'):
        mi.set_identifier('mobi-asin', get('ASIN'))
    elif has('content_id'):
        mi.set_identifier('mobi-asin', get('content_id'))
    if has('languages'):
        langs = list(filter(None, (canonicalize_lang(x) for x in get('languages', False))))
        if langs:
            mi.languages = langs
    if has('issue_date'):
        try:
            mi.pubdate = parse_only_date(get('issue_date'))
        except Exception:
            pass
    if has('publisher') and get('publisher') != 'Unknown':
        mi.publisher = get('publisher')
    if read_cover and m[COVER_KEY]:
        try:
            data = base64.standard_b64decode(m[COVER_KEY])
            w, h, fmt = identify_data(data)
        except Exception:
            w, h, fmt = 0, 0, None
        if fmt and w and h:
            mi.cover_data = (fmt, data)

    return mi

Example #8

0

Show file

File: cover.py Project: 089git/calibre

 def inspect_cover(self, href):
     from calibre.ebooks.oeb.base import urlnormalize
     for x in self.oeb.manifest:
         if x.href == urlnormalize(href):
             try:
                 raw = x.data
                 return identify_data(raw)[:2]
             except:
                 self.log.exception('Failed to read image dimensions')
     return None, None

Example #9

0

Show file

File: cover.py Project: syn-gowthamsrungarapu/calibre

 def inspect_cover(self, href):
     from calibre.ebooks.oeb.base import urlnormalize
     for x in self.oeb.manifest:
         if x.href == urlnormalize(href):
             try:
                 raw = x.data
                 return identify_data(raw)[:2]
             except:
                 self.log.exception('Failed to read image dimensions')
     return None, None

Example #10

0

Show file

 def workaround_ade_quirks(self, container, name):
     root = container.parsed(name)
     # ADE blows up floating images if their sizes are not specified
     for img in root.xpath(
             '//*[local-name() = "img" and (@class = "float-right-img" or @class = "float-left-img")]'
     ):
         if 'style' not in img.attrib:
             imgname = container.href_to_name(img.get('src'), name)
             width, height, fmt = identify_data(container.raw_data(imgname))
             img.set('style', 'width: %dpx; height: %dpx' % (width, height))

Example #11

0

Show file

File: images.py Project: Aliminator666/calibre

 def read_image(self, href):
     if href not in self.images:
         item = self.oeb.manifest.hrefs.get(href)
         if item is None or not isinstance(item.data, bytes):
             return
         width, height, fmt = identify_data(item.data)
         image_fname = 'media/' + self.create_filename(href, fmt)
         image_rid = self.document_relationships.add_image(image_fname)
         self.images[href] = Image(image_rid, image_fname, width, height, fmt, item)
         item.unload_data_from_memory()
     return self.images[href]

Example #12

0

Show file

 def read_image(self, href):
     if href not in self.images:
         item = self.oeb.manifest.hrefs.get(href)
         if item is None or not isinstance(item.data, bytes):
             return
         try:
             width, height, fmt = identify_data(item.data)
         except Exception:
             self.log.warning('Replacing corrupted image with blank: %s' %
                              href)
             item.data = I('blank.png',
                           data=True,
                           allow_user_override=False)
             width, height, fmt = identify_data(item.data)
         image_fname = 'media/' + self.create_filename(href, fmt)
         image_rid = self.document_relationships.add_image(image_fname)
         self.images[href] = Image(image_rid, image_fname, width, height,
                                   fmt, item)
         item.unload_data_from_memory()
     return self.images[href]

Example #13

0

Show file

File: images.py Project: suman95/calibre

 def read_image(self, href):
     if href not in self.images:
         item = self.oeb.manifest.hrefs.get(href)
         if item is None or not isinstance(item.data, bytes):
             return
         width, height, fmt = identify_data(item.data)
         image_fname = 'media/' + self.create_filename(href, fmt)
         image_rid = self.document_relationships.add_image(image_fname)
         self.images[href] = Image(image_rid, image_fname, width, height,
                                   fmt, item)
         item.unload_data_from_memory()
     return self.images[href]

Example #14

0

Show file

def get_cover(docx):
    doc = docx.document
    rid_map = docx.document_relationships[0]
    for image in images(doc):
        rid = get(image, 'r:embed') or get(image, 'r:id')
        if rid in rid_map:
            try:
                raw = docx.read(rid_map[rid])
                width, height, fmt = identify_data(raw)
            except Exception:
                continue
            if 0.8 <= height/width <= 1.8 and height*width >= 160000:
                return (fmt, raw)

Example #15

0

Show file

File: canvas.py Project: pombreda/calibre-1

 def load_image(self, data):
     self.is_valid = False
     try:
         fmt = identify_data(data)[-1].encode('ascii')
     except Exception:
         fmt = b''
     self.original_image_format = fmt.decode('ascii').lower()
     self.selection_state.reset()
     self.original_image_data = data
     self.current_image = i = self.original_image = (QImage.fromData(
         data, format=fmt) if fmt else QImage.fromData(data))
     self.is_valid = not i.isNull()
     self.update()
     self.image_changed.emit(self.current_image)

Example #16

0

Show file

File: mobi8.py Project: Chansie/KindleEar

    def extract_resources(self):
        resource_map = []
        for x in ("fonts", "images"):
            os.mkdir(x)

        for i, sec in enumerate(self.resource_sections):
            fname_idx = i + 1
            data = sec[0]
            typ = data[:4]
            href = None
            if typ in {
                b"FLIS",
                b"FCIS",
                b"SRCS",
                b"\xe9\x8e\r\n",
                b"RESC",
                b"BOUN",
                b"FDST",
                b"DATP",
                b"AUDI",
                b"VIDE",
            }:
                pass  # Ignore these records
            elif typ == b"FONT":
                font = read_font_record(data)
                href = "fonts/%05d.%s" % (fname_idx, font["ext"])
                if font["err"]:
                    self.log.warn("Reading font record %d failed: %s" % (fname_idx, font["err"]))
                    if font["headers"]:
                        self.log.debug("Font record headers: %s" % font["headers"])
                with open(href.replace("/", os.sep), "wb") as f:
                    f.write(font["font_data"] if font["font_data"] else font["raw_data"])
                if font["encrypted"]:
                    self.encrypted_fonts.append(href)
            else:
                imgtype = what(None, data)
                if imgtype is None:
                    from calibre.utils.magick.draw import identify_data

                    try:
                        imgtype = identify_data(data)[2]
                    except Exception:
                        imgtype = "unknown"
                href = "images/%05d.%s" % (fname_idx, imgtype)
                with open(href.replace("/", os.sep), "wb") as f:
                    f.write(data)

            resource_map.append(href)

        return resource_map

Example #17

0

Show file

File: canvas.py Project: pwasiewi/calibre

 def load_image(self, data):
     self.is_valid = False
     try:
         fmt = identify_data(data)[-1].encode('ascii')
     except Exception:
         fmt = b''
     self.original_image_format = fmt.decode('ascii').lower()
     self.selection_state.reset()
     self.original_image_data = data
     self.current_image = i = self.original_image = (
         QImage.fromData(data, format=fmt) if fmt else QImage.fromData(data))
     self.is_valid = not i.isNull()
     self.update()
     self.image_changed.emit(self.current_image)

Example #18

0

Show file

File: mobi8.py Project: mrmac123/calibre

    def extract_resources(self):
        from calibre.ebooks.mobi.writer2.resources import PLACEHOLDER_GIF
        resource_map = []
        for x in ('fonts', 'images'):
            os.mkdir(x)

        for i, sec in enumerate(self.resource_sections):
            fname_idx = i + 1
            data = sec[0]
            typ = data[:4]
            href = None
            if typ in {
                    b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'BOUN',
                    b'FDST', b'DATP', b'AUDI', b'VIDE'
            }:
                pass  # Ignore these records
            elif typ == b'RESC':
                self.resc_data = read_resc_record(data)
            elif typ == b'FONT':
                font = read_font_record(data)
                href = "fonts/%05d.%s" % (fname_idx, font['ext'])
                if font['err']:
                    self.log.warn('Reading font record %d failed: %s' %
                                  (fname_idx, font['err']))
                    if font['headers']:
                        self.log.debug('Font record headers: %s' %
                                       font['headers'])
                with open(href.replace('/', os.sep), 'wb') as f:
                    f.write(font['font_data']
                            if font['font_data'] else font['raw_data'])
                if font['encrypted']:
                    self.encrypted_fonts.append(href)
            else:
                if not (len(data) == len(PLACEHOLDER_GIF)
                        and data == PLACEHOLDER_GIF):
                    imgtype = what(None, data)
                    if imgtype is None:
                        from calibre.utils.magick.draw import identify_data
                        try:
                            imgtype = identify_data(data)[2]
                        except Exception:
                            imgtype = 'unknown'
                    href = 'images/%05d.%s' % (fname_idx, imgtype)
                    with open(href.replace('/', os.sep), 'wb') as f:
                        f.write(data)

            resource_map.append(href)

        return resource_map

Example #19

0

Show file

File: content.py Project: jilanfang/calibre

 def test(src, url, sz=None):
     raw = P(src, data=True)
     conn.request('GET', url)
     r = conn.getresponse()
     self.ae(r.status, httplib.OK)
     data = r.read()
     if sz is None:
         self.ae(data, raw)
     else:
         self.ae(sz, identify_data(data)[0])
     test_response(r)
     conn.request('GET', url, headers={'If-None-Match':r.getheader('ETag')})
     r = conn.getresponse()
     self.ae(r.status, httplib.NOT_MODIFIED)
     self.ae(b'', r.read())

Example #20

0

Show file

File: content.py Project: winning1120xx/calibre

 def test(src, url, sz=None):
     raw = P(src, data=True)
     conn.request('GET', url)
     r = conn.getresponse()
     self.ae(r.status, httplib.OK)
     data = r.read()
     if sz is None:
         self.ae(data, raw)
     else:
         self.ae(sz, identify_data(data)[0])
     test_response(r)
     conn.request('GET', url, headers={'If-None-Match':r.getheader('ETag')})
     r = conn.getresponse()
     self.ae(r.status, httplib.NOT_MODIFIED)
     self.ae(b'', r.read())

Example #21

0

Show file

File: docx.py Project: winning1120xx/calibre

def get_cover(docx):
    doc = docx.document
    get = docx.namespace.get
    images = docx.namespace.XPath(
        '//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]'
    )
    rid_map = docx.document_relationships[0]
    for image in images(doc):
        rid = get(image, 'r:embed') or get(image, 'r:id')
        if rid in rid_map:
            try:
                raw = docx.read(rid_map[rid])
                width, height, fmt = identify_data(raw)
            except Exception:
                continue
            if 0.8 <= height / width <= 1.8 and height * width >= 160000:
                return (fmt, raw)

Example #22

0

Show file

File: images.py Project: ironmancris/calibre

 def add_image(self, img, block, stylizer):
     src = img.get('src')
     if not src:
         return
     href = self.abshref(src)
     if href not in self.images:
         item = self.oeb.manifest.hrefs.get(href)
         if item is None or not isinstance(item.data, bytes):
             return
         width, height, fmt = identify_data(item.data)
         image_fname = 'media/' + self.create_filename(href, fmt)
         image_rid = self.document_relationships.add_image(image_fname)
         self.images[href] = Image(image_rid, image_fname, width, height, fmt, item)
         item.unload_data_from_memory()
     drawing = self.create_image_markup(img, stylizer, href)
     block.add_image(drawing)
     return self.images[href].rid

Example #23

0

Show file

File: docx.py Project: JapaChin/calibre

def get_cover(docx):
    doc = docx.document
    get = docx.namespace.get
    images = docx.namespace.XPath(
        '//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]'
    )
    rid_map = docx.document_relationships[0]
    for image in images(doc):
        rid = get(image, "r:embed") or get(image, "r:id")
        if rid in rid_map:
            try:
                raw = docx.read(rid_map[rid])
                width, height, fmt = identify_data(raw)
            except Exception:
                continue
            if 0.8 <= height / width <= 1.8 and height * width >= 160000:
                return (fmt, raw)

Example #24

0

Show file

File: mobi8.py Project: alip/calibre

    def extract_resources(self):
        from calibre.ebooks.mobi.writer2.resources import PLACEHOLDER_GIF
        resource_map = []
        for x in ('fonts', 'images'):
            os.mkdir(x)

        for i, sec in enumerate(self.resource_sections):
            fname_idx = i+1
            data = sec[0]
            typ = data[:4]
            href = None
            if typ in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'BOUN',
                       b'FDST', b'DATP', b'AUDI', b'VIDE'}:
                pass  # Ignore these records
            elif typ == b'RESC':
                self.resc_data = read_resc_record(data)
            elif typ == b'FONT':
                font = read_font_record(data)
                href = "fonts/%05d.%s" % (fname_idx, font['ext'])
                if font['err']:
                    self.log.warn('Reading font record %d failed: %s'%(
                        fname_idx, font['err']))
                    if font['headers']:
                        self.log.debug('Font record headers: %s'%font['headers'])
                with open(href.replace('/', os.sep), 'wb') as f:
                    f.write(font['font_data'] if font['font_data'] else
                            font['raw_data'])
                if font['encrypted']:
                    self.encrypted_fonts.append(href)
            else:
                if len(data) == len(PLACEHOLDER_GIF) and data == PLACEHOLDER_GIF:
                    continue
                imgtype = what(None, data)
                if imgtype is None:
                    from calibre.utils.magick.draw import identify_data
                    try:
                        imgtype = identify_data(data)[2]
                    except Exception:
                        imgtype = 'unknown'
                href = 'images/%05d.%s'%(fname_idx, imgtype)
                with open(href.replace('/', os.sep), 'wb') as f:
                    f.write(data)

            resource_map.append(href)

        return resource_map

Example #25

0

Show file

 def add_image(self, img, block, stylizer):
     src = img.get('src')
     if not src:
         return
     href = self.abshref(src)
     if href not in self.images:
         item = self.oeb.manifest.hrefs.get(href)
         if item is None or not isinstance(item.data, bytes):
             return
         width, height, fmt = identify_data(item.data)
         image_fname = 'media/' + self.create_filename(href, fmt)
         image_rid = self.document_relationships.add_image(image_fname)
         self.images[href] = Image(image_rid, image_fname, width, height,
                                   fmt, item)
         item.unload_data_from_memory()
     drawing = self.create_image_markup(img, stylizer, href)
     block.add_image(drawing)
     return self.images[href].rid

Example #26

0

Show file

 def process_image(self, data):
     if not self.process_images:
         return data
     func = mobify_image if self.opts.mobi_keep_original_images else rescale_image
     try:
         return func(data)
     except Exception:
         from calibre.utils.magick.draw import identify_data
         if 'png' != identify_data(data)[-1].lower():
             raise
         with PersistentTemporaryFile(suffix='.png') as pt:
             pt.write(data)
         try:
             from calibre.utils.img import optimize_png
             optimize_png(pt.name)
             data = open(pt.name, 'rb').read()
         finally:
             os.remove(pt.name)
         return func(data)

Example #27

0

Show file

File: resources.py Project: AEliu/calibre

 def process_image(self, data):
     if not self.process_images:
         return data
     func = mobify_image if self.opts.mobi_keep_original_images else rescale_image
     try:
         return func(data)
     except Exception:
         from calibre.utils.magick.draw import identify_data
         if 'png' != identify_data(data)[-1].lower():
             raise
         with PersistentTemporaryFile(suffix='.png') as pt:
             pt.write(data)
         try:
             from calibre.utils.img import optimize_png
             optimize_png(pt.name)
             data = open(pt.name, 'rb').read()
         finally:
             os.remove(pt.name)
         return func(data)

Example #28

0

Show file

File: fb2.py Project: AEliu/calibre

def _parse_cover_data(root, imgid, mi, ctx):
    from calibre.ebooks.fb2 import base64_decode
    elm_binary = ctx.XPath('//fb:binary[@id="%s"]'%imgid)(root)
    if elm_binary:
        mimetype = elm_binary[0].get('content-type', 'image/jpeg')
        mime_extensions = guess_all_extensions(mimetype)

        if not mime_extensions and mimetype.startswith('image/'):
            mimetype_fromid = guess_type(imgid)[0]
            if mimetype_fromid and mimetype_fromid.startswith('image/'):
                mime_extensions = guess_all_extensions(mimetype_fromid)

        if mime_extensions:
            pic_data = elm_binary[0].text
            if pic_data:
                cdata = base64_decode(pic_data.strip())
                fmt = identify_data(cdata)[-1]
                mi.cover_data = (fmt, cdata)
        else:
            prints("WARNING: Unsupported coverpage mime-type '%s' (id=#%s)" % (mimetype, imgid))

Example #29

0

Show file

File: rtfml.py Project: sss/calibre

    def image_to_hexstring(self, data):
        data = save_cover_data_to(data, 'cover.jpg', return_data=True)
        width, height = identify_data(data)[:2]

        raw_hex = ''
        for char in data:
            raw_hex += hex(ord(char)).replace('0x', '').rjust(2, '0')

        # Images must be broken up so that they are no longer than 129 chars
        # per line
        hex_string = ''
        col = 1
        for char in raw_hex:
            if col == 129:
                hex_string += '\n'
                col = 1
            col += 1
            hex_string += char

        return (hex_string, width, height)

Example #30

0

Show file

File: rtfml.py Project: BatteringRam/calibre

    def image_to_hexstring(self, data):
        data = save_cover_data_to(data, 'cover.jpg', return_data=True)
        width, height = identify_data(data)[:2]

        raw_hex = ''
        for char in data:
            raw_hex += hex(ord(char)).replace('0x', '').rjust(2, '0')

        # Images must be broken up so that they are no longer than 129 chars
        # per line
        hex_string = ''
        col = 1
        for char in raw_hex:
            if col == 129:
                hex_string += '\n'
                col = 1
            col += 1
            hex_string += char

        return (hex_string, width, height)

Example #31

0

Show file

File: fb2.py Project: winning1120xx/calibre

def _parse_cover_data(root, imgid, mi, ctx):
    from calibre.ebooks.fb2 import base64_decode
    elm_binary = ctx.XPath('//fb:binary[@id="%s"]' % imgid)(root)
    if elm_binary:
        mimetype = elm_binary[0].get('content-type', 'image/jpeg')
        mime_extensions = guess_all_extensions(mimetype)

        if not mime_extensions and mimetype.startswith('image/'):
            mimetype_fromid = guess_type(imgid)[0]
            if mimetype_fromid and mimetype_fromid.startswith('image/'):
                mime_extensions = guess_all_extensions(mimetype_fromid)

        if mime_extensions:
            pic_data = elm_binary[0].text
            if pic_data:
                cdata = base64_decode(pic_data.strip())
                fmt = identify_data(cdata)[-1]
                mi.cover_data = (fmt, cdata)
        else:
            prints("WARNING: Unsupported coverpage mime-type '%s' (id=#%s)" %
                   (mimetype, imgid))

Example #32

0

Show file

File: docx.py Project: BobPyron/calibre

def get_metadata(stream):
    c = DOCX(stream, extract=False)
    mi = c.metadata
    c.close()
    stream.seek(0)
    cdata = None
    with ZipFile(stream, 'r') as zf:
        for zi in zf.infolist():
            ext = zi.filename.rpartition('.')[-1].lower()
            if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
                raw = zf.read(zi)
                try:
                    width, height, fmt = identify_data(raw)
                except:
                    continue
                if 0.8 <= height/width <= 1.8 and height*width >= 12000:
                    cdata = (fmt, raw)
        if cdata is not None:
            mi.cover_data = cdata

    return mi

Example #33

0

Show file

File: docx.py Project: kobolabs/calibre

def get_metadata(stream):
    c = DOCX(stream, extract=False)
    mi = c.metadata
    c.close()
    stream.seek(0)
    cdata = None
    with ZipFile(stream, 'r') as zf:
        for zi in zf.infolist():
            ext = zi.filename.rpartition('.')[-1].lower()
            if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
                raw = zf.read(zi)
                try:
                    width, height, fmt = identify_data(raw)
                except:
                    continue
                if 0.8 <= height / width <= 1.8 and height * width >= 12000:
                    cdata = (fmt, raw)
        if cdata is not None:
            mi.cover_data = cdata

    return mi

Example #34

0

Show file

File: simple.py Project: sss/calibre

    def rescale_image(self, data):
        orig_w, orig_h, ifmt = identify_data(data)
        orig_data = data  # save it in case compression fails
        if self.scale_news_images is not None:
            wmax, hmax = self.scale_news_images
            scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax)
            if scale:
                data = thumbnail(data, new_w, new_h,
                                 compression_quality=95)[-1]
                orig_w = new_w
                orig_h = new_h
        if self.compress_news_images_max_size is None:
            if self.compress_news_images_auto_size is None:  # not compressing
                return data
            else:
                maxsizeb = (orig_w *
                            orig_h) / self.compress_news_images_auto_size
        else:
            maxsizeb = self.compress_news_images_max_size * 1024
        scaled_data = data  # save it in case compression fails
        if len(scaled_data) <= maxsizeb:  # no compression required
            return scaled_data

        img = Image()
        quality = 95
        img.load(data)
        while len(data) >= maxsizeb and quality >= 5:
            quality -= 5
            img.set_compression_quality(quality)
            data = img.export('jpg')

        if len(data) >= len(scaled_data):  # compression failed
            return orig_data if len(orig_data) <= len(
                scaled_data) else scaled_data

        if len(data) >= len(orig_data):  # no improvement
            return orig_data

        return data

Example #35

0

Show file

File: metadata_bulk.py Project: siebert/calibre

def get_cover_data(stream, ext):  # {{{
    from calibre.ebooks.metadata.meta import get_metadata
    old = prefs['read_file_metadata']
    if not old:
        prefs['read_file_metadata'] = True
    cdata = area = None

    try:
        with stream:
            mi = get_metadata(stream, ext)
        if mi.cover and os.access(mi.cover, os.R_OK):
            cdata = open(mi.cover).read()
        elif mi.cover_data[1] is not None:
            cdata = mi.cover_data[1]
        if cdata:
            width, height, fmt = identify_data(cdata)
            area = width*height
    except:
        cdata = area = None

    if old != prefs['read_file_metadata']:
        prefs['read_file_metadata'] = old

    return cdata, area

Example #36

0

Show file

File: simple.py Project: anzizhao/calibre

    def rescale_image(self, data):
        orig_w, orig_h, ifmt = identify_data(data)
        orig_data = data  # save it in case compression fails
        if self.scale_news_images is not None:
            wmax, hmax = self.scale_news_images
            scale, new_w, new_h = fit_image(orig_w, orig_h, wmax, hmax)
            if scale:
                data = thumbnail(data, new_w, new_h, compression_quality=95)[-1]
                orig_w = new_w
                orig_h = new_h
        if self.compress_news_images_max_size is None:
            if self.compress_news_images_auto_size is None:  # not compressing
                return data
            else:
                maxsizeb = (orig_w * orig_h)/self.compress_news_images_auto_size
        else:
            maxsizeb = self.compress_news_images_max_size * 1024
        scaled_data = data  # save it in case compression fails
        if len(scaled_data) <= maxsizeb:  # no compression required
            return scaled_data

        img = Image()
        quality = 95
        img.load(data)
        while len(data) >= maxsizeb and quality >= 5:
            quality -= 5
            img.set_compression_quality(quality)
            data = img.export('jpg')

        if len(data) >= len(scaled_data):  # compression failed
            return orig_data if len(orig_data) <= len(scaled_data) else scaled_data

        if len(data) >= len(orig_data):  # no improvement
            return orig_data

        return data

Example #37

0

Show file

File: docx.py Project: Eksmo/calibre

def get_metadata(stream):
    with ZipFile(stream, 'r') as zf:

        mi = Metadata(_('Unknown'))
        cdata = None

        for zi in zf.infolist():
            ext = zi.filename.rpartition('.')[-1].lower()
            if zi.filename.lower() == 'docprops/core.xml':
                _read_doc_props(zf.read(zi), mi)
            elif zi.filename.lower() == 'docprops/app.xml':
                _read_app_props(zf.read(zi), mi)
            elif cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
                raw = zf.read(zi)
                try:
                    width, height, fmt = identify_data(raw)
                except:
                    continue
                if 0.8 <= height/width <= 1.8 and height*width >= 12000:
                    cdata = (fmt, raw)
        if cdata is not None:
            mi.cover_data = cdata

    return mi

Example #38

0

Show file

def get_metadata(stream):
    with ZipFile(stream, 'r') as zf:

        mi = Metadata(_('Unknown'))
        cdata = None

        for zi in zf.infolist():
            ext = zi.filename.rpartition('.')[-1].lower()
            if zi.filename.lower() == 'docprops/core.xml':
                _read_doc_props(zf.read(zi), mi)
            elif zi.filename.lower() == 'docprops/app.xml':
                _read_app_props(zf.read(zi), mi)
            elif cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
                raw = zf.read(zi)
                try:
                    width, height, fmt = identify_data(raw)
                except:
                    continue
                if 0.8 <= height / width <= 1.8 and height * width >= 12000:
                    cdata = (fmt, raw)
        if cdata is not None:
            mi.cover_data = cdata

    return mi

Example #39

0

Show file

File: cover.py Project: pombreda/calibre-1

def create_epub_cover(container, cover_path, existing_image, options=None):
    from calibre.ebooks.conversion.config import load_defaults
    from calibre.ebooks.oeb.transforms.cover import CoverManager

    ext = cover_path.rpartition('.')[-1].lower()
    cname, tname = 'cover.' + ext, 'titlepage.xhtml'
    recommended_folders = get_recommended_folders(container, (cname, tname))

    if existing_image:
        raster_cover = existing_image
        manifest_id = {v: k
                       for k, v in container.manifest_id_map.iteritems()
                       }[existing_image]
        raster_cover_item = container.opf_xpath('//opf:manifest/*[@id="%s"]' %
                                                manifest_id)[0]
    else:
        folder = recommended_folders[cname]
        if folder:
            cname = folder + '/' + cname
        raster_cover_item = container.generate_item(cname, id_prefix='cover')
        raster_cover = container.href_to_name(raster_cover_item.get('href'),
                                              container.opf_name)

        with open(cover_path, 'rb') as src, container.open(raster_cover,
                                                           'wb') as dest:
            shutil.copyfileobj(src, dest)
    if options is None:
        opts = load_defaults('epub_output')
        keep_aspect = opts.get('preserve_cover_aspect_ratio', False)
        no_svg = opts.get('no_svg_cover', False)
    else:
        keep_aspect = options.get('keep_aspect', False)
        no_svg = options.get('no_svg', False)
    if no_svg:
        style = 'style="height: 100%%"'
        templ = CoverManager.NONSVG_TEMPLATE.replace('__style__', style)
    else:
        width, height = 600, 800
        try:
            if existing_image:
                width, height = identify_data(
                    container.raw_data(existing_image, decode=False))[:2]
            else:
                width, height = identify(cover_path)[:2]
        except:
            container.log.exception("Failed to get width and height of cover")
        ar = 'xMidYMid meet' if keep_aspect else 'none'
        templ = CoverManager.SVG_TEMPLATE.replace('__ar__', ar)
        templ = templ.replace('__viewbox__', '0 0 %d %d' % (width, height))
        templ = templ.replace('__width__', str(width))
        templ = templ.replace('__height__', str(height))
    folder = recommended_folders[tname]
    if folder:
        tname = folder + '/' + tname
    titlepage_item = container.generate_item(tname, id_prefix='titlepage')
    titlepage = container.href_to_name(titlepage_item.get('href'),
                                       container.opf_name)
    raw = templ % container.name_to_href(raster_cover,
                                         titlepage).encode('utf-8')
    with container.open(titlepage, 'wb') as f:
        f.write(raw)

    # We have to make sure the raster cover item has id="cover" for the moron
    # that wrote the Nook firmware
    if raster_cover_item.get('id') != 'cover':
        from calibre.ebooks.oeb.base import uuid_id
        newid = uuid_id()
        for item in container.opf_xpath('//*[@id="cover"]'):
            item.set('id', newid)
        for item in container.opf_xpath('//*[@idref="cover"]'):
            item.set('idref', newid)
        raster_cover_item.set('id', 'cover')

    spine = container.opf_xpath('//opf:spine')[0]
    ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id'))
    container.insert_into_xml(spine, ref, index=0)
    guide = container.opf_get_or_create('guide')
    container.insert_into_xml(
        guide,
        guide.makeelement(OPF('reference'),
                          type='cover',
                          title=_('Cover'),
                          href=container.name_to_href(
                              titlepage, base=container.opf_name)))
    metadata = container.opf_get_or_create('metadata')
    meta = metadata.makeelement(OPF('meta'), name='cover')
    meta.set('content', raster_cover_item.get('id'))
    container.insert_into_xml(metadata, meta)

    return raster_cover, titlepage

Example #40

0

Show file

    def __init__(self, mf):
        for x in ('raw', 'palmdb', 'record_headers', 'records', 'mobi_header',
                'huffman_record_nums',):
            setattr(self, x, getattr(mf, x))

        self.index_header = self.index_record = None
        self.indexing_record_nums = set()
        pir = self.mobi_header.primary_index_record
        if pir != NULL_INDEX:
            self.index_header = IndexHeader(self.records[pir])
            numi = self.index_header.index_count
            self.cncx = CNCX(self.records[
                pir+1+numi:pir+1+numi+self.index_header.num_of_cncx_blocks],
                self.index_header.index_encoding)
            self.index_record = IndexRecord(self.records[pir+1:pir+1+numi],
                    self.index_header, self.cncx)
            self.indexing_record_nums = set(xrange(pir,
                pir+1+numi+self.index_header.num_of_cncx_blocks))
        self.secondary_index_record = self.secondary_index_header = None
        sir = self.mobi_header.secondary_index_record
        if sir != NULL_INDEX:
            self.secondary_index_header = SecondaryIndexHeader(self.records[sir])
            numi = self.secondary_index_header.index_count
            self.indexing_record_nums.add(sir)
            self.secondary_index_record = IndexRecord(
                    self.records[sir+1:sir+1+numi], self.secondary_index_header, self.cncx)
            self.indexing_record_nums |= set(xrange(sir+1, sir+1+numi))


        ntr = self.mobi_header.number_of_text_records
        fntbr = self.mobi_header.first_non_book_record
        fii = self.mobi_header.first_image_index
        if fntbr == NULL_INDEX:
            fntbr = len(self.records)
        self.text_records = [TextRecord(r, self.records[r],
            self.mobi_header.extra_data_flags, mf.decompress6) for r in xrange(1,
            min(len(self.records), ntr+1))]
        self.image_records, self.binary_records = [], []
        self.font_records = []
        image_index = 0
        for i in xrange(fntbr, len(self.records)):
            if i in self.indexing_record_nums or i in self.huffman_record_nums:
                continue
            image_index += 1
            r = self.records[i]
            fmt = None
            if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
                    b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
                    b'AUDI', b'VIDE', b'FONT'}:
                try:
                    width, height, fmt = identify_data(r.raw)
                except:
                    pass
            if fmt is not None:
                self.image_records.append(ImageRecord(image_index, r, fmt))
            elif r.raw[:4] == b'FONT':
                self.font_records.append(FontRecord(i, r))
            else:
                self.binary_records.append(BinaryRecord(i, r))

        if self.index_record is not None:
            self.tbs_indexing = TBSIndexing(self.text_records,
                    self.index_record.indices, self.mobi_header.type_raw)

Example #41

0

Show file

    def test_get(self):  # {{{
        'Test /get'
        with self.create_server() as server:
            db = server.handler.router.ctx.library_broker.get(None)
            conn = server.connect()

            def get(what, book_id, library_id=None, q=''):
                q = ('?' + q) if q else q
                conn.request(
                    'GET', '/get/%s/%s' % (what, book_id) +
                    (('/' + library_id) if library_id else '') + q)
                r = conn.getresponse()
                return r, r.read()

            # Test various invalid parameters
            def bad(*args):
                r, data = get(*args)
                self.ae(r.status, httplib.NOT_FOUND)

            bad('xxx', 1)
            bad('fmt1', 10)
            bad('fmt1', 1, 'zzzz')
            bad('fmt1', 'xx')

            # Test simple fetching of format without metadata update
            r, data = get('fmt1', 1, db.server_library_id)
            self.ae(data, db.format(1, 'fmt1'))
            self.assertIsNotNone(r.getheader('Content-Disposition'))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('fmt1', 1)
            self.ae(data, db.format(1, 'fmt1'))
            self.ae(r.getheader('Used-Cache'), 'yes')

            # Test fetching of format with metadata update
            raw = P('quick_start/eng.epub', data=True)
            r, data = get('epub', 1)
            self.ae(r.status, httplib.OK)
            etag = r.getheader('ETag')
            self.assertIsNotNone(etag)
            self.ae(r.getheader('Used-Cache'), 'no')
            self.assertTrue(data.startswith(b'PK'))
            self.assertGreaterEqual(len(data), len(raw))
            db.set_field('title', {1: 'changed'})
            r, data = get('epub', 1)
            self.assertNotEqual(r.getheader('ETag'), etag)
            etag = r.getheader('ETag')
            self.ae(r.getheader('Used-Cache'), 'no')
            mi = get_metadata(BytesIO(data), extract_cover=False)
            self.ae(mi.title, 'changed')
            r, data = get('epub', 1)
            self.ae(r.getheader('Used-Cache'), 'yes')

            # Test plugboards
            import calibre.library.save_to_disk as c
            orig, c.DEBUG = c.DEBUG, False
            try:
                db.set_pref(
                    'plugboards', {
                        u'epub': {
                            u'content_server':
                            [[u'changed, {title}', u'title']]
                        }
                    })
                # this is needed as the cache is not invalidated for plugboard changes
                db.set_field('title', {1: 'again'})
                r, data = get('epub', 1)
                self.assertNotEqual(r.getheader('ETag'), etag)
                etag = r.getheader('ETag')
                self.ae(r.getheader('Used-Cache'), 'no')
                mi = get_metadata(BytesIO(data), extract_cover=False)
                self.ae(mi.title, 'changed, again')
            finally:
                c.DEBUG = orig

            # Test the serving of covers
            def change_cover(count, book_id=2):
                cpath = db.format_abspath(book_id, '__COVER_INTERNAL__')
                db.set_cover({2: I('lt.png', data=True)})
                t = time.time() + 1 + count
                # Ensure mtime changes, needed on OS X where HFS+ has a 1s
                # mtime resolution
                os.utime(cpath, (t, t))

            r, data = get('cover', 1)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(1))
            self.ae(r.getheader('Used-Cache'), 'no')
            self.ae(r.getheader('Content-Type'), 'image/jpeg')
            r, data = get('cover', 1)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(1))
            self.ae(r.getheader('Used-Cache'), 'yes')
            r, data = get('cover', 3)
            self.ae(r.status, httplib.NOT_FOUND)
            r, data = get('thumb', 1)
            self.ae(r.status, httplib.OK)
            self.ae(identify_data(data), (60, 60, 'jpeg'))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('thumb', 1)
            self.ae(r.status, httplib.OK)
            self.ae(r.getheader('Used-Cache'), 'yes')
            r, data = get('thumb', 1, q='sz=100')
            self.ae(r.status, httplib.OK)
            self.ae(identify_data(data), (100, 100, 'jpeg'))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('thumb', 1, q='sz=100x100')
            self.ae(r.status, httplib.OK)
            self.ae(r.getheader('Used-Cache'), 'yes')
            change_cover(1, 1)
            r, data = get('thumb', 1, q='sz=100')
            self.ae(r.status, httplib.OK)
            self.ae(identify_data(data), (100, 100, 'jpeg'))
            self.ae(r.getheader('Used-Cache'), 'no')

            # Test file sharing in cache
            test_share_open()
            r, data = get('cover', 2)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            path = binascii.unhexlify(r.getheader('Tempfile')).decode('utf-8')
            f, fdata = share_open(path, 'rb'), data
            # Now force an update
            change_cover(1)
            r, data = get('cover', 2)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            path = binascii.unhexlify(r.getheader('Tempfile')).decode('utf-8')
            f2, f2data = share_open(path, 'rb'), data
            # Do it again
            change_cover(2)
            r, data = get('cover', 2)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            self.ae(f.read(), fdata)
            self.ae(f2.read(), f2data)

            # Test serving of metadata as opf
            r, data = get('opf', 1)
            self.ae(r.status, httplib.OK)
            self.ae(r.getheader('Content-Type'),
                    'application/oebps-package+xml; charset=UTF-8')
            self.assertIsNotNone(r.getheader('Last-Modified'))
            opf = OPF(BytesIO(data),
                      populate_spine=False,
                      try_to_guess_cover=False)
            self.ae(db.field_for('title', 1), opf.title)
            self.ae(db.field_for('authors', 1), tuple(opf.authors))
            conn.request('GET',
                         '/get/opf/1',
                         headers={'Accept-Encoding': 'gzip'})
            r = conn.getresponse()
            self.ae(r.status,
                    httplib.OK), self.ae(r.getheader('Content-Encoding'),
                                         'gzip')
            raw = r.read()
            self.ae(zlib.decompress(raw, 16 + zlib.MAX_WBITS), data)

            # Test serving metadata as json
            r, data = get('json', 1)
            self.ae(r.status, httplib.OK)
            self.ae(db.field_for('title', 1), json.loads(data)['title'])
            conn.request('GET',
                         '/get/json/1',
                         headers={'Accept-Encoding': 'gzip'})
            r = conn.getresponse()
            self.ae(r.status,
                    httplib.OK), self.ae(r.getheader('Content-Encoding'),
                                         'gzip')
            raw = r.read()
            self.ae(zlib.decompress(raw, 16 + zlib.MAX_WBITS), data)

Example #42

0

Show file

File: mobiml.py Project: GaryMMugford/calibre

    def mobimlize_elem(self, elem, stylizer, bstate, istates, ignore_valign=False):
        if not isinstance(elem.tag, basestring) or namespace(elem.tag) != XHTML_NS:
            return
        style = stylizer.style(elem)
        # <mbp:frame-set/> does not exist lalalala
        if style["display"] in ("none", "oeb-page-head", "oeb-page-foot") or style["visibility"] == "hidden":
            id_ = elem.get("id", None)
            if id_:
                # Keep anchors so people can use display:none
                # to generate hidden TOCs
                tail = elem.tail
                elem.clear()
                elem.text = None
                elem.set("id", id_)
                elem.tail = tail
                elem.tag = XHTML("a")
            else:
                return
        tag = barename(elem.tag)
        istate = copy.copy(istates[-1])
        istate.rendered = False
        istate.list_num = 0
        if tag == "ol" and "start" in elem.attrib:
            try:
                istate.list_num = int(elem.attrib["start"]) - 1
            except:
                pass
        istates.append(istate)
        left = 0
        display = style["display"]
        if display == "table-cell":
            display = "inline"
        elif display.startswith("table"):
            display = "block"
        isblock = not display.startswith("inline") and style["display"] != "none"
        isblock = isblock and style["float"] == "none"
        isblock = isblock and tag != "br"
        if isblock:
            bstate.para = None
            istate.halign = style["text-align"]
            rawti = style._get("text-indent")
            istate.indent = style["text-indent"]
            if hasattr(rawti, "strip") and "%" in rawti:
                # We have a percentage text indent, these can come out looking
                # too large if the user chooses a wide output profile like
                # tablet
                istate.indent = min(style._unit_convert(rawti, base=500), istate.indent)
            if style["margin-left"] == "auto" and style["margin-right"] == "auto":
                istate.halign = "center"
            margin = asfloat(style["margin-left"])
            padding = asfloat(style["padding-left"])
            if tag != "body":
                left = margin + padding
            istate.left += left
            vmargin = asfloat(style["margin-top"])
            bstate.vmargin = max((bstate.vmargin, vmargin))
            vpadding = asfloat(style["padding-top"])
            if vpadding > 0:
                bstate.vpadding += bstate.vmargin
                bstate.vmargin = 0
                bstate.vpadding += vpadding
        elif not istate.href:
            margin = asfloat(style["margin-left"])
            padding = asfloat(style["padding-left"])
            lspace = margin + padding
            if lspace > 0:
                spaces = int(round((lspace * 3) / style["font-size"]))
                elem.text = (u"\xa0" * spaces) + (elem.text or "")
            margin = asfloat(style["margin-right"])
            padding = asfloat(style["padding-right"])
            rspace = margin + padding
            if rspace > 0:
                spaces = int(round((rspace * 3) / style["font-size"]))
                if len(elem) == 0:
                    elem.text = (elem.text or "") + (u"\xa0" * spaces)
                else:
                    last = elem[-1]
                    last.text = (last.text or "") + (u"\xa0" * spaces)
        if bstate.content and style["page-break-before"] in PAGE_BREAKS:
            bstate.pbreak = True
        istate.fsize = self.mobimlize_font(style["font-size"])
        istate.italic = True if style["font-style"] == "italic" else False
        weight = style["font-weight"]
        istate.bold = weight in ("bold", "bolder") or asfloat(weight) > 400
        istate.preserve = style["white-space"] == "pre"
        istate.pre_wrap = style["white-space"] == "pre-wrap"
        istate.bgcolor = style["background-color"]
        istate.fgcolor = style["color"]
        istate.strikethrough = style.effective_text_decoration == "line-through"
        istate.underline = style.effective_text_decoration == "underline"
        ff = style["font-family"].lower() if hasattr(style["font-family"], "lower") else ""
        if "monospace" in ff or "courier" in ff or ff.endswith(" mono"):
            istate.family = "monospace"
        elif "sans-serif" in ff or "sansserif" in ff or "verdana" in ff or "arial" in ff or "helvetica" in ff:
            istate.family = "sans-serif"
        else:
            istate.family = "serif"
        if "id" in elem.attrib:
            istate.ids.add(elem.attrib["id"])
        if "name" in elem.attrib:
            istate.ids.add(elem.attrib["name"])
        if tag == "a" and "href" in elem.attrib:
            istate.href = elem.attrib["href"]
        istate.attrib.clear()
        if tag == "img" and "src" in elem.attrib:
            istate.attrib["src"] = elem.attrib["src"]
            istate.attrib["align"] = "baseline"
            cssdict = style.cssdict()
            valign = cssdict.get("vertical-align", None)
            if valign in ("top", "bottom", "middle"):
                istate.attrib["align"] = valign
            for prop in ("width", "height"):
                if cssdict[prop] != "auto":
                    value = style[prop]
                    if value == getattr(self.profile, prop):
                        result = "100%"
                    else:
                        # Amazon's renderer does not support
                        # img sizes in units other than px
                        # See #7520 for test case
                        try:
                            pixs = int(round(float(value) / (72.0 / self.profile.dpi)))
                        except:
                            continue
                        result = str(pixs)
                    istate.attrib[prop] = result
            if "width" not in istate.attrib or "height" not in istate.attrib:
                href = self.current_spine_item.abshref(elem.attrib["src"])
                try:
                    item = self.oeb.manifest.hrefs[urlnormalize(href)]
                except:
                    self.oeb.logger.warn("Failed to find image:", href)
                else:
                    try:
                        width, height = identify_data(item.data)[:2]
                    except:
                        self.oeb.logger.warn("Invalid image:", href)
                    else:
                        if "width" not in istate.attrib and "height" not in istate.attrib:
                            istate.attrib["width"] = str(width)
                            istate.attrib["height"] = str(height)
                        else:
                            ar = float(width) / float(height)
                            if "width" not in istate.attrib:
                                try:
                                    width = int(istate.attrib["height"]) * ar
                                except:
                                    pass
                                istate.attrib["width"] = str(int(width))
                            else:
                                try:
                                    height = int(istate.attrib["width"]) / ar
                                except:
                                    pass
                                istate.attrib["height"] = str(int(height))
                        item.unload_data_from_memory()
        elif tag == "hr" and asfloat(style["width"]) > 0:
            prop = style["width"] / self.profile.width
            istate.attrib["width"] = "%d%%" % int(round(prop * 100))
        elif display == "table":
            tag = "table"
        elif display == "table-row":
            tag = "tr"
        elif display == "table-cell":
            tag = "td"
        if tag in TABLE_TAGS and self.ignore_tables:
            tag = "span" if tag == "td" else "div"

        if tag in ("table", "td", "tr"):
            col = style.backgroundColor
            if col:
                elem.set("bgcolor", col)
            css = style.cssdict()
            if "border" in css or "border-width" in css:
                elem.set("border", "1")
        if tag in TABLE_TAGS:
            for attr in ("rowspan", "colspan", "width", "border", "scope", "bgcolor"):
                if attr in elem.attrib:
                    istate.attrib[attr] = elem.attrib[attr]
        if tag == "q":
            t = elem.text
            if not t:
                t = ""
            elem.text = u"\u201c" + t
            t = elem.tail
            if not t:
                t = ""
            elem.tail = u"\u201d" + t
        text = None
        if elem.text:
            if istate.preserve or istate.pre_wrap:
                text = elem.text
            elif (
                len(elem) > 0
                and isspace(elem.text)
                and hasattr(elem[0].tag, "rpartition")
                and elem[0].tag.rpartition("}")[-1] not in INLINE_TAGS
            ):
                text = None
            else:
                text = COLLAPSE.sub(" ", elem.text)
        valign = style["vertical-align"]
        not_baseline = valign in ("super", "sub", "text-top", "text-bottom", "top", "bottom") or (
            isinstance(valign, (float, int)) and abs(valign) != 0
        )
        issup = valign in ("super", "text-top", "top") or (isinstance(valign, (float, int)) and valign > 0)
        vtag = "sup" if issup else "sub"
        if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
            nroot = etree.Element(XHTML("html"), nsmap=MOBI_NSMAP)
            vbstate = BlockState(etree.SubElement(nroot, XHTML("body")))
            vbstate.para = etree.SubElement(vbstate.body, XHTML("p"))
            self.mobimlize_elem(elem, stylizer, vbstate, istates, ignore_valign=True)
            if len(istates) > 0:
                istates.pop()
            if len(istates) == 0:
                istates.append(FormatState())
            at_start = bstate.para is None
            if at_start:
                self.mobimlize_content("span", "", bstate, istates)
            parent = bstate.para if bstate.inline is None else bstate.inline
            if parent is not None:
                vtag = etree.SubElement(parent, XHTML(vtag))
                vtag = etree.SubElement(vtag, XHTML("small"))
                # Add anchors
                for child in vbstate.body:
                    if child is not vbstate.para:
                        vtag.append(child)
                    else:
                        break
                if vbstate.para is not None:
                    if vbstate.para.text:
                        vtag.text = vbstate.para.text
                    for child in vbstate.para:
                        vtag.append(child)
                return

        if tag == "blockquote":
            old_mim = self.opts.mobi_ignore_margins
            self.opts.mobi_ignore_margins = False

        if (
            text
            or tag in CONTENT_TAGS
            or tag in NESTABLE_TAGS
            or (
                # We have an id but no text and no children, the id should still
                # be added.
                istate.ids
                and tag in ("a", "span", "i", "b", "u")
                and len(elem) == 0
            )
        ):
            if tag == "li" and len(istates) > 1 and "value" in elem.attrib:
                try:
                    value = int(elem.attrib["value"])
                    istates[-2].list_num = value - 1
                except:
                    pass
            self.mobimlize_content(tag, text, bstate, istates)
        for child in elem:
            self.mobimlize_elem(child, stylizer, bstate, istates)
            tail = None
            if child.tail:
                if istate.preserve or istate.pre_wrap:
                    tail = child.tail
                elif bstate.para is None and isspace(child.tail):
                    tail = None
                else:
                    tail = COLLAPSE.sub(" ", child.tail)
            if tail:
                self.mobimlize_content(tag, tail, bstate, istates)

        if tag == "blockquote":
            self.opts.mobi_ignore_margins = old_mim

        if bstate.content and style["page-break-after"] in PAGE_BREAKS:
            bstate.pbreak = True
        if isblock:
            para = bstate.para
            if para is not None and para.text == u"\xa0" and len(para) < 1:
                if style.height > 2:
                    para.getparent().replace(para, etree.Element(XHTML("br")))
                else:
                    # This is too small to be rendered effectively, drop it
                    para.getparent().remove(para)
            bstate.para = None
            bstate.istate = None
            vmargin = asfloat(style["margin-bottom"])
            bstate.vmargin = max((bstate.vmargin, vmargin))
            vpadding = asfloat(style["padding-bottom"])
            if vpadding > 0:
                bstate.vpadding += bstate.vmargin
                bstate.vmargin = 0
                bstate.vpadding += vpadding
        if bstate.nested and bstate.nested[-1].tag == elem.tag:
            bstate.nested.pop()
        istates.pop()

Example #43

0

Show file

File: simple.py Project: sss/calibre

 def process_images(self, soup, baseurl):
     diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
     if not os.path.exists(diskpath):
         os.mkdir(diskpath)
     c = 0
     for tag in soup.findAll(
             lambda tag: tag.name.lower() == 'img' and tag.has_key('src')):
         iurl = tag['src']
         if iurl.startswith('data:image/'):
             try:
                 data = b64decode(iurl.partition(',')[-1])
             except:
                 self.log.exception('Failed to decode embedded image')
                 continue
         else:
             if callable(self.image_url_processor):
                 iurl = self.image_url_processor(baseurl, iurl)
             if not urlparse.urlsplit(iurl).scheme:
                 iurl = urlparse.urljoin(baseurl, iurl, False)
             with self.imagemap_lock:
                 if self.imagemap.has_key(iurl):
                     tag['src'] = self.imagemap[iurl]
                     continue
             try:
                 data = self.fetch_url(iurl)
                 if data == 'GIF89a\x01':
                     # Skip empty GIF files as PIL errors on them anyway
                     continue
             except Exception:
                 self.log.exception('Could not fetch image ', iurl)
                 continue
         c += 1
         fname = ascii_filename('img' + str(c))
         if isinstance(fname, unicode):
             fname = fname.encode('ascii', 'replace')
         itype = what(None, data)
         if itype is None and b'<svg' in data[:1024]:
             # SVG image
             imgpath = os.path.join(diskpath, fname + '.svg')
             with self.imagemap_lock:
                 self.imagemap[iurl] = imgpath
             with open(imgpath, 'wb') as x:
                 x.write(data)
             tag['src'] = imgpath
         else:
             try:
                 if itype not in {'png', 'jpg', 'jpeg'}:
                     itype = 'png' if itype == 'gif' else 'jpg'
                     im = Image()
                     im.load(data)
                     data = im.export(itype)
                 if self.compress_news_images and itype in {'jpg', 'jpeg'}:
                     try:
                         data = self.rescale_image(data)
                     except:
                         self.log.exception('failed to compress image ' +
                                            iurl)
                         identify_data(data)
                 else:
                     identify_data(data)
                 # Moon+ apparently cannot handle .jpeg files
                 if itype == 'jpeg':
                     itype = 'jpg'
                 imgpath = os.path.join(diskpath, fname + '.' + itype)
                 with self.imagemap_lock:
                     self.imagemap[iurl] = imgpath
                 with open(imgpath, 'wb') as x:
                     x.write(data)
                 tag['src'] = imgpath
             except:
                 traceback.print_exc()
                 continue

Example #44

0

Show file

File: cover.py Project: IvoNet/calibre

def create_epub_cover(container, cover_path, existing_image, options=None):
    from calibre.ebooks.conversion.config import load_defaults
    from calibre.ebooks.oeb.transforms.cover import CoverManager

    ext = cover_path.rpartition('.')[-1].lower()
    if existing_image:
        raster_cover = existing_image
        manifest_id = {v:k for k, v in container.manifest_id_map.iteritems()}[existing_image]
        raster_cover_item = container.opf_xpath('//opf:manifest/*[@id="%s"]' % manifest_id)[0]
    else:
        raster_cover_item = container.generate_item('cover.'+ext, id_prefix='cover')
        raster_cover = container.href_to_name(raster_cover_item.get('href'), container.opf_name)

        with open(cover_path, 'rb') as src, container.open(raster_cover, 'wb') as dest:
            shutil.copyfileobj(src, dest)
    if options is None:
        opts = load_defaults('epub_output')
        keep_aspect = opts.get('preserve_cover_aspect_ratio', False)
        no_svg = opts.get('no_svg_cover', False)
    else:
        keep_aspect = options.get('keep_aspect', False)
        no_svg = options.get('no_svg', False)
    if no_svg:
        style = 'style="height: 100%%"'
        templ = CoverManager.NONSVG_TEMPLATE.replace('__style__', style)
    else:
        width, height = 600, 800
        try:
            if existing_image:
                width, height = identify_data(container.raw_data(existing_image, decode=False))[:2]
            else:
                width, height = identify(cover_path)[:2]
        except:
            container.log.exception("Failed to get width and height of cover")
        ar = 'xMidYMid meet' if keep_aspect else 'none'
        templ = CoverManager.SVG_TEMPLATE.replace('__ar__', ar)
        templ = templ.replace('__viewbox__', '0 0 %d %d'%(width, height))
        templ = templ.replace('__width__', str(width))
        templ = templ.replace('__height__', str(height))
    titlepage_item = container.generate_item('titlepage.xhtml',
                                             id_prefix='titlepage')
    titlepage = container.href_to_name(titlepage_item.get('href'),
                                          container.opf_name)
    raw = templ%container.name_to_href(raster_cover).encode('utf-8')
    with container.open(titlepage, 'wb') as f:
        f.write(raw)

    # We have to make sure the raster cover item has id="cover" for the moron
    # that wrote the Nook firmware
    if raster_cover_item.get('id') != 'cover':
        from calibre.ebooks.oeb.base import uuid_id
        newid = uuid_id()
        for item in container.opf_xpath('//*[@id="cover"]'):
            item.set('id', newid)
        for item in container.opf_xpath('//*[@idref="cover"]'):
            item.set('idref', newid)
        raster_cover_item.set('id', 'cover')

    spine = container.opf_xpath('//opf:spine')[0]
    ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id'))
    container.insert_into_xml(spine, ref, index=0)
    guide = container.opf_get_or_create('guide')
    container.insert_into_xml(guide, guide.makeelement(
        OPF('reference'), type='cover', title=_('Cover'),
        href=container.name_to_href(titlepage, base=container.opf_name)))
    metadata = container.opf_get_or_create('metadata')
    meta = metadata.makeelement(OPF('meta'), name='cover')
    meta.set('content', raster_cover_item.get('id'))
    container.insert_into_xml(metadata, meta)

    return raster_cover, titlepage

Example #45

0

Show file

File: mobi6.py Project: Eksmo/calibre

    def __init__(self, mf):
        for x in ('raw', 'palmdb', 'record_headers', 'records', 'mobi_header',
                'huffman_record_nums',):
            setattr(self, x, getattr(mf, x))

        self.index_header = self.index_record = None
        self.indexing_record_nums = set()
        pir = self.mobi_header.primary_index_record
        if pir != NULL_INDEX:
            self.index_header = IndexHeader(self.records[pir])
            numi = self.index_header.index_count
            self.cncx = CNCX(self.records[
                pir+1+numi:pir+1+numi+self.index_header.num_of_cncx_blocks],
                self.index_header.index_encoding)
            self.index_record = IndexRecord(self.records[pir+1:pir+1+numi],
                    self.index_header, self.cncx)
            self.indexing_record_nums = set(xrange(pir,
                pir+1+numi+self.index_header.num_of_cncx_blocks))
        self.secondary_index_record = self.secondary_index_header = None
        sir = self.mobi_header.secondary_index_record
        if sir != NULL_INDEX:
            self.secondary_index_header = SecondaryIndexHeader(self.records[sir])
            numi = self.secondary_index_header.index_count
            self.indexing_record_nums.add(sir)
            self.secondary_index_record = IndexRecord(
                    self.records[sir+1:sir+1+numi], self.secondary_index_header, self.cncx)
            self.indexing_record_nums |= set(xrange(sir+1, sir+1+numi))


        ntr = self.mobi_header.number_of_text_records
        fntbr = self.mobi_header.first_non_book_record
        fii = self.mobi_header.first_image_index
        if fntbr == NULL_INDEX:
            fntbr = len(self.records)
        self.text_records = [TextRecord(r, self.records[r],
            self.mobi_header.extra_data_flags, mf.decompress6) for r in xrange(1,
            min(len(self.records), ntr+1))]
        self.image_records, self.binary_records = [], []
        self.font_records = []
        image_index = 0
        for i in xrange(fntbr, len(self.records)):
            if i in self.indexing_record_nums or i in self.huffman_record_nums:
                continue
            image_index += 1
            r = self.records[i]
            fmt = None
            if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
                    b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
                    b'AUDI', b'VIDE', b'FONT'}:
                try:
                    width, height, fmt = identify_data(r.raw)
                except:
                    pass
            if fmt is not None:
                self.image_records.append(ImageRecord(image_index, r, fmt))
            elif r.raw[:4] == b'FONT':
                self.font_records.append(FontRecord(i, r))
            else:
                self.binary_records.append(BinaryRecord(i, r))

        if self.index_record is not None:
            self.tbs_indexing = TBSIndexing(self.text_records,
                    self.index_record.indices, self.mobi_header.type_raw)

Example #46

0

Show file

File: mobiml.py Project: iwannafly/calibre

    def mobimlize_elem(self, elem, stylizer, bstate, istates,
            ignore_valign=False):
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            return
        style = stylizer.style(elem)
        # <mbp:frame-set/> does not exist lalalala
        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
           or style['visibility'] == 'hidden':
            id_ = elem.get('id', None)
            if id_:
                # Keep anchors so people can use display:none
                # to generate hidden TOCs
                tail = elem.tail
                elem.clear()
                elem.text = None
                elem.set('id', id_)
                elem.tail = tail
                elem.tag = XHTML('a')
            else:
                return
        tag = barename(elem.tag)
        istate = copy.copy(istates[-1])
        istate.rendered = False
        istate.list_num = 0
        if tag == 'ol' and 'start' in elem.attrib:
            try:
                istate.list_num = int(elem.attrib['start'])-1
            except:
                pass
        istates.append(istate)
        left = 0
        display = style['display']
        if display == 'table-cell':
            display = 'inline'
        elif display.startswith('table'):
            display = 'block'
        isblock = (not display.startswith('inline') and style['display'] !=
                'none')
        isblock = isblock and style['float'] == 'none'
        isblock = isblock and tag != 'br'
        if isblock:
            bstate.para = None
            istate.halign = style['text-align']
            istate.indent = style['text-indent']
            if style['margin-left'] == 'auto' \
               and style['margin-right'] == 'auto':
                istate.halign = 'center'
            margin = asfloat(style['margin-left'])
            padding = asfloat(style['padding-left'])
            if tag != 'body':
                left = margin + padding
            istate.left += left
            vmargin = asfloat(style['margin-top'])
            bstate.vmargin = max((bstate.vmargin, vmargin))
            vpadding = asfloat(style['padding-top'])
            if vpadding > 0:
                bstate.vpadding += bstate.vmargin
                bstate.vmargin = 0
                bstate.vpadding += vpadding
        elif not istate.href:
            margin = asfloat(style['margin-left'])
            padding = asfloat(style['padding-left'])
            lspace = margin + padding
            if lspace > 0:
                spaces = int(round((lspace * 3) / style['font-size']))
                elem.text = (u'\xa0' * spaces) + (elem.text or '')
            margin = asfloat(style['margin-right'])
            padding = asfloat(style['padding-right'])
            rspace = margin + padding
            if rspace > 0:
                spaces = int(round((rspace * 3) / style['font-size']))
                if len(elem) == 0:
                    elem.text = (elem.text or '') + (u'\xa0' * spaces)
                else:
                    last = elem[-1]
                    last.text = (last.text or '') + (u'\xa0' * spaces)
        if bstate.content and style['page-break-before'] in PAGE_BREAKS:
            bstate.pbreak = True
        istate.fsize = self.mobimlize_font(style['font-size'])
        istate.italic = True if style['font-style'] == 'italic' else False
        weight = style['font-weight']
        istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400
        istate.preserve = (style['white-space'] in ('pre', 'pre-wrap'))
        istate.bgcolor  = style['background-color']
        istate.fgcolor  = style['color']
        istate.strikethrough = style.effective_text_decoration == 'line-through'
        istate.underline = style.effective_text_decoration == 'underline'
        ff = style['font-family'].lower() if style['font-family'] else ''
        if 'monospace' in ff or 'courier' in ff or ff.endswith(' mono'):
            istate.family = 'monospace'
        elif ('sans-serif' in ff or 'sansserif' in ff or 'verdana' in ff or
                'arial' in ff or 'helvetica' in ff):
            istate.family = 'sans-serif'
        else:
            istate.family = 'serif'
        if 'id' in elem.attrib:
            istate.ids.add(elem.attrib['id'])
        if 'name' in elem.attrib:
            istate.ids.add(elem.attrib['name'])
        if tag == 'a' and 'href' in elem.attrib:
            istate.href = elem.attrib['href']
        istate.attrib.clear()
        if tag == 'img' and 'src' in elem.attrib:
            istate.attrib['src'] = elem.attrib['src']
            istate.attrib['align'] = 'baseline'
            cssdict = style.cssdict()
            valign = cssdict.get('vertical-align', None)
            if valign in ('top', 'bottom', 'middle'):
                istate.attrib['align'] = valign
            for prop in ('width', 'height'):
                if cssdict[prop] != 'auto':
                    value = style[prop]
                    if value == getattr(self.profile, prop):
                        result = '100%'
                    else:
                        # Amazon's renderer does not support
                        # img sizes in units other than px
                        # See #7520 for test case
                        try:
                            pixs = int(round(float(value) / \
                                (72./self.profile.dpi)))
                        except:
                            continue
                        result = str(pixs)
                    istate.attrib[prop] = result
            if 'width' not in istate.attrib or 'height' not in istate.attrib:
                href = self.current_spine_item.abshref(elem.attrib['src'])
                try:
                    item = self.oeb.manifest.hrefs[urlnormalize(href)]
                except:
                    self.oeb.logger.warn('Failed to find image:',
                            href)
                else:
                    try:
                        width, height = identify_data(item.data)[:2]
                    except:
                        self.oeb.logger.warn('Invalid image:', href)
                    else:
                        if 'width' not in istate.attrib and 'height' not in \
                                    istate.attrib:
                            istate.attrib['width'] = str(width)
                            istate.attrib['height'] = str(height)
                        else:
                            ar = float(width)/float(height)
                            if 'width' not in istate.attrib:
                                try:
                                    width = int(istate.attrib['height'])*ar
                                except:
                                    pass
                                istate.attrib['width'] = str(int(width))
                            else:
                                try:
                                    height = int(istate.attrib['width'])/ar
                                except:
                                    pass
                                istate.attrib['height'] = str(int(height))
                        item.unload_data_from_memory()
        elif tag == 'hr' and asfloat(style['width']) > 0:
            prop = style['width'] / self.profile.width
            istate.attrib['width'] = "%d%%" % int(round(prop * 100))
        elif display == 'table':
            tag = 'table'
        elif display == 'table-row':
            tag = 'tr'
        elif display == 'table-cell':
            tag = 'td'
        if tag in TABLE_TAGS and self.ignore_tables:
            tag = 'span' if tag == 'td' else 'div'

        if tag in ('table', 'td', 'tr'):
            col = style.backgroundColor
            if col:
                elem.set('bgcolor', col)
            css = style.cssdict()
            if 'border' in css or 'border-width' in css:
                elem.set('border', '1')
        if tag in TABLE_TAGS:
            for attr in ('rowspan', 'colspan', 'width', 'border', 'scope',
                    'bgcolor'):
                if attr in elem.attrib:
                    istate.attrib[attr] = elem.attrib[attr]
        if tag == 'q':
            t = elem.text
            if not t:
                t = ''
            elem.text = u'\u201c' + t
            t = elem.tail
            if not t:
                t = ''
            elem.tail = u'\u201d' + t
        text = None
        if elem.text:
            if istate.preserve:
                text = elem.text
            elif len(elem) > 0 and isspace(elem.text):
                text = None
            else:
                text = COLLAPSE.sub(' ', elem.text)
        valign = style['vertical-align']
        not_baseline = valign in ('super', 'sub', 'text-top',
                'text-bottom', 'top', 'bottom') or (
                isinstance(valign, (float, int)) and abs(valign) != 0)
        issup = valign in ('super', 'text-top', 'top') or (
            isinstance(valign, (float, int)) and valign > 0)
        vtag = 'sup' if issup else 'sub'
        if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
            nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
            vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))
            vbstate.para = etree.SubElement(vbstate.body, XHTML('p'))
            self.mobimlize_elem(elem, stylizer, vbstate, istates,
                    ignore_valign=True)
            if len(istates) > 0:
                istates.pop()
            if len(istates) == 0:
                istates.append(FormatState())
            at_start = bstate.para is None
            if at_start:
                self.mobimlize_content('span', '', bstate, istates)
            parent = bstate.para if bstate.inline is None else bstate.inline
            if parent is not None:
                vtag = etree.SubElement(parent, XHTML(vtag))
                vtag = etree.SubElement(vtag, XHTML('small'))
                # Add anchors
                for child in vbstate.body:
                    if child is not vbstate.para:
                        vtag.append(child)
                    else:
                        break
                if vbstate.para is not None:
                    for child in vbstate.para:
                        vtag.append(child)
                return

        if tag == 'blockquote':
            old_mim = self.opts.mobi_ignore_margins
            self.opts.mobi_ignore_margins = False

        if (text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS or (
            # We have an id but no text and no children, the id should still
            # be added.
            istate.ids and tag in ('a', 'span', 'i', 'b', 'u') and
            len(elem)==0)):
            self.mobimlize_content(tag, text, bstate, istates)
        for child in elem:
            self.mobimlize_elem(child, stylizer, bstate, istates)
            tail = None
            if child.tail:
                if istate.preserve:
                    tail = child.tail
                elif bstate.para is None and isspace(child.tail):
                    tail = None
                else:
                    tail = COLLAPSE.sub(' ', child.tail)
            if tail:
                self.mobimlize_content(tag, tail, bstate, istates)

        if tag == 'blockquote':
            self.opts.mobi_ignore_margins = old_mim

        if bstate.content and style['page-break-after'] in PAGE_BREAKS:
            bstate.pbreak = True
        if isblock:
            para = bstate.para
            if para is not None and para.text == u'\xa0' and len(para) < 1:
                if style.height > 2:
                    para.getparent().replace(para, etree.Element(XHTML('br')))
                else:
                    # This is too small to be rendered effectively, drop it
                    para.getparent().remove(para)
            bstate.para = None
            bstate.istate = None
            vmargin = asfloat(style['margin-bottom'])
            bstate.vmargin = max((bstate.vmargin, vmargin))
            vpadding = asfloat(style['padding-bottom'])
            if vpadding > 0:
                bstate.vpadding += bstate.vmargin
                bstate.vmargin = 0
                bstate.vpadding += vpadding
        if bstate.nested and bstate.nested[-1].tag == elem.tag:
            bstate.nested.pop()
        istates.pop()

Example #47

0

Show file

File: content.py Project: jilanfang/calibre

    def test_get(self):  # {{{
        'Test /get'
        with self.create_server() as server:
            db = server.handler.router.ctx.library_broker.get(None)
            conn = server.connect()

            def get(what, book_id, library_id=None, q=''):
                q = ('?' + q) if q else q
                conn.request('GET', '/get/%s/%s' % (what, book_id) + (('/' + library_id) if library_id else '') + q)
                r = conn.getresponse()
                return r, r.read()

            # Test various invalid parameters
            def bad(*args):
                r, data = get(*args)
                self.ae(r.status, httplib.NOT_FOUND)
            bad('xxx', 1)
            bad('fmt1', 10)
            bad('fmt1', 1, 'zzzz')
            bad('fmt1', 'xx')

            # Test simple fetching of format without metadata update
            r, data = get('fmt1', 1, db.server_library_id)
            self.ae(data, db.format(1, 'fmt1'))
            self.assertIsNotNone(r.getheader('Content-Disposition'))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('fmt1', 1)
            self.ae(data, db.format(1, 'fmt1'))
            self.ae(r.getheader('Used-Cache'), 'yes')

            # Test fetching of format with metadata update
            raw = P('quick_start/eng.epub', data=True)
            r, data = get('epub', 1)
            self.ae(r.status, httplib.OK)
            etag = r.getheader('ETag')
            self.assertIsNotNone(etag)
            self.ae(r.getheader('Used-Cache'), 'no')
            self.assertTrue(data.startswith(b'PK'))
            self.assertGreaterEqual(len(data), len(raw))
            db.set_field('title', {1:'changed'})
            r, data = get('epub', 1)
            self.assertNotEqual(r.getheader('ETag'), etag)
            etag = r.getheader('ETag')
            self.ae(r.getheader('Used-Cache'), 'no')
            mi = get_metadata(BytesIO(data), extract_cover=False)
            self.ae(mi.title, 'changed')
            r, data = get('epub', 1)
            self.ae(r.getheader('Used-Cache'), 'yes')

            # Test plugboards
            import calibre.library.save_to_disk as c
            orig, c.DEBUG = c.DEBUG, False
            try:
                db.set_pref('plugboards', {u'epub': {u'content_server': [[u'changed, {title}', u'title']]}})
                # this is needed as the cache is not invalidated for plugboard changes
                db.set_field('title', {1:'again'})
                r, data = get('epub', 1)
                self.assertNotEqual(r.getheader('ETag'), etag)
                etag = r.getheader('ETag')
                self.ae(r.getheader('Used-Cache'), 'no')
                mi = get_metadata(BytesIO(data), extract_cover=False)
                self.ae(mi.title, 'changed, again')
            finally:
                c.DEBUG = orig

            # Test the serving of covers
            def change_cover(count, book_id=2):
                cpath = db.format_abspath(book_id, '__COVER_INTERNAL__')
                db.set_cover({2:I('lt.png', data=True)})
                t = time.time() + 1 + count
                # Ensure mtime changes, needed on OS X where HFS+ has a 1s
                # mtime resolution
                os.utime(cpath, (t, t))

            r, data = get('cover', 1)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(1))
            self.ae(r.getheader('Used-Cache'), 'no')
            self.ae(r.getheader('Content-Type'), 'image/jpeg')
            r, data = get('cover', 1)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(1))
            self.ae(r.getheader('Used-Cache'), 'yes')
            r, data = get('cover', 3)
            self.ae(r.status, httplib.NOT_FOUND)
            r, data = get('thumb', 1)
            self.ae(r.status, httplib.OK)
            self.ae(identify_data(data), (60, 60, 'jpeg'))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('thumb', 1)
            self.ae(r.status, httplib.OK)
            self.ae(r.getheader('Used-Cache'), 'yes')
            r, data = get('thumb', 1, q='sz=100')
            self.ae(r.status, httplib.OK)
            self.ae(identify_data(data), (100, 100, 'jpeg'))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('thumb', 1, q='sz=100x100')
            self.ae(r.status, httplib.OK)
            self.ae(r.getheader('Used-Cache'), 'yes')
            change_cover(1, 1)
            r, data = get('thumb', 1, q='sz=100')
            self.ae(r.status, httplib.OK)
            self.ae(identify_data(data), (100, 100, 'jpeg'))
            self.ae(r.getheader('Used-Cache'), 'no')

            # Test file sharing in cache
            test_share_open()
            r, data = get('cover', 2)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            path = binascii.unhexlify(r.getheader('Tempfile')).decode('utf-8')
            f, fdata = share_open(path, 'rb'), data
            # Now force an update
            change_cover(1)
            r, data = get('cover', 2)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            path = binascii.unhexlify(r.getheader('Tempfile')).decode('utf-8')
            f2, f2data = share_open(path, 'rb'), data
            # Do it again
            change_cover(2)
            r, data = get('cover', 2)
            self.ae(r.status, httplib.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            self.ae(f.read(), fdata)
            self.ae(f2.read(), f2data)

            # Test serving of metadata as opf
            r, data = get('opf', 1)
            self.ae(r.status, httplib.OK)
            self.ae(r.getheader('Content-Type'), 'application/oebps-package+xml; charset=UTF-8')
            self.assertIsNotNone(r.getheader('Last-Modified'))
            opf = OPF(BytesIO(data), populate_spine=False, try_to_guess_cover=False)
            self.ae(db.field_for('title', 1), opf.title)
            self.ae(db.field_for('authors', 1), tuple(opf.authors))
            conn.request('GET', '/get/opf/1', headers={'Accept-Encoding':'gzip'})
            r = conn.getresponse()
            self.ae(r.status, httplib.OK), self.ae(r.getheader('Content-Encoding'), 'gzip')
            raw = r.read()
            self.ae(zlib.decompress(raw, 16+zlib.MAX_WBITS), data)

            # Test serving metadata as json
            r, data = get('json', 1)
            self.ae(r.status, httplib.OK)
            self.ae(db.field_for('title', 1), json.loads(data)['title'])
            conn.request('GET', '/get/json/1', headers={'Accept-Encoding':'gzip'})
            r = conn.getresponse()
            self.ae(r.status, httplib.OK), self.ae(r.getheader('Content-Encoding'), 'gzip')
            raw = r.read()
            self.ae(zlib.decompress(raw, 16+zlib.MAX_WBITS), data)

Example #48

0

Show file

File: mobiml.py Project: oheil/calibre

    def mobimlize_elem(self,
                       elem,
                       stylizer,
                       bstate,
                       istates,
                       ignore_valign=False):
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            return
        style = stylizer.style(elem)
        # <mbp:frame-set/> does not exist lalalala
        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
           or style['visibility'] == 'hidden':
            id_ = elem.get('id', None)
            if id_:
                # Keep anchors so people can use display:none
                # to generate hidden TOCs
                tail = elem.tail
                elem.clear()
                elem.text = None
                elem.set('id', id_)
                elem.tail = tail
                elem.tag = XHTML('a')
            else:
                return
        tag = barename(elem.tag)
        istate = copy.copy(istates[-1])
        istate.rendered = False
        istate.list_num = 0
        if tag == 'ol' and 'start' in elem.attrib:
            try:
                istate.list_num = int(elem.attrib['start']) - 1
            except:
                pass
        istates.append(istate)
        left = 0
        display = style['display']
        if display == 'table-cell':
            display = 'inline'
        elif display.startswith('table'):
            display = 'block'
        isblock = (not display.startswith('inline')
                   and style['display'] != 'none')
        isblock = isblock and style['float'] == 'none'
        isblock = isblock and tag != 'br'
        if isblock:
            bstate.para = None
            istate.halign = style['text-align']
            istate.indent = style['text-indent']
            if style['margin-left'] == 'auto' \
               and style['margin-right'] == 'auto':
                istate.halign = 'center'
            margin = asfloat(style['margin-left'])
            padding = asfloat(style['padding-left'])
            if tag != 'body':
                left = margin + padding
            istate.left += left
            vmargin = asfloat(style['margin-top'])
            bstate.vmargin = max((bstate.vmargin, vmargin))
            vpadding = asfloat(style['padding-top'])
            if vpadding > 0:
                bstate.vpadding += bstate.vmargin
                bstate.vmargin = 0
                bstate.vpadding += vpadding
        elif not istate.href:
            margin = asfloat(style['margin-left'])
            padding = asfloat(style['padding-left'])
            lspace = margin + padding
            if lspace > 0:
                spaces = int(round((lspace * 3) / style['font-size']))
                elem.text = (u'\xa0' * spaces) + (elem.text or '')
            margin = asfloat(style['margin-right'])
            padding = asfloat(style['padding-right'])
            rspace = margin + padding
            if rspace > 0:
                spaces = int(round((rspace * 3) / style['font-size']))
                if len(elem) == 0:
                    elem.text = (elem.text or '') + (u'\xa0' * spaces)
                else:
                    last = elem[-1]
                    last.text = (last.text or '') + (u'\xa0' * spaces)
        if bstate.content and style['page-break-before'] in PAGE_BREAKS:
            bstate.pbreak = True
        istate.fsize = self.mobimlize_font(style['font-size'])
        istate.italic = True if style['font-style'] == 'italic' else False
        weight = style['font-weight']
        istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400
        istate.preserve = (style['white-space'] in ('pre', 'pre-wrap'))
        istate.bgcolor = style['background-color']
        istate.fgcolor = style['color']
        istate.strikethrough = style.effective_text_decoration == 'line-through'
        istate.underline = style.effective_text_decoration == 'underline'
        ff = style['font-family'].lower() if style['font-family'] else ''
        if 'monospace' in ff or 'courier' in ff or ff.endswith(' mono'):
            istate.family = 'monospace'
        elif ('sans-serif' in ff or 'sansserif' in ff or 'verdana' in ff
              or 'arial' in ff or 'helvetica' in ff):
            istate.family = 'sans-serif'
        else:
            istate.family = 'serif'
        if 'id' in elem.attrib:
            istate.ids.add(elem.attrib['id'])
        if 'name' in elem.attrib:
            istate.ids.add(elem.attrib['name'])
        if tag == 'a' and 'href' in elem.attrib:
            istate.href = elem.attrib['href']
        istate.attrib.clear()
        if tag == 'img' and 'src' in elem.attrib:
            istate.attrib['src'] = elem.attrib['src']
            istate.attrib['align'] = 'baseline'
            cssdict = style.cssdict()
            valign = cssdict.get('vertical-align', None)
            if valign in ('top', 'bottom', 'middle'):
                istate.attrib['align'] = valign
            for prop in ('width', 'height'):
                if cssdict[prop] != 'auto':
                    value = style[prop]
                    if value == getattr(self.profile, prop):
                        result = '100%'
                    else:
                        # Amazon's renderer does not support
                        # img sizes in units other than px
                        # See #7520 for test case
                        try:
                            pixs = int(
                                round(float(value) / (72. / self.profile.dpi)))
                        except:
                            continue
                        result = str(pixs)
                    istate.attrib[prop] = result
            if 'width' not in istate.attrib or 'height' not in istate.attrib:
                href = self.current_spine_item.abshref(elem.attrib['src'])
                try:
                    item = self.oeb.manifest.hrefs[urlnormalize(href)]
                except:
                    self.oeb.logger.warn('Failed to find image:', href)
                else:
                    try:
                        width, height = identify_data(item.data)[:2]
                    except:
                        self.oeb.logger.warn('Invalid image:', href)
                    else:
                        if 'width' not in istate.attrib and 'height' not in \
                                    istate.attrib:
                            istate.attrib['width'] = str(width)
                            istate.attrib['height'] = str(height)
                        else:
                            ar = float(width) / float(height)
                            if 'width' not in istate.attrib:
                                try:
                                    width = int(istate.attrib['height']) * ar
                                except:
                                    pass
                                istate.attrib['width'] = str(int(width))
                            else:
                                try:
                                    height = int(istate.attrib['width']) / ar
                                except:
                                    pass
                                istate.attrib['height'] = str(int(height))
                        item.unload_data_from_memory()
        elif tag == 'hr' and asfloat(style['width']) > 0:
            prop = style['width'] / self.profile.width
            istate.attrib['width'] = "%d%%" % int(round(prop * 100))
        elif display == 'table':
            tag = 'table'
        elif display == 'table-row':
            tag = 'tr'
        elif display == 'table-cell':
            tag = 'td'
        if tag in TABLE_TAGS and self.ignore_tables:
            tag = 'span' if tag == 'td' else 'div'

        if tag in ('table', 'td', 'tr'):
            col = style.backgroundColor
            if col:
                elem.set('bgcolor', col)
            css = style.cssdict()
            if 'border' in css or 'border-width' in css:
                elem.set('border', '1')
        if tag in TABLE_TAGS:
            for attr in ('rowspan', 'colspan', 'width', 'border', 'scope',
                         'bgcolor'):
                if attr in elem.attrib:
                    istate.attrib[attr] = elem.attrib[attr]
        if tag == 'q':
            t = elem.text
            if not t:
                t = ''
            elem.text = u'\u201c' + t
            t = elem.tail
            if not t:
                t = ''
            elem.tail = u'\u201d' + t
        text = None
        if elem.text:
            if istate.preserve:
                text = elem.text
            elif (len(elem) > 0 and isspace(elem.text)
                  and hasattr(elem[0].tag, 'rpartition')
                  and elem[0].tag.rpartition('}')[-1] not in INLINE_TAGS):
                text = None
            else:
                text = COLLAPSE.sub(' ', elem.text)
        valign = style['vertical-align']
        not_baseline = valign in ('super', 'sub', 'text-top', 'text-bottom',
                                  'top', 'bottom') or (isinstance(
                                      valign,
                                      (float, int)) and abs(valign) != 0)
        issup = valign in ('super', 'text-top',
                           'top') or (isinstance(valign,
                                                 (float, int)) and valign > 0)
        vtag = 'sup' if issup else 'sub'
        if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
            nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
            vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))
            vbstate.para = etree.SubElement(vbstate.body, XHTML('p'))
            self.mobimlize_elem(elem,
                                stylizer,
                                vbstate,
                                istates,
                                ignore_valign=True)
            if len(istates) > 0:
                istates.pop()
            if len(istates) == 0:
                istates.append(FormatState())
            at_start = bstate.para is None
            if at_start:
                self.mobimlize_content('span', '', bstate, istates)
            parent = bstate.para if bstate.inline is None else bstate.inline
            if parent is not None:
                vtag = etree.SubElement(parent, XHTML(vtag))
                vtag = etree.SubElement(vtag, XHTML('small'))
                # Add anchors
                for child in vbstate.body:
                    if child is not vbstate.para:
                        vtag.append(child)
                    else:
                        break
                if vbstate.para is not None:
                    if vbstate.para.text:
                        vtag.text = vbstate.para.text
                    for child in vbstate.para:
                        vtag.append(child)
                return

        if tag == 'blockquote':
            old_mim = self.opts.mobi_ignore_margins
            self.opts.mobi_ignore_margins = False

        if (text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS or (
                # We have an id but no text and no children, the id should still
                # be added.
                istate.ids and tag in ('a', 'span', 'i', 'b', 'u')
                and len(elem) == 0)):
            self.mobimlize_content(tag, text, bstate, istates)
        for child in elem:
            self.mobimlize_elem(child, stylizer, bstate, istates)
            tail = None
            if child.tail:
                if istate.preserve:
                    tail = child.tail
                elif bstate.para is None and isspace(child.tail):
                    tail = None
                else:
                    tail = COLLAPSE.sub(' ', child.tail)
            if tail:
                self.mobimlize_content(tag, tail, bstate, istates)

        if tag == 'blockquote':
            self.opts.mobi_ignore_margins = old_mim

        if bstate.content and style['page-break-after'] in PAGE_BREAKS:
            bstate.pbreak = True
        if isblock:
            para = bstate.para
            if para is not None and para.text == u'\xa0' and len(para) < 1:
                if style.height > 2:
                    para.getparent().replace(para, etree.Element(XHTML('br')))
                else:
                    # This is too small to be rendered effectively, drop it
                    para.getparent().remove(para)
            bstate.para = None
            bstate.istate = None
            vmargin = asfloat(style['margin-bottom'])
            bstate.vmargin = max((bstate.vmargin, vmargin))
            vpadding = asfloat(style['padding-bottom'])
            if vpadding > 0:
                bstate.vpadding += bstate.vmargin
                bstate.vmargin = 0
                bstate.vpadding += vpadding
        if bstate.nested and bstate.nested[-1].tag == elem.tag:
            bstate.nested.pop()
        istates.pop()

Example #49

0

Show file

File: simple.py Project: john-peterson/calibre

 def process_images(self, soup, baseurl):
     diskpath = unicode_path(os.path.join(self.current_dir, 'images'))
     if not os.path.exists(diskpath):
         os.mkdir(diskpath)
     c = 0
     for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
         iurl = tag['src']
         if iurl.startswith('data:image/'):
             try:
                 data = b64decode(iurl.partition(',')[-1])
             except:
                 self.log.exception('Failed to decode embedded image')
                 continue
         else:
             if callable(self.image_url_processor):
                 iurl = self.image_url_processor(baseurl, iurl)
             if not urlparse.urlsplit(iurl).scheme:
                 iurl = urlparse.urljoin(baseurl, iurl, False)
             with self.imagemap_lock:
                 if self.imagemap.has_key(iurl):
                     tag['src'] = self.imagemap[iurl]
                     continue
             try:
                 data = self.fetch_url(iurl)
                 if data == 'GIF89a\x01':
                     # Skip empty GIF files as PIL errors on them anyway
                     continue
             except Exception:
                 self.log.exception('Could not fetch image ', iurl)
                 continue
         c += 1
         fname = ascii_filename('img'+str(c))
         if isinstance(fname, unicode):
             fname = fname.encode('ascii', 'replace')
         itype = imghdr.what(None, data)
         if itype is None and b'<svg' in data[:1024]:
             # SVG image
             imgpath = os.path.join(diskpath, fname+'.svg')
             with self.imagemap_lock:
                 self.imagemap[iurl] = imgpath
             with open(imgpath, 'wb') as x:
                 x.write(data)
             tag['src'] = imgpath
         else:
             try:
                 if itype not in {'png', 'jpg', 'jpeg'}:
                     itype == 'png' if itype == 'gif' else 'jpg'
                     im = Image()
                     im.load(data)
                     data = im.export(itype)
                 else:
                     identify_data(data)
                 imgpath = os.path.join(diskpath, fname+'.'+itype)
                 with self.imagemap_lock:
                     self.imagemap[iurl] = imgpath
                 with open(imgpath, 'wb') as x:
                     x.write(data)
                 tag['src'] = imgpath
             except:
                 traceback.print_exc()
                 continue