예제 #1
0
 def read_image_data(self, fname, base=None):
     if fname.startswith('file://'):
         src = fname[len('file://'):]
         if not src or not os.path.exists(src):
             raise LinkedImageNotFound(src)
         with open(src, 'rb') as rawsrc:
             raw = rawsrc.read()
     else:
         try:
             raw = self.docx.read(fname)
         except KeyError:
             raise LinkedImageNotFound(fname)
     base = base or image_filename(fname.rpartition('/')[-1]) or 'image'
     ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
     if ext == 'emf':
         # For an example, see: https://bugs.launchpad.net/bugs/1224849
         self.log.info('Found an EMF image: %s, trying to extract '
                       'embedded raster image', fname)
         from ebook_converter.utils.wmf.emf import emf_unwrap
         try:
             raw = emf_unwrap(raw)
         except Exception:
             self.log.exception('Failed to extract embedded raster image '
                                'from EMF')
         else:
             ext = 'png'
     base = base.rpartition('.')[0]
     if not base:
         base = 'image'
     base += '.' + ext
     return raw, base
예제 #2
0
 def __call__(self, oeb, opts):
     from ebook_converter.utils.imghdr import what
     self.log = oeb.log
     attr_path = XPath('//h:img[@src]')
     for item in oeb.spine:
         root = item.data
         if not hasattr(root, 'xpath'):
             continue
         for img in attr_path(root):
             raw = img.get('src', '')
             if not raw.startswith('data:'):
                 continue
             header, data = raw.partition(',')[0::2]
             if not header.startswith('data:image/') or not data:
                 continue
             if ';base64' in header:
                 data = re.sub(r'\s+', '', data)
                 try:
                     data = from_base64_bytes(data)
                 except Exception:
                     self.log.error('Found invalid base64 encoded data '
                                    'URI, ignoring it')
                     continue
             else:
                 data = urllib.parse.unquote(data)
             data = as_bytes(data)
             fmt = what(None, data)
             if not fmt:
                 self.log.warn('Image encoded as data URL has unknown '
                               'format, ignoring')
                 continue
             img.set(
                 'src',
                 item.relhref(self.convert_image_data_uri(data, fmt, oeb)))
예제 #3
0
def return_raster_image(path):
    from ebook_converter.utils.imghdr import what
    if os.access(path, os.R_OK):
        with open(path, 'rb') as f:
            raw = f.read()
        if what(None, raw) not in (None, 'svg'):
            return raw
예제 #4
0
    def extract_images(self, picts):
        from ebook_converter.utils.imghdr import what
        from binascii import unhexlify
        self.log('Extracting images...')

        with open(picts, 'rb') as f:
            raw = f.read()
        picts = filter(len, re.findall(br'\{\\pict([^}]+)\}', raw))
        hex_pat = re.compile(br'[^a-fA-F0-9]')
        encs = [hex_pat.sub(b'', pict) for pict in picts]

        count = 0
        imap = {}
        for enc in encs:
            if len(enc) % 2 == 1:
                enc = enc[:-1]
            data = unhexlify(enc)
            fmt = what(None, data)
            if fmt is None:
                fmt = 'wmf'
            count += 1
            name = u'%04d.%s' % (count, fmt)
            with open(name, 'wb') as f:
                f.write(data)
            imap[count] = name
            # with open(name+'.hex', 'wb') as f:
            #     f.write(enc)
        return self.convert_images(imap)
예제 #5
0
    def resource_adder(self, link_, base=None):
        link, frag = self.link_to_local_path(link_, base=base)
        if link is None:
            return link_
        try:
            if base and not os.path.isabs(link):
                link = os.path.join(base, link)
            link = os.path.abspath(link)
        except:
            return link_
        if not os.access(link, os.R_OK):
            return link_
        if os.path.isdir(link):
            self.log.warning(link_, 'is a link to a directory. Ignoring.')
            return link_
        if link not in self.added_resources:
            bhref = os.path.basename(link)
            id, href = self.oeb.manifest.generate(
                id='added', href=sanitize_file_name(bhref))
            guessed = mimetypes.guess_type(href)[0]
            media_type = guessed or self.BINARY_MIME
            if media_type == 'text/plain':
                self.log.warning('Ignoring link to text file %r', link_)
                return None
            if media_type == self.BINARY_MIME:
                # Check for the common case, images
                try:
                    img = what(link)
                except EnvironmentError:
                    pass
                else:
                    if img:
                        media_type = mimetypes.guess_type(
                            'dummy.' + img)[0] or self.BINARY_MIME

            self.oeb.log.debug('Added %s', link)
            self.oeb.container = self.DirContainer(os.path.dirname(link),
                                                   self.oeb.log,
                                                   ignore_opf=True)
            # Load into memory
            item = self.oeb.manifest.add(id, href, media_type)
            # bhref refers to an already existing file. The read() method of
            # DirContainer will call unquote on it before trying to read the
            # file, therefore we quote it here.
            # XXX(gryf): why the heck it was changed to bytes?
            item.html_input_href = urllib.parse.quote(bhref)
            if guessed in self.OEB_STYLES:
                item.override_css_fetch = functools.partial(
                    self.css_import_handler, os.path.dirname(link))
            item.data
            self.added_resources[link] = href

        nlink = self.added_resources[link]
        if frag:
            nlink = '#'.join((nlink, frag))
        return nlink
예제 #6
0
def image_from_data(data):
    ' Create an image object from data, which should be a bytestring. '
    if isinstance(data, QImage):
        return data
    i = QImage()
    if not i.loadFromData(data):
        q = what(None, data)
        if q == 'jxr':
            return load_jxr_data(data)
        raise NotImage('Not a valid image (detected type: {})'.format(q))
    return i
예제 #7
0
    def extract_images(self, processed_records, output_dir):
        self.log.debug('Extracting images...')
        output_dir = os.path.abspath(os.path.join(output_dir, 'images'))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        image_index = 0
        self.image_names = []
        image_name_map = {}
        start = getattr(self.book_header, 'first_image_index', -1)
        if start > self.num_sections or start < 0:
            # BAEN PRC files have bad headers
            start = 0
        for i in range(start, self.num_sections):
            if i in processed_records:
                continue
            processed_records.append(i)
            data = self.sections[i][0]
            image_index += 1
            if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
                            b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI',
                            b'VIDE'}:
                # This record is a known non image type, no need to try to
                # load the image
                continue

            try:
                imgfmt = what(None, data)
            except Exception:
                continue
            if imgfmt not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
                continue
            if imgfmt == 'jpeg':
                imgfmt = 'jpg'
            if imgfmt == 'gif':
                try:
                    data = gif_data_to_png_data(data)
                    imgfmt = 'png'
                except AnimatedGIF:
                    pass
            path = os.path.join(output_dir, '%05d.%s' % (image_index, imgfmt))
            image_name_map[image_index] = os.path.basename(path)
            if imgfmt == 'png':
                with open(path, 'wb') as f:
                    f.write(data)
            else:
                try:
                    save_cover_data_to(data, path, minify_to=(10000, 10000))
                except Exception:
                    logging.exception('Exception has been thrown during '
                                      'transforming image')
                    continue
            self.image_names.append(os.path.basename(path))
        return image_name_map
예제 #8
0
 def dataize_svg(self, item, svg=None):
     if svg is None:
         svg = item.data
     hrefs = self.oeb.manifest.hrefs
     for elem in xpath(svg, '//svg:*[@xl:href]'):
         href = urlnormalize(elem.attrib[base.tag('xlink', 'href')])
         path = urllib.parse.urldefrag(href)[0]
         if not path:
             continue
         abshref = item.abshref(path)
         if abshref not in hrefs:
             continue
         linkee = hrefs[abshref]
         data = linkee.bytes_representation
         ext = what(None, data) or 'jpg'
         with PersistentTemporaryFile(suffix='.' + ext) as pt:
             pt.write(data)
             self.temp_files.append(pt.name)
         elem.attrib[base.tag('xlink', 'href')] = pt.name
     return svg
예제 #9
0
    def process_image(self, data):
        if not self.process_images:
            return data
        func = mobify_image if self.opts.mobi_keep_original_images else rescale_image
        try:
            return func(data)
        except Exception:
            ext = what(None, data)
            if ext not in ('png', 'gif'):
                raise
            if ext == 'gif':
                with PersistentTemporaryFile(suffix='.gif') as pt:
                    pt.write(data)
                    return mobify_image(data)

            with PersistentTemporaryFile(suffix='.png') as pt:
                pt.write(data)
            try:
                from ebook_converter.utils.img import optimize_png
                optimize_png(pt.name)
                data = open(pt.name, 'rb').read()
            finally:
                os.remove(pt.name)
            return func(data)
예제 #10
0
    def add_resources(self, add_fonts):
        oeb = self.oeb
        oeb.logger.info('Serializing resources...')
        index = 1

        mh_href = None
        if 'masthead' in oeb.guide and oeb.guide['masthead'].href:
            mh_href = oeb.guide['masthead'].href
            self.records.append(None)
            index += 1
            self.used_image_indices.add(0)
            self.image_indices.add(0)
        elif self.is_periodical:
            # Generate a default masthead
            data = generate_masthead(str(self.oeb.metadata['title'][0]))
            self.records.append(data)
            self.used_image_indices.add(0)
            self.image_indices.add(0)
            index += 1

        cover_href = self.cover_offset = self.thumbnail_offset = None
        if (oeb.metadata.cover and
                str(oeb.metadata.cover[0]) in oeb.manifest.ids):
            cover_id = str(oeb.metadata.cover[0])
            item = oeb.manifest.ids[cover_id]
            cover_href = item.href

        for item in self.oeb.manifest.values():
            if item.media_type not in OEB_RASTER_IMAGES:
                continue
            try:
                data = self.process_image(item.data)
            except:
                self.log.warn('Bad image file %r' % item.href)
                continue
            else:
                if mh_href and item.href == mh_href:
                    self.records[0] = data
                    continue

                self.image_indices.add(len(self.records))
                self.records.append(data)
                self.item_map[item.href] = index
                self.mime_map[item.href] = 'image/%s'%what(None, data)
                index += 1

                if cover_href and item.href == cover_href:
                    self.cover_offset = self.item_map[item.href] - 1
                    self.used_image_indices.add(self.cover_offset)
                    try:
                        data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
                            maxsizeb=MAX_THUMB_SIZE)
                    except:
                        self.log.warn('Failed to generate thumbnail')
                    else:
                        self.image_indices.add(len(self.records))
                        self.records.append(data)
                        self.thumbnail_offset = index - 1
                        self.used_image_indices.add(self.thumbnail_offset)
                        index += 1
            finally:
                item.unload_data_from_memory()

        if add_fonts:
            for item in self.oeb.manifest.values():
                if item.href and item.href.rpartition('.')[-1].lower() in {
                        'ttf', 'otf'} and isinstance(item.data, bytes):
                    self.records.append(write_font_record(item.data))
                    self.item_map[item.href] = len(self.records)
                    self.has_fonts = True
예제 #11
0
def mobify_image(data):
    'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
    fmt = what(None, data)
    if fmt == 'png':
        data = png_data_to_gif_data(data)
    return data
예제 #12
0
def find_imgtype(data):
    return what(None, data) or 'unknown'