Example #1
0
    def extract_resources(self):
        self.resource_map = []
        known_types = {b'FLIS', b'FCIS', b'SRCS',
                    b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
                    b'AUDI', b'VIDE'}

        for i, rec in enumerate(self.resource_records):
            sig = rec.raw[:4]
            payload = rec.raw
            ext = 'dat'
            prefix = 'binary'
            suffix = ''
            if sig in {b'HUFF', b'CDIC', b'INDX'}: continue
            # TODO: Ignore CNCX records as well
            if sig == b'FONT':
                font = read_font_record(rec.raw)
                if font['err']:
                    raise ValueError('Failed to read font record: %s Headers: %s'%(
                        font['err'], font['headers']))
                payload = (font['font_data'] if font['font_data'] else
                        font['raw_data'])
                prefix, ext = 'fonts', font['ext']
            elif sig not in known_types:
                q = what(None, rec.raw)
                if q:
                    prefix, ext = 'images', q

            if prefix == 'binary':
                if sig == b'\xe9\x8e\r\n':
                    suffix = '-EOF'
                elif sig in known_types:
                    suffix = '-' + sig.decode('ascii')

            self.resource_map.append(('%s/%06d%s.%s'%(prefix, i, suffix, ext),
                payload))
Example #2
0
    def resource_adder(self, link_, base=None):
        from polyglot.urllib import quote
        link, frag = self.link_to_local_path(link_, base=base)
        if link is None:
            return link_
        try:
            if base and not os.path.isabs(link):
                link = os.path.join(base, link)
            link = os.path.abspath(link)
        except:
            return link_
        if not os.access(link, os.R_OK):
            return link_
        if os.path.isdir(link):
            self.log.warn(link_, 'is a link to a directory. Ignoring.')
            return link_
        if not self.is_case_sensitive(tempfile.gettempdir()):
            link = link.lower()
        if link not in self.added_resources:
            bhref = os.path.basename(link)
            id, href = self.oeb.manifest.generate(
                id='added', href=sanitize_file_name(bhref))
            guessed = self.guess_type(href)[0]
            media_type = guessed or self.BINARY_MIME
            if media_type == 'text/plain':
                self.log.warn('Ignoring link to text file %r' % link_)
                return None
            if media_type == self.BINARY_MIME:
                # Check for the common case, images
                try:
                    img = what(link)
                except EnvironmentError:
                    pass
                else:
                    if img:
                        media_type = self.guess_type(
                            'dummy.' + img)[0] or self.BINARY_MIME

            self.oeb.log.debug('Added', link)
            self.oeb.container = self.DirContainer(os.path.dirname(link),
                                                   self.oeb.log,
                                                   ignore_opf=True)
            # Load into memory
            item = self.oeb.manifest.add(id, href, media_type)
            # bhref refers to an already existing file. The read() method of
            # DirContainer will call unquote on it before trying to read the
            # file, therefore we quote it here.
            if isinstance(bhref, unicode_type):
                bhref = bhref.encode('utf-8')
            item.html_input_href = as_unicode(quote(bhref))
            if guessed in self.OEB_STYLES:
                item.override_css_fetch = partial(self.css_import_handler,
                                                  os.path.dirname(link))
            item.data
            self.added_resources[link] = href

        nlink = self.added_resources[link]
        if frag:
            nlink = '#'.join((nlink, frag))
        return nlink
Example #3
0
 def read_image_data(self, fname, base=None):
     if fname.startswith('file://'):
         src = fname[len('file://'):]
         if iswindows and src and src[0] == '/':
             src = src[1:]
         if not src or not os.path.exists(src):
             raise LinkedImageNotFound(src)
         with open(src, 'rb') as rawsrc:
             raw = rawsrc.read()
     else:
         try:
             raw = self.docx.read(fname)
         except KeyError:
             raise LinkedImageNotFound(fname)
     base = base or image_filename(fname.rpartition('/')[-1]) or 'image'
     ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
     if ext == 'emf':
         # For an example, see: https://bugs.launchpad.net/bugs/1224849
         self.log('Found an EMF image: %s, trying to extract embedded raster image' % fname)
         from calibre.utils.wmf.emf import emf_unwrap
         try:
             raw = emf_unwrap(raw)
         except Exception:
             self.log.exception('Failed to extract embedded raster image from EMF')
         else:
             ext = 'png'
     base = base.rpartition('.')[0]
     if not base:
         base = 'image'
     base += '.' + ext
     return raw, base
Example #4
0
    def generate_filename(self, rid, base=None, rid_map=None):
        rid_map = self.rid_map if rid_map is None else rid_map
        fname = rid_map[rid]
        if fname in self.used:
            return self.used[fname]
        raw = self.docx.read(fname)
        base = base or ascii_filename(rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image'
        ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
        if ext == 'emf':
            # For an example, see: https://bugs.launchpad.net/bugs/1224849
            self.log('Found an EMF image: %s, trying to extract embedded raster image' % base)
            from calibre.utils.wmf.emf import emf_unwrap
            try:
                raw = emf_unwrap(raw)
            except Exception as e:
                self.log.exception('Failed to extract embedded raster image from EMF')
            else:
                ext = 'png'

        base = base.rpartition('.')[0]
        if not base:
            base = 'image'
        base += '.' + ext
        exists = frozenset(self.used.itervalues())
        c = 1
        name = base
        while name in exists:
            n, e = base.rpartition('.')[0::2]
            name = '%s-%d.%s' % (n, c, e)
            c += 1
        self.used[fname] = name
        with open(os.path.join(self.dest_dir, name), 'wb') as f:
            f.write(raw)
        self.all_images.add('images/' + name)
        return name
Example #5
0
    def extract_images(self, processed_records, output_dir):
        self.log.debug('Extracting images...')
        output_dir = os.path.abspath(os.path.join(output_dir, 'images'))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        image_index = 0
        self.image_names = []
        start = getattr(self.book_header, 'first_image_index', -1)
        if start > self.num_sections or start < 0:
            # BAEN PRC files have bad headers
            start = 0
        for i in range(start, self.num_sections):
            if i in processed_records:
                continue
            processed_records.append(i)
            data  = self.sections[i][0]
            image_index += 1
            if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
                    b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
                # This record is a known non image type, not need to try to
                # load the image
                continue

            path = os.path.join(output_dir, '%05d.jpg' % image_index)
            try:
                if what(None, data) not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
                    continue
                save_cover_data_to(data, path, minify_to=(10000, 10000))
            except Exception:
                continue
            self.image_names.append(os.path.basename(path))
Example #6
0
def return_raster_image(path):
    from calibre.utils.imghdr import what
    if os.access(path, os.R_OK):
        with open(path, 'rb') as f:
            raw = f.read()
        if what(None, raw) not in (None, 'svg'):
            return raw
Example #7
0
def cdb_set_fields(ctx, rd, book_id, library_id):
    db = get_db(ctx, rd, library_id)
    if ctx.restriction_for(rd, db):
        raise HTTPForbidden('Cannot use the set fields interface with a user who has per library restrictions')
    data = load_payload_data(rd)
    try:
        changes, loaded_book_ids = data['changes'], frozenset(map(int, data.get('loaded_book_ids', ())))
        all_dirtied = bool(data.get('all_dirtied'))
        if not isinstance(changes, dict):
            raise TypeError('changes must be a dict')
    except Exception:
        raise HTTPBadRequest(
        '''Data must be of the form {'changes': {'title': 'New Title', ...}, 'loaded_book_ids':[book_id1, book_id2, ...]'}''')
    dirtied = set()
    cdata = changes.pop('cover', False)
    if cdata is not False:
        if cdata is not None:
            try:
                cdata = from_base64_bytes(cdata.split(',', 1)[-1])
            except Exception:
                raise HTTPBadRequest('Cover data is not valid base64 encoded data')
            try:
                fmt = what(None, cdata)
            except Exception:
                fmt = None
            if fmt not in ('jpeg', 'png'):
                raise HTTPBadRequest('Cover data must be either JPEG or PNG')
        dirtied |= db.set_cover({book_id: cdata})

    for field, value in iteritems(changes):
        dirtied |= db.set_field(field, {book_id: value})
    ctx.notify_changes(db.backend.library_path, metadata(dirtied))
    all_ids = dirtied if all_dirtied else (dirtied & loaded_book_ids)
    all_ids |= {book_id}
    return {bid: book_as_json(db, bid) for bid in all_ids}
Example #8
0
    def extract_images(self, picts):
        from calibre.utils.imghdr import what
        from binascii import unhexlify
        self.log('Extracting images...')

        with open(picts, 'rb') as f:
            raw = f.read()
        picts = filter(len, re.findall(br'\{\\pict([^}]+)\}', raw))
        hex_pat = re.compile(br'[^a-fA-F0-9]')
        encs = [hex_pat.sub(b'', pict) for pict in picts]

        count = 0
        imap = {}
        for enc in encs:
            if len(enc) % 2 == 1:
                enc = enc[:-1]
            data = unhexlify(enc)
            fmt = what(None, data)
            if fmt is None:
                fmt = 'wmf'
            count += 1
            name = '%04d.%s' % (count, fmt)
            with open(name, 'wb') as f:
                f.write(data)
            imap[count] = name
            # with open(name+'.hex', 'wb') as f:
            #     f.write(enc)
        return self.convert_images(imap)
Example #9
0
def get_metadata(stream):
    from calibre.ebooks.metadata import MetaInformation
    from calibre.ptempfile import TemporaryDirectory
    from calibre.ebooks.mobi.reader.headers import MetadataHeader
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre.utils.img import save_cover_data_to
    from calibre import CurrentDir

    stream.seek(0)
    try:
        raw = stream.read(3)
    except Exception:
        raw = b''
    stream.seek(0)
    if raw == b'TPZ':
        from calibre.ebooks.metadata.topaz import get_metadata
        return get_metadata(stream)
    from calibre.utils.logging import Log
    log = Log()
    try:
        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    except:
        mi = MetaInformation(_('Unknown'), [_('Unknown')])
    mh = MetadataHeader(stream, log)
    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
        size = 1024**3
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4 * 1024 * 1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
                    parse_cache = {}
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
    if hasattr(mh.exth, 'cover_offset'):
        cover_index = mh.first_image_index + mh.exth.cover_offset
        data = mh.section_data(int(cover_index))
    else:
        try:
            data = mh.section_data(mh.first_image_index)
        except Exception:
            data = b''
    if data and what(None,
                     data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}:
        try:
            mi.cover_data = ('jpg', save_cover_data_to(data))
        except Exception:
            log.exception('Failed to read MOBI cover')
    return mi
Example #10
0
 def read_image_data(self, fname, base=None):
     if fname.startswith('file://'):
         src = fname[len('file://'):]
         if iswindows and src and src[0] == '/':
             src = src[1:]
         if not src or not os.path.exists(src):
             raise LinkedImageNotFound(src)
         with open(src, 'rb') as rawsrc:
             raw = rawsrc.read()
     else:
         try:
             raw = self.docx.read(fname)
         except KeyError:
             raise LinkedImageNotFound(fname)
     base = base or image_filename(fname.rpartition('/')[-1]) or 'image'
     ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
     if ext == 'emf':
         # For an example, see: https://bugs.launchpad.net/bugs/1224849
         self.log('Found an EMF image: %s, trying to extract embedded raster image' % fname)
         from calibre.utils.wmf.emf import emf_unwrap
         try:
             raw = emf_unwrap(raw)
         except Exception:
             self.log.exception('Failed to extract embedded raster image from EMF')
         else:
             ext = 'png'
     base = base.rpartition('.')[0]
     if not base:
         base = 'image'
     base += '.' + ext
     return raw, base
Example #11
0
def cdb_set_fields(ctx, rd, book_id, library_id):
    db = get_db(ctx, rd, library_id)
    if ctx.restriction_for(rd, db):
        raise HTTPForbidden('Cannot use the set fields interface with a user who has per library restrictions')
    data = load_payload_data(rd)
    try:
        changes, loaded_book_ids = data['changes'], frozenset(map(int, data.get('loaded_book_ids', ())))
        all_dirtied = bool(data.get('all_dirtied'))
        if not isinstance(changes, dict):
            raise TypeError('changes must be a dict')
    except Exception:
        raise HTTPBadRequest(
        '''Data must be of the form {'changes': {'title': 'New Title', ...}, 'loaded_book_ids':[book_id1, book_id2, ...]'}''')
    dirtied = set()
    cdata = changes.pop('cover', False)
    if cdata is not False:
        if cdata is not None:
            try:
                cdata = standard_b64decode(cdata.split(',', 1)[-1].encode('ascii'))
            except Exception:
                raise HTTPBadRequest('Cover data is not valid base64 encoded data')
            try:
                fmt = what(None, cdata)
            except Exception:
                fmt = None
            if fmt not in ('jpeg', 'png'):
                raise HTTPBadRequest('Cover data must be either JPEG or PNG')
        dirtied |= db.set_cover({book_id: cdata})

    for field, value in iteritems(changes):
        dirtied |= db.set_field(field, {book_id: value})
    ctx.notify_changes(db.backend.library_path, metadata(dirtied))
    all_ids = dirtied if all_dirtied else (dirtied & loaded_book_ids)
    all_ids |= {book_id}
    return {bid: book_as_json(db, bid) for bid in all_ids}
Example #12
0
def return_raster_image(path):
    from calibre.utils.imghdr import what
    if os.access(path, os.R_OK):
        with open(path, 'rb') as f:
            raw = f.read()
        if what(None, raw) not in (None, 'svg'):
            return raw
Example #13
0
    def read_image_data(self, fname, base=None):
        if fname.startswith("file://"):
            src = fname[len("file://") :]
            if iswindows and src and src[0] == "/":
                src = src[1:]
            if not src or not os.path.exists(src):
                raise LinkedImageNotFound(src)
            with open(src, "rb") as rawsrc:
                raw = rawsrc.read()
        else:
            raw = self.docx.read(fname)
        base = base or ascii_filename(fname.rpartition("/")[-1]).replace(" ", "_") or "image"
        ext = what(None, raw) or base.rpartition(".")[-1] or "jpeg"
        if ext == "emf":
            # For an example, see: https://bugs.launchpad.net/bugs/1224849
            self.log("Found an EMF image: %s, trying to extract embedded raster image" % fname)
            from calibre.utils.wmf.emf import emf_unwrap

            try:
                raw = emf_unwrap(raw)
            except Exception:
                self.log.exception("Failed to extract embedded raster image from EMF")
            else:
                ext = "png"
        base = base.rpartition(".")[0]
        if not base:
            base = "image"
        base += "." + ext
        return raw, base
Example #14
0
    def extract_images(self, processed_records, output_dir):
        self.log.debug('Extracting images...')
        output_dir = os.path.abspath(os.path.join(output_dir, 'images'))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        image_index = 0
        self.image_names = []
        start = getattr(self.book_header, 'first_image_index', -1)
        if start > self.num_sections or start < 0:
            # BAEN PRC files have bad headers
            start = 0
        for i in range(start, self.num_sections):
            if i in processed_records:
                continue
            processed_records.append(i)
            data  = self.sections[i][0]
            image_index += 1
            if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
                    b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
                # This record is a known non image type, not need to try to
                # load the image
                continue

            path = os.path.join(output_dir, '%05d.jpg' % image_index)
            try:
                if what(None, data) not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
                    continue
                save_cover_data_to(data, path, minify_to=(10000, 10000))
            except Exception:
                continue
            self.image_names.append(os.path.basename(path))
Example #15
0
    def extract_images(self, picts):
        from calibre.utils.imghdr import what
        self.log('Extracting images...')

        with open(picts, 'rb') as f:
            raw = f.read()
        picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
        hex = re.compile(r'[^a-fA-F0-9]')
        encs = [hex.sub('', pict) for pict in picts]

        count = 0
        imap = {}
        for enc in encs:
            if len(enc) % 2 == 1:
                enc = enc[:-1]
            data = enc.decode('hex')
            fmt = what(None, data)
            if fmt is None:
                fmt = 'wmf'
            count += 1
            name = u'%04d.%s' % (count, fmt)
            with open(name, 'wb') as f:
                f.write(data)
            imap[count] = name
            # with open(name+'.hex', 'wb') as f:
                # f.write(enc)
        return self.convert_images(imap)
Example #16
0
    def __init__(self, mf):
        for x in ('raw', 'palmdb', 'record_headers', 'records', 'mobi_header',
                'huffman_record_nums',):
            setattr(self, x, getattr(mf, x))

        self.index_header = self.index_record = None
        self.indexing_record_nums = set()
        pir = getattr(self.mobi_header, 'primary_index_record', NULL_INDEX)
        if pir != NULL_INDEX:
            self.index_header = IndexHeader(self.records[pir])
            numi = self.index_header.index_count
            self.cncx = CNCX(self.records[
                pir+1+numi:pir+1+numi+self.index_header.num_of_cncx_blocks],
                self.index_header.index_encoding)
            self.index_record = IndexRecord(self.records[pir+1:pir+1+numi],
                    self.index_header, self.cncx)
            self.indexing_record_nums = set(range(pir,
                pir+1+numi+self.index_header.num_of_cncx_blocks))
        self.secondary_index_record = self.secondary_index_header = None
        sir = self.mobi_header.secondary_index_record
        if sir != NULL_INDEX:
            self.secondary_index_header = SecondaryIndexHeader(self.records[sir])
            numi = self.secondary_index_header.index_count
            self.indexing_record_nums.add(sir)
            self.secondary_index_record = IndexRecord(
                    self.records[sir+1:sir+1+numi], self.secondary_index_header, self.cncx)
            self.indexing_record_nums |= set(range(sir+1, sir+1+numi))

        ntr = self.mobi_header.number_of_text_records
        fii = self.mobi_header.first_image_index
        self.text_records = [TextRecord(r, self.records[r],
            self.mobi_header.extra_data_flags, mf.decompress6) for r in range(1,
            min(len(self.records), ntr+1))]
        self.image_records, self.binary_records = [], []
        self.font_records = []
        image_index = 0
        for i in range(self.mobi_header.first_resource_record, min(self.mobi_header.last_resource_record, len(self.records))):
            if i in self.indexing_record_nums or i in self.huffman_record_nums:
                continue
            image_index += 1
            r = self.records[i]
            fmt = None
            if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
                    b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
                    b'AUDI', b'VIDE', b'FONT', b'CRES', b'CONT', b'CMET'}:
                try:
                    fmt = what(None, r.raw)
                except:
                    pass
            if fmt is not None:
                self.image_records.append(ImageRecord(image_index, r, fmt))
            elif r.raw[:4] == b'FONT':
                self.font_records.append(FontRecord(i, r))
            else:
                self.binary_records.append(BinaryRecord(i, r))

        if self.index_record is not None:
            self.tbs_indexing = TBSIndexing(self.text_records,
                    self.index_record.indices, self.mobi_header.type_raw)
Example #17
0
def get_metadata(stream):
    from calibre.ebooks.metadata import MetaInformation
    from calibre.ptempfile import TemporaryDirectory
    from calibre.ebooks.mobi.reader.headers import MetadataHeader
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre.utils.magick.draw import save_cover_data_to
    from calibre import CurrentDir

    stream.seek(0)
    try:
        raw = stream.read(3)
    except:
        raw = ''
    stream.seek(0)
    if raw == b'TPZ':
        from calibre.ebooks.metadata.topaz import get_metadata
        return get_metadata(stream)
    from calibre.utils.logging import Log
    log = Log()
    try:
        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    except:
        mi = MetaInformation(_('Unknown'), [_('Unknown')])
    mh = MetadataHeader(stream, log)
    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
        size = 1024**3
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4*1024*1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
                    parse_cache = {}
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
    if hasattr(mh.exth, 'cover_offset'):
        cover_index = mh.first_image_index + mh.exth.cover_offset
        data  = mh.section_data(int(cover_index))
    else:
        try:
            data  = mh.section_data(mh.first_image_index)
        except:
            data = ''
    if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}:
        try:
            mi.cover_data = ('jpg', save_cover_data_to(data, 'cover.jpg', return_data=True))
        except Exception:
            log.exception('Failed to read MOBI cover')
    return mi
Example #18
0
    def __init__(self, mf):
        for x in ('raw', 'palmdb', 'record_headers', 'records', 'mobi_header',
                'huffman_record_nums',):
            setattr(self, x, getattr(mf, x))

        self.index_header = self.index_record = None
        self.indexing_record_nums = set()
        pir = getattr(self.mobi_header, 'primary_index_record', NULL_INDEX)
        if pir != NULL_INDEX:
            self.index_header = IndexHeader(self.records[pir])
            numi = self.index_header.index_count
            self.cncx = CNCX(self.records[
                pir+1+numi:pir+1+numi+self.index_header.num_of_cncx_blocks],
                self.index_header.index_encoding)
            self.index_record = IndexRecord(self.records[pir+1:pir+1+numi],
                    self.index_header, self.cncx)
            self.indexing_record_nums = set(range(pir,
                pir+1+numi+self.index_header.num_of_cncx_blocks))
        self.secondary_index_record = self.secondary_index_header = None
        sir = self.mobi_header.secondary_index_record
        if sir != NULL_INDEX:
            self.secondary_index_header = SecondaryIndexHeader(self.records[sir])
            numi = self.secondary_index_header.index_count
            self.indexing_record_nums.add(sir)
            self.secondary_index_record = IndexRecord(
                    self.records[sir+1:sir+1+numi], self.secondary_index_header, self.cncx)
            self.indexing_record_nums |= set(range(sir+1, sir+1+numi))

        ntr = self.mobi_header.number_of_text_records
        fii = self.mobi_header.first_image_index
        self.text_records = [TextRecord(r, self.records[r],
            self.mobi_header.extra_data_flags, mf.decompress6) for r in range(1,
            min(len(self.records), ntr+1))]
        self.image_records, self.binary_records = [], []
        self.font_records = []
        image_index = 0
        for i in range(self.mobi_header.first_resource_record, min(self.mobi_header.last_resource_record, len(self.records))):
            if i in self.indexing_record_nums or i in self.huffman_record_nums:
                continue
            image_index += 1
            r = self.records[i]
            fmt = None
            if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
                    b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
                    b'AUDI', b'VIDE', b'FONT', b'CRES', b'CONT', b'CMET'}:
                try:
                    fmt = what(None, r.raw)
                except:
                    pass
            if fmt is not None:
                self.image_records.append(ImageRecord(image_index, r, fmt))
            elif r.raw[:4] == b'FONT':
                self.font_records.append(FontRecord(i, r))
            else:
                self.binary_records.append(BinaryRecord(i, r))

        if self.index_record is not None:
            self.tbs_indexing = TBSIndexing(self.text_records,
                    self.index_record.indices, self.mobi_header.type_raw)
Example #19
0
def find_imgtype(data):
    imgtype = what(None, data)
    if imgtype is None:
        try:
            imgtype = identify_data(data)[2]
        except Exception:
            imgtype = 'unknown'
    return imgtype
Example #20
0
def find_imgtype(data):
    imgtype = what(None, data)
    if imgtype is None:
        try:
            imgtype = identify_data(data)[2]
        except Exception:
            imgtype = 'unknown'
    return imgtype
Example #21
0
    def resource_adder(self, link_, base=None):
        from urllib import quote
        link, frag = self.link_to_local_path(link_, base=base)
        if link is None:
            return link_
        try:
            if base and not os.path.isabs(link):
                link = os.path.join(base, link)
            link = os.path.abspath(link)
        except:
            return link_
        if not os.access(link, os.R_OK):
            return link_
        if os.path.isdir(link):
            self.log.warn(link_, 'is a link to a directory. Ignoring.')
            return link_
        if not self.is_case_sensitive(tempfile.gettempdir()):
            link = link.lower()
        if link not in self.added_resources:
            bhref = os.path.basename(link)
            id, href = self.oeb.manifest.generate(id='added',
                    href=bhref)
            guessed = self.guess_type(href)[0]
            media_type = guessed or self.BINARY_MIME
            if media_type == 'text/plain':
                self.log.warn('Ignoring link to text file %r'%link_)
                return None
            if media_type == self.BINARY_MIME:
                # Check for the common case, images
                try:
                    img = what(link)
                except EnvironmentError:
                    pass
                else:
                    if img:
                        media_type = self.guess_type('dummy.'+img)[0] or self.BINARY_MIME

            self.oeb.log.debug('Added', link)
            self.oeb.container = self.DirContainer(os.path.dirname(link),
                    self.oeb.log, ignore_opf=True)
            # Load into memory
            item = self.oeb.manifest.add(id, href, media_type)
            # bhref refers to an already existing file. The read() method of
            # DirContainer will call unquote on it before trying to read the
            # file, therefore we quote it here.
            if isinstance(bhref, unicode):
                bhref = bhref.encode('utf-8')
            item.html_input_href = quote(bhref).decode('utf-8')
            if guessed in self.OEB_STYLES:
                item.override_css_fetch = partial(
                        self.css_import_handler, os.path.dirname(link))
            item.data
            self.added_resources[link] = href

        nlink = self.added_resources[link]
        if frag:
            nlink = '#'.join((nlink, frag))
        return nlink
Example #22
0
def mobify_image(data):
    'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
    fmt = what(None, data)

    if fmt == 'png':
        im = Image()
        im.load(data)
        data = im.export('gif')
    return data
Example #23
0
def mobify_image(data):
    "Convert PNG images to GIF as the idiotic Kindle cannot display some PNG"
    fmt = what(None, data)

    if fmt == "png":
        im = Image()
        im.load(data)
        data = im.export("gif")
    return data
Example #24
0
def mobify_image(data):
    'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
    fmt = what(None, data)

    if fmt == 'png':
        im = Image()
        im.load(data)
        data = im.export('gif')
    return data
Example #25
0
 def cf():
     files = choose_files(d, 'select link file', _('Choose file'), select_only_single_file=True)
     if files:
         path = files[0]
         d.url.setText(path)
         if path and os.path.exists(path):
             with lopen(path, 'rb') as f:
                 q = what(f)
             is_image = q in {'jpeg', 'png', 'gif'}
             d.treat_as_image.setChecked(is_image)
Example #26
0
def image_from_data(data):
    ' Create an image object from data, which should be a bytestring. '
    if isinstance(data, QImage):
        return data
    i = QImage()
    if not i.loadFromData(data):
        if what(None, data) == 'jxr':
            return load_jxr_data(data)
        raise NotImage('Not a valid image')
    return i
Example #27
0
 def cf():
     files = choose_files(d, 'select link file', _('Choose file'), select_only_single_file=True)
     if files:
         path = files[0]
         d.url.setText(path)
         if path and os.path.exists(path):
             with lopen(path, 'rb') as f:
                 q = what(f)
             is_image = q in {'jpeg', 'png', 'gif'}
             d.treat_as_image.setChecked(is_image)
Example #28
0
def image_data_to_url(data, base='cover'):
    from calibre.utils.imghdr import what
    ans = BytesIO(data)
    ext = what(None, data)
    if not ext:
        if data.startswith(b'%PDF-'):
            ext = 'pdf'
        else:
            ext = 'jpg'
    ans.name = 'cover.' + ext
    return ans
Example #29
0
def mobify_image(data):
    'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
    fmt = what(None, data)

    if fmt == 'png':
        from PIL import Image
        im = Image.open(BytesIO(data))
        buf = BytesIO()
        im.save(buf, 'gif')
        data = buf.getvalue()
    return data
Example #30
0
def read_cover(mi):
    if mi.cover_data and mi.cover_data[1]:
        return mi
    if mi.cover:
        try:
            with lopen(mi.cover, 'rb') as f:
                cd = f.read()
            mi.cover_data = what(None, cd), cd
        except EnvironmentError:
            pass
    return mi
Example #31
0
def mobify_image(data):
    'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
    fmt = what(None, data)

    if fmt == 'png':
        from PIL import Image
        im = Image.open(BytesIO(data))
        buf = BytesIO()
        im.save(buf, 'gif')
        data = buf.getvalue()
    return data
Example #32
0
def read_cover(mi):
    if mi.cover_data and mi.cover_data[1]:
        return mi
    if mi.cover:
        try:
            with lopen(mi.cover, 'rb') as f:
                cd = f.read()
            mi.cover_data = what(None, cd), cd
        except EnvironmentError:
            pass
    return mi
Example #33
0
def image_from_data(data):
    ' Create an image object from data, which should be a bytestring. '
    if isinstance(data, QImage):
        return data
    i = QImage()
    if not i.loadFromData(data):
        q = what(None, data)
        if q == 'jxr':
            return load_jxr_data(data)
        raise NotImage('Not a valid image (detected type: {})'.format(q))
    return i
Example #34
0
def mobify_image(data):
    'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
    fmt = what(None, data)

    if fmt == 'png':
        if not isinstance(data, StringIO):
            data = StringIO(data)
        im = Image.open(data)
        data = StringIO()
        im.save(data, 'GIF')
        data = data.getvalue()
    return data
Example #35
0
def mobify_image(data):
    'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
    fmt = what(None, data)

    if fmt == 'png':
        if not isinstance(data, StringIO):
            data = StringIO(data)
        im = Image.open(data)
        data = StringIO()
        im.save(data, 'GIF')
        data = data.getvalue()
    return data
Example #36
0
def cdb_set_fields(ctx, rd, book_id, library_id):
    db = get_db(ctx, rd, library_id)
    if ctx.restriction_for(rd, db):
        raise HTTPForbidden(
            'Cannot use the set fields interface with a user who has per library restrictions'
        )
    raw = rd.read()
    ct = rd.inheaders.get('Content-Type', all=True)
    ct = {x.lower().partition(';')[0] for x in ct}
    try:
        if MSGPACK_MIME in ct:
            data = msgpack_loads(raw)
        elif 'application/json' in ct:
            data = json_loads(raw)
        else:
            raise HTTPBadRequest('Only JSON or msgpack requests are supported')
    except Exception:
        raise HTTPBadRequest('Invalid encoded data')
    try:
        changes, loaded_book_ids = data['changes'], frozenset(
            map(int, data.get('loaded_book_ids', ())))
        all_dirtied = bool(data.get('all_dirtied'))
        if not isinstance(changes, dict):
            raise TypeError('changes must be a dict')
    except Exception:
        raise HTTPBadRequest(
            '''Data must be of the form {'changes': {'title': 'New Title', ...}, 'loaded_book_ids':[book_id1, book_id2, ...]'}'''
        )
    dirtied = set()
    cdata = changes.pop('cover', False)
    if cdata is not False:
        if cdata is not None:
            try:
                cdata = standard_b64decode(
                    cdata.split(',', 1)[-1].encode('ascii'))
            except Exception:
                raise HTTPBadRequest(
                    'Cover data is not valid base64 encoded data')
            try:
                fmt = what(None, cdata)
            except Exception:
                fmt = None
            if fmt not in ('jpeg', 'png'):
                raise HTTPBadRequest('Cover data must be either JPEG or PNG')
        dirtied |= db.set_cover({book_id: cdata})

    for field, value in changes.iteritems():
        dirtied |= db.set_field(field, {book_id: value})
    ctx.notify_changes(db.backend.library_path, metadata(dirtied))
    all_ids = dirtied if all_dirtied else (dirtied & loaded_book_ids)
    all_ids |= {book_id}
    return {bid: book_as_json(db, book_id) for bid in all_ids}
Example #37
0
    def extract_images(self, processed_records, output_dir):
        self.log.debug('Extracting images...')
        output_dir = os.path.abspath(os.path.join(output_dir, 'images'))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        image_index = 0
        self.image_names = []
        image_name_map = {}
        start = getattr(self.book_header, 'first_image_index', -1)
        if start > self.num_sections or start < 0:
            # BAEN PRC files have bad headers
            start = 0
        for i in range(start, self.num_sections):
            if i in processed_records:
                continue
            processed_records.append(i)
            data = self.sections[i][0]
            image_index += 1
            if data[:4] in {
                    b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'RESC',
                    b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'
            }:
                # This record is a known non image type, no need to try to
                # load the image
                continue

            try:
                imgfmt = what(None, data)
            except Exception:
                continue
            if imgfmt not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
                continue
            if imgfmt == 'jpeg':
                imgfmt = 'jpg'
            if imgfmt == 'gif':
                try:
                    data = gif_data_to_png_data(data)
                    imgfmt = 'png'
                except AnimatedGIF:
                    pass
            path = os.path.join(output_dir, '%05d.%s' % (image_index, imgfmt))
            image_name_map[image_index] = os.path.basename(path)
            if imgfmt == 'png':
                with open(path, 'wb') as f:
                    f.write(data)
            else:
                try:
                    save_cover_data_to(data, path, minify_to=(10000, 10000))
                except Exception:
                    continue
            self.image_names.append(os.path.basename(path))
        return image_name_map
Example #38
0
    def extract_resources(self):
        resource_map = []
        for x in ("fonts", "images"):
            os.mkdir(x)

        for i, sec in enumerate(self.resource_sections):
            fname_idx = i + 1
            data = sec[0]
            typ = data[:4]
            href = None
            if typ in {
                b"FLIS",
                b"FCIS",
                b"SRCS",
                b"\xe9\x8e\r\n",
                b"RESC",
                b"BOUN",
                b"FDST",
                b"DATP",
                b"AUDI",
                b"VIDE",
            }:
                pass  # Ignore these records
            elif typ == b"FONT":
                font = read_font_record(data)
                href = "fonts/%05d.%s" % (fname_idx, font["ext"])
                if font["err"]:
                    self.log.warn("Reading font record %d failed: %s" % (fname_idx, font["err"]))
                    if font["headers"]:
                        self.log.debug("Font record headers: %s" % font["headers"])
                with open(href.replace("/", os.sep), "wb") as f:
                    f.write(font["font_data"] if font["font_data"] else font["raw_data"])
                if font["encrypted"]:
                    self.encrypted_fonts.append(href)
            else:
                imgtype = what(None, data)
                if imgtype is None:
                    from calibre.utils.magick.draw import identify_data

                    try:
                        imgtype = identify_data(data)[2]
                    except Exception:
                        imgtype = "unknown"
                href = "images/%05d.%s" % (fname_idx, imgtype)
                with open(href.replace("/", os.sep), "wb") as f:
                    f.write(data)

            resource_map.append(href)

        return resource_map
Example #39
0
def download_resources(browser, resource_cache, output_dir):
    img_counter = style_counter = 0
    resources = defaultdict(list)
    for img in browser.css_select('img[src]', all=True):
        # Using javascript ensures that absolute URLs are returned, direct
        # attribute access does not do that
        src = unicode(img.evaluateJavaScript('this.src').toString()).strip()
        if src:
            resources[src].append(img)
    for link in browser.css_select('link[href]', all=True):
        lt = unicode(link.attribute('type')).strip() or 'text/css'
        rel = unicode(link.attribute('rel')).strip() or 'stylesheet'
        if lt == 'text/css' and rel == 'stylesheet':
            href = unicode(
                link.evaluateJavaScript('this.href').toString()).strip()
            if href:
                resources[href].append(link)
            else:
                link.removeFromDocument()
        else:
            link.removeFromDocument()
    loaded_resources = browser.wait_for_resources(resources)
    for url, raw in loaded_resources.iteritems():
        h = hashlib.sha1(raw).digest()
        if h in resource_cache:
            href = os.path.relpath(resource_cache[h],
                                   output_dir).replace(os.sep, '/')
        else:
            elem = resources[url][0]
            if is_tag(elem, 'link'):
                style_counter += 1
                href = 'style_%d.css' % style_counter
            else:
                img_counter += 1
                ext = what(None, raw) or 'jpg'
                if ext == 'jpeg':
                    ext = 'jpg'  # Apparently Moon+ cannot handle .jpeg
                href = 'img_%d.%s' % (img_counter, ext)
            dest = os.path.join(output_dir, href)
            resource_cache[h] = dest
            with open(dest, 'wb') as f:
                f.write(raw)
        for elem in resources[url]:
            elem.setAttribute('href' if is_tag(elem, 'link') else 'src', href)

    failed = set(resources) - set(loaded_resources)
    for url in failed:
        browser.log.warn('Failed to download resource:', url)
        for elem in resources[url]:
            elem.removeFromDocument()
Example #40
0
    def extract_resources(self):
        from calibre.ebooks.mobi.writer2.resources import PLACEHOLDER_GIF
        resource_map = []
        for x in ('fonts', 'images'):
            os.mkdir(x)

        for i, sec in enumerate(self.resource_sections):
            fname_idx = i + 1
            data = sec[0]
            typ = data[:4]
            href = None
            if typ in {
                    b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'BOUN',
                    b'FDST', b'DATP', b'AUDI', b'VIDE'
            }:
                pass  # Ignore these records
            elif typ == b'RESC':
                self.resc_data = read_resc_record(data)
            elif typ == b'FONT':
                font = read_font_record(data)
                href = "fonts/%05d.%s" % (fname_idx, font['ext'])
                if font['err']:
                    self.log.warn('Reading font record %d failed: %s' %
                                  (fname_idx, font['err']))
                    if font['headers']:
                        self.log.debug('Font record headers: %s' %
                                       font['headers'])
                with open(href.replace('/', os.sep), 'wb') as f:
                    f.write(font['font_data']
                            if font['font_data'] else font['raw_data'])
                if font['encrypted']:
                    self.encrypted_fonts.append(href)
            else:
                if not (len(data) == len(PLACEHOLDER_GIF)
                        and data == PLACEHOLDER_GIF):
                    imgtype = what(None, data)
                    if imgtype is None:
                        from calibre.utils.magick.draw import identify_data
                        try:
                            imgtype = identify_data(data)[2]
                        except Exception:
                            imgtype = 'unknown'
                    href = 'images/%05d.%s' % (fname_idx, imgtype)
                    with open(href.replace('/', os.sep), 'wb') as f:
                        f.write(data)

            resource_map.append(href)

        return resource_map
Example #41
0
def download_resources(browser, resource_cache, output_dir):
    img_counter = style_counter = 0
    resources = defaultdict(list)
    for img in browser.css_select("img[src]", all=True):
        # Using javascript ensures that absolute URLs are returned, direct
        # attribute access does not do that
        src = unicode(img.evaluateJavaScript("this.src").toString()).strip()
        if src:
            resources[src].append(img)
    for link in browser.css_select("link[href]", all=True):
        lt = unicode(link.attribute("type")).strip() or "text/css"
        rel = unicode(link.attribute("rel")).strip() or "stylesheet"
        if lt == "text/css" and rel == "stylesheet":
            href = unicode(link.evaluateJavaScript("this.href").toString()).strip()
            if href:
                resources[href].append(link)
            else:
                link.removeFromDocument()
        else:
            link.removeFromDocument()
    loaded_resources = browser.wait_for_resources(resources)
    for url, raw in loaded_resources.iteritems():
        h = hashlib.sha1(raw).digest()
        if h in resource_cache:
            href = os.path.relpath(resource_cache[h], output_dir).replace(os.sep, "/")
        else:
            elem = resources[url][0]
            if is_tag(elem, "link"):
                style_counter += 1
                href = "style_%d.css" % style_counter
            else:
                img_counter += 1
                ext = what(None, raw) or "jpg"
                if ext == "jpeg":
                    ext = "jpg"  # Apparently Moon+ cannot handle .jpeg
                href = "img_%d.%s" % (img_counter, ext)
            dest = os.path.join(output_dir, href)
            resource_cache[h] = dest
            with open(dest, "wb") as f:
                f.write(raw)
        for elem in resources[url]:
            elem.setAttribute("href" if is_tag(elem, "link") else "src", href)

    failed = set(resources) - set(loaded_resources)
    for url in failed:
        browser.log.warn("Failed to download resource:", url)
        for elem in resources[url]:
            elem.removeFromDocument()
Example #42
0
    def generate_filename(self, rid, base=None, rid_map=None):
        rid_map = self.rid_map if rid_map is None else rid_map
        fname = rid_map[rid]
        if fname in self.used:
            return self.used[fname]
        if fname.startswith('file://'):
            src = fname[len('file://'):]
            if iswindows and src and src[0] == '/':
                src = src[1:]
            if not src or not os.path.exists(src):
                raise LinkedImageNotFound(src)
            with open(src, 'rb') as rawsrc:
                raw = rawsrc.read()
        else:
            raw = self.docx.read(fname)
        base = base or ascii_filename(
            rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image'
        ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
        if ext == 'emf':
            # For an example, see: https://bugs.launchpad.net/bugs/1224849
            self.log(
                'Found an EMF image: %s, trying to extract embedded raster image'
                % base)
            from calibre.utils.wmf.emf import emf_unwrap
            try:
                raw = emf_unwrap(raw)
            except Exception as e:
                self.log.exception(
                    'Failed to extract embedded raster image from EMF')
            else:
                ext = 'png'

        base = base.rpartition('.')[0]
        if not base:
            base = 'image'
        base += '.' + ext
        exists = frozenset(self.used.itervalues())
        c = 1
        name = base
        while name in exists:
            n, e = base.rpartition('.')[0::2]
            name = '%s-%d.%s' % (n, c, e)
            c += 1
        self.used[fname] = name
        with open(os.path.join(self.dest_dir, name), 'wb') as f:
            f.write(raw)
        self.all_images.add('images/' + name)
        return name
Example #43
0
def download_resources(browser, resource_cache, output_dir):
    img_counter = style_counter = 0
    resources = defaultdict(list)
    for img in browser.css_select('img[src]', all=True):
        # Using javascript ensures that absolute URLs are returned, direct
        # attribute access does not do that
        src = unicode(img.evaluateJavaScript('this.src') or '').strip()
        if src:
            resources[src].append(img)
    for link in browser.css_select('link[href]', all=True):
        lt = unicode(link.attribute('type')).strip() or 'text/css'
        rel = unicode(link.attribute('rel')).strip() or 'stylesheet'
        if lt == 'text/css' and rel == 'stylesheet':
            href = unicode(link.evaluateJavaScript('this.href') or '').strip()
            if href:
                resources[href].append(link)
            else:
                link.removeFromDocument()
        else:
            link.removeFromDocument()
    loaded_resources = browser.wait_for_resources(resources)
    for url, raw in loaded_resources.iteritems():
        h = hashlib.sha1(raw).digest()
        if h in resource_cache:
            href = os.path.relpath(resource_cache[h], output_dir).replace(os.sep, '/')
        else:
            elem = resources[url][0]
            if is_tag(elem, 'link'):
                style_counter += 1
                href = 'style_%d.css' % style_counter
            else:
                img_counter += 1
                ext = what(None, raw) or 'jpg'
                if ext == 'jpeg':
                    ext = 'jpg'  # Apparently Moon+ cannot handle .jpeg
                href = 'img_%d.%s' % (img_counter, ext)
            dest = os.path.join(output_dir, href)
            resource_cache[h] = dest
            with open(dest, 'wb') as f:
                f.write(raw)
        for elem in resources[url]:
            elem.setAttribute('href' if is_tag(elem, 'link') else 'src', href)

    failed = set(resources) - set(loaded_resources)
    for url in failed:
        browser.log.warn('Failed to download resource:', url)
        for elem in resources[url]:
            elem.removeFromDocument()
Example #44
0
 def process_image(self, data):
     if not self.process_images:
         return data
     try:
         return mobify_image(data)
     except Exception:
         if 'png' != what(None, data):
             raise
         with PersistentTemporaryFile(suffix='.png') as pt:
             pt.write(data)
         try:
             from calibre.utils.img import optimize_png
             optimize_png(pt.name)
             data = lopen(pt.name, 'rb').read()
         finally:
             os.remove(pt.name)
         return mobify_image(data)
Example #45
0
    def extract_resources(self):
        from calibre.ebooks.mobi.writer2.resources import PLACEHOLDER_GIF
        resource_map = []
        for x in ('fonts', 'images'):
            os.mkdir(x)

        for i, sec in enumerate(self.resource_sections):
            fname_idx = i+1
            data = sec[0]
            typ = data[:4]
            href = None
            if typ in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'BOUN',
                       b'FDST', b'DATP', b'AUDI', b'VIDE'}:
                pass  # Ignore these records
            elif typ == b'RESC':
                self.resc_data = read_resc_record(data)
            elif typ == b'FONT':
                font = read_font_record(data)
                href = "fonts/%05d.%s" % (fname_idx, font['ext'])
                if font['err']:
                    self.log.warn('Reading font record %d failed: %s'%(
                        fname_idx, font['err']))
                    if font['headers']:
                        self.log.debug('Font record headers: %s'%font['headers'])
                with open(href.replace('/', os.sep), 'wb') as f:
                    f.write(font['font_data'] if font['font_data'] else
                            font['raw_data'])
                if font['encrypted']:
                    self.encrypted_fonts.append(href)
            else:
                if len(data) == len(PLACEHOLDER_GIF) and data == PLACEHOLDER_GIF:
                    continue
                imgtype = what(None, data)
                if imgtype is None:
                    from calibre.utils.magick.draw import identify_data
                    try:
                        imgtype = identify_data(data)[2]
                    except Exception:
                        imgtype = 'unknown'
                href = 'images/%05d.%s'%(fname_idx, imgtype)
                with open(href.replace('/', os.sep), 'wb') as f:
                    f.write(data)

            resource_map.append(href)

        return resource_map
Example #46
0
    def ask_link(self):
        d = QDialog(self)
        d.setWindowTitle(_('Create link'))
        l = QFormLayout()
        d.setLayout(l)
        d.url = QLineEdit(d)
        d.name = QLineEdit(d)
        d.setMinimumWidth(600)
        d.bb = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
        d.br = b = QPushButton(_('&Browse'))
        b.setIcon(QIcon(I('document_open.png')))

        def cf():
            files = choose_files(d,
                                 'select link file',
                                 _('Choose file'),
                                 select_only_single_file=True)
            if files:
                d.url.setText(files[0])

        b.clicked.connect(cf)
        d.la = la = QLabel(
            _('Enter a URL. You can also choose to create a link to a file on '
              'your computer. If the selected file is an image, it will be '
              'inserted as an image. Note that if you create a link to a file on '
              'your computer, it will stop working if the file is moved.'))
        la.setWordWrap(True)
        la.setStyleSheet('QLabel { margin-bottom: 1.5ex }')
        l.setWidget(0, l.SpanningRole, la)
        l.addRow(_('Enter &URL:'), d.url)
        l.addRow(_('Enter &name (optional):'), d.name)
        l.addRow(_('Choose a file on your computer:'), d.br)
        l.addRow(d.bb)
        d.bb.accepted.connect(d.accept)
        d.bb.rejected.connect(d.reject)
        d.resize(d.sizeHint())
        link, name, is_image = None, None, False
        if d.exec_() == d.Accepted:
            link, name = unicode(d.url.text()).strip(), unicode(
                d.name.text()).strip()
            if link and os.path.exists(link):
                with lopen(link, 'rb') as f:
                    q = what(f)
                is_image = q in {'jpeg', 'png', 'gif'}
        return link, name, is_image
Example #47
0
 def generate_filename(self, rid, base=None):
     if rid in self.used:
         return self.used[rid]
     raw = self.docx.read(self.rid_map[rid])
     base = base or ascii_filename(self.rid_map[rid].rpartition('/')[-1]).replace(' ', '_')
     ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
     base = base.rpartition('.')[0] + '.' + ext
     exists = frozenset(self.used.itervalues())
     c = 1
     while base in exists:
         n, e = base.rpartition('.')[0::2]
         base = '%s-%d.%s' % (n, c, e)
         c += 1
     self.used[rid] = base
     with open(os.path.join(self.dest_dir, base), 'wb') as f:
         f.write(raw)
     self.all_images.add('images/' + base)
     return base
Example #48
0
 def process_image(self, data):
     if not self.process_images:
         return data
     func = mobify_image if self.opts.mobi_keep_original_images else rescale_image
     try:
         return ensure_jpeg_has_jfif(func(data))
     except Exception:
         if 'png' != what(None, data):
             raise
         with PersistentTemporaryFile(suffix='.png') as pt:
             pt.write(data)
         try:
             from calibre.utils.img import optimize_png
             optimize_png(pt.name)
             data = lopen(pt.name, 'rb').read()
         finally:
             os.remove(pt.name)
         return func(data)
Example #49
0
 def process_image(self, data):
     if not self.process_images:
         return data
     func = mobify_image if self.opts.mobi_keep_original_images else rescale_image
     try:
         return func(data)
     except Exception:
         if 'png' != what(None, data):
             raise
         with PersistentTemporaryFile(suffix='.png') as pt:
             pt.write(data)
         try:
             from calibre.utils.img import optimize_png
             optimize_png(pt.name)
             data = lopen(pt.name, 'rb').read()
         finally:
             os.remove(pt.name)
         return func(data)
Example #50
0
 def generate_filename(self, rid, base=None):
     if rid in self.used:
         return self.used[rid]
     raw = self.docx.read(self.rid_map[rid])
     base = base or ascii_filename(
         self.rid_map[rid].rpartition('/')[-1]).replace(' ', '_')
     ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
     base = base.rpartition('.')[0] + '.' + ext
     exists = frozenset(self.used.itervalues())
     c = 1
     while base in exists:
         n, e = base.rpartition('.')[0::2]
         base = '%s-%d.%s' % (n, c, e)
         c += 1
     self.used[rid] = base
     with open(os.path.join(self.dest_dir, base), 'wb') as f:
         f.write(raw)
     self.all_images.add('images/' + base)
     return base
Example #51
0
 def dataize_svg(self, item, svg=None):
     if svg is None:
         svg = item.data
     hrefs = self.oeb.manifest.hrefs
     for elem in xpath(svg, '//svg:*[@xl:href]'):
         href = urlnormalize(elem.attrib[XLINK('href')])
         path = urldefrag(href)[0]
         if not path:
             continue
         abshref = item.abshref(path)
         if abshref not in hrefs:
             continue
         linkee = hrefs[abshref]
         data = str(linkee)
         ext = what(None, data) or 'jpg'
         with PersistentTemporaryFile(suffix='.' + ext) as pt:
             pt.write(data)
             self.temp_files.append(pt.name)
         elem.attrib[XLINK('href')] = pt.name
     return svg
Example #52
0
 def dataize_svg(self, item, svg=None):
     if svg is None:
         svg = item.data
     hrefs = self.oeb.manifest.hrefs
     for elem in xpath(svg, '//svg:*[@xl:href]'):
         href = urlnormalize(elem.attrib[XLINK('href')])
         path = urldefrag(href)[0]
         if not path:
             continue
         abshref = item.abshref(path)
         if abshref not in hrefs:
             continue
         linkee = hrefs[abshref]
         data = str(linkee)
         ext = what(None, data) or 'jpg'
         with PersistentTemporaryFile(suffix='.'+ext) as pt:
             pt.write(data)
             self.temp_files.append(pt.name)
         elem.attrib[XLINK('href')] = pt.name
     return svg
Example #53
0
 def ask_link(self):
     d = QDialog(self)
     d.setWindowTitle(_('Create link'))
     l = QFormLayout()
     d.setLayout(l)
     d.url = QLineEdit(d)
     d.name = QLineEdit(d)
     d.setMinimumWidth(600)
     d.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel)
     d.br = b = QPushButton(_('&Browse'))
     b.setIcon(QIcon(I('document_open.png')))
     def cf():
         files = choose_files(d, 'select link file', _('Choose file'), select_only_single_file=True)
         if files:
             d.url.setText(files[0])
     b.clicked.connect(cf)
     d.la = la = QLabel(_(
         'Enter a URL. You can also choose to create a link to a file on '
         'your computer. If the selected file is an image, it will be '
         'inserted as an image. Note that if you create a link to a file on '
         'your computer, it will stop working if the file is moved.'))
     la.setWordWrap(True)
     la.setStyleSheet('QLabel { margin-bottom: 1.5ex }')
     l.setWidget(0, l.SpanningRole, la)
     l.addRow(_('Enter &URL:'), d.url)
     l.addRow(_('Enter &name (optional):'), d.name)
     l.addRow(_('Choose a file on your computer:'), d.br)
     l.addRow(d.bb)
     d.bb.accepted.connect(d.accept)
     d.bb.rejected.connect(d.reject)
     d.resize(d.sizeHint())
     link, name, is_image = None, None, False
     if d.exec_() == d.Accepted:
         link, name = unicode(d.url.text()).strip(), unicode(d.name.text()).strip()
         if link and os.path.exists(link):
             with lopen(link, 'rb') as f:
                 q = what(f)
             is_image = q in {'jpeg', 'png', 'gif'}
     return link, name, is_image
Example #54
0
File: mobi8.py Project: sss/calibre
    def extract_resources(self):
        self.resource_map = []
        known_types = {
            b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'RESC', b'BOUN',
            b'FDST', b'DATP', b'AUDI', b'VIDE'
        }

        for i, rec in enumerate(self.resource_records):
            sig = rec.raw[:4]
            payload = rec.raw
            ext = 'dat'
            prefix = 'binary'
            suffix = ''
            if sig in {b'HUFF', b'CDIC', b'INDX'}:
                continue
            # TODO: Ignore CNCX records as well
            if sig == b'FONT':
                font = read_font_record(rec.raw)
                if font['err']:
                    raise ValueError(
                        'Failed to read font record: %s Headers: %s' %
                        (font['err'], font['headers']))
                payload = (font['font_data']
                           if font['font_data'] else font['raw_data'])
                prefix, ext = 'fonts', font['ext']
            elif sig not in known_types:
                q = what(None, rec.raw)
                if q:
                    prefix, ext = 'images', q

            if prefix == 'binary':
                if sig == b'\xe9\x8e\r\n':
                    suffix = '-EOF'
                elif sig in known_types:
                    suffix = '-' + sig.decode('ascii')

            self.resource_map.append(
                ('%s/%06d%s.%s' % (prefix, i, suffix, ext), payload))
Example #55
0
 def __call__(self, oeb, opts):
     from calibre.utils.imghdr import what
     self.log = oeb.log
     attr_path = XPath('//h:img[@src]')
     for item in oeb.spine:
         root = item.data
         if not hasattr(root, 'xpath'):
             continue
         for img in attr_path(root):
             raw = img.get('src', '')
             if not raw.startswith('data:'):
                 continue
             header, data = raw.partition(',')[0::2]
             if not header.startswith('data:image/') or not data:
                 continue
             if ';base64' in header:
                 data = re.sub(r'\s+', '', data)
                 from polyglot.binary import from_base64_bytes
                 try:
                     data = from_base64_bytes(data)
                 except Exception:
                     self.log.error(
                         'Found invalid base64 encoded data URI, ignoring it'
                     )
                     continue
             else:
                 data = urlunquote(data)
             data = as_bytes(data)
             fmt = what(None, data)
             if not fmt:
                 self.log.warn(
                     'Image encoded as data URL has unknown format, ignoring'
                 )
                 continue
             img.set(
                 'src',
                 item.relhref(self.convert_image_data_uri(data, fmt, oeb)))
Example #56
0
def cdb_set_fields(ctx, rd, book_id, library_id):
    db = get_db(ctx, rd, library_id)
    if ctx.restriction_for(rd, db):
        raise HTTPForbidden('Cannot use the set fields interface with a user who has per library restrictions')
    raw = rd.read()
    ct = rd.inheaders.get('Content-Type', all=True)
    ct = {x.lower().partition(';')[0] for x in ct}
    try:
        if MSGPACK_MIME in ct:
            data = msgpack_loads(raw)
        elif 'application/json' in ct:
            data = json_loads(raw)
        else:
            raise HTTPBadRequest('Only JSON or msgpack requests are supported')
        changes, loaded_book_ids = data['changes'], frozenset(map(int, data['loaded_book_ids']))
    except Exception:
        raise HTTPBadRequest('Invalid encoded data')
    dirtied = set()
    cdata = changes.pop('cover', False)
    if cdata is not False:
        if cdata is not None:
            try:
                cdata = standard_b64decode(cdata.split(',', 1)[-1].encode('ascii'))
            except Exception:
                raise HTTPBadRequest('Cover data is not valid base64 encoded data')
            try:
                fmt = what(None, cdata)
            except Exception:
                fmt = None
            if fmt not in ('jpeg', 'png'):
                raise HTTPBadRequest('Cover data must be either JPEG or PNG')
        dirtied |= db.set_cover({book_id: cdata})

    for field, value in changes.iteritems():
        dirtied |= db.set_field(field, {book_id: value})
    ctx.notify_changes(db.backend.library_path, metadata(dirtied))
    return {bid: book_as_json(db, book_id) for bid in (dirtied & loaded_book_ids) | {book_id}}
Example #57
0
 def generate_filename(self, rid, base=None, rid_map=None):
     rid_map = self.rid_map if rid_map is None else rid_map
     fname = rid_map[rid]
     if fname in self.used:
         return self.used[fname]
     raw = self.docx.read(fname)
     base = base or ascii_filename(rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image'
     ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
     base = base.rpartition('.')[0]
     if not base:
         base = 'image'
     base += '.' + ext
     exists = frozenset(self.used.itervalues())
     c = 1
     name = base
     while name in exists:
         n, e = base.rpartition('.')[0::2]
         name = '%s-%d.%s' % (n, c, e)
         c += 1
     self.used[fname] = name
     with open(os.path.join(self.dest_dir, name), 'wb') as f:
         f.write(raw)
     self.all_images.add('images/' + name)
     return name