예제 #1
0
파일: opf.py 프로젝트: daviebf/calibre
def get_metadata(stream):
    if isinstance(stream, bytes):
        stream = DummyFile(stream)
    root = parse_opf(stream)
    ver = parse_opf_version(root.get('version'))
    opf = OPF(None, preparsed_opf=root, read_toc=False)
    return opf.to_book_metadata(), ver, opf.raster_cover, opf.first_spine_item()
예제 #2
0
파일: lit.py 프로젝트: MarioJC/calibre
def get_metadata(stream):
    from calibre.ebooks.lit.reader import LitContainer
    from calibre.utils.logging import Log
    litfile = LitContainer(stream, Log())
    src = litfile.get_metadata().encode('utf-8')
    litfile = litfile._litfile
    opf = OPF(cStringIO.StringIO(src), os.getcwdu())
    mi = opf.to_book_metadata()
    covers = []
    for item in opf.iterguide():
        if 'cover' not in item.get('type', '').lower():
            continue
        ctype = item.get('type')
        href = item.get('href', '')
        candidates = [href, href.replace('&', '%26')]
        for item in litfile.manifest.values():
            if item.path in candidates:
                try:
                    covers.append((litfile.get_file('/data/'+item.internal),
                                   ctype))
                except:
                    pass
                break
    covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True)
    idx = 0
    if len(covers) > 1:
        if covers[1][1] == covers[0][1]+'-standard':
            idx = 1
    mi.cover_data = ('jpg', covers[idx][0])
    return mi
예제 #3
0
파일: mobi6.py 프로젝트: wh0197m/calibre
 def read_embedded_metadata(self, root, elem, guide):
     raw = '<?xml version="1.0" encoding="utf-8" ?>\n<package>' + \
             html.tostring(elem, encoding='utf-8') + '</package>'
     stream = cStringIO.StringIO(raw)
     opf = OPF(stream)
     self.embedded_mi = opf.to_book_metadata()
     if guide is not None:
         for ref in guide.xpath('descendant::reference'):
             if 'cover' in ref.get('type', '').lower():
                 href = ref.get('href', '')
                 if href.startswith('#'):
                     href = href[1:]
                 anchors = root.xpath('//*[@id="%s"]' % href)
                 if anchors:
                     cpos = anchors[0]
                     reached = False
                     for elem in root.iter():
                         if elem is cpos:
                             reached = True
                         if reached and elem.tag == 'img':
                             cover = elem.get('src', None)
                             self.embedded_mi.cover = cover
                             elem.getparent().remove(elem)
                             break
                 break
예제 #4
0
	def handle_zip_of_opf_files(self, stream):
		''' Given a zip up of a bunch of opf files, either merge them or add them to library '''
		result = {'updated':0, 'added':0}
		with ZipFile(stream, 'r') as zf:
			self.start_applying_updates()
			for zi in zf.infolist():
				ext = zi.filename.rpartition('.')[-1].lower()
				if ext in {'opf'}:
					try:
						raw = zf.open(zi)
						opf = OPF(raw)
						mi = opf.to_book_metadata()
						casanova_id = self.extract_id(mi)
						if casanova_id:
							book_mi = self.get_casanova_metadata(casanova_id['id'])
							if book_mi:
								# Update an existing book's metadata!
								result['updated'] = result['updated'] + 1
								self.apply_metadata_update(casanova_id['id'], book_mi, mi)
							else:
								# Create a new book entry
								result['added'] = result['added'] + 1
								self.model.db.import_book(mi,[])
					except:
						foo=False
				if ext in {'jpg', 'png', 'gif'}:
					# try and handle the cover
					casanova_id = zi.filename.partition('.')[0].lower()
					if casanova_id in self.book_map:
						book_id = self.book_map[casanova_id]
						raw = zf.open(zi)
						self.db.set_cover(book_id, raw)
			self.finish_applying_updates()
			return result
예제 #5
0
    def process_dir(self, dirpath, filenames, book_id):
        book_id = int(book_id)
        formats = list(filter(self.is_ebook_file, filenames))
        fmts    = [os.path.splitext(x)[1][1:].upper() for x in formats]
        sizes   = [os.path.getsize(os.path.join(dirpath, x)) for x in formats]
        names   = [os.path.splitext(x)[0] for x in formats]
        opf = os.path.join(dirpath, 'metadata.opf')
        parsed_opf = OPF(opf, basedir=dirpath)
        mi = parsed_opf.to_book_metadata()
        annotations = tuple(parsed_opf.read_annotations())
        timestamp = os.path.getmtime(opf)
        path = os.path.relpath(dirpath, self.src_library_path).replace(os.sep,
                '/')

        if int(mi.application_id) == book_id:
            self.books.append({
                'mi': mi,
                'timestamp': timestamp,
                'formats': list(zip(fmts, sizes, names)),
                'id': book_id,
                'dirpath': dirpath,
                'path': path,
                'annotations': annotations
            })
        else:
            self.mismatched_dirs.append(dirpath)

        alm = mi.get('author_link_map', {})
        for author, link in iteritems(alm):
            existing_link, timestamp = self.authors_links.get(author, (None, None))
            if existing_link is None or existing_link != link and timestamp < mi.timestamp:
                self.authors_links[author] = (link, mi.timestamp)
예제 #6
0
파일: extz.py 프로젝트: MarioJC/calibre
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)
    try:
        with ZipFile(stream) as zf:
            opf_name = get_first_opf_name(zf)
            opf_stream = StringIO(zf.read(opf_name))
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
                cover_href = opf.raster_cover
                if not cover_href:
                    for meta in opf.metadata.xpath('//*[local-name()="meta" and @name="cover"]'):
                        val = meta.get('content')
                        if val.rpartition('.')[2].lower() in {'jpeg', 'jpg', 'png'}:
                            cover_href = val
                            break
                if cover_href:
                    try:
                        mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href))
                    except Exception:
                        pass
    except Exception:
        return mi
    return mi
예제 #7
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)
    try:
        with ZipFile(stream) as zf:
            opf_name = get_first_opf_name(zf)
            opf_stream = StringIO(zf.read(opf_name))
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
                cover_href = opf.raster_cover
                if not cover_href:
                    for meta in opf.metadata.xpath('//*[local-name()="meta" and @name="cover"]'):
                        val = meta.get('content')
                        if val.rpartition('.')[2].lower() in {'jpeg', 'jpg', 'png'}:
                            cover_href = val
                            break
                if cover_href:
                    try:
                        mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href))
                    except Exception:
                        pass
    except Exception:
        return mi
    return mi
예제 #8
0
 def read_embedded_metadata(self, root, elem, guide):
     raw = '<?xml version="1.0" encoding="utf-8" ?>\n<package>' + \
             html.tostring(elem, encoding='utf-8') + '</package>'
     stream = cStringIO.StringIO(raw)
     opf = OPF(stream)
     self.embedded_mi = opf.to_book_metadata()
     if guide is not None:
         for ref in guide.xpath('descendant::reference'):
             if 'cover' in ref.get('type', '').lower():
                 href = ref.get('href', '')
                 if href.startswith('#'):
                     href = href[1:]
                 anchors = root.xpath('//*[@id="%s"]' % href)
                 if anchors:
                     cpos = anchors[0]
                     reached = False
                     for elem in root.iter():
                         if elem is cpos:
                             reached = True
                         if reached and elem.tag == 'img':
                             cover = elem.get('src', None)
                             self.embedded_mi.cover = cover
                             elem.getparent().remove(elem)
                             break
                 break
예제 #9
0
def get_metadata(stream):
    from calibre.ebooks.lit.reader import LitContainer
    from calibre.utils.logging import Log
    litfile = LitContainer(stream, Log())
    src = litfile.get_metadata().encode('utf-8')
    litfile = litfile._litfile
    opf = OPF(io.BytesIO(src), getcwd())
    mi = opf.to_book_metadata()
    covers = []
    for item in opf.iterguide():
        if 'cover' not in item.get('type', '').lower():
            continue
        ctype = item.get('type')
        href = item.get('href', '')
        candidates = [href, href.replace('&', '%26')]
        for item in litfile.manifest.values():
            if item.path in candidates:
                try:
                    covers.append(
                        (litfile.get_file('/data/' + item.internal), ctype))
                except:
                    pass
                break
    covers.sort(key=lambda x: len(x[0]), reverse=True)
    idx = 0
    if len(covers) > 1:
        if covers[1][1] == covers[0][1] + '-standard':
            idx = 1
    mi.cover_data = ('jpg', covers[idx][0])
    return mi
예제 #10
0
def get_metadata(stream):
    if isinstance(stream, bytes):
        stream = DummyFile(stream)
    root = parse_opf(stream)
    ver = parse_opf_version(root.get('version'))
    opf = OPF(None, preparsed_opf=root, read_toc=False)
    return opf.to_book_metadata(), ver, opf.raster_cover, opf.first_spine_item(
    )
예제 #11
0
파일: extz.py 프로젝트: 089git/calibre
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    try:
        with ZipFile(stream) as zf:
            opf_name = get_first_opf_name(zf)
            opf_stream = StringIO(zf.read(opf_name))
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
                cover_href = opf.raster_cover
                if cover_href:
                    mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href))
    except:
        return mi
    return mi
예제 #12
0
파일: meta.py 프로젝트: AEliu/calibre
def opf_metadata(opfpath):
    if hasattr(opfpath, 'read'):
        f = opfpath
        opfpath = getattr(f, 'name', os.getcwdu())
    else:
        f = open(opfpath, 'rb')
    try:
        opf = OPF(f, os.path.dirname(opfpath))
        if opf.application_id is not None:
            mi = opf.to_book_metadata()
            if hasattr(opf, 'cover') and opf.cover:
                cpath = os.path.join(os.path.dirname(opfpath), opf.cover)
                if os.access(cpath, os.R_OK):
                    fmt = cpath.rpartition('.')[-1]
                    data = open(cpath, 'rb').read()
                    mi.cover_data = (fmt, data)
            return mi
    except:
        import traceback
        traceback.print_exc()
        pass
예제 #13
0
def opf_metadata(opfpath):
    if hasattr(opfpath, 'read'):
        f = opfpath
        opfpath = getattr(f, 'name', getcwd())
    else:
        f = open(opfpath, 'rb')
    try:
        opf = OPF(f, os.path.dirname(opfpath))
        if opf.application_id is not None:
            mi = opf.to_book_metadata()
            if hasattr(opf, 'cover') and opf.cover:
                cpath = os.path.join(os.path.dirname(opfpath), opf.cover)
                if os.access(cpath, os.R_OK):
                    fmt = cpath.rpartition('.')[-1]
                    data = open(cpath, 'rb').read()
                    mi.cover_data = (fmt, data)
            return mi
    except:
        import traceback
        traceback.print_exc()
        pass
예제 #14
0
파일: extz.py 프로젝트: pombreda/calibre-1
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    try:
        with ZipFile(stream) as zf:
            opf_name = get_first_opf_name(zf)
            opf_stream = StringIO(zf.read(opf_name))
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
                cover_href = opf.raster_cover
                if cover_href:
                    mi.cover_data = (os.path.splitext(cover_href)[1],
                                     zf.read(cover_href))
    except:
        return mi
    return mi
예제 #15
0
def zip_opf_metadata(opfpath, zf):
    from calibre.ebooks.metadata.opf2 import OPF
    if hasattr(opfpath, 'read'):
        f = opfpath
        opfpath = getattr(f, 'name', getcwd())
    else:
        f = open(opfpath, 'rb')
    opf = OPF(f, os.path.dirname(opfpath))
    mi = opf.to_book_metadata()
    # This is broken, in that it only works for
    # when both the OPF file and the cover file are in the root of the
    # zip file and the cover is an actual raster image, but I don't care
    # enough to make it more robust
    if getattr(mi, 'cover', None):
        covername = os.path.basename(mi.cover)
        mi.cover = None
        names = zf.namelist()
        if covername in names:
            fmt = covername.rpartition('.')[-1]
            data = zf.read(covername)
            mi.cover_data = (fmt, data)
    return mi
예제 #16
0
파일: zip.py 프로젝트: j-howell/calibre
def zip_opf_metadata(opfpath, zf):
    from calibre.ebooks.metadata.opf2 import OPF
    if hasattr(opfpath, 'read'):
        f = opfpath
        opfpath = getattr(f, 'name', getcwd())
    else:
        f = open(opfpath, 'rb')
    opf = OPF(f, os.path.dirname(opfpath))
    mi = opf.to_book_metadata()
    # This is broken, in that it only works for
    # when both the OPF file and the cover file are in the root of the
    # zip file and the cover is an actual raster image, but I don't care
    # enough to make it more robust
    if getattr(mi, 'cover', None):
        covername = os.path.basename(mi.cover)
        mi.cover = None
        names = zf.namelist()
        if covername in names:
            fmt = covername.rpartition('.')[-1]
            data = zf.read(covername)
            mi.cover_data = (fmt, data)
    return mi
예제 #17
0
 def handle_zip_of_opf_files(self, stream):
     ''' Given a zip up of a bunch of opf files, either merge them or add them to library '''
     result = {'updated': 0, 'added': 0}
     with ZipFile(stream, 'r') as zf:
         self.start_applying_updates()
         for zi in zf.infolist():
             ext = zi.filename.rpartition('.')[-1].lower()
             if ext in {'opf'}:
                 try:
                     raw = zf.open(zi)
                     opf = OPF(raw)
                     mi = opf.to_book_metadata()
                     casanova_id = self.extract_id(mi)
                     if casanova_id:
                         book_mi = self.get_casanova_metadata(
                             casanova_id['id'])
                         if book_mi:
                             # Update an existing book's metadata!
                             result['updated'] = result['updated'] + 1
                             self.apply_metadata_update(
                                 casanova_id['id'], book_mi, mi)
                         else:
                             # Create a new book entry
                             result['added'] = result['added'] + 1
                             self.model.db.import_book(mi, [])
                 except:
                     foo = False
             if ext in {'jpg', 'png', 'gif'}:
                 # try and handle the cover
                 casanova_id = zi.filename.partition('.')[0].lower()
                 if casanova_id in self.book_map:
                     book_id = self.book_map[casanova_id]
                     raw = zf.open(zi)
                     self.db.set_cover(book_id, raw)
         self.finish_applying_updates()
         return result
예제 #18
0
파일: plumber.py 프로젝트: Eksmo/calibre
 def read_user_metadata(self):
     '''
     Read all metadata specified by the user. Command line options override
     metadata from a specified OPF file.
     '''
     from calibre.ebooks.metadata import MetaInformation
     from calibre.ebooks.metadata.opf2 import OPF
     mi = MetaInformation(None, [])
     if self.opts.read_metadata_from_opf is not None:
         self.opts.read_metadata_from_opf = os.path.abspath(
                                         self.opts.read_metadata_from_opf)
         opf = OPF(open(self.opts.read_metadata_from_opf, 'rb'),
                   os.path.dirname(self.opts.read_metadata_from_opf))
         mi = opf.to_book_metadata()
     self.opts_to_mi(mi)
     if mi.cover:
         if mi.cover.startswith('http:') or mi.cover.startswith('https:'):
             mi.cover = self.download_cover(mi.cover)
         ext = mi.cover.rpartition('.')[-1].lower().strip()
         if ext not in ('png', 'jpg', 'jpeg', 'gif'):
             ext = 'jpg'
         mi.cover_data = (ext, open(mi.cover, 'rb').read())
         mi.cover = None
     self.user_metadata = mi
예제 #19
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from calibre.utils.zipfile import ZipFile
        from calibre.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == "inline":
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer

            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == "tag":
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer

            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory("_htmlz_output") as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = "index"
            if opts.htmlz_title_filename:
                from calibre.utils.filenames import shorten_components_to

                fname = shorten_components_to(100, (ascii_filename(unicode(oeb_book.metadata.title[0])),))[0]
            with open(os.path.join(tdir, fname + ".html"), "wb") as tf:
                if isinstance(html, unicode):
                    html = html.encode("utf-8")
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == "class" and opts.htmlz_class_style == "external":
                with open(os.path.join(tdir, "style.css"), "wb") as tf:
                    tf.write(htmlizer.get_css(oeb_book))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, "images")):
                    os.makedirs(os.path.join(tdir, "images"))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = unicode(etree.tostring(item.data, encoding=unicode))
                        else:
                            data = item.data
                        fname = os.path.join(tdir, "images", images[item.href])
                        with open(fname, "wb") as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.magick.draw import save_cover_data_to

                    cover_path = os.path.join(tdir, "cover.jpg")
                    with open(cover_path, "w") as cf:
                        cf.write("")
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback

                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, "metadata.opf"), "wb") as mdataf:
                opf = OPF(StringIO(etree.tostring(oeb_book.metadata.to_opf1())))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = "cover.jpg"
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, "w")
            htmlz.add_dir(tdir)
예제 #20
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from calibre.utils.zipfile import ZipFile
        from calibre.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == 'inline':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == 'tag':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory(u'_htmlz_output') as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = u'index'
            if opts.htmlz_title_filename:
                from calibre.utils.filenames import shorten_components_to
                fname = shorten_components_to(100, (ascii_filename(
                    unicode_type(oeb_book.metadata.title[0])), ))[0]
            with open(os.path.join(tdir, fname + u'.html'), 'wb') as tf:
                if isinstance(html, unicode_type):
                    html = html.encode('utf-8')
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
                with open(os.path.join(tdir, u'style.css'), 'wb') as tf:
                    tf.write(htmlizer.get_css(oeb_book))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, u'images')):
                    os.makedirs(os.path.join(tdir, u'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = unicode_type(
                                etree.tostring(item.data,
                                               encoding=unicode_type))
                        else:
                            data = item.data
                        fname = os.path.join(tdir, u'images',
                                             images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.img import save_cover_data_to
                    cover_path = os.path.join(tdir, u'cover.jpg')
                    with lopen(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf:
                opf = OPF(
                    io.BytesIO(
                        etree.tostring(oeb_book.metadata.to_opf1(),
                                       encoding='UTF-8')))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = u'cover.jpg'
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
예제 #21
0
    def convert(self, oeb_book, output, input_plugin, opts, log):
        self.report_version(log)

        #for mivals in oeb_book.metadata.items.values():
        #    for mival in mivals:
        #        log.info("metadata: %s" % repr(mival))

        try:
            book_name = str(oeb_book.metadata.title[0])
        except Exception:
            book_name = ""

        asin = None

        if not tweaks.get("kfx_output_ignore_asin_metadata", False):
            for idre in ["^mobi-asin$", "^amazon.*$", "^asin$"]:
                for ident in oeb_book.metadata["identifier"]:
                    idtype = ident.get(OPFNS("scheme"), "").lower()
                    if re.match(idre, idtype) and re.match(ASIN_RE, ident.value):
                        asin = ident.value
                        log.info("Found ASIN metadata %s: %s" % (idtype, asin))
                        break

                if asin:
                    break

        #with open(opts.read_metadata_from_opf, "rb") as opff:
        #    log.info("opf: %s" % opff.read())

        if opts.approximate_pages:
            page_count = 0
            if opts.number_of_pages_field and opts.number_of_pages_field != AUTO_PAGES and opts.read_metadata_from_opf:
                # This OPF contains custom column metadata not present in the oeb_book OPF
                opf = OPF(opts.read_metadata_from_opf, populate_spine=False, try_to_guess_cover=False, read_toc=False)
                mi = opf.to_book_metadata()
                page_count_str = mi.get(opts.number_of_pages_field, None)

                if page_count_str is not None:
                    try:
                        page_count = int(page_count_str)
                    except Exception:
                        pass

                    log.info("Page count value from field %s: %d ('%s')" % (opts.number_of_pages_field, page_count, page_count_str))
                else:
                    log.warning("Book has no page count field %s" % opts.number_of_pages_field)
        else:
            page_count = -1

        #log.info("oeb_book contains %d pages" % len(oeb_book.pages.pages))
        #log.info("options: %s" % str(opts.__dict__))

        # set default values for options expected by the EPUB Output plugin
        for optrec in EPUBOutput.options:
            setattr(opts, optrec.option.name, optrec.recommended_value)

        # override currently known EPUB Output plugin options
        opts.extract_to = None
        opts.dont_split_on_page_breaks = False
        opts.flow_size = 0
        opts.no_default_epub_cover = False
        opts.no_svg_cover = False
        opts.preserve_cover_aspect_ratio = True
        opts.epub_flatten = False
        opts.epub_inline_toc = False
        opts.epub_toc_at_end = False
        opts.toc_title = None

        epub_filename = self.temporary_file(".epub").name
        self.epub_output_plugin.convert(oeb_book, epub_filename, input_plugin, opts, log)  # convert input format to EPUB
        log.info("Successfully converted input format to EPUB")

        if PREPARED_FILE_SAVE_DIR:
            if not os.path.exists(PREPARED_FILE_SAVE_DIR):
                os.makedirs(PREPARED_FILE_SAVE_DIR)

            prepared_file_path = os.path.join(PREPARED_FILE_SAVE_DIR, os.path.basename(epub_filename))
            shutil.copyfile(epub_filename, prepared_file_path)
            log.warning("Saved conversion input file: %s" % prepared_file_path)

        self.convert_using_previewer(
                JobLog(log), book_name, epub_filename, asin, opts.cde_type_pdoc, page_count,
                opts.show_kpr_logs, False, TIMEOUT if opts.enable_timeout else None, output)
예제 #22
0
파일: book.py 프로젝트: MarioJC/calibre
class EbookIterator(BookmarksMixin):

    CHARACTERS_PER_PAGE = 1000

    def __init__(self, pathtoebook, log=None, copy_bookmarks_to_file=True, use_tdir_in_cache=False):
        BookmarksMixin.__init__(self, copy_bookmarks_to_file=copy_bookmarks_to_file)
        self.use_tdir_in_cache = use_tdir_in_cache
        self.log = log or default_log
        pathtoebook = pathtoebook.strip()
        self.pathtoebook = os.path.abspath(pathtoebook)
        self.config = DynamicConfig(name='iterator')
        ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
        ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
        self.ebook_ext = ext.replace('original_', '')

    def search(self, text, index, backwards=False):
        from calibre.ebooks.oeb.polish.parsing import parse
        pmap = [(i, path) for i, path in enumerate(self.spine)]
        if backwards:
            pmap.reverse()
        q = text.lower()
        for i, path in pmap:
            if (backwards and i < index) or (not backwards and i > index):
                with open(path, 'rb') as f:
                    raw = f.read().decode(path.encoding)
                root = parse(raw)
                fragments = []

                def serialize(elem):
                    if elem.text:
                        fragments.append(elem.text.lower())
                    if elem.tail:
                        fragments.append(elem.tail.lower())
                    for child in elem.iterchildren():
                        if hasattr(getattr(child, 'tag', None), 'rpartition') and child.tag.rpartition('}')[-1] not in {'script', 'style', 'del'}:
                            serialize(child)
                        elif getattr(child, 'tail', None):
                            fragments.append(child.tail.lower())
                for body in root.xpath('//*[local-name() = "body"]'):
                    body.tail = None
                    serialize(body)

                if q in ''.join(fragments):
                    return i

    def __enter__(self, processed=False, only_input_plugin=False,
                  run_char_count=True, read_anchor_map=True, view_kepub=False, read_links=True):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''

        self.delete_on_exit = []
        if self.use_tdir_in_cache:
            self._tdir = tdir_in_cache('ev')
        else:
            self._tdir = PersistentTemporaryDirectory('_ebook_iter')
        self.base  = os.path.realpath(self._tdir)
        self.book_format, self.pathtoopf, input_fmt = run_extract_book(
            self.pathtoebook, self.base, only_input_plugin=only_input_plugin, view_kepub=view_kepub, processed=processed)
        self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.mi = self.opf.to_book_metadata()
        self.language = None
        if self.mi.languages:
            self.language = self.mi.languages[0].lower()
        ordered = [i for i in self.opf.spine if i.is_linear] + \
                  [i for i in self.opf.spine if not i.is_linear]
        self.spine = []
        Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links,
                run_char_count=run_char_count, from_epub=self.book_format == 'EPUB')
        is_comic = input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
        for i in ordered:
            spath = i.path
            mt = None
            if i.idref is not None:
                mt = self.opf.manifest.type_for_id(i.idref)
            if mt is None:
                mt = guess_type(spath)[0]
            try:
                self.spine.append(Spiny(spath, mime_type=mt))
                if is_comic:
                    self.spine[-1].is_single_page = True
            except:
                self.log.warn('Missing spine item:', repr(spath))

        cover = self.opf.cover
        if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2',
                                        'azw', 'azw3', 'docx', 'htmlz'}:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
            with open(cfile, 'wb') as f:
                f.write(chtml)
            self.spine[0:0] = [Spiny(cfile,
                mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)

        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(Spiny(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()

        sizes = [i.character_count for i in self.spine]
        self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1

        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        if read_anchor_map:
            create_indexing_data(self.spine, self.toc)

        self.verify_links()

        self.read_bookmarks()

        return self

    def verify_links(self):
        spine_paths = {s:s for s in self.spine}
        for item in self.spine:
            base = os.path.dirname(item)
            for link in item.all_links:
                try:
                    p = urlparse(urlunquote(link))
                except Exception:
                    continue
                if not p.scheme and not p.netloc:
                    path = os.path.abspath(os.path.join(base, p.path)) if p.path else item
                    try:
                        path = spine_paths[path]
                    except Exception:
                        continue
                    if not p.fragment or p.fragment in path.anchor_map:
                        item.verified_links.add((path, p.fragment))

    def __exit__(self, *args):
        remove_dir(self._tdir)
        for x in self.delete_on_exit:
            try:
                os.remove(x)
            except:
                pass
예제 #23
0
파일: opf.py 프로젝트: samlty/calibre
def get_metadata2(root, ver):
    opf = OPF(None, preparsed_opf=root, read_toc=False)
    return opf.to_book_metadata(), ver, opf.raster_cover, opf.first_spine_item(
    )
예제 #24
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from calibre.utils.zipfile import ZipFile
        from calibre.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == 'inline':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == 'tag':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory(u'_htmlz_output') as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = u'index'
            if opts.htmlz_title_filename:
                from calibre.utils.filenames import shorten_components_to
                fname = shorten_components_to(100, (ascii_filename(unicode_type(oeb_book.metadata.title[0])),))[0]
            with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf:
                if isinstance(html, unicode_type):
                    html = html.encode('utf-8')
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
                with open(os.path.join(tdir, u'style.css'), 'wb') as tf:
                    tf.write(htmlizer.get_css(oeb_book))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, u'images')):
                    os.makedirs(os.path.join(tdir, u'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = etree.tostring(item.data, encoding='unicode')
                        else:
                            data = item.data
                        fname = os.path.join(tdir, u'images', images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.img import save_cover_data_to
                    cover_path = os.path.join(tdir, u'cover.jpg')
                    with lopen(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf:
                opf = OPF(io.BytesIO(etree.tostring(oeb_book.metadata.to_opf1(), encoding='UTF-8')))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = u'cover.jpg'
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
예제 #25
0
파일: opf.py 프로젝트: kylinRao/calibre
def get_metadata2(root, ver):
    opf = OPF(None, preparsed_opf=root, read_toc=False)
    return opf.to_book_metadata(), ver, opf.raster_cover, opf.first_spine_item()
예제 #26
0
파일: book.py 프로젝트: won2930015/calibre
class EbookIterator(BookmarksMixin):

    CHARACTERS_PER_PAGE = 1000

    def __init__(self,
                 pathtoebook,
                 log=None,
                 copy_bookmarks_to_file=True,
                 use_tdir_in_cache=False):
        BookmarksMixin.__init__(self,
                                copy_bookmarks_to_file=copy_bookmarks_to_file)
        self.use_tdir_in_cache = use_tdir_in_cache
        self.log = log or default_log
        pathtoebook = pathtoebook.strip()
        self.pathtoebook = os.path.abspath(pathtoebook)
        self.config = DynamicConfig(name='iterator')
        ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
        ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
        self.ebook_ext = ext.replace('original_', '')

    def search(self, text, index, backwards=False):
        from calibre.ebooks.oeb.polish.parsing import parse
        pmap = [(i, path) for i, path in enumerate(self.spine)]
        if backwards:
            pmap.reverse()
        q = text.lower()
        for i, path in pmap:
            if (backwards and i < index) or (not backwards and i > index):
                with open(path, 'rb') as f:
                    raw = f.read().decode(path.encoding)
                root = parse(raw)
                fragments = []

                def serialize(elem):
                    if elem.text:
                        fragments.append(elem.text.lower())
                    if elem.tail:
                        fragments.append(elem.tail.lower())
                    for child in elem.iterchildren():
                        if hasattr(
                                getattr(child, 'tag', None),
                                'rpartition') and child.tag.rpartition(
                                    '}')[-1] not in {'script', 'style', 'del'}:
                            serialize(child)
                        elif getattr(child, 'tail', None):
                            fragments.append(child.tail.lower())

                for body in root.xpath('//*[local-name() = "body"]'):
                    body.tail = None
                    serialize(body)

                if q in ''.join(fragments):
                    return i

    def __enter__(self,
                  processed=False,
                  only_input_plugin=False,
                  run_char_count=True,
                  read_anchor_map=True,
                  view_kepub=False,
                  read_links=True):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''

        self.delete_on_exit = []
        if self.use_tdir_in_cache:
            self._tdir = tdir_in_cache('ev')
        else:
            self._tdir = PersistentTemporaryDirectory('_ebook_iter')
        self.base = os.path.realpath(self._tdir)
        self.book_format, self.pathtoopf, input_fmt = run_extract_book(
            self.pathtoebook,
            self.base,
            only_input_plugin=only_input_plugin,
            view_kepub=view_kepub,
            processed=processed)
        self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.mi = self.opf.to_book_metadata()
        self.language = None
        if self.mi.languages:
            self.language = self.mi.languages[0].lower()

        self.spine = []
        Spiny = partial(SpineItem,
                        read_anchor_map=read_anchor_map,
                        read_links=read_links,
                        run_char_count=run_char_count,
                        from_epub=self.book_format == 'EPUB')
        if input_fmt.lower() == 'htmlz':
            self.spine.append(
                Spiny(os.path.join(os.path.dirname(self.pathtoopf),
                                   'index.html'),
                      mime_type='text/html'))
        else:
            ordered = [i for i in self.opf.spine if i.is_linear] + \
                    [i for i in self.opf.spine if not i.is_linear]
            is_comic = input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
            for i in ordered:
                spath = i.path
                mt = None
                if i.idref is not None:
                    mt = self.opf.manifest.type_for_id(i.idref)
                if mt is None:
                    mt = guess_type(spath)[0]
                try:
                    self.spine.append(Spiny(spath, mime_type=mt))
                    if is_comic:
                        self.spine[-1].is_single_page = True
                except:
                    self.log.warn('Missing spine item:', repr(spath))

        cover = self.opf.cover
        if cover and self.ebook_ext in {
                'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3', 'docx',
                'htmlz'
        }:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE %
                     prepare_string_for_xml(rcpath, True)).encode('utf-8')
            with open(cfile, 'wb') as f:
                f.write(chtml)
            self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)

        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(Spiny(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()

        sizes = [i.character_count for i in self.spine]
        self.pages = [
            math.ceil(i / float(self.CHARACTERS_PER_PAGE)) for i in sizes
        ]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1

        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        if read_anchor_map:
            create_indexing_data(self.spine, self.toc)

        self.verify_links()

        self.read_bookmarks()

        return self

    def verify_links(self):
        spine_paths = {s: s for s in self.spine}
        for item in self.spine:
            base = os.path.dirname(item)
            for link in item.all_links:
                try:
                    p = urlparse(urlunquote(link))
                except Exception:
                    continue
                if not p.scheme and not p.netloc:
                    path = os.path.abspath(os.path.join(
                        base, p.path)) if p.path else item
                    try:
                        path = spine_paths[path]
                    except Exception:
                        continue
                    if not p.fragment or p.fragment in path.anchor_map:
                        item.verified_links.add((path, p.fragment))

    def __exit__(self, *args):
        remove_dir(self._tdir)
        for x in self.delete_on_exit:
            try:
                os.remove(x)
            except:
                pass