예제 #1
0
def get_book_language(container):
    for lang in container.opf_xpath('//dc:language'):
        raw = lang.text
        if raw:
            code = canonicalize_lang(raw.split(',')[0].strip())
            if code:
                return code
예제 #2
0
def field_from_string(field, raw, field_metadata):
    ''' Parse the string raw to return an object that is suitable for calling
    set() on a Metadata object. '''
    dt = field_metadata['datatype']
    val = object
    if dt in {'int', 'float'}:
        val = int(raw) if dt == 'int' else float(raw)
    elif dt == 'rating':
        val = float(raw) * 2
    elif dt == 'datetime':
        from ebook_converter.utils.date import parse_only_date
        val = parse_only_date(raw)
    elif dt == 'bool':
        if raw.lower() in {'true', 'yes', 'y'}:
            val = True
        elif raw.lower() in {'false', 'no', 'n'}:
            val = False
        else:
            raise ValueError('Unknown value for %s: %s'%(field, raw))
    elif dt == 'text':
        ism = field_metadata['is_multiple']
        if ism:
            val = [x.strip() for x in raw.split(ism['ui_to_list'])]
            if field == 'identifiers':
                val = {x.partition(':')[0]:x.partition(':')[-1] for x in val}
            elif field == 'languages':
                from ebook_converter.utils.localization import canonicalize_lang
                val = [canonicalize_lang(x) for x in val]
                val = [x for x in val if x]
    if val is object:
        val = raw
    return val
예제 #3
0
def get_title_sort_pat(lang=None):
    ans = _title_pats.get(lang, None)
    if ans is not None:
        return ans
    q = lang
    from ebook_converter.utils.localization import canonicalize_lang, get_lang
    if lang is None:
        q = tweaks['default_language_for_title_sort']
        if q is None:
            q = get_lang()
    q = canonicalize_lang(q) if q else q
    data = tweaks['per_language_title_sort_articles']
    try:
        ans = data.get(q, None)
    except AttributeError:
        ans = None  # invalid tweak value
    try:
        ans = frozenset(ans) if ans else frozenset(data['eng'])
    except:
        ans = frozenset((r'A\s+', r'The\s+', r'An\s+'))
    ans = '|'.join(ans)
    ans = '^(%s)' % ans
    try:
        ans = re.compile(ans, re.IGNORECASE)
    except:
        ans = re.compile(r'^(A|The|An)\s+', re.IGNORECASE)
    _title_pats[lang] = ans
    return ans
예제 #4
0
def commit_ncx_toc(container, toc, lang=None, uid=None):
    tocname = find_existing_ncx_toc(container)
    if tocname is None:
        item = container.generate_item('toc.ncx', id_prefix='toc')
        tocname = container.href_to_name(item.get('href'),
                                         base=container.opf_name)
        ncx_id = item.get('id')
        [s.set('toc', ncx_id) for s in container.opf_xpath('//opf:spine')]
    if not lang:
        lang = get_lang()
        for _l in container.opf_xpath('//dc:language'):
            _l = canonicalize_lang(base.xml2text(_l).strip())
            if _l:
                lang = _l
                lang = lang_as_iso639_1(_l) or _l
                break
    lang = lang_as_iso639_1(lang) or lang
    if not uid:
        uid = base.uuid_id()
        eid = container.opf.get('unique-identifier', None)
        if eid:
            m = container.opf_xpath('//*[@id="%s"]' % eid)
            if m:
                uid = base.xml2text(m[0])

    title = 'Table of Contents'
    m = container.opf_xpath('//dc:title')
    if m:
        x = base.xml2text(m[0]).strip()
        title = x or title

    to_href = functools.partial(container.name_to_href, base=tocname)
    root = create_ncx(toc, to_href, title, lang, uid)
    container.replace(tocname, root)
    container.pretty_print.add(tocname)
예제 #5
0
def read_languages(root, prefixes, refines):
    ans = []
    for lang in XPath('./opf:metadata/dc:language')(root):
        val = canonicalize_lang((lang.text or '').strip())
        if val and val not in ans and val != 'und':
            ans.append(val)
    return uniq(ans)
예제 #6
0
def read_default_style_language(raw, mi, XPath):
    root = etree.fromstring(raw)
    for lang in XPath('/w:styles/w:docDefaults/w:rPrDefault/w:rPr/w:lang/'
                      '@w:val')(root):
        lang = canonicalize_lang(lang)
        if lang:
            mi.languages = [lang]
            break
예제 #7
0
def get_comic_book_info(d, mi, series_index='volume'):
    # See http://code.google.com/p/comicbookinfo/wiki/Example
    series = d.get('series', '')
    if series.strip():
        mi.series = series
        si = d.get(series_index, None)
        if si is None:
            si = d.get('issue' if series_index == 'volume' else 'volume', None)
        if si is not None:
            try:
                mi.series_index = float(si)
            except Exception:
                mi.series_index = 1
    if d.get('language', None):
        lang = canonicalize_lang(d.get('lang'))
        if lang:
            mi.languages = [lang]
    if d.get('rating', -1) > -1:
        mi.rating = d['rating']
    for x in ('title', 'publisher'):
        y = d.get(x, '').strip()
        if y:
            setattr(mi, x, y)
    tags = d.get('tags', [])
    if tags:
        mi.tags = tags
    authors = []
    for credit in d.get('credits', []):
        if credit.get('role',
                      '') in ('Writer', 'Artist', 'Cartoonist', 'Creator'):
            x = credit.get('person', '')
            if x:
                x = ' '.join((reversed(x.split(', '))))
                authors.append(x)
    if authors:
        mi.authors = authors
    comments = d.get('comments', '')
    if comments and comments.strip():
        mi.comments = comments.strip()
    pubm, puby = d.get('publicationMonth',
                       None), d.get('publicationYear', None)
    if puby is not None:
        from ebook_converter.utils.date import parse_only_date
        from datetime import date
        try:
            dt = date(puby, 6 if pubm is None else pubm, 15)
            dt = parse_only_date(str(dt))
            mi.pubdate = dt
        except Exception:
            pass
예제 #8
0
def parse_lang_code(raw):
    raw = raw or ''
    parts = raw.replace('_', '-').split('-')
    lc = canonicalize_lang(parts[0])
    if lc is None:
        raise ValueError('Invalid language code: %r' % raw)
    cc = None
    if len(parts) > 1:
        ccodes, ccodemap = get_codes()[:2]
        q = parts[1].upper()
        if q in ccodes:
            cc = q
        else:
            cc = ccodemap.get(q, None)
    return DictionaryLocale(lc, cc)
예제 #9
0
def read_doc_props(raw, mi, XPath):
    root = etree.fromstring(raw)
    titles = XPath('//dc:title')(root)
    if titles:
        title = titles[0].text
        if title and title.strip():
            mi.title = title.strip()
    tags = []
    for subject in XPath('//dc:subject')(root):
        if subject.text and subject.text.strip():
            tags.append(subject.text.strip().replace(',', '_'))
    for keywords in XPath('//cp:keywords')(root):
        if keywords.text and keywords.text.strip():
            for x in keywords.text.split():
                tags.extend(y.strip() for y in x.split(',') if y.strip())
    if tags:
        mi.tags = tags
    authors = XPath('//dc:creator')(root)
    aut = []
    for author in authors:
        if author.text and author.text.strip():
            aut.extend(string_to_authors(author.text))
    if aut:
        mi.authors = aut
        mi.author_sort = authors_to_sort_string(aut)

    desc = XPath('//dc:description')(root)
    if desc:
        raw = etree.tostring(desc[0], method='text', encoding='unicode')
        # Word 2007 mangles newlines in the summary
        raw = raw.replace('_x000d_', '')
        mi.comments = raw.strip()

    langs = []
    for lang in XPath('//dc:language')(root):
        if lang.text and lang.text.strip():
            canonic_lang = canonicalize_lang(lang.text)
            if canonic_lang:
                langs.append(canonic_lang)
    if langs:
        mi.languages = langs
예제 #10
0
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        import uuid
        from ebook_converter.ebooks.conversion.plumber import create_oebbook
        from ebook_converter.ebooks.oeb.base import (DirContainer,
            rewrite_links, urlnormalize, BINARY_MIME, OEB_STYLES,
            xpath, urlquote)
        from ebook_converter.ebooks.oeb.transforms.metadata import \
            meta_info_to_oeb_metadata
        from ebook_converter.ebooks.html.input import get_filelist
        from ebook_converter.ebooks.metadata import string_to_authors
        from ebook_converter.utils.localization import canonicalize_lang
        import css_parser, logging
        css_parser.log.setLevel(logging.WARN)
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log, None, opts, self,
                encoding=opts.input_encoding, populate=False)
        self.oeb = oeb

        metadata = oeb.metadata
        meta_info_to_oeb_metadata(mi, metadata, log)
        if not metadata.language:
            l = canonicalize_lang(getattr(opts, 'language', None))
            if not l:
                oeb.logger.warn('Language not specified')
                l = get_lang().replace('_', '-')
            metadata.add('language', l)
        if not metadata.creator:
            a = getattr(opts, 'authors', None)
            if a:
                a = string_to_authors(a)
            if not a:
                oeb.logger.warn('Creator not specified')
                a = [self.oeb.translate('Unknown')]
            for aut in a:
                metadata.add('creator', aut)
        if not metadata.title:
            oeb.logger.warn('Title not specified')
            metadata.add('title', self.oeb.translate('Unknown'))
        bookid = str(uuid.uuid4())
        metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in metadata.identifier:
            if 'id' in ident.attrib:
                self.oeb.uid = metadata.identifier[0]
                break

        filelist = get_filelist(htmlpath, basedir, opts, log)
        filelist = [f for f in filelist if not f.is_binary]
        htmlfile_map = {}
        for f in filelist:
            path = f.path
            oeb.container = DirContainer(os.path.dirname(path), log,
                    ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id='html', href=sanitize_file_name(bname))
            htmlfile_map[path] = href
            item = oeb.manifest.add(id, href, 'text/html')
            if path == htmlpath and '%' in path:
                bname = urlquote(bname)
            item.html_input_href = bname
            oeb.spine.add(item, True)

        self.added_resources = {}
        self.log = log
        self.log('Normalizing filename cases')
        for path, href in htmlfile_map.items():
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urllib.parse.urldefrag
        self.BINARY_MIME = BINARY_MIME

        self.log('Rewriting HTML links')
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
            oeb.container = DirContainer(dpath, log, ignore_opf=True)
            href = htmlfile_map[path]
            try:
                item = oeb.manifest.hrefs[href]
            except KeyError:
                item = oeb.manifest.hrefs[urlnormalize(href)]
            rewrite_links(item.data,
                          functools.partial(self.resource_adder, base=dpath))

        for item in oeb.manifest.values():
            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
                        dpath = os.path.dirname(path)
                        break
                css_parser.replaceUrls(item.data,
                        functools.partial(self.resource_adder, base=dpath))

        toc = self.oeb.toc
        self.oeb.auto_generated_toc = True
        titles = []
        headers = []
        for item in self.oeb.spine:
            if not item.linear:
                continue
            html = item.data
            title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
            title = re.sub(r'\s+', ' ', title.strip())
            if title:
                titles.append(title)
            headers.append('(unlabled)')
            for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
                expr = '/h:html/h:body//h:%s[position()=1]/text()'
                header = ''.join(xpath(html, expr % tag))
                header = re.sub(r'\s+', ' ', header.strip())
                if header:
                    headers[-1] = header
                    break
        use = titles
        if len(titles) > len(set(titles)):
            use = headers
        for title, item in zip(use, self.oeb.spine):
            if not item.linear:
                continue
            toc.add(title, item.href)

        oeb.container = DirContainer(os.getcwd(), oeb.log, ignore_opf=True)
        return oeb
예제 #11
0
def html_lang(docx_lang):
    lang = canonicalize_lang(docx_lang)
    if lang and lang != 'und':
        lang = lang_as_iso639_1(lang)
        if lang:
            return lang
예제 #12
0
    def __init__(self, raw, codec, title):
        self.doctype = raw[:4]
        self.length, self.num_items = struct.unpack('>LL', raw[4:12])
        raw = raw[12:]
        pos = 0
        self.mi = MetaInformation('Unknown', ['Unknown'])
        self.has_fake_cover = True
        self.start_offset = None
        left = self.num_items
        self.kf8_header = None
        self.uuid = self.cdetype = None
        self.page_progression_direction = None
        self.primary_writing_mode = None

        self.decode = lambda x: clean_ascii_chars(x.decode(codec, 'replace'))

        while left > 0:
            left -= 1
            idx, size = struct.unpack('>LL', raw[pos:pos + 8])
            content = raw[pos + 8:pos + size]
            pos += size
            if idx >= 100 and idx < 200:
                self.process_metadata(idx, content, codec)
            elif idx == 203:
                self.has_fake_cover = bool(struct.unpack('>L', content)[0])
            elif idx == 201:
                co, = struct.unpack('>L', content)
                if co < NULL_INDEX:
                    self.cover_offset = co
            elif idx == 202:
                self.thumbnail_offset, = struct.unpack('>L', content)
            elif idx == 501:
                try:
                    self.cdetype = content.decode('ascii')
                except UnicodeDecodeError:
                    self.cdetype = None
                # cdetype
                if content == b'EBSP':
                    if not self.mi.tags:
                        self.mi.tags = []
                    self.mi.tags.append('Sample Book')
            elif idx == 502:
                # last update time
                pass
            elif idx == 503:  # Long title
                # Amazon seems to regard this as the definitive book title
                # rather than the title from the PDB header. In fact when
                # sending MOBI files through Amazon's email service if the
                # title contains non ASCII chars or non filename safe chars
                # they are messed up in the PDB header
                try:
                    title = self.decode(content)
                except Exception:
                    pass
            elif idx == 524:  # Lang code
                try:
                    lang = content.decode(codec)
                    lang = canonicalize_lang(lang)
                    if lang:
                        self.mi.language = lang
                except Exception:
                    pass
            elif idx == 525:
                try:
                    pwm = content.decode(codec)
                    if pwm:
                        self.primary_writing_mode = pwm
                except Exception:
                    pass
            elif idx == 527:
                try:
                    ppd = content.decode(codec)
                    if ppd:
                        self.page_progression_direction = ppd
                except Exception:
                    pass
            # else:
            #    print 'unknown record', idx, repr(content)
        if title:
            title = clean_xml_chars(clean_ascii_chars(title))
            self.mi.title = entities.replace_entities(title)
예제 #13
0
    def run(self, path_to_output, opts, db, notification=DummyReporter()):
        from ebook_converter.library.catalogs.epub_mobi_builder import CatalogBuilder
        from ebook_converter.utils.logging import default_log as log
        from ebook_converter.utils.config import JSONConfig

        # If preset specified from the cli, insert stored options from JSON file
        if hasattr(opts, 'preset') and opts.preset:
            available_presets = JSONConfig("catalog_presets")
            if opts.preset not in available_presets:
                if available_presets:
                    print('Error: Preset "%s" not found.' % opts.preset)
                    print('Stored presets: %s' %
                          ', '.join([p for p in
                                     sorted(available_presets.keys())]))
                else:
                    print('Error: No stored presets.')
                return 1

            # Copy the relevant preset values to the opts object
            for item in available_presets[opts.preset]:
                if item not in ['exclusion_rules_tw', 'format', 'prefix_rules_tw']:
                    setattr(opts, item, available_presets[opts.preset][item])

            # Provide an unconnected device
            opts.connected_device = {
                         'is_device_connected': False,
                         'kind': None,
                         'name': None,
                         'save_template': None,
                         'serial': None,
                         'storage': None,
                        }

            # Convert prefix_rules and exclusion_rules from JSON lists to tuples
            prs = []
            for rule in opts.prefix_rules:
                prs.append(tuple(rule))
            opts.prefix_rules = tuple(prs)

            ers = []
            for rule in opts.exclusion_rules:
                ers.append(tuple(rule))
            opts.exclusion_rules = tuple(ers)

        opts.log = log
        opts.fmt = self.fmt = path_to_output.rpartition('.')[2]

        # Add local options
        opts.creator = '%s, %s %s, %s' % (date.strftime('%A'), date.strftime('%B'), date.strftime('%d').lstrip('0'), date.strftime('%Y'))
        opts.creator_sort_as = '%s %s' % ('calibre', date.strftime('%Y-%m-%d'))
        opts.connected_kindle = False

        # Finalize output_profile
        op = opts.output_profile
        if op is None:
            op = 'default'

        if opts.connected_device['name'] and 'kindle' in opts.connected_device['name'].lower():
            opts.connected_kindle = True
            if opts.connected_device['serial'] and \
               opts.connected_device['serial'][:4] in ['B004', 'B005']:
                op = "kindle_dx"
            else:
                op = "kindle"

        opts.description_clip = 380 if op.endswith('dx') or 'kindle' not in op else 100
        opts.author_clip = 100 if op.endswith('dx') or 'kindle' not in op else 60
        opts.output_profile = op

        opts.basename = "Catalog"
        opts.cli_environment = not hasattr(opts, 'sync')

        # Hard-wired to always sort descriptions by author, with series after non-series
        opts.sort_descriptions_by_author = True

        build_log = []

        build_log.append("%s('%s'): Generating %s %sin %s environment, locale: '%s'" %
            (self.name,
             current_library_name(),
             self.fmt,
             'for %s ' % opts.output_profile if opts.output_profile else '',
             'CLI' if opts.cli_environment else 'GUI',
             langcode_to_name(canonicalize_lang(get_lang()), localize=False))
             )

        # If exclude_genre is blank, assume user wants all tags as genres
        if opts.exclude_genre.strip() == '':
            # opts.exclude_genre = '\[^.\]'
            # build_log.append(" converting empty exclude_genre to '\[^.\]'")
            opts.exclude_genre = 'a^'
            build_log.append(" converting empty exclude_genre to 'a^'")
        if opts.connected_device['is_device_connected'] and \
           opts.connected_device['kind'] == 'device':
            if opts.connected_device['serial']:
                build_log.append(" connected_device: '%s' #%s%s " %
                    (opts.connected_device['name'],
                     opts.connected_device['serial'][0:4],
                     'x' * (len(opts.connected_device['serial']) - 4)))
                for storage in opts.connected_device['storage']:
                    if storage:
                        build_log.append("  mount point: %s" % storage)
            else:
                build_log.append(" connected_device: '%s'" % opts.connected_device['name'])
                try:
                    for storage in opts.connected_device['storage']:
                        if storage:
                            build_log.append("  mount point: %s" % storage)
                except:
                    build_log.append("  (no mount points)")
        else:
            build_log.append(" connected_device: '%s'" % opts.connected_device['name'])

        opts_dict = vars(opts)
        if opts_dict['ids']:
            build_log.append(" book count: %d" % len(opts_dict['ids']))

        sections_list = []
        if opts.generate_authors:
            sections_list.append('Authors')
        if opts.generate_titles:
            sections_list.append('Titles')
        if opts.generate_series:
            sections_list.append('Series')
        if opts.generate_genres:
            sections_list.append('Genres')
        if opts.generate_recently_added:
            sections_list.append('Recently Added')
        if opts.generate_descriptions:
            sections_list.append('Descriptions')

        if not sections_list:
            if opts.cli_environment:
                opts.log.warn('*** No Section switches specified, enabling all Sections ***')
                opts.generate_authors = True
                opts.generate_titles = True
                opts.generate_series = True
                opts.generate_genres = True
                opts.generate_recently_added = True
                opts.generate_descriptions = True
                sections_list = ['Authors', 'Titles', 'Series', 'Genres', 'Recently Added', 'Descriptions']
            else:
                opts.log.warn('\n*** No enabled Sections, terminating catalog generation ***')
                return ["No Included Sections", "No enabled Sections.\nCheck E-book options tab\n'Included sections'\n"]
        if opts.fmt == 'mobi' and sections_list == ['Descriptions']:
            warning = ("\n*** Adding 'By authors' section required for MOBI "
                       "output ***")
            opts.log.warn(warning)
            sections_list.insert(0, 'Authors')
            opts.generate_authors = True

        opts.log(" Sections: %s" % ', '.join(sections_list))
        opts.section_list = sections_list

        # Limit thumb_width to 1.0" - 2.0"
        try:
            if float(opts.thumb_width) < float(self.THUMB_SMALLEST):
                log.warning("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width, self.THUMB_SMALLEST))
                opts.thumb_width = self.THUMB_SMALLEST
            if float(opts.thumb_width) > float(self.THUMB_LARGEST):
                log.warning("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width, self.THUMB_LARGEST))
                opts.thumb_width = self.THUMB_LARGEST
            opts.thumb_width = "%.2f" % float(opts.thumb_width)
        except:
            log.error("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width, self.THUMB_SMALLEST))
            opts.thumb_width = "1.0"

        # eval prefix_rules if passed from command line
        if type(opts.prefix_rules) is not tuple:
            try:
                opts.prefix_rules = eval(opts.prefix_rules)
            except:
                log.error("malformed --prefix-rules: %s" % opts.prefix_rules)
                raise
            for rule in opts.prefix_rules:
                if len(rule) != 4:
                    log.error("incorrect number of args for --prefix-rules: %s" % repr(rule))

        # eval exclusion_rules if passed from command line
        if type(opts.exclusion_rules) is not tuple:
            try:
                opts.exclusion_rules = eval(opts.exclusion_rules)
            except:
                log.error("malformed --exclusion-rules: %s" % opts.exclusion_rules)
                raise
            for rule in opts.exclusion_rules:
                if len(rule) != 3:
                    log.error("incorrect number of args for --exclusion-rules: %s" % repr(rule))

        # Display opts
        keys = sorted(opts_dict.keys())
        build_log.append(" opts:")
        for key in keys:
            if key in ['catalog_title', 'author_clip', 'connected_kindle', 'creator',
                       'cross_reference_authors', 'description_clip', 'exclude_book_marker',
                       'exclude_genre', 'exclude_tags', 'exclusion_rules', 'fmt',
                       'genre_source_field', 'header_note_source_field', 'merge_comments_rule',
                       'output_profile', 'prefix_rules', 'preset', 'read_book_marker',
                       'search_text', 'sort_by', 'sort_descriptions_by_author', 'sync',
                       'thumb_width', 'use_existing_cover', 'wishlist_tag']:
                build_log.append("  %s: %s" % (key, repr(opts_dict[key])))
        if opts.verbose:
            log('\n'.join(line for line in build_log))

        # Capture start_time
        opts.start_time = time.time()

        self.opts = opts

        if opts.verbose:
            log.info(" Begin catalog source generation (%s)" %
                     str(datetime.timedelta(seconds=int(time.time() - opts.start_time))))

        # Launch the Catalog builder
        catalog = CatalogBuilder(db, opts, self, report_progress=notification)

        try:
            catalog.build_sources()
            if opts.verbose:
                log.info(" Completed catalog source generation (%s)\n"  %
                         str(datetime.timedelta(seconds=int(time.time() - opts.start_time))))
        except (AuthorSortMismatchException, EmptyCatalogException) as e:
            log.error(" *** Terminated catalog generation: %s ***" % e)
        except:
            log.error(" unhandled exception in catalog generator")
            raise

        else:
            recommendations = []
            recommendations.append(('remove_fake_margins', False,
                OptionRecommendation.HIGH))
            recommendations.append(('comments', '', OptionRecommendation.HIGH))

            """
            >>> Use to debug generated catalog code before pipeline conversion <<<
            """
            GENERATE_DEBUG_EPUB = False
            if GENERATE_DEBUG_EPUB:
                catalog_debug_path = os.path.join(os.path.expanduser('~'), 'Desktop', 'Catalog debug')
                setattr(opts, 'debug_pipeline', os.path.expanduser(catalog_debug_path))

            dp = getattr(opts, 'debug_pipeline', None)
            if dp is not None:
                recommendations.append(('debug_pipeline', dp,
                    OptionRecommendation.HIGH))

            if opts.output_profile and opts.output_profile.startswith("kindle"):
                recommendations.append(('output_profile', opts.output_profile,
                    OptionRecommendation.HIGH))
                recommendations.append(('book_producer', opts.output_profile,
                    OptionRecommendation.HIGH))
                if opts.fmt == 'mobi':
                    recommendations.append(('no_inline_toc', True,
                        OptionRecommendation.HIGH))
                    recommendations.append(('verbose', 2,
                        OptionRecommendation.HIGH))

            # Use existing cover or generate new cover
            cpath = None
            existing_cover = False
            try:
                search_text = 'title:"%s" author:%s' % (
                        opts.catalog_title.replace('"', '\\"'), 'calibre')
                matches = db.search(search_text, return_matches=True, sort_results=False)
                if matches:
                    cpath = db.cover(matches[0], index_is_id=True, as_path=True)
                    if cpath and os.path.exists(cpath):
                        existing_cover = True
            except:
                pass

            if self.opts.use_existing_cover and not existing_cover:
                log.warning("no existing catalog cover found")

            if self.opts.use_existing_cover and existing_cover:
                recommendations.append(('cover', cpath, OptionRecommendation.HIGH))
                log.info("using existing catalog cover")
            else:
                # TODO(gryf): feature: generating cover with pillow.
                pass
                # from ebook_converter.ebooks.covers import calibre_cover2
                # log.info("replacing catalog cover")
                # new_cover_path = PersistentTemporaryFile(suffix='.jpg')
                # # new_cover = calibre_cover2(opts.catalog_title, 'calibre')
                # new_cover_path.write('')
                # new_cover_path.close()
                # recommendations.append(('cover', new_cover_path.name, OptionRecommendation.HIGH))

            # Run ebook-convert
            from ebook_converter.ebooks.conversion.plumber import Plumber
            plumber = Plumber(os.path.join(catalog.catalog_path, opts.basename + '.opf'),
                            path_to_output, log, report_progress=notification,
                            abort_after_input_dump=False)
            plumber.merge_ui_recommendations(recommendations)
            plumber.run()

            try:
                os.remove(cpath)
            except:
                pass

            if GENERATE_DEBUG_EPUB:
                from ebook_converter.ebooks.epub import initialize_container
                from ebook_converter.ebooks.tweak import zip_rebuilder
                from ebook_converter.utils.zipfile import ZipFile
                input_path = os.path.join(catalog_debug_path, 'input')
                epub_shell = os.path.join(catalog_debug_path, 'epub_shell.zip')
                initialize_container(epub_shell, opf_name='content.opf')
                with ZipFile(epub_shell, 'r') as zf:
                    zf.extractall(path=input_path)
                os.remove(epub_shell)
                zip_rebuilder(input_path, os.path.join(catalog_debug_path, 'input.epub'))

            if opts.verbose:
                log.info(" Catalog creation complete (%s)\n" %
                     str(datetime.timedelta(seconds=int(time.time() - opts.start_time))))

        # returns to gui2.actions.catalog:catalog_generated()
        return catalog.error
예제 #14
0
def metadata_to_xmp_packet(mi):
    A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x'))
    R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf'))
    root = A.xmpmeta(R.RDF)
    rdf = root[0]
    dc = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('dc'))
    dc.set(expand('rdf:about'), '')
    rdf.append(dc)
    for prop, tag in {
            'title': 'dc:title',
            'comments': 'dc:description'
    }.items():
        val = mi.get(prop) or ''
        create_alt_property(dc, tag, val)
    for prop, (tag, ordered) in {
            'authors': ('dc:creator', True),
            'tags': ('dc:subject', False),
            'publisher': ('dc:publisher', False)
    }.items():
        val = mi.get(prop) or ()
        if isinstance(val, (str, bytes)):
            val = [val]
        create_sequence_property(dc, tag, val, ordered)
    if not mi.is_null('pubdate'):
        # Adobe spec recommends local time
        create_sequence_property(dc, 'dc:date',
                                 [isoformat(mi.pubdate, as_utc=False)])
    if not mi.is_null('languages'):
        langs = list(
            filter(
                None,
                map(lambda x: lang_as_iso639_1(x) or canonicalize_lang(x),
                    mi.languages)))
        if langs:
            create_sequence_property(dc, 'dc:language', langs, ordered=False)

    xmp = rdf.makeelement(expand('rdf:Description'),
                          nsmap=nsmap('xmp', 'xmpidq'))
    xmp.set(expand('rdf:about'), '')
    rdf.append(xmp)
    extra_ids = {}
    for x in ('prism', 'pdfx'):
        p = extra_ids[x] = rdf.makeelement(expand('rdf:Description'),
                                           nsmap=nsmap(x))
        p.set(expand('rdf:about'), '')
        rdf.append(p)

    identifiers = mi.get_identifiers()
    if identifiers:
        create_identifiers(xmp, identifiers)
        for scheme, val in identifiers.items():
            if scheme in {'isbn', 'doi'}:
                for prefix, parent in extra_ids.items():
                    ie = parent.makeelement(expand('%s:%s' % (prefix, scheme)))
                    ie.text = val
                    parent.append(ie)

    d = xmp.makeelement(expand('xmp:MetadataDate'))
    d.text = isoformat(now(), as_utc=False)
    xmp.append(d)

    calibre = rdf.makeelement(expand('rdf:Description'),
                              nsmap=nsmap('calibre', 'calibreSI', 'calibreCC'))
    calibre.set(expand('rdf:about'), '')
    rdf.append(calibre)
    if not mi.is_null('rating'):
        try:
            r = float(mi.rating)
        except (TypeError, ValueError):
            pass
        else:
            create_simple_property(calibre, 'calibre:rating', '%g' % r)
    if not mi.is_null('series'):
        create_series(calibre, mi.series, mi.series_index)
    if not mi.is_null('timestamp'):
        create_simple_property(calibre, 'calibre:timestamp',
                               isoformat(mi.timestamp, as_utc=False))
    for x in ('author_link_map', 'user_categories'):
        val = getattr(mi, x, None)
        if val:
            create_simple_property(calibre, 'calibre:' + x, dump_dict(val))

    for x in ('title_sort', 'author_sort'):
        if not mi.is_null(x):
            create_simple_property(calibre, 'calibre:' + x, getattr(mi, x))

    all_user_metadata = mi.get_all_user_metadata(True)
    if all_user_metadata:
        create_user_metadata(calibre, all_user_metadata)
    return serialize_xmp_packet(root)
예제 #15
0
def get_metadata(stream, extract_cover=True):
    whitespace = re.compile(r'\s+')

    def normalize(s):
        return whitespace.sub(' ', s).strip()

    with ZipFile(stream) as zf:
        meta = zf.read('meta.xml')
        root = fromstring(meta)

        def find(field):
            ns, tag = fields[field]
            ans = root.xpath('//ns0:{}'.format(tag), namespaces={'ns0': ns})
            if ans:
                return normalize(
                    tostring(ans[0],
                             method='text',
                             encoding='unicode',
                             with_tail=False)).strip()

        mi = MetaInformation(None, [])
        title = find('title')
        if title:
            mi.title = title
        creator = find('initial-creator') or find('creator')
        if creator:
            mi.authors = string_to_authors(creator)
        desc = find('description')
        if desc:
            mi.comments = desc
        lang = find('language')
        if lang and canonicalize_lang(lang):
            mi.languages = [canonicalize_lang(lang)]
        kw = find('keyword') or find('keywords')
        if kw:
            mi.tags = [x.strip() for x in kw.split(',') if x.strip()]
        data = {}
        for tag in root.xpath('//ns0:user-defined',
                              namespaces={'ns0': fields['user-defined'][0]}):
            name = (tag.get('{%s}name' % METANS) or '').lower()
            vtype = tag.get('{%s}value-type' % METANS) or 'string'
            val = tag.text
            if name and val:
                if vtype == 'boolean':
                    val = val == 'true'
                data[name] = val
        opfmeta = False  # we need this later for the cover
        opfnocover = False
        if data.get('opf.metadata'):
            # custom metadata contains OPF information
            opfmeta = True
            if data.get('opf.titlesort', ''):
                mi.title_sort = data['opf.titlesort']
            if data.get('opf.authors', ''):
                mi.authors = string_to_authors(data['opf.authors'])
            if data.get('opf.authorsort', ''):
                mi.author_sort = data['opf.authorsort']
            if data.get('opf.isbn', ''):
                isbn = check_isbn(data['opf.isbn'])
                if isbn is not None:
                    mi.isbn = isbn
            if data.get('opf.publisher', ''):
                mi.publisher = data['opf.publisher']
            if data.get('opf.pubdate', ''):
                mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
            if data.get('opf.identifiers'):
                try:
                    mi.identifiers = json.loads(data['opf.identifiers'])
                except Exception:
                    pass
            if data.get('opf.rating'):
                try:
                    mi.rating = max(0, min(float(data['opf.rating']), 10))
                except Exception:
                    pass
            if data.get('opf.series', ''):
                mi.series = data['opf.series']
                if data.get('opf.seriesindex', ''):
                    try:
                        mi.series_index = float(data['opf.seriesindex'])
                    except Exception:
                        mi.series_index = 1.0
            if data.get('opf.language', ''):
                cl = canonicalize_lang(data['opf.language'])
                if cl:
                    mi.languages = [cl]
            opfnocover = data.get('opf.nocover', False)
        if not opfnocover:
            try:
                read_cover(stream, zf, mi, opfmeta, extract_cover)
            except Exception:
                pass  # Do not let an error reading the cover prevent reading other data

    return mi