Exemple #1
0
def commit_toc(container, toc, lang=None, uid=None):
    tocname = find_existing_toc(container)
    if tocname is None:
        item = container.generate_item("toc.ncx", id_prefix="toc")
        tocname = container.href_to_name(item.get("href"), base=container.opf_name)
    if not lang:
        lang = get_lang()
        for l in container.opf_xpath("//dc:language"):
            l = canonicalize_lang(xml2text(l).strip())
            if l:
                lang = l
                lang = lang_as_iso639_1(l) or l
                break
    lang = lang_as_iso639_1(lang) or lang
    if not uid:
        uid = uuid_id()
        eid = container.opf.get("unique-identifier", None)
        if eid:
            m = container.opf_xpath('//*[@id="%s"]' % eid)
            if m:
                uid = xml2text(m[0])

    title = _("Table of Contents")
    m = container.opf_xpath("//dc:title")
    if m:
        x = xml2text(m[0]).strip()
        title = x or title

    to_href = partial(container.name_to_href, base=tocname)
    root = create_ncx(toc, to_href, title, lang, uid)
    container.replace(tocname, root)
    container.pretty_print.add(tocname)
Exemple #2
0
def commit_ncx_toc(container, toc, lang=None, uid=None):
    tocname = find_existing_ncx_toc(container)
    if tocname is None:
        item = container.generate_item('toc.ncx', id_prefix='toc')
        tocname = container.href_to_name(item.get('href'), base=container.opf_name)
        ncx_id = item.get('id')
        [s.set('toc', ncx_id) for s in container.opf_xpath('//opf:spine')]
    if not lang:
        lang = get_lang()
        for l in container.opf_xpath('//dc:language'):
            l = canonicalize_lang(xml2text(l).strip())
            if l:
                lang = l
                lang = lang_as_iso639_1(l) or l
                break
    lang = lang_as_iso639_1(lang) or lang
    if not uid:
        uid = uuid_id()
        eid = container.opf.get('unique-identifier', None)
        if eid:
            m = container.opf_xpath('//*[@id="%s"]'%eid)
            if m:
                uid = xml2text(m[0])

    title = _('Table of Contents')
    m = container.opf_xpath('//dc:title')
    if m:
        x = xml2text(m[0]).strip()
        title = x or title

    to_href = partial(container.name_to_href, base=tocname)
    root = create_ncx(toc, to_href, title, lang, uid)
    container.replace(tocname, root)
    container.pretty_print.add(tocname)
Exemple #3
0
def commit_toc(container, toc, lang=None, uid=None):
    tocname = find_existing_toc(container)
    if tocname is None:
        item = container.generate_item('toc.ncx', id_prefix='toc')
        tocname = container.href_to_name(item.get('href'),
                                         base=container.opf_name)
    if not lang:
        lang = get_lang()
        for l in container.opf_xpath('//dc:language'):
            l = canonicalize_lang(xml2text(l).strip())
            if l:
                lang = l
                lang = lang_as_iso639_1(l) or l
                break
    lang = lang_as_iso639_1(lang) or lang
    if not uid:
        uid = uuid_id()
        eid = container.opf.get('unique-identifier', None)
        if eid:
            m = container.opf_xpath('//*[@id="%s"]' % eid)
            if m:
                uid = xml2text(m[0])

    title = _('Table of Contents')
    m = container.opf_xpath('//dc:title')
    if m:
        x = xml2text(m[0]).strip()
        title = x or title

    to_href = partial(container.name_to_href, base=tocname)
    root = create_ncx(toc, to_href, title, lang, uid)
    container.replace(tocname, root)
    container.pretty_print.add(tocname)
Exemple #4
0
 def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None):
     self.docx = DOCX(path_or_stream, log=log)
     self.ms_pat = re.compile(r'\s{2,}')
     self.ws_pat = re.compile(r'[\n\r\t]')
     self.log = self.docx.log
     self.notes_text = notes_text or _('Notes')
     self.dest_dir = dest_dir or os.getcwdu()
     self.mi = self.docx.metadata
     self.body = BODY()
     self.tables = Tables()
     self.styles = Styles(self.tables)
     self.images = Images()
     self.object_map = OrderedDict()
     self.html = HTML(
         HEAD(
             META(charset='utf-8'),
             TITLE(self.mi.title or _('Unknown')),
             LINK(rel='stylesheet', type='text/css', href='docx.css'),
         ),
         self.body
     )
     self.html.text='\n\t'
     self.html[0].text='\n\t\t'
     self.html[0].tail='\n'
     for child in self.html[0]:
         child.tail = '\n\t\t'
     self.html[0][-1].tail = '\n\t'
     self.html[1].text = self.html[1].tail = '\n'
     lang = canonicalize_lang(self.mi.language)
     if lang and lang != 'und':
         lang = lang_as_iso639_1(lang)
         if lang:
             self.html.set('lang', lang)
Exemple #5
0
def update_metadata(opf, mi, apply_null=False, update_timestamp=False):
    for x in ('guide', 'toc', 'manifest', 'spine'):
        setattr(mi, x, None)
    if mi.languages:
        langs = []
        for lc in mi.languages:
            lc2 = lang_as_iso639_1(lc)
            if lc2:
                lc = lc2
            langs.append(lc)
        mi.languages = langs

    opf.smart_update(mi)
    if getattr(mi, 'uuid', None):
        opf.application_id = mi.uuid
    if apply_null:
        if not getattr(mi, 'series', None):
            opf.series = None
        if not getattr(mi, 'tags', []):
            opf.tags = []
        if not getattr(mi, 'isbn', None):
            opf.isbn = None
        if not getattr(mi, 'comments', None):
            opf.comments = None
    if update_timestamp and mi.timestamp is not None:
        opf.timestamp = mi.timestamp
Exemple #6
0
def ensure_container_has_nav(container, lang=None, previous_nav=None):
    tocname = find_existing_nav_toc(container)
    if previous_nav is not None:
        nav_name = container.href_to_name(previous_nav[0])
        if nav_name and container.exists(nav_name):
            tocname = nav_name
            container.apply_unique_properties(tocname, 'nav')
    if tocname is None:
        item = container.generate_item('nav.xhtml', id_prefix='nav')
        item.set('properties', 'nav')
        tocname = container.href_to_name(item.get('href'),
                                         base=container.opf_name)
        if previous_nav is not None:
            root = previous_nav[1]
        else:
            root = container.parse_xhtml(
                P('templates/new_nav.html', data=True).decode('utf-8'))
        container.replace(tocname, root)
    else:
        root = container.parsed(tocname)
    if lang:
        lang = lang_as_iso639_1(lang) or lang
        root.set('lang', lang)
        root.set('{%s}lang' % XML_NS, lang)
    return tocname, root
Exemple #7
0
def get_iterator(lang):
    it = _iterators.get(lang)
    if it is None:
        it = _iterators[lang] = _icu.BreakIterator(
            _icu.UBRK_WORD,
            lang_as_iso639_1(lang) or lang)
    return it
Exemple #8
0
def default_lookup_website(lang):
    lang = lang_as_iso639_1(lang) or lang
    if lang == "en":
        prefix = "https://www.wordnik.com/words/"
    else:
        prefix = "http://%s.wiktionary.org/wiki/" % lang
    return prefix + "{word}"
Exemple #9
0
def default_lookup_website(lang):
    lang = lang_as_iso639_1(lang) or lang
    if lang == 'en':
        prefix = 'https://www.wordnik.com/words/'
    else:
        prefix = 'http://%s.wiktionary.org/wiki/' % lang
    return prefix + '{word}'
Exemple #10
0
def update_metadata(opf, mi, apply_null=False, update_timestamp=False):
    for x in ("guide", "toc", "manifest", "spine"):
        setattr(mi, x, None)
    if mi.languages:
        langs = []
        for lc in mi.languages:
            lc2 = lang_as_iso639_1(lc)
            if lc2:
                lc = lc2
            langs.append(lc)
        mi.languages = langs

    opf.smart_update(mi)
    if getattr(mi, "uuid", None):
        opf.application_id = mi.uuid
    if apply_null:
        if not getattr(mi, "series", None):
            opf.series = None
        if not getattr(mi, "tags", []):
            opf.tags = []
        if not getattr(mi, "isbn", None):
            opf.isbn = None
        if not getattr(mi, "comments", None):
            opf.comments = None
    if update_timestamp and mi.timestamp is not None:
        opf.timestamp = mi.timestamp
Exemple #11
0
def count_words(text, lang='en'):
    with _lock:
        it = _iterators.get(lang, None)
        if it is None:
            it = _iterators[lang] = _icu.BreakIterator(_icu.UBRK_WORD, lang_as_iso639_1(lang) or lang)
        it.set_text(text)
        return len(it.split2())
Exemple #12
0
def create_inline_toc(container, title=None):
    '''
    Create an inline (HTML) Table of Contents from an existing NCX table of contents.

    :param title: The title for this table of contents.
    '''
    lang = get_book_language(container)
    default_title = 'Table of Contents'
    if lang:
        lang = lang_as_iso639_1(lang) or lang
        default_title = translate(lang, default_title)
    title = title or default_title
    toc = get_toc(container)
    if len(toc) == 0:
        return None
    toc_name = find_inline_toc(container)

    name = toc_name
    html = toc_to_html(toc, container, name, title, lang)
    raw = serialize(html, 'text/html')
    if name is None:
        name, c = 'toc.xhtml', 0
        while container.has_name(name):
            c += 1
            name = 'toc%d.xhtml' % c
        container.add_file(name, raw, spine_index=0)
    else:
        with container.open(name, 'wb') as f:
            f.write(raw)
    set_guide_item(container,
                   'toc',
                   title,
                   name,
                   frag='calibre_generated_inline_toc')
    return name
Exemple #13
0
 def norm(x):
     lc = x.langcode
     cc = x.countrycode or cc_map.get(lc, None)
     lc = lang_as_iso639_1(lc) or lc
     if cc:
         lc += '-' + cc
     return lc
Exemple #14
0
 def lang_name(l):
     l = l.lower()
     l = lang_as_iso639_1(l)
     if not l:
         l = 'en'
     l = {'en': 'en-us', 'nb': 'nb-no', 'el': 'el-monoton'}.get(l, l)
     return l.lower().replace('_', '-')
Exemple #15
0
def create_inline_toc(container, title=None):
    '''
    Create an inline (HTML) Table of Contents from an existing NCX table of contents.

    :param title: The title for this table of contents.
    '''
    lang = get_book_language(container)
    default_title = 'Table of Contents'
    if lang:
        lang = lang_as_iso639_1(lang) or lang
        default_title = translate(lang, default_title)
    title = title or default_title
    toc = get_toc(container)
    if len(toc) == 0:
        return None
    toc_name = find_inline_toc(container)

    name = toc_name
    html = toc_to_html(toc, container, name, title, lang)
    raw = serialize(html, 'text/html')
    if name is None:
        name, c = 'toc.xhtml', 0
        while container.has_name(name):
            c += 1
            name = 'toc%d.xhtml' % c
        container.add_file(name, raw, spine_index=0)
    else:
        with container.open(name, 'wb') as f:
            f.write(raw)
    set_guide_item(container, 'toc', title, name, frag='calibre_generated_inline_toc')
    return name
Exemple #16
0
 def norm(x):
     lc = x.langcode
     cc = x.countrycode or cc_map.get(lc, None)
     lc = lang_as_iso639_1(lc) or lc
     if cc:
         lc += '-' + cc
     return lc
def split_into_words_and_positions(text, lang='en'):
    with _lock:
        it = _iterators.get(lang, None)
        if it is None:
            it = _iterators[lang] = _icu.BreakIterator(_icu.UBRK_WORD, lang_as_iso639_1(lang) or lang)
        it.set_text(text)
        return it.split2()
def index_of(needle, haystack, lang='en'):
    with _lock:
        it = _iterators.get(lang, None)
        if it is None:
            it = _iterators[lang] = _icu.BreakIterator(_icu.UBRK_WORD, lang_as_iso639_1(lang) or lang)
        it.set_text(haystack)
        return it.index(needle)
Exemple #19
0
def default_lookup_website(lang):
    lang = lang_as_iso639_1(lang) or lang
    if lang == 'en':
        prefix = 'https://www.wordnik.com/words/'
    else:
        prefix = 'http://%s.wiktionary.org/wiki/' % lang
    return prefix + '{word}'
Exemple #20
0
 def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None):
     self.docx = DOCX(path_or_stream, log=log)
     self.log = self.docx.log
     self.notes_text = notes_text or _("Notes")
     self.dest_dir = dest_dir or os.getcwdu()
     self.mi = self.docx.metadata
     self.body = BODY()
     self.styles = Styles()
     self.images = Images()
     self.tables = Tables()
     self.object_map = OrderedDict()
     self.html = HTML(
         HEAD(
             META(charset="utf-8"),
             TITLE(self.mi.title or _("Unknown")),
             LINK(rel="stylesheet", type="text/css", href="docx.css"),
         ),
         self.body,
     )
     self.html.text = "\n\t"
     self.html[0].text = "\n\t\t"
     self.html[0].tail = "\n"
     for child in self.html[0]:
         child.tail = "\n\t\t"
     self.html[0][-1].tail = "\n\t"
     self.html[1].text = self.html[1].tail = "\n"
     lang = canonicalize_lang(self.mi.language)
     if lang and lang != "und":
         lang = lang_as_iso639_1(lang)
         if lang:
             self.html.set("lang", lang)
Exemple #21
0
 def lang_name(l):
     l = l.lower()
     l = lang_as_iso639_1(l)
     if not l:
         l = 'en'
     l = {'en':'en-us', 'nb':'nb-no', 'el':'el-monoton'}.get(l, l)
     return l.lower().replace('_', '-')
Exemple #22
0
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)
    ).replace(
        '_TITLE_', prepare_string_for_xml(mi.title)
    ).replace(
        '_AUTHORS_', prepare_string_for_xml(authors_to_string(mi.authors))
    ).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)
Exemple #23
0
def dictionary_name_for_locale(loc):
    loc = loc.lower().replace('-', '_')
    lmap = locale_map()
    if loc in lmap:
        return lmap[loc]
    parts = loc.split('_')
    if len(parts) > 2:
        loc = '_'.join(parts[:2])
        if loc in lmap:
            return lmap[loc]
    loc = lang_as_iso639_1(parts[0])
    if not loc:
        return
    if loc in lmap:
        return lmap[loc]
    if loc == 'en':
        return lmap['en_us']
    if loc == 'de':
        return lmap['de_de']
    if loc == 'es':
        return lmap['es_es']
    q = loc + '_'
    for k, v in iteritems(lmap):
        if k.startswith(q):
            return lmap[k]
Exemple #24
0
def update_metadata(opf, mi, apply_null=False, update_timestamp=False, force_identifiers=False):
    for x in ('guide', 'toc', 'manifest', 'spine'):
        setattr(mi, x, None)
    if mi.languages:
        langs = []
        for lc in mi.languages:
            lc2 = lang_as_iso639_1(lc)
            if lc2:
                lc = lc2
            langs.append(lc)
        mi.languages = langs

    opf.smart_update(mi)
    if getattr(mi, 'uuid', None):
        opf.application_id = mi.uuid
    if apply_null:
        if not getattr(mi, 'series', None):
            opf.series = None
        if not getattr(mi, 'tags', []):
            opf.tags = []
        if not getattr(mi, 'isbn', None):
            opf.isbn = None
        if not getattr(mi, 'comments', None):
            opf.comments = None
    if apply_null or force_identifiers:
        opf.set_identifiers(mi.get_identifiers())
    else:
        orig = opf.get_identifiers()
        orig.update(mi.get_identifiers())
        opf.set_identifiers({k:v for k, v in orig.iteritems() if k and v})
    if update_timestamp and mi.timestamp is not None:
        opf.timestamp = mi.timestamp
Exemple #25
0
 def __init__(self,
              path_or_stream,
              dest_dir=None,
              log=None,
              notes_text=None):
     self.docx = DOCX(path_or_stream, log=log)
     self.log = self.docx.log
     self.notes_text = notes_text or _('Notes')
     self.dest_dir = dest_dir or os.getcwdu()
     self.mi = self.docx.metadata
     self.body = BODY()
     self.styles = Styles()
     self.images = Images()
     self.tables = Tables()
     self.object_map = OrderedDict()
     self.html = HTML(
         HEAD(
             META(charset='utf-8'),
             TITLE(self.mi.title or _('Unknown')),
             LINK(rel='stylesheet', type='text/css', href='docx.css'),
         ), self.body)
     self.html.text = '\n\t'
     self.html[0].text = '\n\t\t'
     self.html[0].tail = '\n'
     for child in self.html[0]:
         child.tail = '\n\t\t'
     self.html[0][-1].tail = '\n\t'
     self.html[1].text = self.html[1].tail = '\n'
     lang = canonicalize_lang(self.mi.language)
     if lang and lang != 'und':
         lang = lang_as_iso639_1(lang)
         if lang:
             self.html.set('lang', lang)
Exemple #26
0
    def convert_calibre_md_to_comic_md(self):
        '''
        Maps the entries in the calibre metadata to comictagger metadata
        '''
        from calibre.utils.html2text import html2text
        from calibre.utils.date import UNDEFINED_DATE
        from calibre.utils.localization import lang_as_iso639_1

        if self.calibre_md_in_comic_format:
            return

        self.calibre_md_in_comic_format = GenericMetadata()
        mi = self.calibre_metadata

        # shorten some functions
        role = partial(set_role,
                       credits=self.calibre_md_in_comic_format.credits)
        update_field = partial(update_comic_field,
                               target=self.calibre_md_in_comic_format)

        # update the fields of comic metadata
        update_field("title", mi.title)
        role("Writer", mi.authors)
        update_field("series", mi.series)
        update_field("issue", mi.series_index)
        update_field("tags", mi.tags)
        update_field("publisher", mi.publisher)
        update_field("criticalRating", mi.rating)
        # need to check for None
        if mi.comments:
            update_field("comments", html2text(mi.comments))
        if mi.language:
            update_field("language", lang_as_iso639_1(mi.language))
        if mi.pubdate != UNDEFINED_DATE:
            update_field("year", mi.pubdate.year)
            update_field("month", mi.pubdate.month)
            update_field("day", mi.pubdate.day)

        # custom columns
        field = partial(self.db.field_for, book_id=self.book_id)

        # artists
        role("Penciller", field(prefs['penciller_column']))
        role("Inker", field(prefs['inker_column']))
        role("Colorist", field(prefs['colorist_column']))
        role("Letterer", field(prefs['letterer_column']))
        role("CoverArtist", field(prefs['cover_artist_column']))
        role("Editor", field(prefs['editor_column']))
        # others
        update_field("storyArc", field(prefs['storyarc_column']))
        update_field("characters", field(prefs['characters_column']))
        update_field("teams", field(prefs['teams_column']))
        update_field("locations", field(prefs['locations_column']))
        update_field("volume", field(prefs['volume_column']))
        update_field("genre", field(prefs['genre_column']))
        update_field("issueCount", field(prefs['count_column']))
        update_field("pageCount", field(prefs['pages_column']))
        update_field("webLink", get_link(field(prefs['comicvine_column'])))
        update_field("manga", field(prefs['manga_column']))
def index_of(needle, haystack, lang='en'):
    with _lock:
        it = _iterators.get(lang, None)
        if it is None:
            it = _iterators[lang] = _icu.BreakIterator(
                _icu.UBRK_WORD,
                lang_as_iso639_1(lang) or lang)
        it.set_text(haystack)
        return it.index(needle)
def count_words(text, lang='en'):
    with _lock:
        it = _iterators.get(lang, None)
        if it is None:
            it = _iterators[lang] = _icu.BreakIterator(
                _icu.UBRK_WORD,
                lang_as_iso639_1(lang) or lang)
        it.set_text(text)
        return len(it.split2())
def split_into_words_and_positions(text, lang='en'):
    with _lock:
        it = _iterators.get(lang, None)
        if it is None:
            it = _iterators[lang] = _icu.BreakIterator(
                _icu.UBRK_WORD,
                lang_as_iso639_1(lang) or lang)
        it.set_text(text)
        return it.split2()
    def convert_calibre_md_to_comic_md(self):
        '''
        Maps the entries in the calibre metadata to comictagger metadata
        '''
        from calibre.utils.html2text import html2text
        from calibre.utils.date import UNDEFINED_DATE
        from calibre.utils.localization import lang_as_iso639_1

        if self.calibre_md_in_comic_format:
            return

        self.calibre_md_in_comic_format = GenericMetadata()
        mi = self.calibre_metadata

        # shorten some functions
        role = partial(set_role, credits=self.calibre_md_in_comic_format.credits)
        update_field = partial(update_comic_field, target=self.calibre_md_in_comic_format)

        # update the fields of comic metadata
        update_field("title", mi.title)
        role("Writer", mi.authors)
        update_field("series", mi.series)
        update_field("issue", mi.series_index)
        update_field("tags", mi.tags)
        update_field("publisher", mi.publisher)
        update_field("criticalRating", mi.rating)
        # need to check for None
        if mi.comments:
            update_field("comments", html2text(mi.comments))
        if mi.language:
            update_field("language", lang_as_iso639_1(mi.language))
        if mi.pubdate != UNDEFINED_DATE:
            update_field("year", mi.pubdate.year)
            update_field("month", mi.pubdate.month)
            update_field("day", mi.pubdate.day)

        # custom columns
        field = partial(self.db.field_for, book_id=self.book_id)

        # artists
        role("Penciller", field(prefs['penciller_column']))
        role("Inker", field(prefs['inker_column']))
        role("Colorist", field(prefs['colorist_column']))
        role("Letterer", field(prefs['letterer_column']))
        role("CoverArtist", field(prefs['cover_artist_column']))
        role("Editor", field(prefs['editor_column']))
        # others
        update_field("storyArc", field(prefs['storyarc_column']))
        update_field("characters", field(prefs['characters_column']))
        update_field("teams", field(prefs['teams_column']))
        update_field("locations", field(prefs['locations_column']))
        update_field("volume", field(prefs['volume_column']))
        update_field("genre", field(prefs['genre_column']))
Exemple #31
0
 def lookup(self, word):
     from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
     from urllib import quote
     lang = lang_as_iso639_1(self.view.current_language)
     if not lang:
         lang = canonicalize_lang(lang) or 'en'
     word = quote(word.encode('utf-8'))
     if lang == 'en':
         prefix = 'https://www.wordnik.com/words/'
     else:
         prefix = 'http://%s.wiktionary.org/wiki/' % lang
     open_url(prefix + word)
Exemple #32
0
 def lookup(self, word):
     from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
     from urllib import quote
     lang = lang_as_iso639_1(self.view.current_language)
     if not lang:
         lang = canonicalize_lang(lang) or 'en'
     word = quote(word.encode('utf-8'))
     if lang == 'en':
         prefix = 'https://www.wordnik.com/words/'
     else:
         prefix = 'http://%s.wiktionary.org/wiki/' % lang
     open_url(prefix + word)
Exemple #33
0
    def metadataFromString(self, string):

        cbi_container = json.loads(unicode(string, 'utf-8'))

        metadata = GenericMetadata()

        cbi = cbi_container['ComicBookInfo/1.0']

        # helper func
        # If item is not in CBI, return None
        def xlate(cbi_entry):
            if cbi_entry in cbi:
                return cbi[cbi_entry]
            else:
                return None

        metadata.series = xlate('series')
        metadata.title = xlate('title')
        metadata.issue = xlate('issue')
        metadata.publisher = xlate('publisher')
        metadata.month = xlate('publicationMonth')
        metadata.year = xlate('publicationYear')
        metadata.issueCount = xlate('numberOfIssues')
        metadata.comments = xlate('comments')
        metadata.credits = xlate('credits')
        metadata.genre = xlate('genre')
        metadata.volume = xlate('volume')
        metadata.volumeCount = xlate('numberOfVolumes')
        metadata.language = xlate('language')
        metadata.country = xlate('country')
        metadata.criticalRating = xlate('rating')
        metadata.tags = xlate('tags')

        # make sure credits and tags are at least empty lists and not None
        if metadata.credits is None:
            metadata.credits = []
        if metadata.tags is None:
            metadata.tags = []

        # need to massage the language string to be ISO
        # modified to use a calibre function
        if metadata.language is not None:
            metadata.language = lang_as_iso639_1(metadata.language)

        metadata.isEmpty = False

        return metadata
    def metadataFromString(self, string):

        cbi_container = json.loads(unicode(string, 'utf-8'))

        metadata = GenericMetadata()

        cbi = cbi_container['ComicBookInfo/1.0']

        # helper func
        # If item is not in CBI, return None
        def xlate(cbi_entry):
            if cbi_entry in cbi:
                return cbi[cbi_entry]
            else:
                return None

        metadata.series = xlate('series')
        metadata.title = xlate('title')
        metadata.issue = xlate('issue')
        metadata.publisher = xlate('publisher')
        metadata.month = xlate('publicationMonth')
        metadata.year = xlate('publicationYear')
        metadata.issueCount = xlate('numberOfIssues')
        metadata.comments = xlate('comments')
        metadata.credits = xlate('credits')
        metadata.genre = xlate('genre')
        metadata.volume = xlate('volume')
        metadata.volumeCount = xlate('numberOfVolumes')
        metadata.language = xlate('language')
        metadata.country = xlate('country')
        metadata.criticalRating = xlate('rating')
        metadata.tags = xlate('tags')

        # make sure credits and tags are at least empty lists and not None
        if metadata.credits is None:
            metadata.credits = []
        if metadata.tags is None:
            metadata.tags = []

        # need to massage the language string to be ISO
        # modified to use a calibre function
        if metadata.language is not None:
            metadata.language = lang_as_iso639_1(metadata.language)

        metadata.isEmpty = False

        return metadata
Exemple #35
0
def update_doc_props(root, mi):
    def setm(name, text=None, ns='dc'):
        ans = root.makeelement('{%s}%s' % (namespaces[ns], name))
        for child in tuple(root):
            if child.tag == ans.tag:
                root.remove(child)
        ans.text = text
        root.append(ans)
        return ans
    setm('title', mi.title)
    setm('creator', authors_to_string(mi.authors))
    if mi.tags:
        setm('keywords', ', '.join(mi.tags), ns='cp')
    if mi.comments:
        setm('description', mi.comments)
    if mi.languages:
        l = canonicalize_lang(mi.languages[0])
        setm('language', lang_as_iso639_1(l) or l)
Exemple #36
0
def update_doc_props(root, mi, namespace):
    def setm(name, text=None, ns='dc'):
        ans = root.makeelement('{%s}%s' % (namespace.namespaces[ns], name))
        for child in tuple(root):
            if child.tag == ans.tag:
                root.remove(child)
        ans.text = text
        root.append(ans)
        return ans
    setm('title', mi.title)
    setm('creator', authors_to_string(mi.authors))
    if mi.tags:
        setm('keywords', ', '.join(mi.tags), ns='cp')
    if mi.comments:
        setm('description', mi.comments)
    if mi.languages:
        l = canonicalize_lang(mi.languages[0])
        setm('language', lang_as_iso639_1(l) or l)
def iana2mobi(icode):
    langdict, subtags = IANA_MOBI[None], []
    if icode:
        subtags = list(icode.split('-'))
        while len(subtags) > 0:
            lang = subtags.pop(0).lower()
            lang = lang_as_iso639_1(lang)
            if lang and lang in IANA_MOBI:
                langdict = IANA_MOBI[lang]
                break

    mcode = langdict[None]
    while len(subtags) > 0:
        subtag = subtags.pop(0)
        if subtag not in langdict:
            subtag = subtag.title()
        if subtag not in langdict:
            subtag = subtag.upper()
        if subtag in langdict:
            mcode = langdict[subtag]
            break
    return pack('>HBB', 0, mcode[1], mcode[0])
Exemple #38
0
def iana2mobi(icode):
    langdict, subtags = IANA_MOBI[None], []
    if icode:
        subtags = list(icode.split('-'))
        while len(subtags) > 0:
            lang = subtags.pop(0).lower()
            lang = lang_as_iso639_1(lang)
            if lang and lang in IANA_MOBI:
                langdict = IANA_MOBI[lang]
                break

    mcode = langdict[None]
    while len(subtags) > 0:
        subtag = subtags.pop(0)
        if subtag not in langdict:
            subtag = subtag.title()
        if subtag not in langdict:
            subtag = subtag.upper()
        if subtag in langdict:
            mcode = langdict[subtag]
            break
    return pack('>HBB', 0, mcode[1], mcode[0])
Exemple #39
0
def update_metadata(opf,
                    mi,
                    apply_null=False,
                    update_timestamp=False,
                    force_identifiers=False):
    for x in ('guide', 'toc', 'manifest', 'spine'):
        setattr(mi, x, None)
    if mi.languages:
        langs = []
        for lc in mi.languages:
            lc2 = lang_as_iso639_1(lc)
            if lc2:
                lc = lc2
            langs.append(lc)
        mi.languages = langs

    opf.smart_update(mi)
    if getattr(mi, 'uuid', None):
        opf.application_id = mi.uuid
    if apply_null:
        if not getattr(mi, 'series', None):
            opf.series = None
        if not getattr(mi, 'tags', []):
            opf.tags = []
        if not getattr(mi, 'isbn', None):
            opf.isbn = None
        if not getattr(mi, 'comments', None):
            opf.comments = None
        if not getattr(mi, 'publisher', None):
            opf.publisher = None
    if apply_null or force_identifiers:
        opf.set_identifiers(mi.get_identifiers())
    else:
        orig = opf.get_identifiers()
        orig.update(mi.get_identifiers())
        opf.set_identifiers({k: v for k, v in orig.iteritems() if k and v})
    if update_timestamp and mi.timestamp is not None:
        opf.timestamp = mi.timestamp
Exemple #40
0
def create_book(mi,
                path,
                fmt='epub',
                opf_name='metadata.opf',
                html_name='start.xhtml',
                toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    if fmt not in valid_empty_formats:
        raise ValueError('Cannot create empty book in the %s format' % fmt)
    if fmt == 'txt':
        with open(path, 'wb') as f:
            if not mi.is_null('title'):
                f.write(as_bytes(mi.title))
        return
    if fmt == 'docx':
        from calibre.ebooks.conversion.plumber import Plumber
        from calibre.ebooks.docx.writer.container import DOCX
        from calibre.utils.logging import default_log
        p = Plumber('a.docx', 'b.docx', default_log)
        p.setup_options()
        # Use the word default of one inch page margins
        for x in 'left right top bottom'.split():
            setattr(p.opts, 'margin_' + x, 72)
        DOCX(p.opts, default_log).write(path, mi, create_empty_document=True)
        return
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace(
            '_TITLE_', prepare_string_for_xml(mi.title)).replace(
                '_AUTHORS_',
                prepare_string_for_xml(authors_to_string(
                    mi.authors))).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang),
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf,
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name,
                                                                    ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name,
                          DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype',
                        b'application/epub+zip',
                        compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0o755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)
Exemple #41
0
def metadata_to_xmp_packet(mi):
    A = ElementMaker(namespace=NS_MAP["x"], nsmap=nsmap("x"))
    R = ElementMaker(namespace=NS_MAP["rdf"], nsmap=nsmap("rdf"))
    root = A.xmpmeta(R.RDF)
    rdf = root[0]
    dc = rdf.makeelement(expand("rdf:Description"), nsmap=nsmap("dc"))
    dc.set(expand("rdf:about"), "")
    rdf.append(dc)
    for prop, tag in {"title": "dc:title", "comments": "dc:description"}.iteritems():
        val = mi.get(prop) or ""
        create_alt_property(dc, tag, val)
    for prop, (tag, ordered) in {
        "authors": ("dc:creator", True),
        "tags": ("dc:subject", False),
        "publisher": ("dc:publisher", False),
    }.iteritems():
        val = mi.get(prop) or ()
        if isinstance(val, basestring):
            val = [val]
        create_sequence_property(dc, tag, val, ordered)
    if not mi.is_null("pubdate"):
        create_sequence_property(
            dc, "dc:date", [isoformat(mi.pubdate, as_utc=False)]
        )  # Adobe spec recommends local time
    if not mi.is_null("languages"):
        langs = filter(None, map(lambda x: lang_as_iso639_1(x) or canonicalize_lang(x), mi.languages))
        if langs:
            create_sequence_property(dc, "dc:language", langs, ordered=False)

    xmp = rdf.makeelement(expand("rdf:Description"), nsmap=nsmap("xmp", "xmpidq"))
    xmp.set(expand("rdf:about"), "")
    rdf.append(xmp)
    extra_ids = {}
    for x in ("prism", "pdfx"):
        p = extra_ids[x] = rdf.makeelement(expand("rdf:Description"), nsmap=nsmap(x))
        p.set(expand("rdf:about"), "")
        rdf.append(p)

    identifiers = mi.get_identifiers()
    if identifiers:
        create_identifiers(xmp, identifiers)
        for scheme, val in identifiers.iteritems():
            if scheme in {"isbn", "doi"}:
                for prefix, parent in extra_ids.iteritems():
                    ie = parent.makeelement(expand("%s:%s" % (prefix, scheme)))
                    ie.text = val
                    parent.append(ie)

    d = xmp.makeelement(expand("xmp:MetadataDate"))
    d.text = isoformat(now(), as_utc=False)
    xmp.append(d)

    calibre = rdf.makeelement(expand("rdf:Description"), nsmap=nsmap("calibre", "calibreSI", "calibreCC"))
    calibre.set(expand("rdf:about"), "")
    rdf.append(calibre)
    if not mi.is_null("rating"):
        try:
            r = float(mi.rating)
        except (TypeError, ValueError):
            pass
        else:
            create_simple_property(calibre, "calibre:rating", "%g" % r)
    if not mi.is_null("series"):
        create_series(calibre, mi.series, mi.series_index)
    if not mi.is_null("timestamp"):
        create_simple_property(calibre, "calibre:timestamp", isoformat(mi.timestamp, as_utc=False))
    for x in ("author_link_map", "user_categories"):
        val = getattr(mi, x, None)
        if val:
            create_simple_property(calibre, "calibre:" + x, dump_dict(val))

    for x in ("title_sort", "author_sort"):
        if not mi.is_null(x):
            create_simple_property(calibre, "calibre:" + x, getattr(mi, x))

    all_user_metadata = mi.get_all_user_metadata(True)
    if all_user_metadata:
        create_user_metadata(calibre, all_user_metadata)
    return serialize_xmp_packet(root)
Exemple #42
0
def metadata_to_xmp_packet(mi):
    A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x'))
    R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf'))
    root = A.xmpmeta(R.RDF)
    rdf = root[0]
    dc = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('dc'))
    dc.set(expand('rdf:about'), '')
    rdf.append(dc)
    for prop, tag in {'title':'dc:title', 'comments':'dc:description'}.iteritems():
        val = mi.get(prop) or ''
        create_alt_property(dc, tag, val)
    for prop, (tag, ordered) in {
        'authors':('dc:creator', True), 'tags':('dc:subject', False), 'publisher':('dc:publisher', False),
    }.iteritems():
        val = mi.get(prop) or ()
        if isinstance(val, basestring):
            val = [val]
        create_sequence_property(dc, tag, val, ordered)
    if not mi.is_null('pubdate'):
        create_sequence_property(dc, 'dc:date', [isoformat(mi.pubdate, as_utc=False)])  # Adobe spec recommends local time
    if not mi.is_null('languages'):
        langs = filter(None, map(lambda x:lang_as_iso639_1(x) or canonicalize_lang(x), mi.languages))
        if langs:
            create_sequence_property(dc, 'dc:language', langs, ordered=False)

    xmp = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('xmp', 'xmpidq'))
    xmp.set(expand('rdf:about'), '')
    rdf.append(xmp)
    extra_ids = {}
    for x in ('prism', 'pdfx'):
        p = extra_ids[x] = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap(x))
        p.set(expand('rdf:about'), '')
        rdf.append(p)

    identifiers = mi.get_identifiers()
    if identifiers:
        create_identifiers(xmp, identifiers)
        for scheme, val in identifiers.iteritems():
            if scheme in {'isbn', 'doi'}:
                for prefix, parent in extra_ids.iteritems():
                    ie = parent.makeelement(expand('%s:%s'%(prefix, scheme)))
                    ie.text = val
                    parent.append(ie)

    d = xmp.makeelement(expand('xmp:MetadataDate'))
    d.text = isoformat(now(), as_utc=False)
    xmp.append(d)

    calibre = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('calibre', 'calibreSI', 'calibreCC'))
    calibre.set(expand('rdf:about'), '')
    rdf.append(calibre)
    if not mi.is_null('rating'):
        try:
            r = float(mi.rating)
        except (TypeError, ValueError):
            pass
        else:
            create_simple_property(calibre, 'calibre:rating', '%g' % r)
    if not mi.is_null('series'):
        create_series(calibre, mi.series, mi.series_index)
    if not mi.is_null('timestamp'):
        create_simple_property(calibre, 'calibre:timestamp', isoformat(mi.timestamp, as_utc=False))
    for x in ('author_link_map', 'user_categories'):
        val = getattr(mi, x, None)
        if val:
            create_simple_property(calibre, 'calibre:'+x, dump_dict(val))

    for x in ('title_sort', 'author_sort'):
        if not mi.is_null(x):
            create_simple_property(calibre, 'calibre:'+x, getattr(mi, x))

    all_user_metadata = mi.get_all_user_metadata(True)
    if all_user_metadata:
        create_user_metadata(calibre, all_user_metadata)
    return serialize_xmp_packet(root)
Exemple #43
0
def html_lang(docx_lang):
    lang = canonicalize_lang(docx_lang)
    if lang and lang != 'und':
        lang = lang_as_iso639_1(lang)
        if lang:
            return lang
Exemple #44
0
def lang_for_tag(tag):
    for attr in ('lang', '{http://www.w3.org/XML/1998/namespace}lang'):
        val = lang_as_iso639_1(tag.get(attr))
        if val:
            return val
Exemple #45
0
    def update(self, mi, asin=None):
        mi.title = normalize(mi.title)

        def update_exth_record(rec):
            recs.append(rec)
            if rec[0] in self.original_exth_records:
                self.original_exth_records.pop(rec[0])

        if self.type != b"BOOKMOBI":
            raise MobiError(
                "Setting metadata only supported for MOBI files of type 'BOOK'.\n"
                "\tThis is a %r file of type %r" %
                (self.type[0:4], self.type[4:8]))

        recs = []
        added_501 = False
        try:
            from calibre.ebooks.conversion.config import load_defaults
            prefs = load_defaults('mobi_output')
            pas = prefs.get('prefer_author_sort', False)
            kindle_pdoc = prefs.get('personal_doc', None)
            share_not_sync = prefs.get('share_not_sync', False)
        except:
            pas = False
            kindle_pdoc = None
            share_not_sync = False
        if mi.author_sort and pas:
            # We want an EXTH field per author...
            authors = mi.author_sort.split(' & ')
            for author in authors:
                update_exth_record(
                    (100, normalize(author).encode(self.codec, 'replace')))
        elif mi.authors:
            authors = mi.authors
            for author in authors:
                update_exth_record(
                    (100, normalize(author).encode(self.codec, 'replace')))
        if mi.publisher:
            update_exth_record(
                (101, normalize(mi.publisher).encode(self.codec, 'replace')))
        if mi.comments:
            # Strip user annotations
            a_offset = mi.comments.find('<div class="user_annotations">')
            ad_offset = mi.comments.find('<hr class="annotations_divider" />')
            if a_offset >= 0:
                mi.comments = mi.comments[:a_offset]
            if ad_offset >= 0:
                mi.comments = mi.comments[:ad_offset]
            update_exth_record(
                (103, normalize(mi.comments).encode(self.codec, 'replace')))
        if mi.isbn:
            update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
        if mi.tags:
            # FIXME: Keep a single subject per EXTH field?
            subjects = '; '.join(mi.tags)
            update_exth_record(
                (105, normalize(subjects).encode(self.codec, 'replace')))

            if kindle_pdoc and kindle_pdoc in mi.tags:
                added_501 = True
                update_exth_record((501, b'PDOC'))

        if mi.pubdate:
            update_exth_record(
                (106, unicode_type(mi.pubdate).encode(self.codec, 'replace')))
        elif mi.timestamp:
            update_exth_record(
                (106, unicode_type(mi.timestamp).encode(self.codec,
                                                        'replace')))
        elif self.timestamp:
            update_exth_record((106, self.timestamp))
        else:
            update_exth_record(
                (106, nowf().isoformat().encode(self.codec, 'replace')))
        if self.cover_record is not None:
            update_exth_record((201, pack('>I', self.cover_rindex)))
            update_exth_record((203, pack('>I', 0)))
        if self.thumbnail_record is not None:
            update_exth_record((202, pack('>I', self.thumbnail_rindex)))
        # Add a 113 record if not present to allow Amazon syncing
        if (113 not in self.original_exth_records
                and self.original_exth_records.get(501, None) == 'EBOK'
                and not added_501 and not share_not_sync):
            from uuid import uuid4
            update_exth_record((113, unicode_type(uuid4()).encode(self.codec)))

        if asin is not None:
            update_exth_record((113, asin.encode(self.codec)))
            update_exth_record((504, asin.encode(self.codec)))

        # Add a 112 record with actual UUID
        if getattr(mi, 'uuid', None):
            update_exth_record(
                (112, ("calibre:%s" % mi.uuid).encode(self.codec, 'replace')))
        if 503 in self.original_exth_records:
            update_exth_record((503, mi.title.encode(self.codec, 'replace')))

        # Update book producer
        if getattr(mi, 'book_producer', False):
            update_exth_record(
                (108, mi.book_producer.encode(self.codec, 'replace')))

        # Set langcode in EXTH header
        if not mi.is_null('language'):
            lang = canonicalize_lang(mi.language)
            lang = lang_as_iso639_1(lang) or lang
            if lang:
                update_exth_record((524, lang.encode(self.codec, 'replace')))

        # Include remaining original EXTH fields
        for id in sorted(self.original_exth_records):
            recs.append((id, self.original_exth_records[id]))
        recs = sorted(recs, key=lambda x: (x[0], x[0]))

        exth = io.BytesIO()
        for code, data in recs:
            exth.write(pack('>II', code, len(data) + 8))
            exth.write(data)
        exth = exth.getvalue()
        trail = len(exth) % 4
        pad = b'\0' * (4 - trail)  # Always pad w/ at least 1 byte
        exth = [b'EXTH', pack('>II', len(exth) + 12, len(recs)), exth, pad]
        exth = b''.join(exth)

        if getattr(self, 'exth', None) is None:
            raise MobiError('No existing EXTH record. Cannot update metadata.')

        if not mi.is_null('language'):
            self.record0[92:96] = iana2mobi(mi.language)
        self.create_exth(exth=exth, new_title=mi.title)

        # Fetch updated timestamp, cover_record, thumbnail_record
        self.fetchEXTHFields()

        if mi.cover_data[1] or mi.cover:
            try:
                data = mi.cover_data[1]
                if not data:
                    with open(mi.cover, 'rb') as f:
                        data = f.read()
            except:
                pass
            else:
                if is_image(self.cover_record):
                    size = len(self.cover_record)
                    cover = rescale_image(data, size)
                    if len(cover) <= size:
                        cover += b'\0' * (size - len(cover))
                        self.cover_record[:] = cover
                if is_image(self.thumbnail_record):
                    size = len(self.thumbnail_record)
                    thumbnail = rescale_image(data,
                                              size,
                                              dimen=MAX_THUMB_DIMEN)
                    if len(thumbnail) <= size:
                        thumbnail += b'\0' * (size - len(thumbnail))
                        self.thumbnail_record[:] = thumbnail
                return
Exemple #46
0
 def migrate_lang_code(self, root):  # {{{
     from calibre.utils.localization import lang_as_iso639_1
     for lang in root.xpath('//*[local-name() = "language"]'):
         clc = lang_as_iso639_1(lang.text)
         if clc:
             lang.text = clc
Exemple #47
0
 def migrate_lang_code(self, root):  # {{{
     from calibre.utils.localization import lang_as_iso639_1
     for lang in root.xpath('//*[local-name() = "language"]'):
         clc = lang_as_iso639_1(lang.text)
         if clc:
             lang.text = clc
Exemple #48
0
def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
    stream.seek(0)
    reader = OCFZipReader(stream, root=os.getcwdu())
    raster_cover = reader.opf.raster_cover
    mi = MetaInformation(mi)
    new_cdata = None
    replacements = {}
    try:
        new_cdata = mi.cover_data[1]
        if not new_cdata:
            raise Exception('no cover')
    except:
        try:
            new_cdata = open(mi.cover, 'rb').read()
        except:
            pass
    new_cover = cpath = None
    if new_cdata and raster_cover:
        try:
            cpath = posixpath.join(posixpath.dirname(reader.opf_path),
                                   raster_cover)
            cover_replacable = not reader.encryption_meta.is_encrypted(cpath) and \
                    os.path.splitext(cpath)[1].lower() in ('.png', '.jpg', '.jpeg')
            if cover_replacable:
                new_cover = _write_new_cover(new_cdata, cpath)
                replacements[cpath] = open(new_cover.name, 'rb')
        except:
            import traceback
            traceback.print_exc()

    for x in ('guide', 'toc', 'manifest', 'spine'):
        setattr(mi, x, None)
    if mi.languages:
        langs = []
        for lc in mi.languages:
            lc2 = lang_as_iso639_1(lc)
            if lc2: lc = lc2
            langs.append(lc)
        mi.languages = langs

    reader.opf.smart_update(mi)
    if apply_null:
        if not getattr(mi, 'series', None):
            reader.opf.series = None
        if not getattr(mi, 'tags', []):
            reader.opf.tags = []
        if not getattr(mi, 'isbn', None):
            reader.opf.isbn = None
    if update_timestamp and mi.timestamp is not None:
        reader.opf.timestamp = mi.timestamp

    newopf = StringIO(reader.opf.render())
    safe_replace(stream,
                 reader.container[OPF.MIMETYPE],
                 newopf,
                 extra_replacements=replacements)
    try:
        if cpath is not None:
            replacements[cpath].close()
            os.remove(replacements[cpath].name)
    except:
        pass
Exemple #49
0
def create_inline_toc(container, title=None):
    lang = get_book_language(container)
    default_title = 'Table of Contents'
    if lang:
        lang = lang_as_iso639_1(lang) or lang
        default_title = translate(lang, default_title)
    title = title or default_title
    toc = get_toc(container)
    if len(toc) == 0:
        return None
    toc_name = find_inline_toc(container)

    def process_node(html_parent, toc, level=1, indent='  ', style_level=2):
        li = html_parent.makeelement(XHTML('li'))
        li.tail = '\n'+ (indent*level)
        html_parent.append(li)
        name, frag = toc.dest, toc.frag
        href = '#'
        if name:
            href = container.name_to_href(name, toc_name)
            if frag:
                href += '#' + frag
        a = li.makeelement(XHTML('a'), href=href)
        a.text = toc.title
        li.append(a)
        if len(toc) > 0:
            parent = li.makeelement(XHTML('ul'))
            parent.set('class', 'level%d' % (style_level))
            li.append(parent)
            a.tail = '\n\n' + (indent*(level+2))
            parent.text = '\n'+(indent*(level+3))
            parent.tail = '\n\n' + (indent*(level+1))
            for child in toc:
                process_node(parent, child, level+3, style_level=style_level + 1)
            parent[-1].tail = '\n' + (indent*(level+2))

    E = ElementMaker(namespace=XHTML_NS, nsmap={None:XHTML_NS})
    html = E.html(
        E.head(
            E.title(title),
            E.style('''
                li { list-style-type: none; padding-left: 2em; margin-left: 0}
                a { text-decoration: none }
                a:hover { color: red }''', type='text/css'),
        ),
        E.body(
            E.h2(title),
            E.ul(),
            id="calibre_generated_inline_toc",
        )
    )

    name = toc_name
    ul = html[1][1]
    ul.set('class', 'level1')
    for child in toc:
        process_node(ul, child)
    if lang:
        html.set('lang', lang)
    pretty_html_tree(container, html)
    raw = serialize(html, 'text/html')
    if name is None:
        name, c = 'toc.xhtml', 0
        while container.has_name(name):
            c += 1
            name = 'toc%d.xhtml' % c
        container.add_file(name, raw, spine_index=0)
    else:
        with container.open(name, 'wb') as f:
            f.write(raw)
    set_guide_item(container, 'toc', title, name, frag='calibre_generated_inline_toc')
    return name
Exemple #50
0
def create_inline_toc(container, title=None):
    '''
    Create an inline (HTML) Table of Contents from an existing NCX table of contents.

    :param title: The title for this table of contents.
    '''
    lang = get_book_language(container)
    default_title = 'Table of Contents'
    if lang:
        lang = lang_as_iso639_1(lang) or lang
        default_title = translate(lang, default_title)
    title = title or default_title
    toc = get_toc(container)
    if len(toc) == 0:
        return None
    toc_name = find_inline_toc(container)

    def process_node(html_parent, toc, level=1, indent='  ', style_level=2):
        li = html_parent.makeelement(XHTML('li'))
        li.tail = '\n' + (indent * level)
        html_parent.append(li)
        name, frag = toc.dest, toc.frag
        href = '#'
        if name:
            href = container.name_to_href(name, toc_name)
            if frag:
                href += '#' + frag
        a = li.makeelement(XHTML('a'), href=href)
        a.text = toc.title
        li.append(a)
        if len(toc) > 0:
            parent = li.makeelement(XHTML('ul'))
            parent.set('class', 'level%d' % (style_level))
            li.append(parent)
            a.tail = '\n\n' + (indent * (level + 2))
            parent.text = '\n' + (indent * (level + 3))
            parent.tail = '\n\n' + (indent * (level + 1))
            for child in toc:
                process_node(parent,
                             child,
                             level + 3,
                             style_level=style_level + 1)
            parent[-1].tail = '\n' + (indent * (level + 2))

    E = ElementMaker(namespace=XHTML_NS, nsmap={None: XHTML_NS})
    html = E.html(
        E.head(
            E.title(title),
            E.style('''
                li { list-style-type: none; padding-left: 2em; margin-left: 0}
                a { text-decoration: none }
                a:hover { color: red }''',
                    type='text/css'),
        ), E.body(
            E.h2(title),
            E.ul(),
            id="calibre_generated_inline_toc",
        ))

    name = toc_name
    ul = html[1][1]
    ul.set('class', 'level1')
    for child in toc:
        process_node(ul, child)
    if lang:
        html.set('lang', lang)
    pretty_html_tree(container, html)
    raw = serialize(html, 'text/html')
    if name is None:
        name, c = 'toc.xhtml', 0
        while container.has_name(name):
            c += 1
            name = 'toc%d.xhtml' % c
        container.add_file(name, raw, spine_index=0)
    else:
        with container.open(name, 'wb') as f:
            f.write(raw)
    set_guide_item(container,
                   'toc',
                   title,
                   name,
                   frag='calibre_generated_inline_toc')
    return name
Exemple #51
0
    def fb2_header(self):
        from calibre.ebooks.oeb.base import OPF
        metadata = {}
        metadata['title'] = self.oeb_book.metadata.title[0].value
        metadata['appname'] = __appname__
        metadata['version'] = __version__
        metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year)
        if self.oeb_book.metadata.language:
            lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value)
            if not lc:
                lc = self.oeb_book.metadata.language[0].value
            metadata['lang'] = lc or 'en'
        else:
            metadata['lang'] = u'en'
        metadata['id'] = None
        metadata['cover'] = self.get_cover()
        metadata['genre'] = self.opts.fb2_genre

        metadata['author'] = u''
        for auth in self.oeb_book.metadata.creator:
            author_first = u''
            author_middle = u''
            author_last = u''
            author_parts = auth.value.split(' ')
            if len(author_parts) == 1:
                author_last = author_parts[0]
            elif len(author_parts) == 2:
                author_first = author_parts[0]
                author_last = author_parts[1]
            else:
                author_first = author_parts[0]
                author_middle = ' '.join(author_parts[1:-1])
                author_last = author_parts[-1]
            metadata['author'] += '<author>'
            metadata['author'] += '<first-name>%s</first-name>' % prepare_string_for_xml(author_first)
            if author_middle:
                metadata['author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml(author_middle)
            metadata['author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(author_last)
            metadata['author'] += '</author>'
        if not metadata['author']:
            metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>'

        metadata['keywords'] = u''
        tags = list(map(unicode, self.oeb_book.metadata.subject))
        if tags:
            tags = ', '.join(prepare_string_for_xml(x) for x in tags)
            metadata['keywords'] = '<keywords>%s</keywords>'%tags

        metadata['sequence'] = u''
        if self.oeb_book.metadata.series:
            index = '1'
            if self.oeb_book.metadata.series_index:
                index = self.oeb_book.metadata.series_index[0]
            metadata['sequence'] = u'<sequence name="%s" number="%s" />' % (prepare_string_for_xml(u'%s' % self.oeb_book.metadata.series[0]), index)

        year = publisher = isbn = u''
        identifiers = self.oeb_book.metadata['identifier']
        for x in identifiers:
            if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
                metadata['id'] = unicode(x).split(':')[-1]
                break
        if metadata['id'] is None:
            self.log.warn('No UUID identifier found')
            metadata['id'] = str(uuid.uuid4())

        try:
            date = self.oeb_book.metadata['date'][0]
        except IndexError:
            pass
        else:
            year = '<year>%s</year>' % prepare_string_for_xml(date.value.partition('-')[0])

        try:
            publisher = self.oeb_book.metadata['publisher'][0]
        except IndexError:
            pass
        else:
            publisher = '<publisher>%s</publisher>' % prepare_string_for_xml(publisher.value)

        for x in identifiers:
            if x.get(OPF('scheme'), None).lower() == 'isbn':
                isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value)

        metadata['year'], metadata['isbn'], metadata['publisher'] = year, isbn, publisher
        for key, value in metadata.items():
            if key not in ('author', 'cover', 'sequence', 'keywords', 'year', 'publisher', 'isbn'):
                metadata[key] = prepare_string_for_xml(value)

        return textwrap.dedent(u'''
            <FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
                <description>
                    <title-info>
                        <genre>%(genre)s</genre>
                            %(author)s
                        <book-title>%(title)s</book-title>
                        %(cover)s
                        <lang>%(lang)s</lang>
                        %(keywords)s
                        %(sequence)s
                    </title-info>
                    <document-info>
                        %(author)s
                        <program-used>%(appname)s %(version)s</program-used>
                        <date>%(date)s</date>
                        <id>%(id)s</id>
                        <version>1.0</version>
                    </document-info>
                    <publish-info>
                        %(publisher)s
                        %(year)s
                        %(isbn)s
                    </publish-info>
                </description>\n''') % metadata
Exemple #52
0
    def fb2_header(self):
        from calibre.ebooks.oeb.base import OPF
        metadata = {}
        metadata['title'] = self.oeb_book.metadata.title[0].value
        metadata['appname'] = __appname__
        metadata['version'] = __version__
        metadata['date'] = '%i.%i.%i' % (
            datetime.now().day, datetime.now().month, datetime.now().year)
        if self.oeb_book.metadata.language:
            lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value)
            if not lc:
                lc = self.oeb_book.metadata.language[0].value
            metadata['lang'] = lc or 'en'
        else:
            metadata['lang'] = u'en'
        metadata['id'] = None
        metadata['cover'] = self.get_cover()
        metadata['genre'] = self.opts.fb2_genre

        metadata['author'] = ''
        for auth in self.oeb_book.metadata.creator:
            author_first = ''
            author_middle = ''
            author_last = ''
            author_parts = auth.value.split(' ')
            if len(author_parts) == 1:
                author_last = author_parts[0]
            elif len(author_parts) == 2:
                author_first = author_parts[0]
                author_last = author_parts[1]
            else:
                author_first = author_parts[0]
                author_middle = ' '.join(author_parts[1:-1])
                author_last = author_parts[-1]
            metadata['author'] += '<author>'
            metadata[
                'author'] += '<first-name>%s</first-name>' % prepare_string_for_xml(
                    author_first)
            if author_middle:
                metadata[
                    'author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml(
                        author_middle)
            metadata[
                'author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(
                    author_last)
            metadata['author'] += '</author>'
        if not metadata['author']:
            metadata[
                'author'] = '<author><first-name></first-name><last-name></last-name></author>'

        metadata['keywords'] = ''
        tags = list(map(unicode_type, self.oeb_book.metadata.subject))
        if tags:
            tags = ', '.join(prepare_string_for_xml(x) for x in tags)
            metadata['keywords'] = '<keywords>%s</keywords>' % tags

        metadata['sequence'] = ''
        if self.oeb_book.metadata.series:
            index = '1'
            if self.oeb_book.metadata.series_index:
                index = self.oeb_book.metadata.series_index[0]
            metadata['sequence'] = '<sequence name="%s" number="%s"/>' % (
                prepare_string_for_xml(
                    '%s' % self.oeb_book.metadata.series[0]), index)

        year = publisher = isbn = ''
        identifiers = self.oeb_book.metadata['identifier']
        for x in identifiers:
            if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(
                    x).startswith('urn:uuid:'):
                metadata['id'] = unicode_type(x).split(':')[-1]
                break
        if metadata['id'] is None:
            self.log.warn('No UUID identifier found')
            metadata['id'] = unicode_type(uuid.uuid4())

        try:
            date = self.oeb_book.metadata['date'][0]
        except IndexError:
            pass
        else:
            year = '<year>%s</year>' % prepare_string_for_xml(
                date.value.partition('-')[0])

        try:
            publisher = self.oeb_book.metadata['publisher'][0]
        except IndexError:
            pass
        else:
            publisher = '<publisher>%s</publisher>' % prepare_string_for_xml(
                publisher.value)

        for x in identifiers:
            if x.get(OPF('scheme'), None).lower() == 'isbn':
                isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value)

        metadata['year'], metadata['isbn'], metadata[
            'publisher'] = year, isbn, publisher
        for key, value in metadata.items():
            if key not in ('author', 'cover', 'sequence', 'keywords', 'year',
                           'publisher', 'isbn'):
                metadata[key] = prepare_string_for_xml(value)

        try:
            comments = self.oeb_book.metadata['description'][0]
        except Exception:
            metadata['comments'] = ''
        else:
            from calibre.utils.html2text import html2text
            metadata['comments'] = '<annotation><p>{}</p></annotation>'.format(
                prepare_string_for_xml(html2text(comments.value).strip()))

        # Keep the indentation level of the description the same as the body.
        header = textwrap.dedent('''\
            <FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
            <description>
                <title-info>
                    <genre>%(genre)s</genre>
                    %(author)s
                    <book-title>%(title)s</book-title>
                    %(cover)s
                    <lang>%(lang)s</lang>
                    %(keywords)s
                    %(sequence)s
                    %(comments)s
                </title-info>
                <document-info>
                    %(author)s
                    <program-used>%(appname)s %(version)s</program-used>
                    <date>%(date)s</date>
                    <id>%(id)s</id>
                    <version>1.0</version>
                </document-info>
                <publish-info>
                    %(publisher)s
                    %(year)s
                    %(isbn)s
                </publish-info>
            </description>''') % metadata

        # Remove empty lines.
        return '\n'.join(filter(unicode_type.strip, header.splitlines()))
Exemple #53
0
def metadata_to_xmp_packet(mi):
    A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x'))
    R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf'))
    root = A.xmpmeta(R.RDF)
    rdf = root[0]
    dc = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('dc'))
    dc.set(expand('rdf:about'), '')
    rdf.append(dc)
    for prop, tag in iteritems({
            'title': 'dc:title',
            'comments': 'dc:description'
    }):
        val = mi.get(prop) or ''
        create_alt_property(dc, tag, val)
    for prop, (tag, ordered) in iteritems({
            'authors': ('dc:creator', True),
            'tags': ('dc:subject', False),
            'publisher': ('dc:publisher', False),
    }):
        val = mi.get(prop) or ()
        if isinstance(val, string_or_bytes):
            val = [val]
        create_sequence_property(dc, tag, val, ordered)
    if not mi.is_null('pubdate'):
        create_sequence_property(dc, 'dc:date',
                                 [isoformat(mi.pubdate, as_utc=False)
                                  ])  # Adobe spec recommends local time
    if not mi.is_null('languages'):
        langs = list(
            filter(
                None,
                map(lambda x: lang_as_iso639_1(x) or canonicalize_lang(x),
                    mi.languages)))
        if langs:
            create_sequence_property(dc, 'dc:language', langs, ordered=False)

    xmp = rdf.makeelement(expand('rdf:Description'),
                          nsmap=nsmap('xmp', 'xmpidq'))
    xmp.set(expand('rdf:about'), '')
    rdf.append(xmp)
    extra_ids = {}
    for x in ('prism', 'pdfx'):
        p = extra_ids[x] = rdf.makeelement(expand('rdf:Description'),
                                           nsmap=nsmap(x))
        p.set(expand('rdf:about'), '')
        rdf.append(p)

    identifiers = mi.get_identifiers()
    if identifiers:
        create_identifiers(xmp, identifiers)
        for scheme, val in iteritems(identifiers):
            if scheme in {'isbn', 'doi'}:
                for prefix, parent in iteritems(extra_ids):
                    ie = parent.makeelement(expand('%s:%s' % (prefix, scheme)))
                    ie.text = val
                    parent.append(ie)

    d = xmp.makeelement(expand('xmp:MetadataDate'))
    d.text = isoformat(now(), as_utc=False)
    xmp.append(d)

    calibre = rdf.makeelement(expand('rdf:Description'),
                              nsmap=nsmap('calibre', 'calibreSI', 'calibreCC'))
    calibre.set(expand('rdf:about'), '')
    rdf.append(calibre)
    if not mi.is_null('rating'):
        try:
            r = float(mi.rating)
        except (TypeError, ValueError):
            pass
        else:
            create_simple_property(calibre, 'calibre:rating', '%g' % r)
    if not mi.is_null('series'):
        create_series(calibre, mi.series, mi.series_index)
    if not mi.is_null('timestamp'):
        create_simple_property(calibre, 'calibre:timestamp',
                               isoformat(mi.timestamp, as_utc=False))
    for x in ('author_link_map', 'user_categories'):
        val = getattr(mi, x, None)
        if val:
            create_simple_property(calibre, 'calibre:' + x, dump_dict(val))

    for x in ('title_sort', 'author_sort'):
        if not mi.is_null(x):
            create_simple_property(calibre, 'calibre:' + x, getattr(mi, x))

    all_user_metadata = mi.get_all_user_metadata(True)
    if all_user_metadata:
        create_user_metadata(calibre, all_user_metadata)
    return serialize_xmp_packet(root)
Exemple #54
0
def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None):
    from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree
    tocname = find_existing_nav_toc(container)
    if previous_nav is not None:
        nav_name = container.href_to_name(previous_nav[0])
        if nav_name and container.exists(nav_name):
            tocname = nav_name
            container.apply_unique_properties(tocname, 'nav')
    if tocname is None:
        item = container.generate_item('nav.xhtml', id_prefix='nav')
        item.set('properties', 'nav')
        tocname = container.href_to_name(item.get('href'), base=container.opf_name)
        if previous_nav is not None:
            root = previous_nav[1]
        else:
            root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8'))
        container.replace(tocname, root)
    else:
        root = container.parsed(tocname)
    if lang:
        lang = lang_as_iso639_1(lang) or lang
        root.set('lang', lang)
        root.set('{%s}lang' % XML_NS, lang)
    nav = ensure_single_nav_of_type(root, 'toc')
    if toc.toc_title:
        nav.append(nav.makeelement(XHTML('h1')))
        nav[-1].text = toc.toc_title

    rnode = nav.makeelement(XHTML('ol'))
    nav.append(rnode)
    to_href = partial(container.name_to_href, base=tocname)
    spat = re.compile(r'\s+')

    def process_node(xml_parent, toc_parent):
        for child in toc_parent:
            li = xml_parent.makeelement(XHTML('li'))
            xml_parent.append(li)
            title = child.title or ''
            title = spat.sub(' ', title).strip()
            a = li.makeelement(XHTML('a' if child.dest else 'span'))
            a.text = title
            li.append(a)
            if child.dest:
                href = to_href(child.dest)
                if child.frag:
                    href += '#'+child.frag
                a.set('href', href)
            if len(child):
                ol = li.makeelement(XHTML('ol'))
                li.append(ol)
                process_node(ol, child)
    process_node(rnode, toc)
    pretty_xml_tree(nav)

    def collapse_li(parent):
        for li in parent.iterdescendants(XHTML('li')):
            if len(li) == 1:
                li.text = None
                li[0].tail = None
    collapse_li(nav)
    nav.tail = '\n'

    def create_li(ol, entry):
        li = ol.makeelement(XHTML('li'))
        ol.append(li)
        a = li.makeelement(XHTML('a'))
        li.append(a)
        href = container.name_to_href(entry['dest'], tocname)
        if entry['frag']:
            href += '#' + entry['frag']
        a.set('href', href)
        return a

    if landmarks is not None:
        nav = ensure_single_nav_of_type(root, 'landmarks')
        nav.set('hidden', '')
        ol = nav.makeelement(XHTML('ol'))
        nav.append(ol)
        for entry in landmarks:
            if entry['type'] and container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS:
                a = create_li(ol, entry)
                a.set('{%s}type' % EPUB_NS, entry['type'])
                a.text = entry['title'] or None
        pretty_xml_tree(nav)
        collapse_li(nav)

    if toc.page_list:
        nav = ensure_single_nav_of_type(root, 'page-list')
        nav.set('hidden', '')
        ol = nav.makeelement(XHTML('ol'))
        nav.append(ol)
        for entry in toc.page_list:
            if container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS:
                a = create_li(ol, entry)
                a.text = str(entry['pagenum'])
        pretty_xml_tree(nav)
        collapse_li(nav)
    container.replace(tocname, root)
Exemple #55
0
def lang_for_tag(tag):
    for attr in ('lang', '{http://www.w3.org/XML/1998/namespace}lang'):
        val = lang_as_iso639_1(tag.get(attr))
        if val:
            return val
Exemple #56
0
    def update(self, mi):
        mi.title = normalize(mi.title)
        def update_exth_record(rec):
            recs.append(rec)
            if rec[0] in self.original_exth_records:
                self.original_exth_records.pop(rec[0])

        if self.type != "BOOKMOBI":
                raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
                                "\tThis is a %r file of type %r" % (self.type[0:4], self.type[4:8]))

        recs = []
        added_501 = False
        try:
            from calibre.ebooks.conversion.config import load_defaults
            prefs = load_defaults('mobi_output')
            pas = prefs.get('prefer_author_sort', False)
            kindle_pdoc = prefs.get('personal_doc', None)
            share_not_sync = prefs.get('share_not_sync', False)
        except:
            pas = False
            kindle_pdoc = None
            share_not_sync = False
        if mi.author_sort and pas:
            # We want an EXTH field per author...
            authors = mi.author_sort.split(' & ')
            for author in authors:
                update_exth_record((100, normalize(author).encode(self.codec, 'replace')))
        elif mi.authors:
            authors = mi.authors
            for author in authors:
                update_exth_record((100, normalize(author).encode(self.codec, 'replace')))
        if mi.publisher:
            update_exth_record((101, normalize(mi.publisher).encode(self.codec, 'replace')))
        if mi.comments:
            # Strip user annotations
            a_offset = mi.comments.find('<div class="user_annotations">')
            ad_offset = mi.comments.find('<hr class="annotations_divider" />')
            if a_offset >= 0:
                mi.comments = mi.comments[:a_offset]
            if ad_offset >= 0:
                mi.comments = mi.comments[:ad_offset]
            update_exth_record((103, normalize(mi.comments).encode(self.codec, 'replace')))
        if mi.isbn:
            update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
        if mi.tags:
            # FIXME: Keep a single subject per EXTH field?
            subjects = '; '.join(mi.tags)
            update_exth_record((105, normalize(subjects).encode(self.codec, 'replace')))

            if kindle_pdoc and kindle_pdoc in mi.tags:
                added_501 = True
                update_exth_record((501, b'PDOC'))

        if mi.pubdate:
            update_exth_record((106, str(mi.pubdate).encode(self.codec, 'replace')))
        elif mi.timestamp:
            update_exth_record((106, str(mi.timestamp).encode(self.codec, 'replace')))
        elif self.timestamp:
            update_exth_record((106, self.timestamp))
        else:
            update_exth_record((106, nowf().isoformat().encode(self.codec, 'replace')))
        if self.cover_record is not None:
            update_exth_record((201, pack('>I', self.cover_rindex)))
            update_exth_record((203, pack('>I', 0)))
        if self.thumbnail_record is not None:
            update_exth_record((202, pack('>I', self.thumbnail_rindex)))
        # Add a 113 record if not present to allow Amazon syncing
        if (113 not in self.original_exth_records and
                self.original_exth_records.get(501, None) == 'EBOK' and
                not added_501 and not share_not_sync):
            from uuid import uuid4
            update_exth_record((113, str(uuid4())))
        # Add a 112 record with actual UUID
        if getattr(mi, 'uuid', None):
            update_exth_record((112,
                    (u"calibre:%s" % mi.uuid).encode(self.codec, 'replace')))
        if 503 in self.original_exth_records:
            update_exth_record((503, mi.title.encode(self.codec, 'replace')))

        # Update book producer
        if getattr(mi, 'book_producer', False):
            update_exth_record((108, mi.book_producer.encode(self.codec, 'replace')))

        # Set langcode in EXTH header
        if not mi.is_null('language'):
            lang = canonicalize_lang(mi.language)
            lang = lang_as_iso639_1(lang) or lang
            if lang:
                update_exth_record((524, lang.encode(self.codec, 'replace')))

        # Include remaining original EXTH fields
        for id in sorted(self.original_exth_records):
            recs.append((id, self.original_exth_records[id]))
        recs = sorted(recs, key=lambda x:(x[0],x[0]))

        exth = StringIO()
        for code, data in recs:
            exth.write(pack('>II', code, len(data) + 8))
            exth.write(data)
        exth = exth.getvalue()
        trail = len(exth) % 4
        pad = '\0' * (4 - trail)  # Always pad w/ at least 1 byte
        exth = ['EXTH', pack('>II', len(exth) + 12, len(recs)), exth, pad]
        exth = ''.join(exth)

        if getattr(self, 'exth', None) is None:
            raise MobiError('No existing EXTH record. Cannot update metadata.')

        if not mi.is_null('language'):
            self.record0[92:96] = iana2mobi(mi.language)
        self.create_exth(exth=exth, new_title=mi.title)

        # Fetch updated timestamp, cover_record, thumbnail_record
        self.fetchEXTHFields()

        if mi.cover_data[1] or mi.cover:
            try:
                data =  mi.cover_data[1] if mi.cover_data[1] else open(mi.cover, 'rb').read()
            except:
                pass
            else:
                if is_image(self.cover_record):
                    size = len(self.cover_record)
                    cover = rescale_image(data, size)
                    if len(cover) <= size:
                        cover += b'\0' * (size - len(cover))
                        self.cover_record[:] = cover
                if is_image(self.thumbnail_record):
                    size = len(self.thumbnail_record)
                    thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN)
                    if len(thumbnail) <= size:
                        thumbnail += b'\0' * (size - len(thumbnail))
                        self.thumbnail_record[:] = thumbnail
                return
Exemple #57
0
def html_lang(docx_lang):
    lang = canonicalize_lang(docx_lang)
    if lang and lang != 'und':
        lang = lang_as_iso639_1(lang)
        if lang:
            return lang
Exemple #58
0
 def __init__(self, namespace, log, document_lang):
     self.namespace = namespace
     self.document_lang = lang_as_iso639_1(document_lang) or 'en'
     self.log = log
     self.block_styles, self.text_styles = {}, {}
     self.styles_for_html_blocks = {}
Exemple #59
0
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    if fmt not in valid_empty_formats:
        raise ValueError('Cannot create empty book in the %s format' % fmt)
    if fmt == 'txt':
        with open(path, 'wb') as f:
            if not mi.is_null('title'):
                f.write(mi.title)
        return
    if fmt == 'docx':
        from calibre.ebooks.conversion.plumber import Plumber
        from calibre.ebooks.docx.writer.container import DOCX
        from calibre.utils.logging import default_log
        p = Plumber('a.docx', 'b.docx', default_log)
        p.setup_options()
        # Use the word default of one inch page margins
        for x in 'left right top bottom'.split():
            setattr(p.opts, 'margin_' + x, 72)
        DOCX(p.opts, default_log).write(path, mi, create_empty_document=True)
        return
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)
    ).replace(
        '_TITLE_', prepare_string_for_xml(mi.title)
    ).replace(
        '_AUTHORS_', prepare_string_for_xml(authors_to_string(mi.authors))
    ).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)