Ejemplo n.º 1
0
def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
    from ebook_converter.ebooks.metadata.book.base import Metadata
    from ebook_converter.utils.date import parse_only_date
    from ebook_converter.db.write import get_series_values
    if 'meta' not in extensions:
        extensions.append('meta')
    md = create_markdown_object(extensions)
    html = md.convert(txt)
    mi = Metadata(title or 'Unknown')
    m = md.Meta
    for k, v in {'date':'pubdate', 'summary':'comments'}.items():
        if v not in m and k in m:
            m[v] = m.pop(k)
    for k in 'title authors series tags pubdate comments publisher rating'.split():
        val = m.get(k)
        if val:
            mf = mi.metadata_for_field(k)
            if not mf.get('is_multiple'):
                val = val[0]
            if k == 'series':
                val, si = get_series_values(val)
                mi.series_index = 1 if si is None else si
            if k == 'rating':
                try:
                    val = max(0, min(int(float(val)), 10))
                except Exception:
                    continue
            if mf.get('datatype') == 'datetime':
                try:
                    val = parse_only_date(val, assume_utc=False)
                except Exception:
                    continue
            setattr(mi, k, val)
    return mi, HTML_TEMPLATE % (mi.title, html)
Ejemplo n.º 2
0
 def test_input_comment_multi(self):
     stream_meta = get_metadata(self.get_stream('comment_multi'))
     canon_meta = Metadata('A Comment Tag & Title Ⓒ', ['James Madison', 'James Monroe', 'John Quincy Adams'])
     canon_meta.publisher = 'Publisher C'
     canon_meta.languages = ['French', 'Japanese']
     canon_meta.pubdate = parse_date('2015-01-01')
     canon_meta.timestamp = parse_date('2014-01-01')
     canon_meta.series = 'Comment Series'
     canon_meta.series_index = float(3)
     canon_meta.rating = float(0)
     canon_meta.comments = 'comment "comments" ♥ HTML -- too &'
     canon_meta.tags = ['tag d', 'tag e', 'tag f']
     canon_meta.set_identifiers({'isbn': '3456789012', 'url': 'http://google.com/search?q=calibre'})
     self.compare_metadata(stream_meta, canon_meta)
Ejemplo n.º 3
0
 def test_input_meta_single(self):
     stream_meta = get_metadata(self.get_stream('meta_single'))
     canon_meta = Metadata('A Meta Tag & Title Ⓒ', ['George Washington'])
     canon_meta.publisher = 'Publisher A'
     canon_meta.languages = ['English']
     canon_meta.pubdate = parse_date('2019-01-01')
     canon_meta.timestamp = parse_date('2018-01-01')
     canon_meta.series = 'Meta Series'
     canon_meta.series_index = float(1)
     # canon_meta.rating = float(0)
     # canon_meta.comments = ''
     canon_meta.tags = ['tag a', 'tag b']
     canon_meta.set_identifiers({'isbn': '1234567890'})
     self.compare_metadata(stream_meta, canon_meta)
Ejemplo n.º 4
0
 def test_input_meta_multi(self):
     stream_meta = get_metadata(self.get_stream('meta_multi'))
     canon_meta = Metadata('A Meta Tag & Title Ⓒ', ['George Washington', 'John Adams', 'Thomas Jefferson'])
     canon_meta.publisher = 'Publisher A'
     canon_meta.languages = ['English', 'Spanish']
     canon_meta.pubdate = parse_date('2019-01-01')
     canon_meta.timestamp = parse_date('2018-01-01')
     canon_meta.series = 'Meta Series'
     canon_meta.series_index = float(1)
     canon_meta.rating = float(8)
     canon_meta.comments = 'meta "comments" ♥ HTML &'
     canon_meta.tags = ['tag a', 'tag b', 'tag c']
     canon_meta.set_identifiers({'isbn': '1234567890', 'url': 'http://google.com/search?q=calibre'})
     self.compare_metadata(stream_meta, canon_meta)
Ejemplo n.º 5
0
def get_metadata_(src, encoding=None):
    # Meta data definitions as in
    # https://www.mobileread.com/forums/showpost.php?p=712544&postcount=9

    if isinstance(src, bytes):
        if not encoding:
            src = xml_to_unicode(src)[0]
        else:
            src = src.decode(encoding, 'replace')
    src = src[:150000]  # Searching shouldn't take too long
    comment_tags, meta_tags, meta_tag_ids, title_tag = parse_metadata(src)

    def get_all(field):
        ans = comment_tags.get(field, meta_tags.get(field, None))
        if ans:
            ans = [x.strip() for x in ans if x.strip()]
        if not ans:
            ans = None
        return ans

    def get(field):
        ans = get_all(field)
        if ans:
            ans = ans[0]
        return ans

    # Title
    title = get('title') or title_tag.strip() or 'Unknown'

    # Author
    authors = authors_to_string(get_all('authors')) or 'Unknown'

    # Create MetaInformation with Title and Author
    mi = Metadata(title, string_to_authors(authors))

    # Single-value text fields
    for field in ('publisher', 'isbn'):
        val = get(field)
        if val:
            setattr(mi, field, val)

    # Multi-value text fields
    for field in ('languages',):
        val = get_all(field)
        if val:
            setattr(mi, field, val)

    # HTML fields
    for field in ('comments',):
        val = get(field)
        if val:
            setattr(mi, field, val.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&apos;'))

    # Date fields
    for field in ('pubdate', 'timestamp'):
        try:
            val = parse_date(get(field))
        except:
            pass
        else:
            if not is_date_undefined(val):
                setattr(mi, field, val)

    # SERIES
    series = get('series')
    if series:
        pat = re.compile(r'\[([.0-9]+)\]$')
        match = pat.search(series)
        series_index = None
        if match is not None:
            try:
                series_index = float(match.group(1))
            except:
                pass
            series = series.replace(match.group(), '').strip()
        mi.series = series
        if series_index is None:
            series_index = get('series_index')
            try:
                series_index = float(series_index)
            except:
                pass
        if series_index is not None:
            mi.series_index = series_index

    # RATING
    rating = get('rating')
    if rating:
        try:
            mi.rating = float(rating)
            if mi.rating < 0:
                mi.rating = 0
            if mi.rating > 10:
                mi.rating = 0
        except:
            pass

    # TAGS
    tags = get_all('tags')
    if tags:
        tags = [x.strip() for s in tags for x in s.split(',') if x.strip()]
        if tags:
            mi.tags = tags

    # IDENTIFIERS
    for (k,v) in meta_tag_ids.items():
        v = [x.strip() for x in v if x.strip()]
        if v:
            mi.set_identifier(k, v[0])

    return mi