Python Metadata Examples, ebook_converter.ebooks.metadata.book.base.Metadata Python Examples

Example #1

0

Show file

File: container.py Project: keshavbhatt/ebook-converter

    def metadata(self):
        mi = Metadata('Unknown')
        dp_name, ap_name = self.get_document_properties_names()
        if dp_name:
            try:
                raw = self.read(dp_name)
            except KeyError:
                pass
            else:
                read_doc_props(raw, mi, self.namespace.XPath)
        if mi.is_null('language'):
            try:
                raw = self.read('word/styles.xml')
            except KeyError:
                pass
            else:
                read_default_style_language(raw, mi, self.namespace.XPath)

        ap_name = self.relationships.get(self.namespace.names['APPPROPS'],
                                         None)
        if ap_name:
            try:
                raw = self.read(ap_name)
            except KeyError:
                pass
            else:
                read_app_props(raw, mi)

        return mi

Example #2

0

Show file

def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
    from ebook_converter.ebooks.metadata.book.base import Metadata
    from ebook_converter.utils.date import parse_only_date
    from ebook_converter.db.write import get_series_values
    if 'meta' not in extensions:
        extensions.append('meta')
    md = create_markdown_object(extensions)
    html = md.convert(txt)
    mi = Metadata(title or 'Unknown')
    m = md.Meta
    for k, v in {'date':'pubdate', 'summary':'comments'}.items():
        if v not in m and k in m:
            m[v] = m.pop(k)
    for k in 'title authors series tags pubdate comments publisher rating'.split():
        val = m.get(k)
        if val:
            mf = mi.metadata_for_field(k)
            if not mf.get('is_multiple'):
                val = val[0]
            if k == 'series':
                val, si = get_series_values(val)
                mi.series_index = 1 if si is None else si
            if k == 'rating':
                try:
                    val = max(0, min(int(float(val)), 10))
                except Exception:
                    continue
            if mf.get('datatype') == 'datetime':
                try:
                    val = parse_only_date(val, assume_utc=False)
                except Exception:
                    continue
            setattr(mi, k, val)
    return mi, HTML_TEMPLATE % (mi.title, html)

Example #3

0

Show file

 def test_rtf_metadata(self):
     stream = BytesIO(br'{\rtf1\ansi\ansicpg1252}')
     m = Metadata('Test ø̄title', ['Author One', 'Author БTwo'])
     m.tags = 'tag1 見tag2'.split()
     m.comments = '<p>some ⊹comments</p>'
     m.publisher = 'publiSher'
     set_metadata(stream, m)
     stream.seek(0)
     o = get_metadata(stream)
     for attr in 'title authors publisher comments tags'.split():
         self.assertEqual(getattr(m, attr), getattr(o, attr))

Example #4

0

Show file

File: __init__.py Project: keshavbhatt/ebook-converter

def generate_test_db(
        library_path,  # {{{
        num_of_records=20000,
        num_of_authors=6000,
        num_of_tags=10000,
        tag_length=7,
        author_length=7,
        title_length=10,
        max_authors=10,
        max_tags=10):
    import random, string, os, sys, time
    from ebook_converter.constants_old import preferred_encoding

    if not os.path.exists(library_path):
        os.makedirs(library_path)

    letters = string.letters.decode(preferred_encoding)

    def randstr(length):
        return ''.join(random.choice(letters) for i in range(length))

    all_tags = [randstr(tag_length) for j in range(num_of_tags)]
    print('Generated', num_of_tags, 'tags')
    all_authors = [randstr(author_length) for j in range(num_of_authors)]
    print('Generated', num_of_authors, 'authors')
    all_titles = [randstr(title_length) for j in range(num_of_records)]
    print('Generated', num_of_records, 'titles')

    testdb = db(library_path)

    print('Creating', num_of_records, 'records...')

    start = time.time()

    for i, title in enumerate(all_titles):
        print(i + 1, end=' ')
        sys.stdout.flush()
        authors = random.randint(1, max_authors)
        authors = [random.choice(all_authors) for i in range(authors)]
        tags = random.randint(0, max_tags)
        tags = [random.choice(all_tags) for i in range(tags)]
        from ebook_converter.ebooks.metadata.book.base import Metadata
        mi = Metadata(title, authors)
        mi.tags = tags
        testdb.import_book(mi, [])

    t = time.time() - start
    print('\nGenerated', num_of_records, 'records in:', t, 'seconds')
    print('Time per record:', t / num_of_records)

Example #5

0

Show file

File: chm_input.py Project: gryf/ebook-converter

    def convert(self, stream, options, file_ext, log, accelerators):
        # NOTE(gryf): for some reason, those import cannot be moved to the top
        # of module.
        from ebook_converter.ebooks.chm.metadata import get_metadata_from_reader
        from ebook_converter.customize.ui import plugin_for_input_format
        self.opts = options

        log.debug('Processing CHM...')
        with TemporaryDirectory('_chm2oeb') as tdir:
            if not isinstance(tdir, str):
                tdir = tdir.decode(filesystem_encoding)
            html_input = plugin_for_input_format('html')
            for opt in html_input.options:
                setattr(options, opt.option.name, opt.recommended_value)
            no_images = False  # options.no_images
            chm_name = stream.name
            # chm_data = stream.read()

            # closing stream so CHM can be opened by external library
            stream.close()
            log.debug('tdir=%s', tdir)
            log.debug('stream.name=%s', stream.name)
            debug_dump = False
            odi = options.debug_pipeline
            if odi:
                debug_dump = os.path.join(odi, 'input')
            mainname = self._chmtohtml(tdir,
                                       chm_name,
                                       no_images,
                                       log,
                                       debug_dump=debug_dump)
            mainpath = os.path.join(tdir, mainname)

            try:
                metadata = get_metadata_from_reader(self._chm_reader)
            except Exception:
                log.exception('Failed to read metadata, using filename')
                from ebook_converter.ebooks.metadata.book.base import Metadata
                metadata = Metadata(os.path.basename(chm_name))
            encoding = (self._chm_reader.get_encoding()
                        or options.input_encoding or 'cp1252')
            self._chm_reader.CloseCHM()

            options.debug_pipeline = None
            options.input_encoding = 'utf-8'
            uenc = encoding
            if os.path.abspath(mainpath) in self._chm_reader.re_encoded_files:
                uenc = 'utf-8'
            htmlpath, toc = self._create_html_root(mainpath, log, uenc)
            oeb = self._create_oebbook_html(htmlpath, tdir, options, log,
                                            metadata)
            options.debug_pipeline = odi
            if toc.count() > 1:
                oeb.toc = self.parse_html_toc(oeb.spine[0])
                oeb.manifest.remove(oeb.spine[0])
                oeb.auto_generated_toc = False
        return oeb

Example #6

0

Show file

def MetaInformation(title, authors=('Unknown', )):
    ''' Convenient encapsulation of book metadata, needed for compatibility
        @param title: title or ``'Unknown'`` or a MetaInformation object
        @param authors: List of strings or []
    '''
    from ebook_converter.ebooks.metadata.book.base import Metadata
    mi = None
    if hasattr(title, 'title') and hasattr(title, 'authors'):
        mi = title
        title = mi.title
        authors = mi.authors
    return Metadata(title, authors, other=mi)

Example #7

0

Show file