Exemple #1
0
 def to_html(self):
     '''
     A HTML representation of this object.
     '''
     from ebook_converter.ebooks.metadata import authors_to_string
     from ebook_converter.utils.date import isoformat
     ans = [('Title', str(self.title))]
     ans += [('Author(s)', (authors_to_string(self.authors)
                            if self.authors else 'Unknown'))]
     ans += [('Publisher', str(self.publisher))]
     ans += [('Producer', str(self.book_producer))]
     ans += [('Comments', str(self.comments))]
     ans += [('ISBN', str(self.isbn))]
     ans += [('Tags', ', '.join([str(t) for t in self.tags]))]
     if self.series:
         ans += [('Series', str(self.series) +
                  ' #%s' % self.format_series_index())]
     ans += [('Languages', ', '.join(self.languages))]
     if self.timestamp is not None:
         ans += [('Timestamp', str(isoformat(self.timestamp, as_utc=False,
                                             sep=' ')))]
     if self.pubdate is not None:
         ans += [('Published', str(isoformat(self.pubdate, as_utc=False,
                                             sep=' ')))]
     if self.rights is not None:
         ans += [('Rights', str(self.rights))]
     for key in self.custom_field_keys():
         val = self.get(key, None)
         if val:
             (name, val) = self.format_field(key)
             ans += [(name, val)]
     for i, x in enumerate(ans):
         ans[i] = '<tr><td><b>%s</b></td><td>%s</td></tr>'%x
     return '<table>%s</table>'%'\n'.join(ans)
Exemple #2
0
    def __unicode__representation__(self):
        '''
        A string representation of this object, suitable for printing to
        console
        '''
        from ebook_converter.utils.date import isoformat
        from ebook_converter.ebooks.metadata import authors_to_string
        ans = []

        def fmt(x, y):
            ans.append('%-20s: %s'%(str(x), str(y)))

        fmt('Title', self.title)
        if self.title_sort:
            fmt('Title sort', self.title_sort)
        if self.authors:
            fmt('Author(s)',  authors_to_string(self.authors) +
               ((' [' + self.author_sort + ']')
                if self.author_sort and self.author_sort != 'Unknown' else ''))
        if self.publisher:
            fmt('Publisher', self.publisher)
        if getattr(self, 'book_producer', False):
            fmt('Book Producer', self.book_producer)
        if self.tags:
            fmt('Tags', ', '.join([str(t) for t in self.tags]))
        if self.series:
            fmt('Series', self.series + ' #%s'%self.format_series_index())
        if not self.is_null('languages'):
            fmt('Languages', ', '.join(self.languages))
        if self.rating is not None:
            fmt('Rating', ('%.2g'%(float(self.rating)/2)) if self.rating
                    else '')
        if self.timestamp is not None:
            fmt('Timestamp', isoformat(self.timestamp))
        if self.pubdate is not None:
            fmt('Published', isoformat(self.pubdate))
        if self.rights is not None:
            fmt('Rights', str(self.rights))
        if self.identifiers:
            fmt('Identifiers', ', '.join(['%s:%s'%(k, v) for k, v in
                self.identifiers.items()]))
        if self.comments:
            fmt('Comments', self.comments)

        for key in self.custom_field_keys():
            val = self.get(key, None)
            if val:
                (name, val) = self.format_field(key)
                fmt(name, str(val))
        return '\n'.join(ans)
Exemple #3
0
def set_pubdate(root, prefixes, refines, val):
    for date in XPath('./opf:metadata/dc:date')(root):
        remove_element(date, refines)
    if not is_date_undefined(val):
        val = isoformat(val)
        m = XPath('./opf:metadata')(root)[0]
        d = m.makeelement(oeb_base.tag('dc', 'date'))
        d.text = val
        m.append(d)
Exemple #4
0
 def add_user_metadata(name, val):
     if not hasattr(add_user_metadata, 'sentinel_added'):
         add_user_metadata.sentinel_added = True
         remove_user_metadata('opf.metadata')
         add_um('opf.metadata', 'true', 'boolean')
     val_type = 'string'
     if hasattr(val, 'strftime'):
         val = isoformat(val, as_utc=True).split('T')[0]
         val_type = 'date'
     add_um(name, val, val_type)
Exemple #5
0
def to_json(obj):
    if isinstance(obj, bytearray):
        return {'__class__': 'bytearray',
                '__value__': base64.standard_b64encode(bytes(obj))
                             .decode('ascii')}
    if isinstance(obj, datetime.datetime):
        return {'__class__': 'datetime.datetime',
                '__value__': isoformat(obj, as_utc=True)}
    if isinstance(obj, (set, frozenset)):
        return {'__class__': 'set', '__value__': tuple(obj)}
    if isinstance(obj, bytes):
        return obj.decode('utf-8')
    if hasattr(obj, 'toBase64'):  # QByteArray
        return {'__class__': 'bytearray',
                '__value__': bytes(obj.toBase64()).decode('ascii')}
    raise TypeError(repr(obj) + ' is not JSON serializable')
Exemple #6
0
            def tpl_replace(objtplname):

                tpl_field = re.sub(r'[\{\}]', '', objtplname.group())

                if tpl_field in TEMPLATE_ALLOWED_FIELDS:
                    if tpl_field in ['pubdate', 'timestamp']:
                        tpl_field = isoformat(
                            entry[tpl_field]).partition('T')[0]
                    elif tpl_field in ['tags', 'authors']:
                        tpl_field = entry[tpl_field][0]
                    elif tpl_field in ['id', 'series_index']:
                        tpl_field = str(entry[tpl_field])
                    else:
                        tpl_field = entry[tpl_field]
                    return ascii_text(tpl_field)
                else:
                    return ''
Exemple #7
0
        def create_bibtex_entry(entry,
                                fields,
                                mode,
                                template_citation,
                                bibtexdict,
                                db,
                                citation_bibtex=True,
                                calibre_files=True):

            # Bibtex doesn't like UTF-8 but keep unicode until writing
            # Define starting chain or if book valid strict and not book return a Fail string

            bibtex_entry = []
            if mode != "misc" and check_entry_book_valid(entry):
                bibtex_entry.append('@book{')
            elif mode != "book":
                bibtex_entry.append('@misc{')
            else:
                # case strict book
                return ''

            if citation_bibtex:
                # Citation tag
                bibtex_entry.append(
                    make_bibtex_citation(entry, template_citation, bibtexdict))
                bibtex_entry = [' '.join(bibtex_entry)]

            for field in fields:
                if field.startswith('#'):
                    item = db.get_field(entry['id'], field, index_is_id=True)
                    if isinstance(item, (bool, numbers.Number)):
                        item = repr(item)
                elif field == 'title_sort':
                    item = entry['sort']
                elif field == 'library_name':
                    item = library_name
                else:
                    item = entry[field]

                # check if the field should be included (none or empty)
                if item is None:
                    continue
                try:
                    if len(item) == 0:
                        continue
                except TypeError:
                    pass

                if field == 'authors':
                    bibtex_entry.append('author = "%s"' %
                                        bibtexdict.bibtex_author_format(item))

                elif field == 'id':
                    bibtex_entry.append('calibreid = "%s"' % int(item))

                elif field == 'rating':
                    bibtex_entry.append('rating = "%s"' % int(item))

                elif field == 'size':
                    bibtex_entry.append('%s = "%s octets"' %
                                        (field, int(item)))

                elif field == 'tags':
                    # A list to flatten
                    bibtex_entry.append(
                        'tags = "%s"' %
                        bibtexdict.utf8ToBibtex(', '.join(item)))

                elif field == 'comments':
                    # \n removal
                    item = item.replace('\r\n', ' ')
                    item = item.replace('\n', ' ')
                    # unmatched brace removal (users should use \leftbrace or \rightbrace for single braces)
                    item = bibtexdict.stripUnmatchedSyntax(item, '{', '}')
                    # html to text
                    try:
                        item = html2text(item)
                    except:
                        log.warn("Failed to convert comments to text")
                    bibtex_entry.append('note = "%s"' %
                                        bibtexdict.utf8ToBibtex(item))

                elif field == 'isbn':
                    # Could be 9, 10 or 13 digits
                    bibtex_entry.append('isbn = "%s"' % format_isbn(item))

                elif field == 'formats':
                    # Add file path if format is selected
                    formats = [
                        format.rpartition('.')[2].lower() for format in item
                    ]
                    bibtex_entry.append('formats = "%s"' % ', '.join(formats))
                    if calibre_files:
                        files = [
                            ':%s:%s' %
                            (format, format.rpartition('.')[2].upper())
                            for format in item
                        ]
                        bibtex_entry.append('file = "%s"' % ', '.join(files))

                elif field == 'series_index':
                    bibtex_entry.append('volume = "%s"' % int(item))

                elif field == 'timestamp':
                    bibtex_entry.append('timestamp = "%s"' %
                                        isoformat(item).partition('T')[0])

                elif field == 'pubdate':
                    bibtex_entry.append('year = "%s"' % item.year)
                    bibtex_entry.append(
                        'month = "%s"' %
                        bibtexdict.utf8ToBibtex(date.strftime("%b", item)))

                elif field.startswith('#') and isinstance(item, (str, bytes)):
                    bibtex_entry.append(
                        'custom_%s = "%s"' %
                        (field[1:], bibtexdict.utf8ToBibtex(item)))

                elif isinstance(item, (str, bytes)):
                    # elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
                    # 'author_sort', 'series', 'title_sort'] :
                    bibtex_entry.append('%s = "%s"' %
                                        (field, bibtexdict.utf8ToBibtex(item)))

            bibtex_entry = ',\n    '.join(bibtex_entry)
            bibtex_entry += ' }\n\n'

            return bibtex_entry
def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
    if not mi.is_null('title'):
        m.clear('title')
        m.add('title', mi.title)
    if mi.title_sort:
        if not m.title:
            m.add('title', mi.title_sort)
        m.clear('title_sort')
        m.add('title_sort', mi.title_sort)
    if not mi.is_null('authors'):
        m.filter('creator', lambda x: x.role.lower() in ['aut', ''])
        for a in mi.authors:
            attrib = {'role': 'aut'}
            if mi.author_sort:
                attrib[base.tag('opf', 'file-as')] = mi.author_sort
            m.add('creator', a, attrib=attrib)
    if not mi.is_null('book_producer'):
        m.filter('contributor', lambda x: x.role.lower() == 'bkp')
        m.add('contributor', mi.book_producer, role='bkp')
    elif override_input_metadata:
        m.filter('contributor', lambda x: x.role.lower() == 'bkp')
    if not mi.is_null('comments'):
        m.clear('description')
        m.add('description', mi.comments)
    elif override_input_metadata:
        m.clear('description')
    if not mi.is_null('publisher'):
        m.clear('publisher')
        m.add('publisher', mi.publisher)
    elif override_input_metadata:
        m.clear('publisher')
    if not mi.is_null('series'):
        m.clear('series')
        m.add('series', mi.series)
    elif override_input_metadata:
        m.clear('series')
    identifiers = mi.get_identifiers()
    set_isbn = False
    for typ, val in identifiers.items():
        has = False
        if typ.lower() == 'isbn':
            set_isbn = True
        for x in m.identifier:
            if x.scheme.lower() == typ.lower():
                x.content = val
                has = True
        if not has:
            m.add('identifier', val, scheme=typ.upper())
    if override_input_metadata and not set_isbn:
        m.filter('identifier', lambda x: x.scheme.lower() == 'isbn')
    if not mi.is_null('languages'):
        m.clear('language')
        for lang in mi.languages:
            if lang and lang.lower() not in ('und', ''):
                m.add('language', lang)
    if not mi.is_null('series_index'):
        m.clear('series_index')
        m.add('series_index', mi.format_series_index())
    elif override_input_metadata:
        m.clear('series_index')
    if not mi.is_null('rating'):
        m.clear('rating')
        m.add('rating', '%.2f' % mi.rating)
    elif override_input_metadata:
        m.clear('rating')
    if not mi.is_null('tags'):
        m.clear('subject')
        for t in mi.tags:
            m.add('subject', t)
    elif override_input_metadata:
        m.clear('subject')
    if not mi.is_null('pubdate'):
        m.clear('date')
        m.add('date', isoformat(mi.pubdate))
    if not mi.is_null('timestamp'):
        m.clear('timestamp')
        m.add('timestamp', isoformat(mi.timestamp))
    if not mi.is_null('rights'):
        m.clear('rights')
        m.add('rights', mi.rights)
    if not mi.is_null('publication_type'):
        m.clear('publication_type')
        m.add('publication_type', mi.publication_type)

    if not m.timestamp:
        m.add('timestamp', isoformat(now()))
    def run(self, path_to_output, opts, db, notification=DummyReporter()):
        from ebook_converter.library import current_library_name
        from ebook_converter.utils.date import isoformat
        from ebook_converter.utils.html2text import html2text
        from ebook_converter.utils.logging import default_log as log
        from lxml import etree
        from ebook_converter.ebooks.metadata import authors_to_string

        self.fmt = path_to_output.rpartition('.')[2]
        self.notification = notification
        current_library = current_library_name()
        if getattr(opts, 'library_path', None):
            current_library = os.path.basename(opts.library_path)

        if opts.verbose:
            opts_dict = vars(opts)
            log("%s('%s'): Generating %s" %
                (self.name, current_library, self.fmt.upper()))
            if opts.connected_device['is_device_connected']:
                log(" connected_device: %s" % opts.connected_device['name'])
            if opts_dict['search_text']:
                log(" --search='%s'" % opts_dict['search_text'])

            if opts_dict['ids']:
                log(" Book count: %d" % len(opts_dict['ids']))
                if opts_dict['search_text']:
                    log(" (--search ignored when a subset of the database is specified)"
                        )

            if opts_dict['fields']:
                if opts_dict['fields'] == 'all':
                    log(" Fields: %s" % ', '.join(FIELDS[1:]))
                else:
                    log(" Fields: %s" % opts_dict['fields'])

        # If a list of ids are provided, don't use search_text
        if opts.ids:
            opts.search_text = None

        data = self.search_sort_db(db, opts)

        if not len(data):
            log.error(
                "\nNo matching database entries for search criteria '%s'" %
                opts.search_text)
            # raise SystemExit(1)

        # Get the requested output fields as a list
        fields = self.get_output_fields(db, opts)

        # If connected device, add 'On Device' values to data
        if opts.connected_device[
                'is_device_connected'] and 'ondevice' in fields:
            for entry in data:
                entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[
                    entry['id']]['ondevice']

        fm = {x: db.field_metadata.get(x, {}) for x in fields}

        if self.fmt == 'csv':
            outfile = codecs.open(path_to_output, 'w', 'utf8')

            # Write a UTF-8 BOM
            outfile.write('\ufeff')

            # Output the field headers
            outfile.write('%s\n' % ','.join(fields))

            # Output the entry fields
            for entry in data:
                outstr = []
                for field in fields:
                    if field.startswith('#'):
                        item = db.get_field(entry['id'],
                                            field,
                                            index_is_id=True)
                        if isinstance(item, (list, tuple)):
                            if fm.get(field,
                                      {}).get('display',
                                              {}).get('is_names', False):
                                item = ' & '.join(item)
                            else:
                                item = ', '.join(item)
                    elif field == 'library_name':
                        item = current_library
                    elif field == 'title_sort':
                        item = entry['sort']
                    else:
                        item = entry[field]

                    if item is None:
                        outstr.append('""')
                        continue
                    elif field == 'formats':
                        fmt_list = []
                        for format in item:
                            fmt_list.append(format.rpartition('.')[2].lower())
                        item = ', '.join(fmt_list)
                    elif field == 'authors':
                        item = authors_to_string(item)
                    elif field == 'tags':
                        item = ', '.join(item)
                    elif field == 'isbn':
                        # Could be 9, 10 or 13 digits, with hyphens, possibly ending in 'X'
                        item = '%s' % re.sub(r'[^\dX-]', '', item)
                    elif fm.get(field, {}).get('datatype') == 'datetime':
                        item = isoformat(item, as_utc=False)
                    elif field == 'comments':
                        item = item.replace('\r\n', ' ')
                        item = item.replace('\n', ' ')
                    elif fm.get(field, {}).get('datatype',
                                               None) == 'rating' and item:
                        item = '%.2g' % (item / 2)

                    # Convert HTML to markdown text
                    if isinstance(item, str):
                        opening_tag = re.search(r'<(\w+)( |>)', item)
                        if opening_tag:
                            closing_tag = re.search(
                                r'<\/%s>$' % opening_tag.group(1), item)
                            if closing_tag:
                                item = html2text(item)

                    outstr.append('"%s"' % str(item).replace('"', '""'))

                outfile.write(','.join(outstr) + '\n')
            outfile.close()

        elif self.fmt == 'xml':
            from lxml.builder import E

            root = E.calibredb()
            for r in data:
                record = E.record()
                root.append(record)

                for field in fields:
                    if field.startswith('#'):
                        val = db.get_field(r['id'], field, index_is_id=True)
                        if not isinstance(val, str):
                            val = str(val)
                        item = getattr(E, field.replace('#', '_'))(val)
                        record.append(item)

                for field in ('id', 'uuid', 'publisher', 'rating', 'size',
                              'isbn', 'ondevice', 'identifiers'):
                    if field in fields:
                        val = r[field]
                        if not val:
                            continue
                        if not isinstance(val, (bytes, str)):
                            if (fm.get(field, {}).get('datatype', None)
                                    == 'rating' and val):
                                val = '%.2g' % (val / 2)
                            val = str(val)
                        item = getattr(E, field)(val)
                        record.append(item)

                if 'title' in fields:
                    title = E.title(r['title'], sort=r['sort'])
                    record.append(title)

                if 'authors' in fields:
                    aus = E.authors(sort=r['author_sort'])
                    for au in r['authors']:
                        aus.append(E.author(au))
                    record.append(aus)

                for field in ('timestamp', 'pubdate'):
                    if field in fields:
                        record.append(
                            getattr(E, field)(isoformat(r[field],
                                                        as_utc=False)))

                if 'tags' in fields and r['tags']:
                    tags = E.tags()
                    for tag in r['tags']:
                        tags.append(E.tag(tag))
                    record.append(tags)

                if 'comments' in fields and r['comments']:
                    record.append(E.comments(r['comments']))

                if 'series' in fields and r['series']:
                    record.append(
                        E.series(r['series'], index=str(r['series_index'])))

                if 'cover' in fields and r['cover']:
                    record.append(E.cover(r['cover'].replace(os.sep, '/')))

                if 'formats' in fields and r['formats']:
                    fmt = E.formats()
                    for f in r['formats']:
                        fmt.append(E.format(f.replace(os.sep, '/')))
                    record.append(fmt)

                if 'library_name' in fields:
                    record.append(E.library_name(current_library))

            with open(path_to_output, 'wb') as f:
                f.write(
                    etree.tostring(root,
                                   encoding='utf-8',
                                   xml_declaration=True,
                                   pretty_print=True))
Exemple #10
0
def metadata_to_xmp_packet(mi):
    A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x'))
    R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf'))
    root = A.xmpmeta(R.RDF)
    rdf = root[0]
    dc = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('dc'))
    dc.set(expand('rdf:about'), '')
    rdf.append(dc)
    for prop, tag in {
            'title': 'dc:title',
            'comments': 'dc:description'
    }.items():
        val = mi.get(prop) or ''
        create_alt_property(dc, tag, val)
    for prop, (tag, ordered) in {
            'authors': ('dc:creator', True),
            'tags': ('dc:subject', False),
            'publisher': ('dc:publisher', False)
    }.items():
        val = mi.get(prop) or ()
        if isinstance(val, (str, bytes)):
            val = [val]
        create_sequence_property(dc, tag, val, ordered)
    if not mi.is_null('pubdate'):
        # Adobe spec recommends local time
        create_sequence_property(dc, 'dc:date',
                                 [isoformat(mi.pubdate, as_utc=False)])
    if not mi.is_null('languages'):
        langs = list(
            filter(
                None,
                map(lambda x: lang_as_iso639_1(x) or canonicalize_lang(x),
                    mi.languages)))
        if langs:
            create_sequence_property(dc, 'dc:language', langs, ordered=False)

    xmp = rdf.makeelement(expand('rdf:Description'),
                          nsmap=nsmap('xmp', 'xmpidq'))
    xmp.set(expand('rdf:about'), '')
    rdf.append(xmp)
    extra_ids = {}
    for x in ('prism', 'pdfx'):
        p = extra_ids[x] = rdf.makeelement(expand('rdf:Description'),
                                           nsmap=nsmap(x))
        p.set(expand('rdf:about'), '')
        rdf.append(p)

    identifiers = mi.get_identifiers()
    if identifiers:
        create_identifiers(xmp, identifiers)
        for scheme, val in identifiers.items():
            if scheme in {'isbn', 'doi'}:
                for prefix, parent in extra_ids.items():
                    ie = parent.makeelement(expand('%s:%s' % (prefix, scheme)))
                    ie.text = val
                    parent.append(ie)

    d = xmp.makeelement(expand('xmp:MetadataDate'))
    d.text = isoformat(now(), as_utc=False)
    xmp.append(d)

    calibre = rdf.makeelement(expand('rdf:Description'),
                              nsmap=nsmap('calibre', 'calibreSI', 'calibreCC'))
    calibre.set(expand('rdf:about'), '')
    rdf.append(calibre)
    if not mi.is_null('rating'):
        try:
            r = float(mi.rating)
        except (TypeError, ValueError):
            pass
        else:
            create_simple_property(calibre, 'calibre:rating', '%g' % r)
    if not mi.is_null('series'):
        create_series(calibre, mi.series, mi.series_index)
    if not mi.is_null('timestamp'):
        create_simple_property(calibre, 'calibre:timestamp',
                               isoformat(mi.timestamp, as_utc=False))
    for x in ('author_link_map', 'user_categories'):
        val = getattr(mi, x, None)
        if val:
            create_simple_property(calibre, 'calibre:' + x, dump_dict(val))

    for x in ('title_sort', 'author_sort'):
        if not mi.is_null(x):
            create_simple_property(calibre, 'calibre:' + x, getattr(mi, x))

    all_user_metadata = mi.get_all_user_metadata(True)
    if all_user_metadata:
        create_user_metadata(calibre, all_user_metadata)
    return serialize_xmp_packet(root)