def to_html(self): ''' A HTML representation of this object. ''' from ebook_converter.ebooks.metadata import authors_to_string from ebook_converter.utils.date import isoformat ans = [('Title', str(self.title))] ans += [('Author(s)', (authors_to_string(self.authors) if self.authors else 'Unknown'))] ans += [('Publisher', str(self.publisher))] ans += [('Producer', str(self.book_producer))] ans += [('Comments', str(self.comments))] ans += [('ISBN', str(self.isbn))] ans += [('Tags', ', '.join([str(t) for t in self.tags]))] if self.series: ans += [('Series', str(self.series) + ' #%s' % self.format_series_index())] ans += [('Languages', ', '.join(self.languages))] if self.timestamp is not None: ans += [('Timestamp', str(isoformat(self.timestamp, as_utc=False, sep=' ')))] if self.pubdate is not None: ans += [('Published', str(isoformat(self.pubdate, as_utc=False, sep=' ')))] if self.rights is not None: ans += [('Rights', str(self.rights))] for key in self.custom_field_keys(): val = self.get(key, None) if val: (name, val) = self.format_field(key) ans += [(name, val)] for i, x in enumerate(ans): ans[i] = '<tr><td><b>%s</b></td><td>%s</td></tr>'%x return '<table>%s</table>'%'\n'.join(ans)
def __unicode__representation__(self): ''' A string representation of this object, suitable for printing to console ''' from ebook_converter.utils.date import isoformat from ebook_converter.ebooks.metadata import authors_to_string ans = [] def fmt(x, y): ans.append('%-20s: %s'%(str(x), str(y))) fmt('Title', self.title) if self.title_sort: fmt('Title sort', self.title_sort) if self.authors: fmt('Author(s)', authors_to_string(self.authors) + ((' [' + self.author_sort + ']') if self.author_sort and self.author_sort != 'Unknown' else '')) if self.publisher: fmt('Publisher', self.publisher) if getattr(self, 'book_producer', False): fmt('Book Producer', self.book_producer) if self.tags: fmt('Tags', ', '.join([str(t) for t in self.tags])) if self.series: fmt('Series', self.series + ' #%s'%self.format_series_index()) if not self.is_null('languages'): fmt('Languages', ', '.join(self.languages)) if self.rating is not None: fmt('Rating', ('%.2g'%(float(self.rating)/2)) if self.rating else '') if self.timestamp is not None: fmt('Timestamp', isoformat(self.timestamp)) if self.pubdate is not None: fmt('Published', isoformat(self.pubdate)) if self.rights is not None: fmt('Rights', str(self.rights)) if self.identifiers: fmt('Identifiers', ', '.join(['%s:%s'%(k, v) for k, v in self.identifiers.items()])) if self.comments: fmt('Comments', self.comments) for key in self.custom_field_keys(): val = self.get(key, None) if val: (name, val) = self.format_field(key) fmt(name, str(val)) return '\n'.join(ans)
def set_pubdate(root, prefixes, refines, val): for date in XPath('./opf:metadata/dc:date')(root): remove_element(date, refines) if not is_date_undefined(val): val = isoformat(val) m = XPath('./opf:metadata')(root)[0] d = m.makeelement(oeb_base.tag('dc', 'date')) d.text = val m.append(d)
def add_user_metadata(name, val): if not hasattr(add_user_metadata, 'sentinel_added'): add_user_metadata.sentinel_added = True remove_user_metadata('opf.metadata') add_um('opf.metadata', 'true', 'boolean') val_type = 'string' if hasattr(val, 'strftime'): val = isoformat(val, as_utc=True).split('T')[0] val_type = 'date' add_um(name, val, val_type)
def to_json(obj): if isinstance(obj, bytearray): return {'__class__': 'bytearray', '__value__': base64.standard_b64encode(bytes(obj)) .decode('ascii')} if isinstance(obj, datetime.datetime): return {'__class__': 'datetime.datetime', '__value__': isoformat(obj, as_utc=True)} if isinstance(obj, (set, frozenset)): return {'__class__': 'set', '__value__': tuple(obj)} if isinstance(obj, bytes): return obj.decode('utf-8') if hasattr(obj, 'toBase64'): # QByteArray return {'__class__': 'bytearray', '__value__': bytes(obj.toBase64()).decode('ascii')} raise TypeError(repr(obj) + ' is not JSON serializable')
def tpl_replace(objtplname): tpl_field = re.sub(r'[\{\}]', '', objtplname.group()) if tpl_field in TEMPLATE_ALLOWED_FIELDS: if tpl_field in ['pubdate', 'timestamp']: tpl_field = isoformat( entry[tpl_field]).partition('T')[0] elif tpl_field in ['tags', 'authors']: tpl_field = entry[tpl_field][0] elif tpl_field in ['id', 'series_index']: tpl_field = str(entry[tpl_field]) else: tpl_field = entry[tpl_field] return ascii_text(tpl_field) else: return ''
def create_bibtex_entry(entry, fields, mode, template_citation, bibtexdict, db, citation_bibtex=True, calibre_files=True): # Bibtex doesn't like UTF-8 but keep unicode until writing # Define starting chain or if book valid strict and not book return a Fail string bibtex_entry = [] if mode != "misc" and check_entry_book_valid(entry): bibtex_entry.append('@book{') elif mode != "book": bibtex_entry.append('@misc{') else: # case strict book return '' if citation_bibtex: # Citation tag bibtex_entry.append( make_bibtex_citation(entry, template_citation, bibtexdict)) bibtex_entry = [' '.join(bibtex_entry)] for field in fields: if field.startswith('#'): item = db.get_field(entry['id'], field, index_is_id=True) if isinstance(item, (bool, numbers.Number)): item = repr(item) elif field == 'title_sort': item = entry['sort'] elif field == 'library_name': item = library_name else: item = entry[field] # check if the field should be included (none or empty) if item is None: continue try: if len(item) == 0: continue except TypeError: pass if field == 'authors': bibtex_entry.append('author = "%s"' % bibtexdict.bibtex_author_format(item)) elif field == 'id': bibtex_entry.append('calibreid = "%s"' % int(item)) elif field == 'rating': bibtex_entry.append('rating = "%s"' % int(item)) elif field == 'size': bibtex_entry.append('%s = "%s octets"' % (field, int(item))) elif field == 'tags': # A list to flatten bibtex_entry.append( 'tags = "%s"' % bibtexdict.utf8ToBibtex(', '.join(item))) elif field == 'comments': # \n removal item = item.replace('\r\n', ' ') item = item.replace('\n', ' ') # unmatched brace removal (users should use \leftbrace or \rightbrace for single braces) item = bibtexdict.stripUnmatchedSyntax(item, '{', '}') # html to text try: item = html2text(item) except: log.warn("Failed to convert comments to text") bibtex_entry.append('note = "%s"' % bibtexdict.utf8ToBibtex(item)) elif field == 'isbn': # Could be 9, 10 or 13 digits bibtex_entry.append('isbn = "%s"' % format_isbn(item)) elif field == 'formats': # Add file path if format is selected formats = [ format.rpartition('.')[2].lower() for format in item ] bibtex_entry.append('formats = "%s"' % ', '.join(formats)) if calibre_files: files = [ ':%s:%s' % (format, format.rpartition('.')[2].upper()) for format in item ] bibtex_entry.append('file = "%s"' % ', '.join(files)) elif field == 'series_index': bibtex_entry.append('volume = "%s"' % int(item)) elif field == 'timestamp': bibtex_entry.append('timestamp = "%s"' % isoformat(item).partition('T')[0]) elif field == 'pubdate': bibtex_entry.append('year = "%s"' % item.year) bibtex_entry.append( 'month = "%s"' % bibtexdict.utf8ToBibtex(date.strftime("%b", item))) elif field.startswith('#') and isinstance(item, (str, bytes)): bibtex_entry.append( 'custom_%s = "%s"' % (field[1:], bibtexdict.utf8ToBibtex(item))) elif isinstance(item, (str, bytes)): # elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice', # 'author_sort', 'series', 'title_sort'] : bibtex_entry.append('%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item))) bibtex_entry = ',\n '.join(bibtex_entry) bibtex_entry += ' }\n\n' return bibtex_entry
def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False): if not mi.is_null('title'): m.clear('title') m.add('title', mi.title) if mi.title_sort: if not m.title: m.add('title', mi.title_sort) m.clear('title_sort') m.add('title_sort', mi.title_sort) if not mi.is_null('authors'): m.filter('creator', lambda x: x.role.lower() in ['aut', '']) for a in mi.authors: attrib = {'role': 'aut'} if mi.author_sort: attrib[base.tag('opf', 'file-as')] = mi.author_sort m.add('creator', a, attrib=attrib) if not mi.is_null('book_producer'): m.filter('contributor', lambda x: x.role.lower() == 'bkp') m.add('contributor', mi.book_producer, role='bkp') elif override_input_metadata: m.filter('contributor', lambda x: x.role.lower() == 'bkp') if not mi.is_null('comments'): m.clear('description') m.add('description', mi.comments) elif override_input_metadata: m.clear('description') if not mi.is_null('publisher'): m.clear('publisher') m.add('publisher', mi.publisher) elif override_input_metadata: m.clear('publisher') if not mi.is_null('series'): m.clear('series') m.add('series', mi.series) elif override_input_metadata: m.clear('series') identifiers = mi.get_identifiers() set_isbn = False for typ, val in identifiers.items(): has = False if typ.lower() == 'isbn': set_isbn = True for x in m.identifier: if x.scheme.lower() == typ.lower(): x.content = val has = True if not has: m.add('identifier', val, scheme=typ.upper()) if override_input_metadata and not set_isbn: m.filter('identifier', lambda x: x.scheme.lower() == 'isbn') if not mi.is_null('languages'): m.clear('language') for lang in mi.languages: if lang and lang.lower() not in ('und', ''): m.add('language', lang) if not mi.is_null('series_index'): m.clear('series_index') m.add('series_index', mi.format_series_index()) elif override_input_metadata: m.clear('series_index') if not mi.is_null('rating'): m.clear('rating') m.add('rating', '%.2f' % mi.rating) elif override_input_metadata: m.clear('rating') if not mi.is_null('tags'): m.clear('subject') for t in mi.tags: m.add('subject', t) elif override_input_metadata: m.clear('subject') if not mi.is_null('pubdate'): m.clear('date') m.add('date', isoformat(mi.pubdate)) if not mi.is_null('timestamp'): m.clear('timestamp') m.add('timestamp', isoformat(mi.timestamp)) if not mi.is_null('rights'): m.clear('rights') m.add('rights', mi.rights) if not mi.is_null('publication_type'): m.clear('publication_type') m.add('publication_type', mi.publication_type) if not m.timestamp: m.add('timestamp', isoformat(now()))
def run(self, path_to_output, opts, db, notification=DummyReporter()): from ebook_converter.library import current_library_name from ebook_converter.utils.date import isoformat from ebook_converter.utils.html2text import html2text from ebook_converter.utils.logging import default_log as log from lxml import etree from ebook_converter.ebooks.metadata import authors_to_string self.fmt = path_to_output.rpartition('.')[2] self.notification = notification current_library = current_library_name() if getattr(opts, 'library_path', None): current_library = os.path.basename(opts.library_path) if opts.verbose: opts_dict = vars(opts) log("%s('%s'): Generating %s" % (self.name, current_library, self.fmt.upper())) if opts.connected_device['is_device_connected']: log(" connected_device: %s" % opts.connected_device['name']) if opts_dict['search_text']: log(" --search='%s'" % opts_dict['search_text']) if opts_dict['ids']: log(" Book count: %d" % len(opts_dict['ids'])) if opts_dict['search_text']: log(" (--search ignored when a subset of the database is specified)" ) if opts_dict['fields']: if opts_dict['fields'] == 'all': log(" Fields: %s" % ', '.join(FIELDS[1:])) else: log(" Fields: %s" % opts_dict['fields']) # If a list of ids are provided, don't use search_text if opts.ids: opts.search_text = None data = self.search_sort_db(db, opts) if not len(data): log.error( "\nNo matching database entries for search criteria '%s'" % opts.search_text) # raise SystemExit(1) # Get the requested output fields as a list fields = self.get_output_fields(db, opts) # If connected device, add 'On Device' values to data if opts.connected_device[ 'is_device_connected'] and 'ondevice' in fields: for entry in data: entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[ entry['id']]['ondevice'] fm = {x: db.field_metadata.get(x, {}) for x in fields} if self.fmt == 'csv': outfile = codecs.open(path_to_output, 'w', 'utf8') # Write a UTF-8 BOM outfile.write('\ufeff') # Output the field headers outfile.write('%s\n' % ','.join(fields)) # Output the entry fields for entry in data: outstr = [] for field in fields: if field.startswith('#'): item = db.get_field(entry['id'], field, index_is_id=True) if isinstance(item, (list, tuple)): if fm.get(field, {}).get('display', {}).get('is_names', False): item = ' & '.join(item) else: item = ', '.join(item) elif field == 'library_name': item = current_library elif field == 'title_sort': item = entry['sort'] else: item = entry[field] if item is None: outstr.append('""') continue elif field == 'formats': fmt_list = [] for format in item: fmt_list.append(format.rpartition('.')[2].lower()) item = ', '.join(fmt_list) elif field == 'authors': item = authors_to_string(item) elif field == 'tags': item = ', '.join(item) elif field == 'isbn': # Could be 9, 10 or 13 digits, with hyphens, possibly ending in 'X' item = '%s' % re.sub(r'[^\dX-]', '', item) elif fm.get(field, {}).get('datatype') == 'datetime': item = isoformat(item, as_utc=False) elif field == 'comments': item = item.replace('\r\n', ' ') item = item.replace('\n', ' ') elif fm.get(field, {}).get('datatype', None) == 'rating' and item: item = '%.2g' % (item / 2) # Convert HTML to markdown text if isinstance(item, str): opening_tag = re.search(r'<(\w+)( |>)', item) if opening_tag: closing_tag = re.search( r'<\/%s>$' % opening_tag.group(1), item) if closing_tag: item = html2text(item) outstr.append('"%s"' % str(item).replace('"', '""')) outfile.write(','.join(outstr) + '\n') outfile.close() elif self.fmt == 'xml': from lxml.builder import E root = E.calibredb() for r in data: record = E.record() root.append(record) for field in fields: if field.startswith('#'): val = db.get_field(r['id'], field, index_is_id=True) if not isinstance(val, str): val = str(val) item = getattr(E, field.replace('#', '_'))(val) record.append(item) for field in ('id', 'uuid', 'publisher', 'rating', 'size', 'isbn', 'ondevice', 'identifiers'): if field in fields: val = r[field] if not val: continue if not isinstance(val, (bytes, str)): if (fm.get(field, {}).get('datatype', None) == 'rating' and val): val = '%.2g' % (val / 2) val = str(val) item = getattr(E, field)(val) record.append(item) if 'title' in fields: title = E.title(r['title'], sort=r['sort']) record.append(title) if 'authors' in fields: aus = E.authors(sort=r['author_sort']) for au in r['authors']: aus.append(E.author(au)) record.append(aus) for field in ('timestamp', 'pubdate'): if field in fields: record.append( getattr(E, field)(isoformat(r[field], as_utc=False))) if 'tags' in fields and r['tags']: tags = E.tags() for tag in r['tags']: tags.append(E.tag(tag)) record.append(tags) if 'comments' in fields and r['comments']: record.append(E.comments(r['comments'])) if 'series' in fields and r['series']: record.append( E.series(r['series'], index=str(r['series_index']))) if 'cover' in fields and r['cover']: record.append(E.cover(r['cover'].replace(os.sep, '/'))) if 'formats' in fields and r['formats']: fmt = E.formats() for f in r['formats']: fmt.append(E.format(f.replace(os.sep, '/'))) record.append(fmt) if 'library_name' in fields: record.append(E.library_name(current_library)) with open(path_to_output, 'wb') as f: f.write( etree.tostring(root, encoding='utf-8', xml_declaration=True, pretty_print=True))
def metadata_to_xmp_packet(mi): A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x')) R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf')) root = A.xmpmeta(R.RDF) rdf = root[0] dc = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('dc')) dc.set(expand('rdf:about'), '') rdf.append(dc) for prop, tag in { 'title': 'dc:title', 'comments': 'dc:description' }.items(): val = mi.get(prop) or '' create_alt_property(dc, tag, val) for prop, (tag, ordered) in { 'authors': ('dc:creator', True), 'tags': ('dc:subject', False), 'publisher': ('dc:publisher', False) }.items(): val = mi.get(prop) or () if isinstance(val, (str, bytes)): val = [val] create_sequence_property(dc, tag, val, ordered) if not mi.is_null('pubdate'): # Adobe spec recommends local time create_sequence_property(dc, 'dc:date', [isoformat(mi.pubdate, as_utc=False)]) if not mi.is_null('languages'): langs = list( filter( None, map(lambda x: lang_as_iso639_1(x) or canonicalize_lang(x), mi.languages))) if langs: create_sequence_property(dc, 'dc:language', langs, ordered=False) xmp = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('xmp', 'xmpidq')) xmp.set(expand('rdf:about'), '') rdf.append(xmp) extra_ids = {} for x in ('prism', 'pdfx'): p = extra_ids[x] = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap(x)) p.set(expand('rdf:about'), '') rdf.append(p) identifiers = mi.get_identifiers() if identifiers: create_identifiers(xmp, identifiers) for scheme, val in identifiers.items(): if scheme in {'isbn', 'doi'}: for prefix, parent in extra_ids.items(): ie = parent.makeelement(expand('%s:%s' % (prefix, scheme))) ie.text = val parent.append(ie) d = xmp.makeelement(expand('xmp:MetadataDate')) d.text = isoformat(now(), as_utc=False) xmp.append(d) calibre = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('calibre', 'calibreSI', 'calibreCC')) calibre.set(expand('rdf:about'), '') rdf.append(calibre) if not mi.is_null('rating'): try: r = float(mi.rating) except (TypeError, ValueError): pass else: create_simple_property(calibre, 'calibre:rating', '%g' % r) if not mi.is_null('series'): create_series(calibre, mi.series, mi.series_index) if not mi.is_null('timestamp'): create_simple_property(calibre, 'calibre:timestamp', isoformat(mi.timestamp, as_utc=False)) for x in ('author_link_map', 'user_categories'): val = getattr(mi, x, None) if val: create_simple_property(calibre, 'calibre:' + x, dump_dict(val)) for x in ('title_sort', 'author_sort'): if not mi.is_null(x): create_simple_property(calibre, 'calibre:' + x, getattr(mi, x)) all_user_metadata = mi.get_all_user_metadata(True) if all_user_metadata: create_user_metadata(calibre, all_user_metadata) return serialize_xmp_packet(root)