def _metadata_from_formats(formats, force_read_metadata=False, pattern=None): mi = MetaInformation(None, None) formats.sort(key=lambda x: METADATA_PRIORITIES[path_to_ext(x)]) extensions = list(map(path_to_ext, formats)) if 'opf' in extensions: opf = formats[extensions.index('opf')] mi2 = opf_metadata(opf) if mi2 is not None and mi2.title: return mi2 for path, ext in zip(formats, extensions): with lopen(path, 'rb') as stream: try: newmi = get_metadata(stream, stream_type=ext, use_libprs_metadata=True, force_read_metadata=force_read_metadata, pattern=pattern) mi.smart_update(newmi) except: continue if getattr(mi, 'application_id', None) is not None: return mi if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] return mi
def get_metadata_from_reader(rdr): raw = rdr.GetFile(rdr.home) home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0]) title = rdr.title try: x = rdr.GetEncoding() codecs.lookup(x) enc = x except: enc = 'cp1252' title = force_unicode(title, enc) authors = _get_authors(home) mi = MetaInformation(title, authors) publisher = _get_publisher(home) if publisher: mi.publisher = publisher isbn = _get_isbn(home) if isbn: mi.isbn = isbn comments = _get_comments(home) if comments: mi.comments = comments cdata = _get_cover(home, rdr) if cdata is not None: mi.cover_data = ('jpg', cdata) return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' name = getattr(stream, 'name', '').rpartition('.')[0] if name: name = os.path.basename(name) mi = MetaInformation(name or _('Unknown'), [_('Unknown')]) stream.seek(0) mdata = u'' for x in range(0, 4): line = stream.readline().decode('utf-8', 'replace') if line == '': break else: mdata += line mdata = mdata[:100] mo = re.search( '(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata) if mo is not None: mi.title = mo.group('title') mi.authors = mo.group('author').split(',') return mi
def do_one_isbn_add(self): try: db = self.gui.library_view.model().db try: x = self.isbn_books.pop(0) except IndexError: self.gui.library_view.model().books_added(self.isbn_add_dialog.value) self.isbn_add_dialog.accept() self.gui.iactions['Edit Metadata'].download_metadata( ids=self.add_by_isbn_ids, ensure_fields=frozenset(['title', 'authors'])) return mi = MetaInformation(None) mi.isbn = x['isbn'] if self.isbn_add_tags: mi.tags = list(self.isbn_add_tags) fmts = [] if x['path'] is None else [x['path']] self.add_by_isbn_ids.add(db.import_book(mi, fmts)) self.isbn_add_dialog.value += 1 QTimer.singleShot(10, self.do_one_isbn_add) except: self.isbn_add_dialog.accept() raise
def _get_metadata(self, id, args, kwargs): from calibre.ebooks.metadata.meta import get_metadata try: mi = get_metadata(*args, **kwargs) except: mi = MetaInformation('', [_('Unknown')]) self.metadata.emit(id, mi)
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) try: with ZipFile(stream) as zf: opf_name = get_first_opf_name(zf) opf_stream = StringIO(zf.read(opf_name)) opf = OPF(opf_stream) mi = opf.to_book_metadata() if extract_cover: cover_href = opf.raster_cover if not cover_href: for meta in opf.metadata.xpath( '//*[local-name()="meta" and @name="cover"]'): val = meta.get('content') if val.rpartition('.')[2].lower() in { 'jpeg', 'jpg', 'png' }: cover_href = val break if cover_href: try: mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href)) except Exception: pass except Exception: return mi return mi
def add_empty(self, *args): ''' Add an empty book item to the library. This does not import any formats from a book file. ''' author = series = None index = self.gui.library_view.currentIndex() if index.isValid(): raw = index.model().db.authors(index.row()) if raw: authors = [a.strip().replace('|', ',') for a in raw.split(',')] if authors: author = authors[0] series = index.model().db.series(index.row()) dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db, author, series) if dlg.exec_() == dlg.Accepted: num = dlg.qty_to_add series = dlg.selected_series db = self.gui.library_view.model().db ids = [] for x in xrange(num): mi = MetaInformation(_('Unknown'), dlg.selected_authors) if series: mi.series = series mi.series_index = db.get_next_series_num_for(series) ids.append(db.import_book(mi, [])) self.gui.library_view.model().books_added(num) if hasattr(self.gui, 'db_images'): self.gui.db_images.reset() self.gui.tags_view.recount() if ids: ids.reverse() self.gui.library_view.select_rows(ids)
def _from_formats(self, id, args, kwargs): from calibre.ebooks.metadata.meta import metadata_from_formats try: mi = metadata_from_formats(*args, **kwargs) except: mi = MetaInformation('', [_('Unknown')]) self.metadataf.emit(id, mi)
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) snbFile = SNBFile() try: if not hasattr(stream, 'write'): snbFile.Parse(io.BytesIO(stream), True) else: stream.seek(0) snbFile.Parse(stream, True) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = etree.fromstring(meta) mi.title = meta.find('.//head/name').text mi.authors = [meta.find('.//head/author').text] mi.language = meta.find('.//head/language').text.lower().replace('_', '-') mi.publisher = meta.find('.//head/publisher').text if extract_cover: cover = meta.find('.//head/cover') if cover is not None and cover.text is not None: root, ext = os.path.splitext(cover.text) if ext == '.jpeg': ext = '.jpg' mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text)) except Exception: import traceback traceback.print_exc() return mi
def get_comic_metadata(stream, stream_type): # See http://code.google.com/p/comicbookinfo/wiki/Example from calibre.ebooks.metadata import MetaInformation comment = None mi = MetaInformation(None, None) if stream_type == 'cbz': from calibre.utils.zipfile import ZipFile zf = ZipFile(stream) comment = zf.comment elif stream_type == 'cbr': from calibre.utils.unrar import RARFile f = RARFile(stream, get_comment=True) comment = f.comment if comment: import json m = json.loads(comment) if hasattr(m, 'iterkeys'): for cat in m.iterkeys(): if cat.startswith('ComicBookInfo'): get_comic_book_info(m[cat], mi) break return mi
def add_empty(self, *args): ''' Add an empty book item to the library. This does not import any formats from a book file. ''' author = series = title = None index = self.gui.library_view.currentIndex() if index.isValid(): raw = index.model().db.authors(index.row()) if raw: authors = [a.strip().replace('|', ',') for a in raw.split(',')] if authors: author = authors[0] series = index.model().db.series(index.row()) title = index.model().db.title(index.row()) dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db, author, series, dup_title=title) if dlg.exec_() == dlg.Accepted: temp_files = [] num = dlg.qty_to_add series = dlg.selected_series title = dlg.selected_title or _('Unknown') db = self.gui.library_view.model().db ids, orig_fmts = [], [] if dlg.duplicate_current_book: origmi = db.get_metadata(index.row(), get_cover=True, cover_as_data=True) if dlg.copy_formats.isChecked(): book_id = db.id(index.row()) orig_fmts = tuple(db.new_api.format(book_id, fmt, as_path=True) for fmt in db.new_api.formats(book_id)) for x in xrange(num): if dlg.duplicate_current_book: mi = origmi else: mi = MetaInformation(title, dlg.selected_authors) if series: mi.series = series mi.series_index = db.get_next_series_num_for(series) fmts = [] empty_format = gprefs.get('create_empty_format_file', '') if dlg.duplicate_current_book and dlg.copy_formats.isChecked(): fmts = orig_fmts elif empty_format: from calibre.ebooks.oeb.polish.create import create_book pt = PersistentTemporaryFile(suffix='.' + empty_format) pt.close() temp_files.append(pt.name) create_book(mi, pt.name, fmt=empty_format) fmts = [pt.name] ids.append(db.import_book(mi, fmts)) tuple(map(os.remove, orig_fmts)) self.gui.library_view.model().books_added(num) self.gui.refresh_cover_browser() self.gui.tags_view.recount() if ids: ids.reverse() self.gui.library_view.select_rows(ids) for path in temp_files: os.remove(path)
def _start_splitmerge(self,book_list, tdir=None, db=None): # logger.debug(book_list) em = self.get_epubmerge_plugin() es = self.get_epubsplit_plugin() good_list = [ b for b in book_list if b['good'] ] tmp = PersistentTemporaryFile(prefix='merge-', suffix='.epub', dir=tdir) if len(good_list) == 1: deftitle = "New "+good_list[0]['title'] defauthors = good_list[0]['authors'] else: deftitle = "New Chapters Anthology" defauthors = ["Various Authors"] mi = MetaInformation(deftitle,defauthors) tagslists = [ x['tags'] for x in good_list ] mi.tags = [item for sublist in tagslists for item in sublist] mi.comments = "<p>New Chapters from:</p>" mi.comments += '<br/>'.join( [ "%s by %s"%(x['title'],", ".join(x['authors'])) for x in good_list ] ) em.do_merge(tmp, [b['splittmp'] for b in good_list], authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, keepmetadatafiles=False, ) book_id = db.create_book_entry(mi, add_duplicates=True) db.add_format_with_hooks(book_id, 'EPUB', tmp, index_is_id=True) self.gui.library_view.model().books_added(1) self.gui.library_view.model().refresh_ids([book_id]) # self.gui.iactions['Edit Metadata'].edit_metadata(False) self.gui.tags_view.recount() ## run word counts cp_plugin = self.gui.iactions['Count Pages'] cp_plugin.count_statistics([book_id],['WordCount']) ## run auto convert self.gui.iactions['Convert Books'].auto_convert_auto_add([book_id]) ## add to FFF update lists self.gui.library_view.select_rows([book_id]) fff_plugin = self.gui.iactions['FanFicFare'] fff_plugin.update_lists() remove_dir(tdir)
def add_annotation_to_library(self, db, db_id, annotation): from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.metadata import MetaInformation bm = annotation ignore_tags = set(['Catalog', 'Clippings']) if bm.type == 'kindle_bookmark': mi = db.get_metadata(db_id, index_is_id=True) user_notes_soup = self.generate_annotation_html(bm.value) if mi.comments: a_offset = mi.comments.find('<div class="user_annotations">') ad_offset = mi.comments.find( '<hr class="annotations_divider" />') if a_offset >= 0: mi.comments = mi.comments[:a_offset] if ad_offset >= 0: mi.comments = mi.comments[:ad_offset] if set(mi.tags).intersection(ignore_tags): return if mi.comments: hrTag = Tag(user_notes_soup, 'hr') hrTag['class'] = 'annotations_divider' user_notes_soup.insert(0, hrTag) mi.comments += unicode(user_notes_soup.prettify()) else: mi.comments = unicode(user_notes_soup.prettify()) # Update library comments db.set_comment(db_id, mi.comments) # Add bookmark file to db_id db.add_format_with_hooks(db_id, bm.value.bookmark_extension, bm.value.path, index_is_id=True) elif bm.type == 'kindle_clippings': # Find 'My Clippings' author=Kindle in database, or add last_update = 'Last modified %s' % strftime( u'%x %X', bm.value['timestamp'].timetuple()) mc_id = list( db.data.search_getting_ids('title:"My Clippings"', '', sort_results=False)) if mc_id: db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'], index_is_id=True) mi = db.get_metadata(mc_id[0], index_is_id=True) mi.comments = last_update db.set_metadata(mc_id[0], mi) else: mi = MetaInformation('My Clippings', authors=['Kindle']) mi.tags = ['Clippings'] mi.comments = last_update db.add_books([bm.value['path']], ['txt'], [mi])
def get_metadata(stream): ''' Return fb2 metadata as a L{MetaInformation} object ''' root = _get_fbroot(stream) book_title = _parse_book_title(root) authors = _parse_authors(root) # fallback for book_title if book_title: book_title = unicode(book_title) else: book_title = force_unicode( os.path.splitext( os.path.basename(getattr(stream, 'name', _('Unknown'))))[0]) mi = MetaInformation(book_title, authors) try: _parse_cover(root, mi) except: pass try: _parse_comments(root, mi) except: pass try: _parse_tags(root, mi) except: pass try: _parse_series(root, mi) except: pass try: _parse_isbn(root, mi) except: pass try: _parse_publisher(root, mi) except: pass try: _parse_pubdate(root, mi) except: pass #try: # _parse_timestamp(root, mi) #except: # pass try: _parse_language(root, mi) except: pass #_parse_uuid(root, mi) #if DEBUG: # prints(mi) return mi
def set_metadata(stream, mi, apply_null=False, update_timestamp=False, force_identifiers=False): stream.seek(0) reader = get_zip_reader(stream, root=os.getcwdu()) raster_cover = reader.opf.raster_cover mi = MetaInformation(mi) new_cdata = None replacements = {} try: new_cdata = mi.cover_data[1] if not new_cdata: raise Exception('no cover') except: try: new_cdata = open(mi.cover, 'rb').read() except: pass new_cover = cpath = None if new_cdata and raster_cover: try: cpath = posixpath.join(posixpath.dirname(reader.opf_path), raster_cover) cover_replacable = not reader.encryption_meta.is_encrypted(cpath) and \ os.path.splitext(cpath)[1].lower() in ('.png', '.jpg', '.jpeg') if cover_replacable: new_cover = _write_new_cover(new_cdata, cpath) replacements[cpath] = open(new_cover.name, 'rb') except: import traceback traceback.print_exc() update_metadata(reader.opf, mi, apply_null=apply_null, update_timestamp=update_timestamp, force_identifiers=force_identifiers) newopf = StringIO(reader.opf.render()) if isinstance(reader.archive, LocalZipFile): reader.archive.safe_replace(reader.container[OPF.MIMETYPE], newopf, extra_replacements=replacements) else: safe_replace(stream, reader.container[OPF.MIMETYPE], newopf, extra_replacements=replacements) try: if cpath is not None: replacements[cpath].close() os.remove(replacements[cpath].name) except: pass
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ stream.seek(0) if stream.read(5) != br'{\rtf': return MetaInformation(_('Unknown')) block = get_document_info(stream)[0] if not block: return MetaInformation(_('Unknown')) stream.seek(0) cpg = detect_codepage(stream) stream.seek(0) title_match = title_pat.search(block) if title_match is not None: title = decode(title_match.group(1).strip(), cpg) else: title = _('Unknown') author_match = author_pat.search(block) if author_match is not None: author = decode(author_match.group(1).strip(), cpg) else: author = None mi = MetaInformation(title) if author: mi.authors = [x.strip() for x in author.split(',')] comment_match = comment_pat.search(block) if comment_match is not None: comment = decode(comment_match.group(1).strip(), cpg) mi.comments = comment tags_match = tags_pat.search(block) if tags_match is not None: tags = decode(tags_match.group(1).strip(), cpg) mi.tags = list(filter(None, (x.strip() for x in tags.split(',')))) publisher_match = publisher_pat.search(block) if publisher_match is not None: publisher = decode(publisher_match.group(1).strip(), cpg) mi.publisher = publisher return mi
def get_metadata(stream, cover=True): with TemporaryDirectory('_pdf_metadata_read') as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) try: res = fork_job('calibre.ebooks.metadata.pdf', 'read_info', (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run pdfinfo') info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: prints(raw) if not info: raise ValueError('Could not read info dict from PDF') covpath = os.path.join(pdfpath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) au = info.get('Author', None) if au is None: au = [_('Unknown')] else: au = string_to_authors(au) mi = MetaInformation(title, au) # if isbn is not None: # mi.isbn = isbn creator = info.get('Creator', None) if creator: mi.book_producer = creator keywords = info.get('Keywords', None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(',')] isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)] if isbn: mi.isbn = isbn = isbn[0] mi.tags = [x for x in mi.tags if check_isbn(x) != isbn] subject = info.get('Subject', None) if subject: mi.tags.insert(0, subject) if cdata: mi.cover_data = ('jpeg', cdata) return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) section_data = None for i in range(1, pheader.num_sections): raw_data = pheader.section_data(i) section_header = SectionHeader(raw_data) if section_header.type == DATATYPE_METADATA: section_data = raw_data[8:] break if not section_data: return mi default_encoding = 'latin-1' record_count, = struct.unpack('>H', section_data[0:2]) adv = 0 title = None author = None pubdate = 0 for i in range(record_count): try: type, length = struct.unpack_from('>HH', section_data, 2 + adv) except struct.error: break # CharSet if type == 1: val, = struct.unpack('>H', section_data[6+adv:8+adv]) default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1') # Author elif type == 4: author = section_data[6+adv+(2*length)] # Title elif type == 5: title = section_data[6+adv+(2*length)] # Publication Date elif type == 6: pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4]) adv += 2*length if title: mi.title = title.replace('\0', '').decode(default_encoding, 'replace') if author: author = author.replace('\0', '').decode(default_encoding, 'replace') mi.author = author.split(',') mi.pubdate = datetime.fromtimestamp(pubdate) return mi
def __init__(self, raw, codec, title): self.doctype = raw[:4] self.length, self.num_items = struct.unpack('>LL', raw[4:12]) raw = raw[12:] pos = 0 self.mi = MetaInformation(_('Unknown'), [_('Unknown')]) self.has_fake_cover = True self.start_offset = None left = self.num_items self.kf8_header = None self.uuid = self.cdetype = None while left > 0: left -= 1 idx, size = struct.unpack('>LL', raw[pos:pos + 8]) content = raw[pos + 8:pos + size] pos += size if idx >= 100 and idx < 200: self.process_metadata(idx, content, codec) elif idx == 203: self.has_fake_cover = bool(struct.unpack('>L', content)[0]) elif idx == 201: co, = struct.unpack('>L', content) if co < NULL_INDEX: self.cover_offset = co elif idx == 202: self.thumbnail_offset, = struct.unpack('>L', content) elif idx == 501: try: self.cdetype = content.decode('ascii') except UnicodeDecodeError: self.cdetype = None # cdetype if content == b'EBSP': if not self.mi.tags: self.mi.tags = [] self.mi.tags.append(_('Sample Book')) elif idx == 502: # last update time pass elif idx == 503: # Long title # Amazon seems to regard this as the definitive book title # rather than the title from the PDB header. In fact when # sending MOBI files through Amazon's email service if the # title contains non ASCII chars or non filename safe chars # they are messed up in the PDB header try: title = content.decode(codec) except: pass #else: # print 'unknown record', idx, repr(content) if title: self.mi.title = replace_entities(title)
def parse_comic_comment(comment, series_index='volume'): # See http://code.google.com/p/comicbookinfo/wiki/Example from calibre.ebooks.metadata import MetaInformation import json mi = MetaInformation(None, None) m = json.loads(comment) if isinstance(m, dict): for cat in m: if cat.startswith('ComicBookInfo'): get_comic_book_info(m[cat], mi, series_index=series_index) break return mi
def do_set_metadata(opts, mi, stream, stream_type): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) from_opf = getattr(opts, 'from_opf', None) if from_opf is not None: from calibre.ebooks.metadata.opf2 import OPF opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata() mi.smart_update(opf_mi) for pref in config().option_set.preferences: if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort', 'author_sort', 'get_cover', 'cover', 'tags', 'lrf_bookid', 'identifiers'): continue val = getattr(opts, pref.name, None) if val is not None: setattr(mi, pref.name, val) if getattr(opts, 'authors', None) is not None: mi.authors = string_to_authors(opts.authors) mi.author_sort = authors_to_sort_string(mi.authors) if getattr(opts, 'author_sort', None) is not None: mi.author_sort = opts.author_sort if getattr(opts, 'title_sort', None) is not None: mi.title_sort = opts.title_sort elif getattr(opts, 'title', None) is not None: mi.title_sort = title_sort(opts.title) if getattr(opts, 'tags', None) is not None: mi.tags = [t.strip() for t in opts.tags.split(',')] if getattr(opts, 'series', None) is not None: mi.series = opts.series.strip() if getattr(opts, 'series_index', None) is not None: mi.series_index = float(opts.series_index.strip()) if getattr(opts, 'pubdate', None) is not None: mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False) if getattr(opts, 'identifiers', None): val = { k.strip(): v.strip() for k, v in (x.partition(':')[0::2] for x in opts.identifiers) } if val: orig = mi.get_identifiers() orig.update(val) val = {k: v for k, v in orig.iteritems() if k and v} mi.set_identifiers(val) if getattr(opts, 'cover', None) is not None: ext = os.path.splitext(opts.cover)[1].replace('.', '').upper() mi.cover_data = (ext, open(opts.cover, 'rb').read()) with force_identifiers: set_metadata(stream, mi, stream_type)
def get_metadata(stream): ''' Return fb2 metadata as a L{MetaInformation} object ''' root = _get_fbroot(get_fb2_data(stream)[0]) ctx = Context(root) book_title = _parse_book_title(root, ctx) authors = _parse_authors(root, ctx) or [_('Unknown')] # fallback for book_title if book_title: book_title = unicode_type(book_title) else: book_title = force_unicode( os.path.splitext( os.path.basename(getattr(stream, 'name', _('Unknown'))))[0]) mi = MetaInformation(book_title, authors) try: _parse_cover(root, mi, ctx) except: pass try: _parse_comments(root, mi, ctx) except: pass try: _parse_tags(root, mi, ctx) except: pass try: _parse_series(root, mi, ctx) except: pass try: _parse_isbn(root, mi, ctx) except: pass try: _parse_publisher(root, mi, ctx) except: pass try: _parse_pubdate(root, mi, ctx) except: pass try: _parse_language(root, mi, ctx) except: pass return mi
def _get_metadata(stream, stream_type, use_libprs_metadata, force_read_metadata=False, pattern=None): if stream_type: stream_type = stream_type.lower() if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'): stream_type = 'html' if stream_type in ('mobi', 'prc', 'azw'): stream_type = 'mobi' if stream_type in ('odt', 'ods', 'odp', 'odg', 'odf'): stream_type = 'odt' opf = None if hasattr(stream, 'name'): c = os.path.splitext(stream.name)[0] + '.opf' if os.access(c, os.R_OK): opf = opf_metadata(os.path.abspath(c)) if use_libprs_metadata and getattr(opf, 'application_id', None) is not None: return opf mi = MetaInformation(None, None) name = os.path.basename(getattr(stream, 'name', '')) base = metadata_from_filename(name, pat=pattern) if force_read_metadata or prefs['read_file_metadata']: mi = get_file_type_metadata(stream, stream_type) if base.title == os.path.splitext(name)[0] and \ base.is_null('authors') and base.is_null('isbn'): # Assume that there was no metadata in the file and the user set pattern # to match meta info from the file name did not match. # The regex is meant to match the standard format filenames are written # in the library title - author.extension base.smart_update( metadata_from_filename( name, re.compile(r'^(?P<title>.+)[ _]-[ _](?P<author>[^-]+)$'))) if base.title: base.title = base.title.replace('_', ' ') if base.authors: base.authors = [a.replace('_', ' ').strip() for a in base.authors] if not base.authors: base.authors = [_('Unknown')] if not base.title: base.title = _('Unknown') base.smart_update(mi) if opf is not None: base.smart_update(opf) return base
def get_clippings_cid(self, title): ''' Find or create cid for title ''' cid = None try: cid = list(self.parent.opts.gui.current_db.data.parse('title:"%s" and tag:Clippings' % title))[0] except: mi = MetaInformation(title, authors = ['Various']) mi.tags = ['Clippings'] cid = self.parent.opts.gui.current_db.create_book_entry(mi, cover=None, add_duplicates=False, force_id=None) return cid
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ pheader = PdbHeaderReader(stream) MetadataReader = MREADER.get(pheader.ident, None) if MetadataReader is None: return MetaInformation(pheader.title, [_('Unknown')]) return MetadataReader(stream, extract_cover)
def add_empty(self, *args): ''' Add an empty book item to the library. This does not import any formats from a book file. ''' author = series = None index = self.gui.library_view.currentIndex() if index.isValid(): raw = index.model().db.authors(index.row()) if raw: authors = [a.strip().replace('|', ',') for a in raw.split(',')] if authors: author = authors[0] series = index.model().db.series(index.row()) dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db, author, series) if dlg.exec_() == dlg.Accepted: temp_files = [] num = dlg.qty_to_add series = dlg.selected_series title = dlg.selected_title or _('Unknown') db = self.gui.library_view.model().db ids = [] for x in xrange(num): mi = MetaInformation(title, dlg.selected_authors) if series: mi.series = series mi.series_index = db.get_next_series_num_for(series) fmts = [] empty_format = gprefs.get('create_empty_format_file', '') if empty_format: from calibre.ebooks.oeb.polish.create import create_book pt = PersistentTemporaryFile(suffix='.' + empty_format) pt.close() temp_files.append(pt.name) create_book(mi, pt.name, fmt=empty_format) fmts = [pt.name] ids.append(db.import_book(mi, fmts)) self.gui.library_view.model().books_added(num) if hasattr(self.gui, 'db_images'): self.gui.db_images.beginResetModel( ), self.gui.db_images.endResetModel() self.gui.tags_view.recount() if ids: ids.reverse() self.gui.library_view.select_rows(ids) for path in temp_files: os.remove(path)
def get_metadata(f): read = lambda at, amount: _read(f, at, amount) f.seek(0) buf = f.read(12) if buf[4:] == b'ftypLRX2': offset = 0 while True: offset += word_be(buf[:4]) try: buf = read(offset, 8) except: raise ValueError('Not a valid LRX file') if buf[4:] == b'bbeb': break offset += 8 buf = read(offset, 16) if buf[:8].decode('utf-16-le') != 'LRF\x00': raise ValueError('Not a valid LRX file') lrf_version = word_le(buf[8:12]) offset += 0x4c compressed_size = short_le(read(offset, 2)) offset += 2 if lrf_version >= 800: offset += 6 compressed_size -= 4 uncompressed_size = word_le(read(offset, 4)) info = decompress(f.read(compressed_size)) if len(info) != uncompressed_size: raise ValueError('LRX file has malformed metadata section') root = safe_xml_fromstring(info) bi = root.find('BookInfo') title = bi.find('Title') title_sort = title.get('reading', None) title = title.text author = bi.find('Author') author_sort = author.get('reading', None) mi = MetaInformation(title, string_to_authors(author.text)) mi.title_sort, mi.author_sort = title_sort, author_sort author = author.text publisher = bi.find('Publisher') mi.publisher = getattr(publisher, 'text', None) mi.tags = [x.text for x in bi.findall('Category')] mi.language = root.find('DocInfo').find('Language').text return mi elif buf[4:8] == b'LRX': raise ValueError('Librie LRX format not supported') else: raise ValueError('Not a LRX file')
def get_cbz_metadata(stream): # See http://code.google.com/p/comicbookinfo/wiki/Example from calibre.utils.zipfile import ZipFile from calibre.ebooks.metadata import MetaInformation import json zf = ZipFile(stream) mi = MetaInformation(None, None) if zf.comment: m = json.loads(zf.comment) if hasattr(m, 'keys'): for cat in m.keys(): if cat.startswith('ComicBookInfo'): get_comic_book_info(m[cat], mi) return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ pheader = PdbHeaderReader(stream) MetadataReader = MREADER.get(pheader.ident, None) if MetadataReader is None: t = pheader.title if isinstance(t, bytes): t = t.decode('utf-8', 'replace') return MetaInformation(t, [_('Unknown')]) return MetadataReader(stream, extract_cover)
def do_add_empty(db, title, authors, isbn, tags, series, series_index): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation(None) if title is not None: mi.title = title if authors: mi.authors = authors if isbn: mi.isbn = isbn if tags: mi.tags = tags if series: mi.series, mi.series_index = series, series_index db.import_book(mi, []) write_dirtied(db) send_message()