def do_one_isbn_add(self): try: db = self.gui.library_view.model().db try: x = self.isbn_books.pop(0) except IndexError: self.gui.library_view.model().books_added(self.isbn_add_dialog.value) self.isbn_add_dialog.accept() self.gui.iactions['Edit Metadata'].download_metadata( ids=self.add_by_isbn_ids, ensure_fields=frozenset(['title', 'authors'])) return mi = MetaInformation(None) mi.isbn = x['isbn'] if self.isbn_add_tags: mi.tags = list(self.isbn_add_tags) fmts = [] if x['path'] is None else [x['path']] self.add_by_isbn_ids.add(db.import_book(mi, fmts)) self.isbn_add_dialog.value += 1 QTimer.singleShot(10, self.do_one_isbn_add) except: self.isbn_add_dialog.accept() raise
def get_metadata_from_reader(rdr): raw = rdr.GetFile(rdr.home) home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0]) title = rdr.title try: x = rdr.GetEncoding() codecs.lookup(x) enc = x except: enc = 'cp1252' title = force_unicode(title, enc) authors = _get_authors(home) mi = MetaInformation(title, authors) publisher = _get_publisher(home) if publisher: mi.publisher = publisher isbn = _get_isbn(home) if isbn: mi.isbn = isbn comments = _get_comments(home) if comments: mi.comments = comments cdata = _get_cover(home, rdr) if cdata is not None: mi.cover_data = ('jpg', cdata) return mi
def do_one_isbn_add(self): try: db = self.gui.library_view.model().db try: x = self.isbn_books.pop(0) except IndexError: self.gui.library_view.model().books_added( self.isbn_add_dialog.value) self.isbn_add_dialog.accept() self.gui.iactions['Edit Metadata'].download_metadata( ids=self.add_by_isbn_ids, ensure_fields=frozenset(['title', 'authors'])) return mi = MetaInformation(None) mi.isbn = x['isbn'] if self.isbn_add_tags: mi.tags = list(self.isbn_add_tags) fmts = [] if x['path'] is None else [x['path']] self.add_by_isbn_ids.add(db.import_book(mi, fmts)) self.isbn_add_dialog.value += 1 QTimer.singleShot(10, self.do_one_isbn_add) except: self.isbn_add_dialog.accept() raise
def get_metadata(stream, cpath=None): if not podofo: raise Unavailable(podofo_err) pt = PersistentTemporaryFile('_podofo.pdf') pt.write(stream.read()) pt.close() server = Server(pool_size=1) job = ParallelJob('read_pdf_metadata', 'Read pdf metadata', lambda x,y:x, args=[pt.name, cpath]) server.add_job(job) while not job.is_finished: time.sleep(0.1) job.update() job.update() server.close() if job.result is None: raise ValueError('Failed to read metadata: ' + job.details) title, authors, creator, tags, ok = job.result if not ok: print 'Failed to extract cover:' print job.details if title == '_': title = getattr(stream, 'name', _('Unknown')) title = os.path.splitext(title)[0] mi = MetaInformation(title, authors) if creator: mi.book_producer = creator if tags: mi.tags = tags if os.path.exists(pt.name): os.remove(pt.name) if ok: mi.cover = cpath return mi
def _metadata_from_formats(formats, force_read_metadata=False, pattern=None): mi = MetaInformation(None, None) formats.sort(cmp=lambda x,y: cmp(METADATA_PRIORITIES[path_to_ext(x)], METADATA_PRIORITIES[path_to_ext(y)])) extensions = list(map(path_to_ext, formats)) if 'opf' in extensions: opf = formats[extensions.index('opf')] mi2 = opf_metadata(opf) if mi2 is not None and mi2.title: return mi2 for path, ext in zip(formats, extensions): with lopen(path, 'rb') as stream: try: newmi = get_metadata(stream, stream_type=ext, use_libprs_metadata=True, force_read_metadata=force_read_metadata, pattern=pattern) mi.smart_update(newmi) except: continue if getattr(mi, 'application_id', None) is not None: return mi if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] return mi
def get_djvu_metadata(stream, cover=True): with TemporaryDirectory('_djvu_metadata_read') as djvupath: stream.seek(0) with open(os.path.join(djvupath, 'src.djvu'), 'wb') as f: shutil.copyfileobj(stream, f) try: res = fork_job('calibre_plugins.djvu_metadata.djvu', 'get_djvu_metadata_worker', (djvupath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run djvused') info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: prints(raw) if info is None: raise ValueError('Could not read metadata from djvu') covpath = os.path.join(djvupath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) au = info.get('Author', None) if au is None: au = [_('Unknown')] else: au = string_to_authors(au) mi = MetaInformation(title, au) if cdata: mi.cover_data = ('jpg', cdata) return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) try: with ZipFile(stream) as zf: opf_name = get_first_opf_name(zf) opf_stream = StringIO(zf.read(opf_name)) opf = OPF(opf_stream) mi = opf.to_book_metadata() if extract_cover: cover_href = opf.raster_cover if not cover_href: for meta in opf.metadata.xpath('//*[local-name()="meta" and @name="cover"]'): val = meta.get('content') if val.rpartition('.')[2].lower() in {'jpeg', 'jpg', 'png'}: cover_href = val break if cover_href: try: mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href)) except Exception: pass except Exception: return mi return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' name = getattr(stream, 'name', '').rpartition('.')[0] if name: name = os.path.basename(name) mi = MetaInformation(name or _('Unknown'), [_('Unknown')]) stream.seek(0) mdata = '' for x in range(0, 4): line = stream.readline().decode('utf-8', 'replace') if not line: break else: mdata += line mdata = mdata[:1024] mo = re.search( '(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata) if mo is not None: mi.title = mo.group('title') mi.authors = mo.group('author').split(',') return mi
def add_empty(self, *args): ''' Add an empty book item to the library. This does not import any formats from a book file. ''' author = series = None index = self.gui.library_view.currentIndex() if index.isValid(): raw = index.model().db.authors(index.row()) if raw: authors = [a.strip().replace('|', ',') for a in raw.split(',')] if authors: author = authors[0] series = index.model().db.series(index.row()) dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db, author, series) if dlg.exec_() == dlg.Accepted: num = dlg.qty_to_add series = dlg.selected_series db = self.gui.library_view.model().db ids = [] for x in xrange(num): mi = MetaInformation(_('Unknown'), dlg.selected_authors) if series: mi.series = series mi.series_index = db.get_next_series_num_for(series) ids.append(db.import_book(mi, [])) self.gui.library_view.model().books_added(num) if hasattr(self.gui, 'db_images'): self.gui.db_images.reset() self.gui.tags_view.recount() if ids: ids.reverse() self.gui.library_view.select_rows(ids)
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' name = getattr(stream, 'name', '').rpartition('.')[0] if name: name = os.path.basename(name) mi = MetaInformation(name or _('Unknown'), [_('Unknown')]) stream.seek(0) mdata = u'' for x in range(0, 4): line = stream.readline().decode('utf-8', 'replace') if line == '': break else: mdata += line mdata = mdata[:100] mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata) if mo is not None: mi.title = mo.group('title') mi.authors = mo.group('author').split(',') return mi
def add_empty(self, *args): ''' Add an empty book item to the library. This does not import any formats from a book file. ''' author = series = title = None index = self.gui.library_view.currentIndex() if index.isValid(): raw = index.model().db.authors(index.row()) if raw: authors = [a.strip().replace('|', ',') for a in raw.split(',')] if authors: author = authors[0] series = index.model().db.series(index.row()) title = index.model().db.title(index.row()) dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db, author, series, dup_title=title) if dlg.exec_() == dlg.Accepted: temp_files = [] num = dlg.qty_to_add series = dlg.selected_series title = dlg.selected_title or _('Unknown') db = self.gui.library_view.model().db ids, orig_fmts = [], [] if dlg.duplicate_current_book: origmi = db.get_metadata(index.row(), get_cover=True, cover_as_data=True) if dlg.copy_formats.isChecked(): book_id = db.id(index.row()) orig_fmts = tuple(db.new_api.format(book_id, fmt, as_path=True) for fmt in db.new_api.formats(book_id)) for x in xrange(num): if dlg.duplicate_current_book: mi = origmi else: mi = MetaInformation(title, dlg.selected_authors) if series: mi.series = series mi.series_index = db.get_next_series_num_for(series) fmts = [] empty_format = gprefs.get('create_empty_format_file', '') if dlg.duplicate_current_book and dlg.copy_formats.isChecked(): fmts = orig_fmts elif empty_format: from calibre.ebooks.oeb.polish.create import create_book pt = PersistentTemporaryFile(suffix='.' + empty_format) pt.close() temp_files.append(pt.name) create_book(mi, pt.name, fmt=empty_format) fmts = [pt.name] ids.append(db.import_book(mi, fmts)) tuple(map(os.remove, orig_fmts)) self.gui.library_view.model().books_added(num) self.gui.refresh_cover_browser() self.gui.tags_view.recount() if ids: ids.reverse() self.gui.library_view.select_rows(ids) for path in temp_files: os.remove(path)
def get_metadata(stream): from calibre.ebooks.metadata import MetaInformation from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.utils.magick.draw import save_cover_data_to from calibre import CurrentDir stream.seek(0) try: raw = stream.read(3) except: raw = '' stream.seek(0) if raw == b'TPZ': from calibre.ebooks.metadata.topaz import get_metadata return get_metadata(stream) from calibre.utils.logging import Log log = Log() try: mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) except: mi = MetaInformation(_('Unknown'), [_('Unknown')]) mh = MetadataHeader(stream, log) if mh.title and mh.title != _('Unknown'): mi.title = mh.title if mh.exth is not None: if mh.exth.mi is not None: mi = mh.exth.mi else: size = 1024**3 if hasattr(stream, 'seek') and hasattr(stream, 'tell'): pos = stream.tell() stream.seek(0, 2) size = stream.tell() stream.seek(pos) if size < 4*1024*1024: with TemporaryDirectory('_mobi_meta_reader') as tdir: with CurrentDir(tdir): mr = MobiReader(stream, log) parse_cache = {} mr.extract_content(tdir, parse_cache) if mr.embedded_mi is not None: mi = mr.embedded_mi if hasattr(mh.exth, 'cover_offset'): cover_index = mh.first_image_index + mh.exth.cover_offset data = mh.section_data(int(cover_index)) else: try: data = mh.section_data(mh.first_image_index) except: data = '' if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}: try: mi.cover_data = ('jpg', save_cover_data_to(data, 'cover.jpg', return_data=True)) except Exception: log.exception('Failed to read MOBI cover') return mi
def add_annotation_to_library(self, db, db_id, annotation): from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.metadata import MetaInformation bm = annotation ignore_tags = set(['Catalog', 'Clippings']) if bm.type == 'kindle_bookmark': mi = db.get_metadata(db_id, index_is_id=True) user_notes_soup = self.generate_annotation_html(bm.value) if mi.comments: a_offset = mi.comments.find('<div class="user_annotations">') ad_offset = mi.comments.find( '<hr class="annotations_divider" />') if a_offset >= 0: mi.comments = mi.comments[:a_offset] if ad_offset >= 0: mi.comments = mi.comments[:ad_offset] if set(mi.tags).intersection(ignore_tags): return if mi.comments: hrTag = Tag(user_notes_soup, 'hr') hrTag['class'] = 'annotations_divider' user_notes_soup.insert(0, hrTag) mi.comments += unicode(user_notes_soup.prettify()) else: mi.comments = unicode(user_notes_soup.prettify()) # Update library comments db.set_comment(db_id, mi.comments) # Add bookmark file to db_id db.add_format_with_hooks(db_id, bm.value.bookmark_extension, bm.value.path, index_is_id=True) elif bm.type == 'kindle_clippings': # Find 'My Clippings' author=Kindle in database, or add last_update = 'Last modified %s' % strftime( u'%x %X', bm.value['timestamp'].timetuple()) mc_id = list( db.data.search_getting_ids('title:"My Clippings"', '', sort_results=False)) if mc_id: db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'], index_is_id=True) mi = db.get_metadata(mc_id[0], index_is_id=True) mi.comments = last_update db.set_metadata(mc_id[0], mi) else: mi = MetaInformation('My Clippings', authors=['Kindle']) mi.tags = ['Clippings'] mi.comments = last_update db.add_books([bm.value['path']], ['txt'], [mi])
def _start_splitmerge(self,book_list, tdir=None, db=None): # logger.debug(book_list) em = self.get_epubmerge_plugin() es = self.get_epubsplit_plugin() good_list = [ b for b in book_list if b['good'] ] tmp = PersistentTemporaryFile(prefix='merge-', suffix='.epub', dir=tdir) if len(good_list) == 1: deftitle = "New "+good_list[0]['title'] defauthors = good_list[0]['authors'] else: deftitle = "New Chapters Anthology" defauthors = ["Various Authors"] mi = MetaInformation(deftitle,defauthors) tagslists = [ x['tags'] for x in good_list ] mi.tags = [item for sublist in tagslists for item in sublist] mi.comments = "<p>New Chapters from:</p>" mi.comments += '<br/>'.join( [ "%s by %s"%(x['title'],", ".join(x['authors'])) for x in good_list ] ) em.do_merge(tmp, [b['splittmp'] for b in good_list], authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, keepmetadatafiles=False, ) book_id = db.create_book_entry(mi, add_duplicates=True) db.add_format_with_hooks(book_id, 'EPUB', tmp, index_is_id=True) self.gui.library_view.model().books_added(1) self.gui.library_view.model().refresh_ids([book_id]) # self.gui.iactions['Edit Metadata'].edit_metadata(False) self.gui.tags_view.recount() ## run word counts cp_plugin = self.gui.iactions['Count Pages'] cp_plugin.count_statistics([book_id],['WordCount']) ## run auto convert self.gui.iactions['Convert Books'].auto_convert_auto_add([book_id]) ## add to FFF update lists self.gui.library_view.select_rows([book_id]) fff_plugin = self.gui.iactions['FanFicFare'] fff_plugin.update_lists() remove_dir(tdir)
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) section_data = None for i in range(1, pheader.num_sections): raw_data = pheader.section_data(i) section_header = SectionHeader(raw_data) if section_header.type == DATATYPE_METADATA: section_data = raw_data[8:] break if not section_data: return mi default_encoding = 'latin-1' record_count, = struct.unpack('>H', section_data[0:2]) adv = 0 title = None author = None pubdate = 0 for i in range(record_count): try: type, length = struct.unpack_from('>HH', section_data, 2 + adv) except struct.error: break # CharSet if type == 1: val, = struct.unpack('>H', section_data[6+adv:8+adv]) default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1') # Author elif type == 4: author = section_data[6+adv+(2*length)] # Title elif type == 5: title = section_data[6+adv+(2*length)] # Publication Date elif type == 6: pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4]) adv += 2*length if title: mi.title = title.replace('\0', '').decode(default_encoding, 'replace') if author: author = author.replace('\0', '').decode(default_encoding, 'replace') mi.author = author.split(',') mi.pubdate = datetime.fromtimestamp(pubdate) return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) section_data = None for i in range(1, pheader.num_sections): raw_data = pheader.section_data(i) section_header = SectionHeader(raw_data) if section_header.type == DATATYPE_METADATA: section_data = raw_data[8:] break if not section_data: return mi default_encoding = 'latin-1' record_count, = struct.unpack('>H', section_data[0:2]) adv = 0 title = None author = None pubdate = 0 for i in xrange(record_count): try: type, length = struct.unpack_from('>HH', section_data, 2 + adv) except struct.error: break # CharSet if type == 1: val, = struct.unpack('>H', section_data[6+adv:8+adv]) default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1') # Author elif type == 4: author = section_data[6+adv+(2*length)] # Title elif type == 5: title = section_data[6+adv+(2*length)] # Publication Date elif type == 6: pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4]) adv += 2*length if title: mi.title = title.replace('\0', '').decode(default_encoding, 'replace') if author: author = author.replace('\0', '').decode(default_encoding, 'replace') mi.author = author.split(',') mi.pubdate = datetime.fromtimestamp(pubdate) return mi
def __init__(self, raw, codec, title): self.doctype = raw[:4] self.length, self.num_items = struct.unpack('>LL', raw[4:12]) raw = raw[12:] pos = 0 self.mi = MetaInformation(_('Unknown'), [_('Unknown')]) self.has_fake_cover = True self.start_offset = None left = self.num_items self.kf8_header = None self.uuid = self.cdetype = None while left > 0: left -= 1 idx, size = struct.unpack('>LL', raw[pos:pos + 8]) content = raw[pos + 8:pos + size] pos += size if idx >= 100 and idx < 200: self.process_metadata(idx, content, codec) elif idx == 203: self.has_fake_cover = bool(struct.unpack('>L', content)[0]) elif idx == 201: co, = struct.unpack('>L', content) if co < NULL_INDEX: self.cover_offset = co elif idx == 202: self.thumbnail_offset, = struct.unpack('>L', content) elif idx == 501: try: self.cdetype = content.decode('ascii') except UnicodeDecodeError: self.cdetype = None # cdetype if content == b'EBSP': if not self.mi.tags: self.mi.tags = [] self.mi.tags.append(_('Sample Book')) elif idx == 502: # last update time pass elif idx == 503: # Long title # Amazon seems to regard this as the definitive book title # rather than the title from the PDB header. In fact when # sending MOBI files through Amazon's email service if the # title contains non ASCII chars or non filename safe chars # they are messed up in the PDB header try: title = content.decode(codec) except: pass #else: # print 'unknown record', idx, repr(content) if title: self.mi.title = replace_entities(title)
def get_clippings_cid(self, title): ''' Find or create cid for title ''' cid = None try: cid = list(self.parent.opts.gui.current_db.data.parse('title:"%s" and tag:Clippings' % title))[0] except: mi = MetaInformation(title, authors = ['Various']) mi.tags = ['Clippings'] cid = self.parent.opts.gui.current_db.create_book_entry(mi, cover=None, add_duplicates=False, force_id=None) return cid
def get_metadata(stream, cover=True): with TemporaryDirectory('_pdf_metadata_read') as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) try: res = fork_job('calibre.ebooks.metadata.pdf', 'read_info', (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run pdfinfo') info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: prints(raw) if not info: raise ValueError('Could not read info dict from PDF') covpath = os.path.join(pdfpath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) au = info.get('Author', None) if au is None: au = [_('Unknown')] else: au = string_to_authors(au) mi = MetaInformation(title, au) #if isbn is not None: # mi.isbn = isbn creator = info.get('Creator', None) if creator: mi.book_producer = creator keywords = info.get('Keywords', None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(',')] subject = info.get('Subject', None) if subject: mi.tags.insert(0, subject) if cdata: mi.cover_data = ('jpeg', cdata) return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(None, [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) # Only Dropbook produced 132 byte record0 files are supported if len(pheader.section_data(0)) == 132: hr = HeaderRecord(pheader.section_data(0)) if hr.compression in (2, 10) and hr.has_metadata == 1: try: mdata = pheader.section_data(hr.metadata_offset) mdata = mdata.decode('cp1252', 'replace').split('\x00') mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0]) mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])] mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3]) mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4]) except Exception: pass if extract_cover: mi.cover_data = get_cover(pheader, hr) if not mi.title: mi.title = pheader.title if pheader.title else _('Unknown') return mi
def add_empty(self, *args): ''' Add an empty book item to the library. This does not import any formats from a book file. ''' author = series = None index = self.gui.library_view.currentIndex() if index.isValid(): raw = index.model().db.authors(index.row()) if raw: authors = [a.strip().replace('|', ',') for a in raw.split(',')] if authors: author = authors[0] series = index.model().db.series(index.row()) dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db, author, series) if dlg.exec_() == dlg.Accepted: temp_files = [] num = dlg.qty_to_add series = dlg.selected_series title = dlg.selected_title or _('Unknown') db = self.gui.library_view.model().db ids = [] for x in xrange(num): mi = MetaInformation(title, dlg.selected_authors) if series: mi.series = series mi.series_index = db.get_next_series_num_for(series) fmts = [] empty_format = gprefs.get('create_empty_format_file', '') if empty_format: from calibre.ebooks.oeb.polish.create import create_book pt = PersistentTemporaryFile(suffix='.' + empty_format) pt.close() temp_files.append(pt.name) create_book(mi, pt.name, fmt=empty_format) fmts = [pt.name] ids.append(db.import_book(mi, fmts)) self.gui.library_view.model().books_added(num) if hasattr(self.gui, 'db_images'): self.gui.db_images.beginResetModel( ), self.gui.db_images.endResetModel() self.gui.tags_view.recount() if ids: ids.reverse() self.gui.library_view.select_rows(ids) for path in temp_files: os.remove(path)
def add_annotation_to_library(self, db, db_id, annotation): from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.metadata import MetaInformation bm = annotation ignore_tags = set(['Catalog', 'Clippings']) if bm.type == 'kindle_bookmark': mi = db.get_metadata(db_id, index_is_id=True) user_notes_soup = self.generate_annotation_html(bm.value) if mi.comments: a_offset = mi.comments.find('<div class="user_annotations">') ad_offset = mi.comments.find('<hr class="annotations_divider" />') if a_offset >= 0: mi.comments = mi.comments[:a_offset] if ad_offset >= 0: mi.comments = mi.comments[:ad_offset] if set(mi.tags).intersection(ignore_tags): return if mi.comments: hrTag = Tag(user_notes_soup,'hr') hrTag['class'] = 'annotations_divider' user_notes_soup.insert(0, hrTag) mi.comments += unicode(user_notes_soup.prettify()) else: mi.comments = unicode(user_notes_soup.prettify()) # Update library comments db.set_comment(db_id, mi.comments) # Add bookmark file to db_id db.add_format_with_hooks(db_id, bm.value.bookmark_extension, bm.value.path, index_is_id=True) elif bm.type == 'kindle_clippings': # Find 'My Clippings' author=Kindle in database, or add last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple()) mc_id = list(db.data.search_getting_ids('title:"My Clippings"', '', sort_results=False)) if mc_id: db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'], index_is_id=True) mi = db.get_metadata(mc_id[0], index_is_id=True) mi.comments = last_update db.set_metadata(mc_id[0], mi) else: mi = MetaInformation('My Clippings', authors=['Kindle']) mi.tags = ['Clippings'] mi.comments = last_update db.add_books([bm.value['path']], ['txt'], [mi])
def get_metadata(f): read = lambda at, amount: _read(f, at, amount) f.seek(0) buf = f.read(12) if buf[4:] == 'ftypLRX2': offset = 0 while True: offset += word_be(buf[:4]) try: buf = read(offset, 8) except: raise ValueError('Not a valid LRX file') if buf[4:] == 'bbeb': break offset += 8 buf = read(offset, 16) if buf[:8].decode('utf-16-le') != 'LRF\x00': raise ValueError('Not a valid LRX file') lrf_version = word_le(buf[8:12]) offset += 0x4c compressed_size = short_le(read(offset, 2)) offset += 2 if lrf_version >= 800: offset += 6 compressed_size -= 4 uncompressed_size = word_le(read(offset, 4)) info = decompress(f.read(compressed_size)) if len(info) != uncompressed_size: raise ValueError('LRX file has malformed metadata section') root = etree.fromstring(info) bi = root.find('BookInfo') title = bi.find('Title') title_sort = title.get('reading', None) title = title.text author = bi.find('Author') author_sort = author.get('reading', None) mi = MetaInformation(title, string_to_authors(author.text)) mi.title_sort, mi.author_sort = title_sort, author_sort author = author.text publisher = bi.find('Publisher') mi.publisher = getattr(publisher, 'text', None) mi.tags = [x.text for x in bi.findall('Category')] mi.language = root.find('DocInfo').find('Language').text return mi elif buf[4:8] == 'LRX': raise ValueError('Librie LRX format not supported') else: raise ValueError('Not a LRX file')
def _get_metadata(self, id, args, kwargs): from calibre.ebooks.metadata.meta import get_metadata try: mi = get_metadata(*args, **kwargs) except: mi = MetaInformation('', [_('Unknown')]) self.emit(SIGNAL('metadata(PyQt_PyObject, PyQt_PyObject)'), id, mi)
def _from_formats(self, id, args, kwargs): from calibre.ebooks.metadata.meta import metadata_from_formats try: mi = metadata_from_formats(*args, **kwargs) except: mi = MetaInformation('', [_('Unknown')]) self.metadataf.emit(id, mi)
def _get_metadata(self, id, args, kwargs): from calibre.ebooks.metadata.meta import get_metadata try: mi = get_metadata(*args, **kwargs) except: mi = MetaInformation('', [_('Unknown')]) self.metadata.emit(id, mi)
def get_comic_metadata(stream, stream_type, series_index='volume'): # See http://code.google.com/p/comicbookinfo/wiki/Example from calibre.ebooks.metadata import MetaInformation comment = None mi = MetaInformation(None, None) if stream_type == 'cbz': from calibre.utils.zipfile import ZipFile zf = ZipFile(stream) comment = zf.comment elif stream_type == 'cbr': from calibre.utils.unrar import comment as get_comment comment = get_comment(stream) if comment: import json m = json.loads(comment) if hasattr(m, 'iterkeys'): for cat in m.iterkeys(): if cat.startswith('ComicBookInfo'): get_comic_book_info(m[cat], mi, series_index=series_index) break return mi
def add_annotation_to_library(self, db, db_id, annotation): from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.metadata import MetaInformation bm = annotation ignore_tags = set(["Catalog", "Clippings"]) if bm.type == "kindle_bookmark": mi = db.get_metadata(db_id, index_is_id=True) user_notes_soup = self.generate_annotation_html(bm.value) if mi.comments: a_offset = mi.comments.find('<div class="user_annotations">') ad_offset = mi.comments.find('<hr class="annotations_divider" />') if a_offset >= 0: mi.comments = mi.comments[:a_offset] if ad_offset >= 0: mi.comments = mi.comments[:ad_offset] if set(mi.tags).intersection(ignore_tags): return if mi.comments: hrTag = Tag(user_notes_soup, "hr") hrTag["class"] = "annotations_divider" user_notes_soup.insert(0, hrTag) mi.comments += unicode(user_notes_soup.prettify()) else: mi.comments = unicode(user_notes_soup.prettify()) # Update library comments db.set_comment(db_id, mi.comments) # Add bookmark file to db_id db.add_format_with_hooks(db_id, bm.value.bookmark_extension, bm.value.path, index_is_id=True) elif bm.type == "kindle_clippings": # Find 'My Clippings' author=Kindle in database, or add last_update = "Last modified %s" % strftime(u"%x %X", bm.value["timestamp"].timetuple()) mc_id = list(db.data.search_getting_ids('title:"My Clippings"', "")) if mc_id: db.add_format_with_hooks(mc_id[0], "TXT", bm.value["path"], index_is_id=True) mi = db.get_metadata(mc_id[0], index_is_id=True) mi.comments = last_update db.set_metadata(mc_id[0], mi) else: mi = MetaInformation("My Clippings", authors=["Kindle"]) mi.tags = ["Clippings"] mi.comments = last_update db.add_books([bm.value["path"]], ["txt"], [mi])
def add_empty(self, *args): ''' Add an empty book item to the library. This does not import any formats from a book file. ''' author = series = None index = self.gui.library_view.currentIndex() if index.isValid(): raw = index.model().db.authors(index.row()) if raw: authors = [a.strip().replace('|', ',') for a in raw.split(',')] if authors: author = authors[0] series = index.model().db.series(index.row()) dlg = AddEmptyBookDialog(self.gui, self.gui.library_view.model().db, author, series) if dlg.exec_() == dlg.Accepted: temp_files = [] num = dlg.qty_to_add series = dlg.selected_series title = dlg.selected_title or _('Unknown') db = self.gui.library_view.model().db ids = [] for x in xrange(num): mi = MetaInformation(title, dlg.selected_authors) if series: mi.series = series mi.series_index = db.get_next_series_num_for(series) fmts = [] empty_format = gprefs.get('create_empty_format_file', '') if empty_format: from calibre.ebooks.oeb.polish.create import create_book pt = PersistentTemporaryFile(suffix='.' + empty_format) pt.close() temp_files.append(pt.name) create_book(mi, pt.name, fmt=empty_format) fmts = [pt.name] ids.append(db.import_book(mi, fmts)) self.gui.library_view.model().books_added(num) if hasattr(self.gui, 'db_images'): self.gui.db_images.beginResetModel(), self.gui.db_images.endResetModel() self.gui.tags_view.recount() if ids: ids.reverse() self.gui.library_view.select_rows(ids) for path in temp_files: os.remove(path)
def set_metadata_opf2(root, cover_prefix, mi, opf_version, cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) opf = OPF(None, preparsed_opf=root, read_toc=False) if mi.languages: mi.languages = normalize_languages(list(opf.raw_languages) or [], mi.languages) opf.smart_update(mi, apply_null=apply_null) if getattr(mi, 'uuid', None): opf.application_id = mi.uuid if apply_null or force_identifiers: opf.set_identifiers(mi.get_identifiers()) else: orig = opf.get_identifiers() orig.update(mi.get_identifiers()) opf.set_identifiers({k:v for k, v in orig.iteritems() if k and v}) if update_timestamp and mi.timestamp is not None: opf.timestamp = mi.timestamp raster_cover = opf.raster_cover if raster_cover is None and cover_data is not None and add_missing_cover: guide_raster_cover = opf.guide_raster_cover i = None if guide_raster_cover is not None: i = guide_raster_cover raster_cover = i.get('href') else: if cover_prefix and not cover_prefix.endswith('/'): cover_prefix += '/' name = cover_prefix + 'cover.jpg' i = create_manifest_item(opf.root, name, 'cover') if i is not None: raster_cover = name if i is not None: if opf_version.major < 3: [x.getparent().remove(x) for x in opf.root.xpath('//*[local-name()="meta" and @name="cover"]')] m = opf.create_metadata_element('meta', is_dc=False) m.set('name', 'cover'), m.set('content', i.get('id')) else: for x in opf.root.xpath('//*[local-name()="item" and contains(@properties, "cover-image")]'): x.set('properties', x.get('properties').replace('cover-image', '').strip()) i.set('properties', 'cover-image') with pretty_print: return opf.render(), raster_cover
def set_metadata_opf2(root, cover_prefix, mi, opf_version, cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) opf = OPF(None, preparsed_opf=root, read_toc=False) if mi.languages: mi.languages = normalize_languages(list(opf.raw_languages) or [], mi.languages) opf.smart_update(mi, apply_null=apply_null) if getattr(mi, 'uuid', None): opf.application_id = mi.uuid if apply_null or force_identifiers: opf.set_identifiers(mi.get_identifiers()) else: orig = opf.get_identifiers() orig.update(mi.get_identifiers()) opf.set_identifiers({k:v for k, v in orig.items() if k and v}) if update_timestamp and mi.timestamp is not None: opf.timestamp = mi.timestamp raster_cover = opf.raster_cover if raster_cover is None and cover_data is not None and add_missing_cover: guide_raster_cover = opf.guide_raster_cover i = None if guide_raster_cover is not None: i = guide_raster_cover raster_cover = i.get('href') else: if cover_prefix and not cover_prefix.endswith('/'): cover_prefix += '/' name = cover_prefix + 'cover.jpg' i = create_manifest_item(opf.root, name, 'cover') if i is not None: raster_cover = name if i is not None: if opf_version.major < 3: [x.getparent().remove(x) for x in opf.root.xpath('//*[local-name()="meta" and @name="cover"]')] m = opf.create_metadata_element('meta', is_dc=False) m.set('name', 'cover'), m.set('content', i.get('id')) else: for x in opf.root.xpath('//*[local-name()="item" and contains(@properties, "cover-image")]'): x.set('properties', x.get('properties').replace('cover-image', '').strip()) i.set('properties', 'cover-image') with pretty_print: return opf.render(), raster_cover
def get_metadata(stream): ''' Return fb2 metadata as a L{MetaInformation} object ''' root = _get_fbroot(stream) book_title = _parse_book_title(root) authors = _parse_authors(root) # fallback for book_title if book_title: book_title = unicode(book_title) else: book_title = force_unicode( os.path.splitext( os.path.basename(getattr(stream, 'name', _('Unknown'))))[0]) mi = MetaInformation(book_title, authors) try: _parse_cover(root, mi) except: pass try: _parse_comments(root, mi) except: pass try: _parse_tags(root, mi) except: pass try: _parse_series(root, mi) except: pass try: _parse_isbn(root, mi) except: pass try: _parse_publisher(root, mi) except: pass try: _parse_pubdate(root, mi) except: pass #try: # _parse_timestamp(root, mi) #except: # pass try: _parse_language(root, mi) except: pass #_parse_uuid(root, mi) #if DEBUG: # prints(mi) return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(None, [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) # Only Dropbook produced 132 byte record0 files are supported if len(pheader.section_data(0)) == 132: hr = HeaderRecord(pheader.section_data(0)) if hr.compression in (2, 10) and hr.has_metadata == 1: try: mdata = pheader.section_data(hr.metadata_offset) mdata = mdata.split('\x00') mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0]) mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])] mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3]) mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4]) except: pass if extract_cover: mi.cover_data = get_cover(pheader, hr) if not mi.title: mi.title = pheader.title if pheader.title else _('Unknown') return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) snbFile = SNBFile() try: if not hasattr(stream, 'write'): snbFile.Parse(io.BytesIO(stream), True) else: stream.seek(0) snbFile.Parse(stream, True) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = etree.fromstring(meta) mi.title = meta.find('.//head/name').text mi.authors = [meta.find('.//head/author').text] mi.language = meta.find('.//head/language').text.lower().replace('_', '-') mi.publisher = meta.find('.//head/publisher').text if extract_cover: cover = meta.find('.//head/cover') if cover is not None and cover.text is not None: root, ext = os.path.splitext(cover.text) if ext == '.jpeg': ext = '.jpg' mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text)) except Exception: import traceback traceback.print_exc() return mi
def set_metadata(stream, mi, apply_null=False, update_timestamp=False, force_identifiers=False): stream.seek(0) reader = get_zip_reader(stream, root=os.getcwdu()) raster_cover = reader.opf.raster_cover mi = MetaInformation(mi) new_cdata = None replacements = {} try: new_cdata = mi.cover_data[1] if not new_cdata: raise Exception('no cover') except: try: new_cdata = open(mi.cover, 'rb').read() except: pass new_cover = cpath = None if new_cdata and raster_cover: try: cpath = posixpath.join(posixpath.dirname(reader.opf_path), raster_cover) cover_replacable = not reader.encryption_meta.is_encrypted(cpath) and \ os.path.splitext(cpath)[1].lower() in ('.png', '.jpg', '.jpeg') if cover_replacable: new_cover = _write_new_cover(new_cdata, cpath) replacements[cpath] = open(new_cover.name, 'rb') except: import traceback traceback.print_exc() update_metadata(reader.opf, mi, apply_null=apply_null, update_timestamp=update_timestamp, force_identifiers=force_identifiers) newopf = StringIO(reader.opf.render()) if isinstance(reader.archive, LocalZipFile): reader.archive.safe_replace(reader.container[OPF.MIMETYPE], newopf, extra_replacements=replacements) else: safe_replace(stream, reader.container[OPF.MIMETYPE], newopf, extra_replacements=replacements) try: if cpath is not None: replacements[cpath].close() os.remove(replacements[cpath].name) except: pass
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ stream.seek(0) if stream.read(5) != r'{\rtf': return MetaInformation(_('Unknown')) block = get_document_info(stream)[0] if not block: return MetaInformation(_('Unknown')) stream.seek(0) cpg = detect_codepage(stream) stream.seek(0) title_match = title_pat.search(block) if title_match is not None: title = decode(title_match.group(1).strip(), cpg) else: title = _('Unknown') author_match = author_pat.search(block) if author_match is not None: author = decode(author_match.group(1).strip(), cpg) else: author = None mi = MetaInformation(title) if author: mi.authors = string_to_authors(author) comment_match = comment_pat.search(block) if comment_match is not None: comment = decode(comment_match.group(1).strip(), cpg) mi.comments = comment tags_match = tags_pat.search(block) if tags_match is not None: tags = decode(tags_match.group(1).strip(), cpg) mi.tags = list(filter(None, (x.strip() for x in tags.split(',')))) publisher_match = publisher_pat.search(block) if publisher_match is not None: publisher = decode(publisher_match.group(1).strip(), cpg) mi.publisher = publisher return mi
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ title = 'Unknown' mi = MetaInformation(title, ['Unknown']) stream.seek(0) try: if not stream.read(14) == MAGIC: print >> sys.stderr, u'Couldn\'t read RB header from file' return mi stream.read(10) read_i32 = lambda: struct.unpack('<I', stream.read(4))[0] stream.seek(read_i32()) toc_count = read_i32() for i in range(toc_count): stream.read(32) length, offset, flag = read_i32(), read_i32(), read_i32() if flag == 2: break else: print >> sys.stderr, u'Couldn\'t find INFO from RB file' return mi stream.seek(offset) info = stream.read(length).splitlines() for line in info: if '=' not in line: continue key, value = line.split('=') if key.strip() == 'TITLE': mi.title = value.strip() elif key.strip() == 'AUTHOR': mi.author = value mi.authors = string_to_authors(value) except Exception as err: msg = u'Couldn\'t read metadata from rb: %s with error %s' % ( mi.title, unicode(err)) print >> sys.stderr, msg.encode('utf8') raise return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) try: with ZipFile(stream) as zf: opf_name = get_first_opf_name(zf) opf_stream = StringIO(zf.read(opf_name)) opf = OPF(opf_stream) mi = opf.to_book_metadata() if extract_cover: cover_href = opf.raster_cover if cover_href: mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href)) except: return mi return mi
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ title = 'Unknown' mi = MetaInformation(title, ['Unknown']) stream.seek(0) try: if not stream.read(14) == MAGIC: print(u'Couldn\'t read RB header from file', file=sys.stderr) return mi stream.read(10) read_i32 = lambda: struct.unpack('<I', stream.read(4))[0] stream.seek(read_i32()) toc_count = read_i32() for i in range(toc_count): stream.read(32) length, offset, flag = read_i32(), read_i32(), read_i32() if flag == 2: break else: print(u'Couldn\'t find INFO from RB file', file=sys.stderr) return mi stream.seek(offset) info = stream.read(length).splitlines() for line in info: if '=' not in line: continue key, value = line.split('=') if key.strip() == 'TITLE': mi.title = value.strip() elif key.strip() == 'AUTHOR': mi.author = value mi.authors = string_to_authors(value) except Exception as err: msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err)) print(msg.encode('utf8'), file=sys.stderr) raise return mi
def get_metadata_quick(raw): p = podofo.PDFDoc() p.load(raw) title = p.title if not title: title = '_' author = p.author authors = string_to_authors(author) if author else [_('Unknown')] creator = p.creator try: tags = [x.strip() for x in p.keywords.split(u',')] tags = [x for x in tags if x] except: tags = [] mi = MetaInformation(title, authors) if creator: mi.book_producer = creator if tags: mi.tags = tags return mi
def parse_comic_comment(comment, series_index='volume'): # See http://code.google.com/p/comicbookinfo/wiki/Example from calibre.ebooks.metadata import MetaInformation import json mi = MetaInformation(None, None) m = json.loads(comment) if isinstance(m, dict): for cat in m: if cat.startswith('ComicBookInfo'): get_comic_book_info(m[cat], mi, series_index=series_index) break return mi
def _metadata_from_formats(formats, force_read_metadata=False, pattern=None): mi = MetaInformation(None, None) formats.sort(key=lambda x: METADATA_PRIORITIES[path_to_ext(x)]) extensions = list(map(path_to_ext, formats)) if 'opf' in extensions: opf = formats[extensions.index('opf')] mi2 = opf_metadata(opf) if mi2 is not None and mi2.title: return mi2 for path, ext in zip(formats, extensions): with lopen(path, 'rb') as stream: try: newmi = get_metadata(stream, stream_type=ext, use_libprs_metadata=True, force_read_metadata=force_read_metadata, pattern=pattern) mi.smart_update(newmi) except: continue if getattr(mi, 'application_id', None) is not None: return mi if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] return mi
def get_metadata_from_reader(rdr): raw = rdr.GetFile(rdr.home) home = BeautifulSoup( xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0]) title = rdr.title try: x = rdr.GetEncoding() codecs.lookup(x) enc = x except: enc = 'cp1252' title = force_unicode(title, enc) authors = _get_authors(home) mi = MetaInformation(title, authors) publisher = _get_publisher(home) if publisher: mi.publisher = publisher isbn = _get_isbn(home) if isbn: mi.isbn = isbn comments = _get_comments(home) if comments: mi.comments = comments cdata = _get_cover(home, rdr) if cdata is not None: mi.cover_data = ('jpg', cdata) return mi
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ title = 'Unknown' mi = MetaInformation(title, ['Unknown']) stream.seek(0) try: if stream.read(10) not in MAGIC: print >>sys.stderr, u'Couldn\'t read IMP header from file' return mi def cString(skip=0): result = '' while 1: data = stream.read(1) if data == '\x00': if not skip: return result skip -= 1 result, data = '', '' result += data stream.read(38) # skip past some uninteresting headers _, category, title, author = cString(), cString(), cString(1), cString(2) if title: mi.title = title if author: mi.authors = string_to_authors(author) mi.author = author if category: mi.category = category except Exception as err: msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err)) print >>sys.stderr, msg.encode('utf8') return mi
def get_metadata(stream, cover=True): with TemporaryDirectory('_pdf_metadata_read') as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) try: res = fork_job('calibre.ebooks.metadata.pdf', 'read_info', (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run pdfinfo') info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: prints(raw) if not info: raise ValueError('Could not read info dict from PDF') covpath = os.path.join(pdfpath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) au = info.get('Author', None) if au is None: au = [_('Unknown')] else: au = string_to_authors(au) mi = MetaInformation(title, au) # if isbn is not None: # mi.isbn = isbn creator = info.get('Creator', None) if creator: mi.book_producer = creator keywords = info.get('Keywords', None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(',')] isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)] if isbn: mi.isbn = isbn = isbn[0] mi.tags = [x for x in mi.tags if check_isbn(x) != isbn] subject = info.get('Subject', None) if subject: mi.tags.insert(0, subject) if cdata: mi.cover_data = ('jpeg', cdata) return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) snbFile = SNBFile() try: if not hasattr(stream, 'write'): snbFile.Parse(StringIO(stream), True) else: stream.seek(0) snbFile.Parse(stream, True) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = etree.fromstring(meta) mi.title = meta.find('.//head/name').text mi.authors = [meta.find('.//head/author').text] mi.language = meta.find('.//head/language').text.lower().replace('_', '-') mi.publisher = meta.find('.//head/publisher').text if extract_cover: cover = meta.find('.//head/cover') if cover is not None and cover.text is not None: root, ext = os.path.splitext(cover.text) if ext == '.jpeg': ext = '.jpg' mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text)) except Exception: import traceback traceback.print_exc() return mi
def get_metadata(stream): from calibre.ebooks.metadata import MetaInformation from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.utils.img import save_cover_data_to from calibre import CurrentDir stream.seek(0) try: raw = stream.read(3) except Exception: raw = b'' stream.seek(0) if raw == b'TPZ': from calibre.ebooks.metadata.topaz import get_metadata return get_metadata(stream) from calibre.utils.logging import Log log = Log() try: mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) except: mi = MetaInformation(_('Unknown'), [_('Unknown')]) mh = MetadataHeader(stream, log) if mh.title and mh.title != _('Unknown'): mi.title = mh.title if mh.exth is not None: if mh.exth.mi is not None: mi = mh.exth.mi else: size = 1024**3 if hasattr(stream, 'seek') and hasattr(stream, 'tell'): pos = stream.tell() stream.seek(0, 2) size = stream.tell() stream.seek(pos) if size < 4 * 1024 * 1024: with TemporaryDirectory('_mobi_meta_reader') as tdir: with CurrentDir(tdir): mr = MobiReader(stream, log) parse_cache = {} mr.extract_content(tdir, parse_cache) if mr.embedded_mi is not None: mi = mr.embedded_mi if hasattr(mh.exth, 'cover_offset'): cover_index = mh.first_image_index + mh.exth.cover_offset data = mh.section_data(int(cover_index)) else: try: data = mh.section_data(mh.first_image_index) except Exception: data = b'' if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}: try: mi.cover_data = ('jpg', save_cover_data_to(data)) except Exception: log.exception('Failed to read MOBI cover') return mi
def get_metadata(stream): ''' Return fb2 metadata as a L{MetaInformation} object ''' root = _get_fbroot(get_fb2_data(stream)[0]) ctx = Context(root) book_title = _parse_book_title(root, ctx) authors = _parse_authors(root, ctx) or [_('Unknown')] # fallback for book_title if book_title: book_title = unicode_type(book_title) else: book_title = force_unicode( os.path.splitext( os.path.basename(getattr(stream, 'name', _('Unknown'))))[0]) mi = MetaInformation(book_title, authors) try: _parse_cover(root, mi, ctx) except: pass try: _parse_comments(root, mi, ctx) except: pass try: _parse_tags(root, mi, ctx) except: pass try: _parse_series(root, mi, ctx) except: pass try: _parse_isbn(root, mi, ctx) except: pass try: _parse_publisher(root, mi, ctx) except: pass try: _parse_pubdate(root, mi, ctx) except: pass try: _parse_language(root, mi, ctx) except: pass return mi
def do_add_empty( dbctx, title, authors, isbn, tags, series, series_index, cover, identifiers, languages ): mi = MetaInformation(None) if title is not None: mi.title = title if authors: mi.authors = authors if identifiers: mi.set_identifiers(identifiers) if isbn: mi.isbn = isbn if tags: mi.tags = tags if series: mi.series, mi.series_index = series, series_index if cover: mi.cover = cover if languages: mi.languages = languages ids, duplicates = dbctx.run('add', 'empty', read_cover(mi)) prints(_('Added book ids: %s') % ','.join(map(str, ids)))
def get_social_metadata(title, authors, publisher, isbn, username=None, password=None): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation(title, authors) if isbn: br = get_browser() try: login(br, username, password) raw = br.open_novisit('http://www.librarything.com/isbn/' +isbn).read() except: return mi if '/wiki/index.php/HelpThing:Verify' in raw: raise Exception('LibraryThing is blocking calibre.') if not raw: return mi raw = raw.decode('utf-8', 'replace') raw = strip_encoding_declarations(raw) root = html.fromstring(raw) h1 = root.xpath('//div[@class="headsummary"]/h1') if h1 and not mi.title: mi.title = html.tostring(h1[0], method='text', encoding=unicode) h2 = root.xpath('//div[@class="headsummary"]/h2/a') if h2 and not mi.authors: mi.authors = [html.tostring(x, method='text', encoding=unicode) for x in h2] h3 = root.xpath('//div[@class="headsummary"]/h3/a') if h3: match = None for h in h3: series = html.tostring(h, method='text', encoding=unicode) match = re.search(r'(.+) \((.+)\)', series) if match is not None: break if match is not None: mi.series = match.group(1).strip() match = re.search(r'[0-9.]+', match.group(2)) si = 1.0 if match is not None: si = float(match.group()) mi.series_index = si #tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a') #if tags: # mi.tags = [html.tostring(x, method='text', encoding=unicode) for x # in tags] span = root.xpath( '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span') if span: raw = html.tostring(span[0], method='text', encoding=unicode) match = re.search(r'([0-9.]+)', raw) if match is not None: rating = float(match.group()) if rating > 0 and rating <= 5: mi.rating = rating return mi
def read_user_metadata(self): ''' Read all metadata specified by the user. Command line options override metadata from a specified OPF file. ''' from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPF mi = MetaInformation(None, []) if self.opts.read_metadata_from_opf is not None: self.opts.read_metadata_from_opf = os.path.abspath( self.opts.read_metadata_from_opf) opf = OPF(open(self.opts.read_metadata_from_opf, 'rb'), os.path.dirname(self.opts.read_metadata_from_opf)) mi = opf.to_book_metadata() self.opts_to_mi(mi) if mi.cover: if mi.cover.startswith('http:') or mi.cover.startswith('https:'): mi.cover = self.download_cover(mi.cover) ext = mi.cover.rpartition('.')[-1].lower().strip() if ext not in ('png', 'jpg', 'jpeg', 'gif'): ext = 'jpg' mi.cover_data = (ext, open(mi.cover, 'rb').read()) mi.cover = None self.user_metadata = mi
def get_metadata(stream, cover=True): with TemporaryDirectory("_pdf_metadata_read") as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, "src.pdf"), "wb") as f: shutil.copyfileobj(stream, f) try: res = fork_job("calibre.ebooks.metadata.pdf", "read_info", (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError("Failed to run pdfinfo") info = res["result"] with open(res["stdout_stderr"], "rb") as f: raw = f.read().strip() if raw: prints(raw) if not info: raise ValueError("Could not read info dict from PDF") covpath = os.path.join(pdfpath, "cover.jpg") cdata = None if cover and os.path.exists(covpath): with open(covpath, "rb") as f: cdata = f.read() title = info.get("Title", None) au = info.get("Author", None) if au is None: au = [_("Unknown")] else: au = string_to_authors(au) mi = MetaInformation(title, au) # if isbn is not None: # mi.isbn = isbn creator = info.get("Creator", None) if creator: mi.book_producer = creator keywords = info.get("Keywords", None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(",")] isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)] if isbn: mi.isbn = isbn = isbn[0] mi.tags = [x for x in mi.tags if check_isbn(x) != isbn] subject = info.get("Subject", None) if subject: mi.tags.insert(0, subject) if cdata: mi.cover_data = ("jpeg", cdata) return mi