def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if data.has_key('title'): mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif data.has_key('creator'): mi.authors = string_to_authors(data['creator']) if data.has_key('description'): mi.comments = data['description'] if data.has_key('language'): mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata','') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if 'title' in data: mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif 'creator' in data: mi.authors = string_to_authors(data['creator']) if 'description' in data: mi.comments = data['description'] if 'language' in data: mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata','') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' name = getattr(stream, 'name', '').rpartition('.')[0] if name: name = os.path.basename(name) mi = MetaInformation(name or _('Unknown'), [_('Unknown')]) stream.seek(0) mdata = u'' for x in range(0, 4): line = stream.readline().decode('utf-8', 'replace') if line == '': break else: mdata += line mdata = mdata[:100] mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata) if mo is not None: mi.title = mo.group('title') mi.authors = mo.group('author').split(',') return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) snbFile = SNBFile() try: if not hasattr(stream, 'write'): snbFile.Parse(io.BytesIO(stream), True) else: stream.seek(0) snbFile.Parse(stream, True) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = etree.fromstring(meta) mi.title = meta.find('.//head/name').text mi.authors = [meta.find('.//head/author').text] mi.language = meta.find('.//head/language').text.lower().replace('_', '-') mi.publisher = meta.find('.//head/publisher').text if extract_cover: cover = meta.find('.//head/cover') if cover is not None and cover.text is not None: root, ext = os.path.splitext(cover.text) if ext == '.jpeg': ext = '.jpg' mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text)) except Exception: import traceback traceback.print_exc() return mi
def _metadata_from_formats(formats, force_read_metadata=False, pattern=None): mi = MetaInformation(None, None) formats.sort(cmp=lambda x,y: cmp(METADATA_PRIORITIES[path_to_ext(x)], METADATA_PRIORITIES[path_to_ext(y)])) extensions = list(map(path_to_ext, formats)) if 'opf' in extensions: opf = formats[extensions.index('opf')] mi2 = opf_metadata(opf) if mi2 is not None and mi2.title: return mi2 for path, ext in zip(formats, extensions): with lopen(path, 'rb') as stream: try: newmi = get_metadata(stream, stream_type=ext, use_libprs_metadata=True, force_read_metadata=force_read_metadata, pattern=pattern) mi.smart_update(newmi) except: continue if getattr(mi, 'application_id', None) is not None: return mi if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) snbFile = SNBFile() try: if not hasattr(stream, 'write'): snbFile.Parse(StringIO(stream), True) else: stream.seek(0) snbFile.Parse(stream, True) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = etree.fromstring(meta) mi.title = meta.find('.//head/name').text mi.authors = [meta.find('.//head/author').text] mi.language = meta.find('.//head/language').text.lower().replace('_', '-') mi.publisher = meta.find('.//head/publisher').text if extract_cover: cover = meta.find('.//head/cover') if cover is not None and cover.text is not None: root, ext = os.path.splitext(cover.text) if ext == '.jpeg': ext = '.jpg' mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text)) except Exception: import traceback traceback.print_exc() return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(None, [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) # Only Dropbook produced 132 byte record0 files are supported if len(pheader.section_data(0)) == 132: hr = HeaderRecord(pheader.section_data(0)) if hr.compression in (2, 10) and hr.has_metadata == 1: try: mdata = pheader.section_data(hr.metadata_offset) mdata = mdata.split('\x00') mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0]) mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])] mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3]) mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4]) except: pass if extract_cover: mi.cover_data = get_cover(pheader, hr) if not mi.title: mi.title = pheader.title if pheader.title else _('Unknown') return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(None, [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) # Only Dropbook produced 132 byte record0 files are supported if len(pheader.section_data(0)) == 132: hr = HeaderRecord(pheader.section_data(0)) if hr.compression in (2, 10) and hr.has_metadata == 1: try: mdata = pheader.section_data(hr.metadata_offset) mdata = mdata.decode('cp1252', 'replace').split('\x00') mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0]) mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])] mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3]) mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4]) except Exception: pass if extract_cover: mi.cover_data = get_cover(pheader, hr) if not mi.title: mi.title = pheader.title if pheader.title else _('Unknown') return mi
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ title = 'Unknown' mi = MetaInformation(title, ['Unknown']) stream.seek(0) try: if stream.read(10) not in MAGIC: print >>sys.stderr, u'Couldn\'t read IMP header from file' return mi def cString(skip=0): result = '' while 1: data = stream.read(1) if data == '\x00': if not skip: return result skip -= 1 result, data = '', '' result += data stream.read(38) # skip past some uninteresting headers _, category, title, author = cString(), cString(), cString(1), cString(2) if title: mi.title = title if author: mi.authors = string_to_authors(author) mi.author = author if category: mi.category = category except Exception as err: msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err)) print >>sys.stderr, msg.encode('utf8') return mi
def get_social_metadata(title, authors, publisher, isbn, username=None, password=None): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation(title, authors) if isbn: br = get_browser() try: login(br, username, password) raw = br.open_novisit('http://www.librarything.com/isbn/' +isbn).read() except: return mi if '/wiki/index.php/HelpThing:Verify' in raw: raise Exception('LibraryThing is blocking calibre.') if not raw: return mi raw = raw.decode('utf-8', 'replace') raw = strip_encoding_declarations(raw) root = html.fromstring(raw) h1 = root.xpath('//div[@class="headsummary"]/h1') if h1 and not mi.title: mi.title = html.tostring(h1[0], method='text', encoding=unicode) h2 = root.xpath('//div[@class="headsummary"]/h2/a') if h2 and not mi.authors: mi.authors = [html.tostring(x, method='text', encoding=unicode) for x in h2] h3 = root.xpath('//div[@class="headsummary"]/h3/a') if h3: match = None for h in h3: series = html.tostring(h, method='text', encoding=unicode) match = re.search(r'(.+) \((.+)\)', series) if match is not None: break if match is not None: mi.series = match.group(1).strip() match = re.search(r'[0-9.]+', match.group(2)) si = 1.0 if match is not None: si = float(match.group()) mi.series_index = si #tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a') #if tags: # mi.tags = [html.tostring(x, method='text', encoding=unicode) for x # in tags] span = root.xpath( '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span') if span: raw = html.tostring(span[0], method='text', encoding=unicode) match = re.search(r'([0-9.]+)', raw) if match is not None: rating = float(match.group()) if rating > 0 and rating <= 5: mi.rating = rating return mi
def do_set_metadata(opts, mi, stream, stream_type): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) from_opf = getattr(opts, 'from_opf', None) if from_opf is not None: from calibre.ebooks.metadata.opf2 import OPF opf_mi = OPF(open(from_opf, 'rb')).to_book_metadata() mi.smart_update(opf_mi) for pref in config().option_set.preferences: if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort', 'author_sort', 'get_cover', 'cover', 'tags', 'lrf_bookid', 'identifiers'): continue val = getattr(opts, pref.name, None) if val is not None: setattr(mi, pref.name, val) if getattr(opts, 'authors', None) is not None: mi.authors = string_to_authors(opts.authors) mi.author_sort = authors_to_sort_string(mi.authors) if getattr(opts, 'author_sort', None) is not None: mi.author_sort = opts.author_sort if getattr(opts, 'title_sort', None) is not None: mi.title_sort = opts.title_sort elif getattr(opts, 'title', None) is not None: mi.title_sort = title_sort(opts.title) if getattr(opts, 'tags', None) is not None: mi.tags = [t.strip() for t in opts.tags.split(',')] if getattr(opts, 'series', None) is not None: mi.series = opts.series.strip() if getattr(opts, 'series_index', None) is not None: mi.series_index = float(opts.series_index.strip()) if getattr(opts, 'pubdate', None) is not None: mi.pubdate = parse_date(opts.pubdate, assume_utc=False, as_utc=False) if getattr(opts, 'identifiers', None): val = {k.strip():v.strip() for k, v in (x.partition(':')[0::2] for x in opts.identifiers)} if val: orig = mi.get_identifiers() orig.update(val) val = {k:v for k, v in iteritems(orig) if k and v} mi.set_identifiers(val) if getattr(opts, 'cover', None) is not None: ext = os.path.splitext(opts.cover)[1].replace('.', '').upper() mi.cover_data = (ext, open(opts.cover, 'rb').read()) with force_identifiers: set_metadata(stream, mi, stream_type)
def get_metadata(stream): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if data.has_key('title'): mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif data.has_key('creator'): mi.authors = string_to_authors(data['creator']) if data.has_key('description'): mi.comments = data['description'] if data.has_key('language'): mi.language = data['language'] if data.get('keywords', ''): mi.tags = data['keywords'].split(',') return mi
def do_add_empty(db, title, authors, isbn, tags, series, series_index): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation(None) if title is not None: mi.title = title if authors: mi.authors = authors if isbn: mi.isbn = isbn if tags: mi.tags = tags if series: mi.series, mi.series_index = series, series_index db.import_book(mi, []) write_dirtied(db) send_message()
def add_document(self,document): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation('', [_('Unknown')]) mi.title = document['title'] mi.authors = document['authors'] mi.tags = ["Mendeley"] mendeley_id = {} mendeley_id['mendeley'] = document['mendeley_id'] mi.identifiers = mendeley_id mi.series_index = 1 # needed? self.db.add_books([document['path']], ['pdf'], [mi], False, True) os.remove(document['path'])
def add_document(self, document): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation('', [_('Unknown')]) mi.title = document['title'] mi.authors = document['authors'] mi.tags = ["Mendeley"] mendeley_id = {} mendeley_id['mendeley'] = document['mendeley_id'] mi.identifiers = mendeley_id mi.series_index = 1 # needed? self.db.add_books([document['path']], ['pdf'], [mi], False, True) os.remove(document['path'])
def get_metadata(stream): """ Return basic meta-data about the LRF file in C{stream} as a L{MetaInformation} object. @param stream: A file like object or an instance of L{LRFMetaFile} """ lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream) authors = string_to_authors(lrf.author) mi = MetaInformation(lrf.title.strip(), authors) mi.author = lrf.author.strip() mi.comments = lrf.free_text.strip() mi.category = lrf.category.strip()+', '+lrf.classification.strip() tags = [x.strip() for x in mi.category.split(',') if x.strip()] if tags: mi.tags = tags if mi.category.strip() == ',': mi.category = None mi.publisher = lrf.publisher.strip() mi.cover_data = lrf.get_cover() try: mi.title_sort = lrf.title_reading.strip() if not mi.title_sort: mi.title_sort = None except: pass try: mi.author_sort = lrf.author_reading.strip() if not mi.author_sort: mi.author_sort = None except: pass if not mi.title or 'unknown' in mi.title.lower(): mi.title = None if not mi.authors: mi.authors = None if not mi.author or 'unknown' in mi.author.lower(): mi.author = None if not mi.category or 'unknown' in mi.category.lower(): mi.category = None if not mi.publisher or 'unknown' in mi.publisher.lower() or \ 'some publisher' in mi.publisher.lower(): mi.publisher = None return mi
def get_metadata(stream): """ Return basic meta-data about the LRF file in C{stream} as a L{MetaInformation} object. @param stream: A file like object or an instance of L{LRFMetaFile} """ lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream) authors = string_to_authors(lrf.author) mi = MetaInformation(lrf.title.strip(), authors) mi.author = lrf.author.strip() mi.comments = lrf.free_text.strip() mi.category = lrf.category.strip() + ', ' + lrf.classification.strip() tags = [x.strip() for x in mi.category.split(',') if x.strip()] if tags: mi.tags = tags if mi.category.strip() == ',': mi.category = None mi.publisher = lrf.publisher.strip() mi.cover_data = lrf.get_cover() try: mi.title_sort = lrf.title_reading.strip() if not mi.title_sort: mi.title_sort = None except: pass try: mi.author_sort = lrf.author_reading.strip() if not mi.author_sort: mi.author_sort = None except: pass if not mi.title or 'unknown' in mi.title.lower(): mi.title = None if not mi.authors: mi.authors = None if not mi.author or 'unknown' in mi.author.lower(): mi.author = None if not mi.category or 'unknown' in mi.category.lower(): mi.category = None if not mi.publisher or 'unknown' in mi.publisher.lower() or \ 'some publisher' in mi.publisher.lower(): mi.publisher = None return mi
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ stream.seek(0) if stream.read(5) != r'{\rtf': return MetaInformation(_('Unknown')) block = get_document_info(stream)[0] if not block: return MetaInformation(_('Unknown')) stream.seek(0) cpg = detect_codepage(stream) stream.seek(0) title_match = title_pat.search(block) if title_match is not None: title = decode(title_match.group(1).strip(), cpg) else: title = _('Unknown') author_match = author_pat.search(block) if author_match is not None: author = decode(author_match.group(1).strip(), cpg) else: author = None mi = MetaInformation(title) if author: mi.authors = string_to_authors(author) comment_match = comment_pat.search(block) if comment_match is not None: comment = decode(comment_match.group(1).strip(), cpg) mi.comments = comment tags_match = tags_pat.search(block) if tags_match is not None: tags = decode(tags_match.group(1).strip(), cpg) mi.tags = list(filter(None, (x.strip() for x in tags.split(',')))) publisher_match = publisher_pat.search(block) if publisher_match is not None: publisher = decode(publisher_match.group(1).strip(), cpg) mi.publisher = publisher return mi
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ title = 'Unknown' mi = MetaInformation(title, ['Unknown']) stream.seek(0) try: if not stream.read(14) == MAGIC: print >> sys.stderr, u'Couldn\'t read RB header from file' return mi stream.read(10) read_i32 = lambda: struct.unpack('<I', stream.read(4))[0] stream.seek(read_i32()) toc_count = read_i32() for i in range(toc_count): stream.read(32) length, offset, flag = read_i32(), read_i32(), read_i32() if flag == 2: break else: print >> sys.stderr, u'Couldn\'t find INFO from RB file' return mi stream.seek(offset) info = stream.read(length).splitlines() for line in info: if '=' not in line: continue key, value = line.split('=') if key.strip() == 'TITLE': mi.title = value.strip() elif key.strip() == 'AUTHOR': mi.author = value mi.authors = string_to_authors(value) except Exception as err: msg = u'Couldn\'t read metadata from rb: %s with error %s' % ( mi.title, unicode(err)) print >> sys.stderr, msg.encode('utf8') raise return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pml = '' if stream.name.endswith('.pmlz'): with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) pmls = glob.glob(os.path.join(tdir, '*.pml')) for p in pmls: with open(p, 'r+b') as p_stream: pml += p_stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], tdir, True) else: pml = stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name))) for comment in re.findall(r'(?mus)\\v.*?\\v', pml): m = re.search(r'TITLE="(.*?)"', comment) if m: mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(r'AUTHOR="(.*?)"', comment) if m: if mi.authors == [_('Unknown')]: mi.authors = [] mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))) m = re.search(r'PUBLISHER="(.*?)"', comment) if m: mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(r'COPYRIGHT="(.*?)"', comment) if m: mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(r'ISBN="(.*?)"', comment) if m: mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pml = b'' if stream.name.endswith('.pmlz'): with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) pmls = glob.glob(os.path.join(tdir, '*.pml')) for p in pmls: with open(p, 'r+b') as p_stream: pml += p_stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], tdir, True) else: pml = stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name))) for comment in re.findall(br'(?ms)\\v.*?\\v', pml): m = re.search(br'TITLE="(.*?)"', comment) if m: mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(br'AUTHOR="(.*?)"', comment) if m: if mi.authors == [_('Unknown')]: mi.authors = [] mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))) m = re.search(br'PUBLISHER="(.*?)"', comment) if m: mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(br'COPYRIGHT="(.*?)"', comment) if m: mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(br'ISBN="(.*?)"', comment) if m: mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) return mi
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ title = 'Unknown' mi = MetaInformation(title, ['Unknown']) stream.seek(0) try: if not stream.read(14) == MAGIC: print(u'Couldn\'t read RB header from file', file=sys.stderr) return mi stream.read(10) read_i32 = lambda: struct.unpack('<I', stream.read(4))[0] stream.seek(read_i32()) toc_count = read_i32() for i in range(toc_count): stream.read(32) length, offset, flag = read_i32(), read_i32(), read_i32() if flag == 2: break else: print(u'Couldn\'t find INFO from RB file', file=sys.stderr) return mi stream.seek(offset) info = stream.read(length).splitlines() for line in info: if '=' not in line: continue key, value = line.split('=') if key.strip() == 'TITLE': mi.title = value.strip() elif key.strip() == 'AUTHOR': mi.author = value mi.authors = string_to_authors(value) except Exception as err: msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err)) print(msg.encode('utf8'), file=sys.stderr) raise return mi
def do_add_empty(dbctx, title, authors, isbn, tags, series, series_index, cover, identifiers, languages): mi = MetaInformation(None) if title is not None: mi.title = title if authors: mi.authors = authors if identifiers: mi.set_identifiers(identifiers) if isbn: mi.isbn = isbn if tags: mi.tags = tags if series: mi.series, mi.series_index = series, series_index if cover: mi.cover = cover if languages: mi.languages = languages ids, duplicates = dbctx.run('add', 'empty', read_cover(mi)) prints(_('Added book ids: %s') % ','.join(map(str, ids)))
def do_add_empty( dbctx, title, authors, isbn, tags, series, series_index, cover, identifiers, languages ): mi = MetaInformation(None) if title is not None: mi.title = title if authors: mi.authors = authors if identifiers: mi.set_identifiers(identifiers) if isbn: mi.isbn = isbn if tags: mi.tags = tags if series: mi.series, mi.series_index = series, series_index if cover: mi.cover = cover if languages: mi.languages = languages ids, duplicates = dbctx.run('add', 'empty', read_cover(mi)) prints(_('Added book ids: %s') % ','.join(map(str, ids)))
def metadata_from_filename(name, pat=None, fallback_pat=None): if isbytestring(name): name = name.decode(filesystem_encoding, 'replace') name = name.rpartition('.')[0] mi = MetaInformation(None, None) if pat is None: pat = re.compile(prefs.get('filename_pattern')) name = name.replace('_', ' ') match = pat.search(name) if match is None and fallback_pat is not None: match = fallback_pat.search(name) if match is not None: try: mi.title = match.group('title') except IndexError: pass try: au = match.group('author') aus = string_to_authors(au) if aus: mi.authors = aus if prefs['swap_author_names'] and mi.authors: def swap(a): if ',' in a: parts = a.split(',', 1) else: parts = a.split(None, 1) if len(parts) > 1: t = parts[-1] parts = parts[:-1] parts.insert(0, t) return ' '.join(parts) mi.authors = [swap(x) for x in mi.authors] except (IndexError, ValueError): pass try: mi.series = match.group('series') except IndexError: pass try: si = match.group('series_index') mi.series_index = float(si) except (IndexError, ValueError, TypeError): pass try: si = match.group('isbn') mi.isbn = si except (IndexError, ValueError): pass try: publisher = match.group('publisher') mi.publisher = publisher except (IndexError, ValueError): pass try: pubdate = match.group('published') if pubdate: from calibre.utils.date import parse_only_date mi.pubdate = parse_only_date(pubdate) except: pass try: comments = match.group('comments') mi.comments = comments except (IndexError, ValueError): pass if mi.is_null('title'): mi.title = name return mi
def _start_merge(self,book_list): db=self.gui.current_db self.previous = self.gui.library_view.currentIndex() # if any bad, bail. bad_list = filter(lambda x : not x['good'], book_list) if len(bad_list) > 0: d = error_dialog(self.gui, _('Cannot Merge Epubs'), _('%s books failed.')%len(bad_list), det_msg='\n'.join(map(lambda x : x['error'] , bad_list))) d.exec_() else: d = OrderEPUBsDialog(self.gui, _('Order EPUBs to Merge'), prefs, self.qaction.icon(), book_list, ) d.exec_() if d.result() != d.Accepted: return book_list = d.get_books() print("2:%s"%(time.time()-self.t)) self.t = time.time() deftitle = "%s %s" % (book_list[0]['title'],prefs['mergeword']) mi = MetaInformation(deftitle,["Temp Author"]) # if all same series, use series for name. But only if all. serieslist = map(lambda x : x['series'], filter(lambda x : x['series'] != None, book_list)) if len(serieslist) == len(book_list): mi.title = serieslist[0] for sr in serieslist: if mi.title != sr: mi.title = deftitle; break # print("======================= mi.title:\n%s\n========================="%mi.title) mi.authors = list() authorslists = map(lambda x : x['authors'], book_list) for l in authorslists: for a in l: if a not in mi.authors: mi.authors.append(a) #mi.authors = [item for sublist in authorslists for item in sublist] # print("======================= mi.authors:\n%s\n========================="%mi.authors) #mi.author_sort = ' & '.join(map(lambda x : x['author_sort'], book_list)) # print("======================= mi.author_sort:\n%s\n========================="%mi.author_sort) # set publisher if all from same publisher. publishers = set(map(lambda x : x['publisher'], book_list)) if len(publishers) == 1: mi.publisher = publishers.pop() # print("======================= mi.publisher:\n%s\n========================="%mi.publisher) tagslists = map(lambda x : x['tags'], book_list) mi.tags = [item for sublist in tagslists for item in sublist] mi.tags.extend(prefs['mergetags'].split(',')) # print("======================= mergetags:\n%s\n========================="%prefs['mergetags']) # print("======================= m.tags:\n%s\n========================="%mi.tags) languageslists = map(lambda x : x['languages'], book_list) mi.languages = [item for sublist in languageslists for item in sublist] mi.series = '' # ======================= make book comments ========================= if len(mi.authors) > 1: booktitle = lambda x : _("%s by %s") % (x['title'],' & '.join(x['authors'])) else: booktitle = lambda x : x['title'] mi.comments = (_("%s containing:")+"\n\n") % prefs['mergeword'] if prefs['includecomments']: def bookcomments(x): if x['comments']: return '<b>%s</b>\n\n%s'%(booktitle(x),x['comments']) else: return '<b>%s</b>\n'%booktitle(x) mi.comments += ('<div class="mergedbook">' + '<hr></div><div class="mergedbook">'.join([ bookcomments(x) for x in book_list]) + '</div>') else: mi.comments += '\n'.join( [ booktitle(x) for x in book_list ] ) # ======================= make book entry ========================= book_id = db.create_book_entry(mi, add_duplicates=True) # set default cover to same as first book coverdata = db.cover(book_list[0]['calibre_id'],index_is_id=True) if coverdata: db.set_cover(book_id, coverdata) # ======================= custom columns =================== print("3:%s"%(time.time()-self.t)) self.t = time.time() # have to get custom from db for each book. idslist = map(lambda x : x['calibre_id'], book_list) custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): #print("col: %s action: %s"%(col,action)) if col not in custom_columns: print("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #print("coldef:%s"%coldef) if action not in permitted_values[coldef['datatype']]: print("%s not a valid column type for %s, skipping."%(col,action)) continue label = coldef['label'] found = False value = None idx = None if action == 'first': idx = 0 if action == 'last': idx = -1 if action in ['first','last']: value = db.get_custom(idslist[idx], label=label, index_is_id=True) if coldef['datatype'] == 'series' and value != None: # get the number-in-series, too. value = "%s [%s]"%(value, db.get_custom_extra(idslist[idx], label=label, index_is_id=True)) found = True if action in ('add','average','averageall'): value = 0.0 count = 0 for bid in idslist: try: value += db.get_custom(bid, label=label, index_is_id=True) found = True # only count ones with values unless averageall count += 1 except: # if not set, it's None and fails. # only count ones with values unless averageall if action == 'averageall': count += 1 if found and action in ('average','averageall'): value = value / count if coldef['datatype'] == 'int': value += 0.5 # so int rounds instead of truncs. if action == 'and': value = True for bid in idslist: try: value = value and db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'or': value = False for bid in idslist: try: value = value or db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'newest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue > value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'oldest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue < value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'union': if not coldef['is_multiple']: action = 'concat' else: value = set() for bid in idslist: try: value = value.union(db.get_custom(bid, label=label, index_is_id=True)) found = True except: # if not set, it's None and fails. pass if action == 'concat': value = "" for bid in idslist: try: value = value + ' ' + db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass value = value.strip() if found and value != None: db.set_custom(book_id,value,label=label,commit=False) db.commit() print("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) print("5:%s"%(time.time()-self.t)) self.t = time.time() confirm('\n'+_('''The book for the new Merged EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''), 'epubmerge_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) print("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() totalsize = sum(map(lambda x : x['epub_size'], book_list)) print("merging %s EPUBs totaling %s"%(len(book_list),gethumanreadable(totalsize))) if len(book_list) > 100 or totalsize > 5*1024*1024: confirm('\n'+_('''You're merging %s EPUBs totaling %s. Calibre will be locked until the merge is finished.''')%(len(book_list),gethumanreadable(totalsize)), 'epubmerge_edited_now_merge_again', self.gui) self.gui.status_bar.show_message(_('Merging %s EPUBs...')%len(book_list), 60000) mi = db.get_metadata(book_id,index_is_id=True) mergedepub = PersistentTemporaryFile(suffix='.epub') epubstomerge = map(lambda x : x['epub'] , book_list) coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') self.do_merge( mergedepub, epubstomerge, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, titlenavpoints=prefs['titlenavpoints'], flattentoc=prefs['flattentoc'], printtimes=True, coverjpgpath=coverjpgpath, keepmetadatafiles=prefs['keepmeta'] ) print("6:%s"%(time.time()-self.t)) print(_("Merge finished, output in:\n%s")%mergedepub.name) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', mergedepub, index_is_id=True) print("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished merging %s EPUBs.')%len(book_list), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed(current, self.previous)
def _start_merge(self, book_list): db = self.gui.current_db self.previous = self.gui.library_view.currentIndex() # if any bad, bail. bad_list = filter(lambda x: not x['good'], book_list) if len(bad_list) > 0: d = error_dialog(self.gui, _('Cannot Merge Epubs'), _('%s books failed.') % len(bad_list), det_msg='\n'.join( map(lambda x: x['error'], bad_list))) d.exec_() else: d = OrderEPUBsDialog( self.gui, _('Order EPUBs to Merge'), prefs, self.qaction.icon(), book_list, ) d.exec_() if d.result() != d.Accepted: return book_list = d.get_books() logger.debug("2:%s" % (time.time() - self.t)) self.t = time.time() deftitle = "%s %s" % (book_list[0]['title'], prefs['mergeword']) mi = MetaInformation(deftitle, ["Temp Author"]) # if all same series, use series for name. But only if all. serieslist = map(lambda x: x['series'], filter(lambda x: x['series'] != None, book_list)) if len(serieslist) == len(book_list): mi.title = serieslist[0] for sr in serieslist: if mi.title != sr: mi.title = deftitle break # logger.debug("======================= mi.title:\n%s\n========================="%mi.title) mi.authors = list() authorslists = map(lambda x: x['authors'], book_list) for l in authorslists: for a in l: if a not in mi.authors: mi.authors.append(a) #mi.authors = [item for sublist in authorslists for item in sublist] # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors) #mi.author_sort = ' & '.join(map(lambda x : x['author_sort'], book_list)) # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort) # set publisher if all from same publisher. publishers = set(map(lambda x: x['publisher'], book_list)) if len(publishers) == 1: mi.publisher = publishers.pop() # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher) tagslists = map(lambda x: x['tags'], book_list) mi.tags = [item for sublist in tagslists for item in sublist] mi.tags.extend(prefs['mergetags'].split(',')) # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags']) # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags) languageslists = map(lambda x: x['languages'], book_list) mi.languages = [ item for sublist in languageslists for item in sublist ] mi.series = '' # ======================= make book comments ========================= if len(mi.authors) > 1: booktitle = lambda x: _("%s by %s") % (x['title'], ' & '.join( x['authors'])) else: booktitle = lambda x: x['title'] mi.comments = (_("%s containing:") + "\n\n") % prefs['mergeword'] if prefs['includecomments']: def bookcomments(x): if x['comments']: return '<b>%s</b>\n\n%s' % (booktitle(x), x['comments']) else: return '<b>%s</b>\n' % booktitle(x) mi.comments += ('<div class="mergedbook">' + '<hr></div><div class="mergedbook">'.join( [bookcomments(x) for x in book_list]) + '</div>') else: mi.comments += '\n'.join([booktitle(x) for x in book_list]) # ======================= make book entry ========================= book_id = db.create_book_entry(mi, add_duplicates=True) # set default cover to same as first book coverdata = db.cover(book_list[0]['calibre_id'], index_is_id=True) if coverdata: db.set_cover(book_id, coverdata) # ======================= custom columns =================== logger.debug("3:%s" % (time.time() - self.t)) self.t = time.time() # have to get custom from db for each book. idslist = map(lambda x: x['calibre_id'], book_list) custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: logger.debug("%s not an existing column, skipping." % col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) if action not in permitted_values[coldef['datatype']]: logger.debug( "%s not a valid column type for %s, skipping." % (col, action)) continue label = coldef['label'] found = False value = None idx = None if action == 'first': idx = 0 if action == 'last': idx = -1 if action in ['first', 'last']: value = db.get_custom(idslist[idx], label=label, index_is_id=True) if coldef['datatype'] == 'series' and value != None: # get the number-in-series, too. value = "%s [%s]" % ( value, db.get_custom_extra( idslist[idx], label=label, index_is_id=True)) found = True if action in ('add', 'average', 'averageall'): value = 0.0 count = 0 for bid in idslist: try: value += db.get_custom(bid, label=label, index_is_id=True) found = True # only count ones with values unless averageall count += 1 except: # if not set, it's None and fails. # only count ones with values unless averageall if action == 'averageall': count += 1 if found and action in ('average', 'averageall'): value = value / count if coldef['datatype'] == 'int': value += 0.5 # so int rounds instead of truncs. if action == 'and': value = True for bid in idslist: try: value = value and db.get_custom( bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'or': value = False for bid in idslist: try: value = value or db.get_custom( bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'newest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue > value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'oldest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue < value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'union': if not coldef['is_multiple']: action = 'concat' else: value = set() for bid in idslist: try: value = value.union( db.get_custom(bid, label=label, index_is_id=True)) found = True except: # if not set, it's None and fails. pass if action == 'concat': value = "" for bid in idslist: try: value = value + ' ' + db.get_custom( bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass value = value.strip() if found and value != None: db.set_custom(book_id, value, label=label, commit=False) db.commit() logger.debug("4:%s" % (time.time() - self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) logger.debug("5:%s" % (time.time() - self.t)) self.t = time.time() confirm( '\n' + _('''The book for the new Merged EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.''' ), 'epubmerge_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) logger.debug("5:%s" % (time.time() - self.t)) self.t = time.time() self.gui.tags_view.recount() totalsize = sum(map(lambda x: x['epub_size'], book_list)) logger.debug("merging %s EPUBs totaling %s" % (len(book_list), gethumanreadable(totalsize))) if len(book_list) > 100 or totalsize > 5 * 1024 * 1024: confirm( '\n' + _('''You're merging %s EPUBs totaling %s. Calibre will be locked until the merge is finished.''' ) % (len(book_list), gethumanreadable(totalsize)), 'epubmerge_edited_now_merge_again', self.gui) self.gui.status_bar.show_message( _('Merging %s EPUBs...') % len(book_list), 60000) mi = db.get_metadata(book_id, index_is_id=True) mergedepub = PersistentTemporaryFile(suffix='.epub') epubstomerge = map(lambda x: x['epub'], book_list) coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') self.do_merge(mergedepub, epubstomerge, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, titlenavpoints=prefs['titlenavpoints'], flattentoc=prefs['flattentoc'], printtimes=True, coverjpgpath=coverjpgpath, keepmetadatafiles=prefs['keepmeta']) logger.debug("6:%s" % (time.time() - self.t)) logger.debug(_("Merge finished, output in:\n%s") % mergedepub.name) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', mergedepub, index_is_id=True) logger.debug("7:%s" % (time.time() - self.t)) self.t = time.time() self.gui.status_bar.show_message( _('Finished merging %s EPUBs.') % len(book_list), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed( current, self.previous)
def get_social_metadata(title, authors, publisher, isbn, username=None, password=None): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation(title, authors) if isbn: br = get_browser() try: login(br, username, password) raw = br.open_novisit('http://www.librarything.com/isbn/' + isbn).read() except: return mi if '/wiki/index.php/HelpThing:Verify' in raw: raise Exception('LibraryThing is blocking calibre.') if not raw: return mi raw = raw.decode('utf-8', 'replace') raw = strip_encoding_declarations(raw) root = html.fromstring(raw) h1 = root.xpath('//div[@class="headsummary"]/h1') if h1 and not mi.title: mi.title = html.tostring(h1[0], method='text', encoding=unicode) h2 = root.xpath('//div[@class="headsummary"]/h2/a') if h2 and not mi.authors: mi.authors = [ html.tostring(x, method='text', encoding=unicode) for x in h2 ] h3 = root.xpath('//div[@class="headsummary"]/h3/a') if h3: match = None for h in h3: series = html.tostring(h, method='text', encoding=unicode) match = re.search(r'(.+) \((.+)\)', series) if match is not None: break if match is not None: mi.series = match.group(1).strip() match = re.search(r'[0-9.]+', match.group(2)) si = 1.0 if match is not None: si = float(match.group()) mi.series_index = si #tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a') #if tags: # mi.tags = [html.tostring(x, method='text', encoding=unicode) for x # in tags] span = root.xpath( '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span') if span: raw = html.tostring(span[0], method='text', encoding=unicode) match = re.search(r'([0-9.]+)', raw) if match is not None: rating = float(match.group()) if rating > 0 and rating <= 5: mi.rating = rating return mi
def metadata_from_filename(name, pat=None, fallback_pat=None): if isbytestring(name): name = name.decode(filesystem_encoding, "replace") name = name.rpartition(".")[0] mi = MetaInformation(None, None) if pat is None: pat = re.compile(prefs.get("filename_pattern")) name = name.replace("_", " ") match = pat.search(name) if match is None and fallback_pat is not None: match = fallback_pat.search(name) if match is not None: try: mi.title = match.group("title") except IndexError: pass try: au = match.group("author") aus = string_to_authors(au) if aus: mi.authors = aus if prefs["swap_author_names"] and mi.authors: def swap(a): if "," in a: parts = a.split(",", 1) else: parts = a.split(None, 1) if len(parts) > 1: t = parts[-1] parts = parts[:-1] parts.insert(0, t) return " ".join(parts) mi.authors = [swap(x) for x in mi.authors] except (IndexError, ValueError): pass try: mi.series = match.group("series") except IndexError: pass try: si = match.group("series_index") mi.series_index = float(si) except (IndexError, ValueError, TypeError): pass try: si = match.group("isbn") mi.isbn = si except (IndexError, ValueError): pass try: publisher = match.group("publisher") mi.publisher = publisher except (IndexError, ValueError): pass try: pubdate = match.group("published") if pubdate: from calibre.utils.date import parse_only_date mi.pubdate = parse_only_date(pubdate) except: pass if mi.is_null("title"): mi.title = name return mi
def get_metadata(stream, extract_cover=True): whitespace = re.compile(r'\s+') def normalize(s): return whitespace.sub(' ', s).strip() with ZipFile(stream) as zf: meta = zf.read('meta.xml') root = fromstring(meta) def find(field): ns, tag = fields[field] ans = root.xpath('//ns0:{}'.format(tag), namespaces={'ns0': ns}) if ans: return normalize( tostring(ans[0], method='text', encoding='unicode', with_tail=False)).strip() mi = MetaInformation(None, []) title = find('title') if title: mi.title = title creator = find('initial-creator') or find('creator') if creator: mi.authors = string_to_authors(creator) desc = find('description') if desc: mi.comments = desc lang = find('language') if lang and canonicalize_lang(lang): mi.languages = [canonicalize_lang(lang)] kw = find('keyword') or find('keywords') if kw: mi.tags = [x.strip() for x in kw.split(',') if x.strip()] data = {} for tag in root.xpath('//ns0:user-defined', namespaces={'ns0': fields['user-defined'][0]}): name = (tag.get('{%s}name' % METANS) or '').lower() vtype = tag.get('{%s}value-type' % METANS) or 'string' val = tag.text if name and val: if vtype == 'boolean': val = val == 'true' data[name] = val opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata'): # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.identifiers'): try: mi.identifiers = json.loads(data['opf.identifiers']) except Exception: pass if data.get('opf.rating'): try: mi.rating = max(0, min(float(data['opf.rating']), 10)) except Exception: pass if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except Exception: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', False) if not opfnocover: try: read_cover(stream, zf, mi, opfmeta, extract_cover) except Exception: pass # Do not let an error reading the cover prevent reading other data return mi
def _start_merge(self,book_list,tdir=None): db=self.gui.current_db self.previous = self.gui.library_view.currentIndex() # if any bad, bail. bad_list = [ x for x in book_list if not x['good'] ] if len(bad_list) > 0: d = error_dialog(self.gui, _('Cannot Merge Epubs'), _('%s books failed.')%len(bad_list), det_msg='\n'.join( [ x['error'] for x in bad_list ])) d.exec_() else: d = OrderEPUBsDialog(self.gui, _('Order EPUBs to Merge'), prefs, self.qaction.icon(), book_list, ) d.exec_() if d.result() != d.Accepted: return book_list = d.get_books() logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() deftitle = "%s %s" % (book_list[0]['title'],prefs['mergeword']) mi = MetaInformation(deftitle,["Temp Author"]) # if all same series, use series for name. But only if all. serieslist = [ x['series'] for x in book_list if x['series'] != None ] if len(serieslist) == len(book_list): mi.title = serieslist[0] for sr in serieslist: if mi.title != sr: mi.title = deftitle; break # logger.debug("======================= mi.title:\n%s\n========================="%mi.title) mi.authors = list() authorslists = [ x['authors'] for x in book_list ] for l in authorslists: for a in l: if a not in mi.authors: mi.authors.append(a) #mi.authors = [item for sublist in authorslists for item in sublist] # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors) #mi.author_sort = ' & '.join([ x['author_sort'] for x in book_list ]) # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort) # set publisher if all from same publisher. publishers = set([ x['publisher'] for x in book_list ]) if len(publishers) == 1: mi.publisher = publishers.pop() # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher) tagslists = [ x['tags'] for x in book_list ] mi.tags = [item for sublist in tagslists for item in sublist] mi.tags.extend(prefs['mergetags'].split(',')) # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags']) # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags) languageslists = [ x['languages'] for x in book_list ] mi.languages = [item for sublist in languageslists for item in sublist] mi.series = '' if prefs['firstseries'] and book_list[0]['series']: mi.series = book_list[0]['series'] mi.series_index = book_list[0]['series_index'] # ======================= make book comments ========================= if len(mi.authors) > 1: booktitle = lambda x : _("%s by %s") % (x['title'],' & '.join(x['authors'])) else: booktitle = lambda x : x['title'] mi.comments = ("<p>"+_("%s containing:")+"</p>") % prefs['mergeword'] if prefs['includecomments']: def bookcomments(x): if x['comments']: return '<p><b>%s</b></p>%s'%(booktitle(x),x['comments']) else: return '<b>%s</b><br/>'%booktitle(x) mi.comments += ('<div class="mergedbook">' + '<hr></div><div class="mergedbook">'.join([ bookcomments(x) for x in book_list]) + '</div>') else: mi.comments += '<br/>'.join( [ booktitle(x) for x in book_list ] ) # ======================= make book entry ========================= book_id = db.create_book_entry(mi, add_duplicates=True) # set default cover to same as first book coverdata = db.cover(book_list[0]['calibre_id'],index_is_id=True) if coverdata: db.set_cover(book_id, coverdata) # ======================= custom columns =================== logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() # have to get custom from db for each book. idslist = [ x['calibre_id'] for x in book_list ] custom_columns = self.gui.library_view.model().custom_columns for col, action in six.iteritems(prefs['custom_cols']): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) if action not in permitted_values[coldef['datatype']]: logger.debug("%s not a valid column type for %s, skipping."%(col,action)) continue label = coldef['label'] found = False value = None idx = None if action == 'first': idx = 0 if action == 'last': idx = -1 if action in ['first','last']: value = db.get_custom(idslist[idx], label=label, index_is_id=True) if coldef['datatype'] == 'series' and value != None: # get the number-in-series, too. value = "%s [%s]"%(value, db.get_custom_extra(idslist[idx], label=label, index_is_id=True)) found = True if action in ('add','average','averageall'): value = 0.0 count = 0 for bid in idslist: try: value += db.get_custom(bid, label=label, index_is_id=True) found = True # only count ones with values unless averageall count += 1 except: # if not set, it's None and fails. # only count ones with values unless averageall if action == 'averageall': count += 1 if found and action in ('average','averageall'): value = value / count if coldef['datatype'] == 'int': value += 0.5 # so int rounds instead of truncs. if action == 'and': value = True for bid in idslist: try: value = value and db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'or': value = False for bid in idslist: try: value = value or db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'newest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue > value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'oldest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue < value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'union': if not coldef['is_multiple']: action = 'concat' else: value = set() for bid in idslist: try: value = value.union(db.get_custom(bid, label=label, index_is_id=True)) found = True except: # if not set, it's None and fails. pass if action == 'concat': value = "" for bid in idslist: try: value = value + ' ' + db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass value = value.strip() if action == 'now': value = datetime.now() found = True logger.debug("now: %s"%value) if found and value != None: logger.debug("value: %s"%value) db.set_custom(book_id,value,label=label,commit=False) db.commit() logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() confirm('\n'+_('''The book for the new Merged EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''), 'epubmerge_created_now_edit_again', self.gui, title=_("EpubMerge"), show_cancel_button=False) self.gui.iactions['Edit Metadata'].edit_metadata(False) logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() totalsize = sum([ x['epub_size'] for x in book_list ]) logger.debug("merging %s EPUBs totaling %s"%(len(book_list),gethumanreadable(totalsize))) confirm('\n'+_('''EpubMerge will be done in a Background job. The merged EPUB will not appear in the Library until finished. You are merging %s EPUBs totaling %s.''')%(len(book_list),gethumanreadable(totalsize)), 'epubmerge_background_merge_again', self.gui, title=_("EpubMerge"), show_cancel_button=False) # if len(book_list) > 100 or totalsize > 5*1024*1024: # confirm('\n'+_('''You're merging %s EPUBs totaling %s. Calibre will be locked until the merge is finished.''')%(len(book_list),gethumanreadable(totalsize)), # 'epubmerge_edited_now_merge_again', # self.gui) self.gui.status_bar.show_message(_('Merging %s EPUBs...')%len(book_list), 60000) mi = db.get_metadata(book_id,index_is_id=True) mergedepub = PersistentTemporaryFile(prefix="output_", suffix='.epub', dir=tdir) epubstomerge = [ x['epub'] for x in book_list ] epubtitles = {} for x in book_list: # save titles indexed by epub for reporting from BG epubtitles[x['epub']]=_("%s by %s") % (x['title'],' & '.join(x['authors'])) coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') func = 'arbitrary_n' cpus = self.gui.job_manager.server.pool_size args = ['calibre_plugins.epubmerge.jobs', 'do_merge_bg', ({'book_id':book_id, 'book_count':len(book_list), 'tdir':tdir, 'outputepubfn':mergedepub.name, 'inputepubfns':epubstomerge, # already .name'ed 'epubtitles':epubtitles, # for reporting 'authoropts':mi.authors, 'titleopt':mi.title, 'descopt':mi.comments, 'tags':mi.tags, 'languages':mi.languages, 'titlenavpoints':prefs['titlenavpoints'], 'originalnavpoints':prefs['originalnavpoints'], 'flattentoc':prefs['flattentoc'], 'printtimes':True, 'coverjpgpath':coverjpgpath, 'keepmetadatafiles':prefs['keepmeta'] }, cpus)] desc = _('EpubMerge: %s')%mi.title job = self.gui.job_manager.run_job( self.Dispatcher(self.merge_done), func, args=args, description=desc) self.gui.jobs_pointer.start() self.gui.status_bar.show_message(_('Starting EpubMerge'),3000)