def get_metadata(self, md, select): book = None if md.isbn: book = self.get_book_by_isbn(md.isbn) if not book: book = self.get_book_by_title(md.title, md.author_sort, select) if not book: return None mi = Metadata(book['title']) mi.authors = book['author'] mi.author_sort = mi.authors[0] if mi.authors else None if mi.author_sort: for r in REMOVES: mi.author_sort = r.sub("", mi.author_sort) mi.authors[0] = mi.author_sort mi.publisher = book['publisher'] mi.comments = book['summary'] mi.isbn = book.get('isbn13', None) mi.tags = [t['name'] for t in book['tags']][:8] mi.rating = int(float(book['rating']['average'])) mi.pubdate = self.str2date(book['pubdate']) mi.timestamp = datetime.datetime.now() mi.douban_id = book['id'] mi.douban_author_intro = book['author_intro'] mi.douban_subtitle = book.get('subtitle', None) img_url = book['images']['large'] img_fmt = img_url.split(".")[-1] img = StringIO(urlopen(img_url).read()) mi.cover_data = (img_fmt, img) #logging.error("=================\ndouban metadata:\n%s" % mi) return mi
def _metadata(self, baike): from calibre.ebooks.metadata.book.base import Metadata from cStringIO import StringIO info = baike.get_info() print "\n".join( "%s:\t%s" % v for v in info.items()) mi = Metadata(info['title']) plat = "网络小说平台" plat = info.get(u'首发状态', plat) plat = info.get(u'首发网站', plat) plat = plat.replace(u'首发', '') mi.publisher = info.get(u'连载平台', plat) mi.authors = [ info.get(u'作者', u'佚名') ] mi.author_sort = mi.authors[0] mi.isbn = BAIKE_ISBN mi.tags = baike.get_tags() mi.pubdate = datetime.datetime.now() mi.timestamp = datetime.datetime.now() mi.cover_url = baike.get_image() mi.comments = re.sub(r'\[\d+\]$', "", baike.get_summary() ) mi.website = baike.http.url mi.source = u'百度百科' if self.copy_image: img = StringIO(urlopen(mi.cover_url).read()) img_fmt = mi.cover_url.split(".")[-1] mi.cover_data = (img_fmt, img) if u'完结' in info.get(u'连载状态', ""): day = re.findall('\d*-\d*-\d*', info[u'连载状态']) try: mi.pubdate = datetime.datetime.strptime(day[0], '%Y-%m-%d') except: pass return mi
def _metadata(self, book): authors = [] if book['author']: for author in book['author']: for r in REMOVES: author = r.sub("", author) authors.append(author) if not authors: authors = [u'佚名'] from calibre.ebooks.metadata.book.base import Metadata from cStringIO import StringIO mi = Metadata(book['title']) mi.authors = authors mi.author_sort = mi.authors[0] mi.publisher = book['publisher'] mi.comments = book['summary'] mi.isbn = book.get('isbn13', None) mi.tags = [t['name'] for t in book['tags']][:8] mi.rating = int(float(book['rating']['average'])) mi.pubdate = self.str2date(book['pubdate']) mi.timestamp = datetime.datetime.now() mi.douban_id = book['id'] mi.douban_author_intro = book['author_intro'] mi.douban_subtitle = book.get('subtitle', None) mi.website = "https://book.douban.com/isbn/%s" % mi.isbn mi.source = u'豆瓣' mi.cover_url = book['images']['large'] if self.copy_image: img = StringIO(urlopen(mi.cover_url).read()) img_fmt = mi.cover_url.split(".")[-1] mi.cover_data = (img_fmt, img) logging.debug("=================\ndouban metadata:\n%s" % mi) return mi
def get_metadata(self, md): book = None if md.isbn: book = self.get_book_by_isbn(md.isbn) if not book: book = self.get_book_by_title(md.title) mi = Metadata(book['title']) mi.authors = book['author'] mi.author_sort = mi.authors[0] if mi.authors else None if mi.author_sort: for r in REMOVES: mi.author_sort = r.sub("", mi.author_sort) mi.authors[0] = mi.author_sort mi.publisher = book['publisher'] mi.comments = book['summary'] mi.isbn = book.get('isbn13', None) mi.tags = [ t['name'] for t in book['tags'] ][:8] mi.rating = int(float(book['rating']['average'])) mi.pubdate = self.str2date(book['pubdate']) mi.timestamp = datetime.datetime.now() mi.douban_id = book['id'] mi.douban_author_intro = book['author_intro'] mi.douban_subtitle = book.get('subtitle', None) img_url = book['images']['large'] img_fmt = img_url.split(".")[-1] img = StringIO(urlopen(img_url).read()) mi.cover_data = (img_fmt, img) logging.error("=================\ndouban metadata:\n%s" % mi) return mi
def read_metadata_kfx(stream, read_cover=True): ' Read the metadata.kfx file that is found in the sdr book folder for KFX files ' c = Container(stream.read()) m = extract_metadata(c.decode()) # dump_metadata(m) def has(x): return m[x] and m[x][0] def get(x, single=True): ans = m[x] if single: ans = clean_xml_chars(ans[0]) if ans else '' else: ans = [clean_xml_chars(y) for y in ans] return ans title = get('title') or _('Unknown') authors = get('authors', False) or [_('Unknown')] auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$') def fix_author(x): if tweaks['author_sort_copy_method'] != 'copy': m = auth_pat.match(x.strip()) if m is not None: return m.group(2) + ' ' + m.group(1) return x mi = Metadata(title, [fix_author(x) for x in authors]) if has('author'): mi.author_sort = get('author') if has('ASIN'): mi.set_identifier('mobi-asin', get('ASIN')) elif has('content_id'): mi.set_identifier('mobi-asin', get('content_id')) if has('languages'): langs = list( filter(None, (canonicalize_lang(x) for x in get('languages', False)))) if langs: mi.languages = langs if has('issue_date'): try: mi.pubdate = parse_only_date(get('issue_date')) except Exception: pass if has('publisher') and get('publisher') != 'Unknown': mi.publisher = get('publisher') if read_cover and m[COVER_KEY]: try: data = base64.standard_b64decode(m[COVER_KEY]) fmt, w, h = identify(bytes(data)) except Exception: w, h, fmt = 0, 0, None if fmt and w > -1 and h > -1: mi.cover_data = (fmt, data) return mi
def get_metadata(stream: IO, f_type: str) -> Metadata: assert f_type == "umd" book = UMDFile.from_stream(stream) metadata = Metadata(title=book.title, authors=[book.author]) metadata.publisher = book.publisher metadata.pubdate = book.publish_date if book.cover: metadata.cover_data = ('jpeg', book.cover) return metadata
def read_metadata_kfx(stream, read_cover=True): ' Read the metadata.kfx file that is found in the sdr book folder for KFX files ' c = Container(stream.read()) m = extract_metadata(c.decode()) # dump_metadata(m) def has(x): return m[x] and m[x][0] def get(x, single=True): ans = m[x] if single: ans = clean_xml_chars(ans[0]) if ans else '' else: ans = [clean_xml_chars(y) for y in ans] return ans title = get('title') or _('Unknown') authors = get('authors', False) or [_('Unknown')] auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$') def fix_author(x): if tweaks['author_sort_copy_method'] != 'copy': m = auth_pat.match(x.strip()) if m is not None: return m.group(2) + ' ' + m.group(1) return x mi = Metadata(title, [fix_author(x) for x in authors]) if has('author'): mi.author_sort = get('author') if has('ASIN'): mi.set_identifier('mobi-asin', get('ASIN')) elif has('content_id'): mi.set_identifier('mobi-asin', get('content_id')) if has('languages'): langs = list(filter(None, (canonicalize_lang(x) for x in get('languages', False)))) if langs: mi.languages = langs if has('issue_date'): try: mi.pubdate = parse_only_date(get('issue_date')) except Exception: pass if has('publisher') and get('publisher') != 'Unknown': mi.publisher = get('publisher') if read_cover and m[COVER_KEY]: try: data = base64.standard_b64decode(m[COVER_KEY]) w, h, fmt = identify_data(data) except Exception: w, h, fmt = 0, 0, None if fmt and w and h: mi.cover_data = (fmt, data) return mi
def read_metadata_kfx(stream, read_cover=True): " Read the metadata.kfx file that is found in the sdr book folder for KFX files " c = Container(stream.read()) m = extract_metadata(c.decode()) # dump_metadata(m) def has(x): return m[x] and m[x][0] def get(x, single=True): ans = m[x] if single: ans = clean_xml_chars(ans[0]) if ans else "" else: ans = [clean_xml_chars(y) for y in ans] return ans title = get("title") or _("Unknown") authors = get("authors", False) or [_("Unknown")] auth_pat = re.compile(r"([^,]+?)\s*,\s+([^,]+)$") def fix_author(x): if tweaks["author_sort_copy_method"] != "copy": m = auth_pat.match(x.strip()) if m is not None: return m.group(2) + " " + m.group(1) return x mi = Metadata(title, [fix_author(x) for x in authors]) if has("author"): mi.author_sort = get("author") if has("ASIN"): mi.set_identifier("mobi-asin", get("ASIN")) elif has("content_id"): mi.set_identifier("mobi-asin", get("content_id")) if has("languages"): langs = list(filter(None, (canonicalize_lang(x) for x in get("languages", False)))) if langs: mi.languages = langs if has("issue_date"): try: mi.pubdate = parse_only_date(get("issue_date")) except Exception: pass if has("publisher") and get("publisher") != "Unknown": mi.publisher = get("publisher") if read_cover and m[COVER_KEY]: try: data = base64.standard_b64decode(m[COVER_KEY]) fmt, w, h = identify(bytes(data)) except Exception: w, h, fmt = 0, 0, None if fmt and w > -1 and h > -1: mi.cover_data = (fmt, data) return mi
def get_metadata(book_id): oldmi = db.get_metadata(book_id, index_is_id=True, get_cover=True, cover_as_data=True) opf, cov = id_map[book_id] if opf is None: newmi = Metadata(oldmi.title, authors=tuple(oldmi.authors)) else: with open(opf, 'rb') as f: newmi = OPF(f, basedir=os.path.dirname(opf), populate_spine=False).to_book_metadata() newmi.cover, newmi.cover_data = None, (None, None) for x in ('title', 'authors'): if newmi.is_null(x): # Title and author are set to null if they are # the same as the originals as an optimization, # we undo that, as it is confusing. newmi.set(x, copy.copy(oldmi.get(x))) if cov: with open(cov, 'rb') as f: newmi.cover_data = ('jpg', f.read()) return oldmi, newmi
def _metadata(self, baike): from calibre.ebooks.metadata.book.base import Metadata info = baike.get_info() logging.debug("\n".join("%s:\t%s" % v for v in info.items())) mi = Metadata(info['title']) plat = "网络小说平台" plat = info.get(u'首发状态', plat) plat = info.get(u'首发网站', plat) plat = plat.replace(u'首发', '') mi.publisher = info.get(u'连载平台', plat) mi.authors = [info.get(u'作者', u'佚名')] mi.author_sort = mi.authors[0] mi.isbn = BAIKE_ISBN mi.tags = baike.get_tags() mi.pubdate = datetime.datetime.now() mi.timestamp = datetime.datetime.now() mi.cover_url = baike.get_image() mi.comments = re.sub(r'\[\d+\]$', "", baike.get_summary()) mi.website = baike.http.url mi.source = u'百度百科' mi.provider_key = KEY mi.provider_value = baike.get_id() if self.copy_image and mi.cover_url: logging.debug("fetching cover: %s", mi.cover_url) img = io.BytesIO(urlopen(mi.cover_url).read()) img_fmt = mi.cover_url.split(".")[-1] mi.cover_data = (img_fmt, img) if u'完结' in info.get(u'连载状态', ""): day = re.findall('\d*-\d*-\d*', info[u'连载状态']) try: mi.pubdate = datetime.datetime.strptime(day[0], '%Y-%m-%d') except: pass return mi
def get_metadata(stream): with ZipFile(stream, 'r') as zf: mi = Metadata(_('Unknown')) cdata = None for zi in zf.infolist(): ext = zi.filename.rpartition('.')[-1].lower() if zi.filename.lower() == 'docprops/core.xml': _read_doc_props(zf.read(zi), mi) elif zi.filename.lower() == 'docprops/app.xml': _read_app_props(zf.read(zi), mi) elif cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}: raw = zf.read(zi) try: width, height, fmt = identify_data(raw) except: continue if 0.8 <= height / width <= 1.8 and height * width >= 12000: cdata = (fmt, raw) if cdata is not None: mi.cover_data = cdata return mi
def _metadata(self, baike): from calibre.ebooks.metadata.book.base import Metadata from cStringIO import StringIO info = baike.get_info() print "\n".join("%s:\t%s" % v for v in info.items()) mi = Metadata(info['title']) plat = "網絡小說平台" plat = info.get(u'首發狀態', plat) plat = info.get(u'首發網站', plat) plat = plat.replace(u'首發', '') mi.publisher = info.get(u'連載平台', plat) mi.authors = [info.get(u'作者', u'佚名')] mi.author_sort = mi.authors[0] mi.isbn = BAIKE_ISBN mi.tags = baike.get_tags() mi.pubdate = datetime.datetime.now() mi.timestamp = datetime.datetime.now() mi.cover_url = baike.get_image() mi.comments = re.sub(r'\[\d+\]$', "", baike.get_summary()) mi.website = baike.http.url mi.source = u'百度百科' if self.copy_image: img = StringIO(urlopen(mi.cover_url).read()) img_fmt = mi.cover_url.split(".")[-1] mi.cover_data = (img_fmt, img) if u'完結' in info.get(u'連載狀態', ""): day = re.findall('\d*-\d*-\d*', info[u'連載狀態']) try: mi.pubdate = datetime.datetime.strptime(day[0], '%Y-%m-%d') except: pass return mi
def get_metadata(stream): with ZipFile(stream, 'r') as zf: mi = Metadata(_('Unknown')) cdata = None for zi in zf.infolist(): ext = zi.filename.rpartition('.')[-1].lower() if zi.filename.lower() == 'docprops/core.xml': _read_doc_props(zf.read(zi), mi) elif zi.filename.lower() == 'docprops/app.xml': _read_app_props(zf.read(zi), mi) elif cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}: raw = zf.read(zi) try: width, height, fmt = identify_data(raw) except: continue if 0.8 <= height/width <= 1.8 and height*width >= 12000: cdata = (fmt, raw) if cdata is not None: mi.cover_data = cdata return mi