def add_word(self): d = QDialog(self) d.l = l = QFormLayout(d) d.setWindowTitle(_('Add a word')) d.w = w = QLineEdit(d) w.setPlaceholderText(_('Word to add')) l.addRow(_('&Word:'), w) d.loc = loc = LanguagesEdit(parent=d) l.addRow(_('&Language:'), d.loc) loc.lang_codes = [canonicalize_lang(get_lang())] d.bb = bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel) bb.accepted.connect(d.accept), bb.rejected.connect(d.reject) l.addRow(bb) if d.exec_() != d.Accepted: return word = unicode(w.text()) lang = (loc.lang_codes or [canonicalize_lang(get_lang())])[0] if not word: return if (word, lang) not in self.current_dictionary.words: dictionaries.add_to_user_dictionary(self.current_dictionary.name, word, DictionaryLocale(lang, None)) dictionaries.clear_caches() self.show_current_dictionary() self.dictionaries_changed = True idx = self.find_word(word, lang) if idx > -1: self.words.scrollToItem(self.words.item(idx))
def __init__(self, mi, source_plugin, title, authors, identifiers): same_identifier = 2 idents = mi.get_identifiers() for k, v in identifiers.iteritems(): if idents.get(k) == v: same_identifier = 1 break all_fields = 1 if source_plugin.test_fields(mi) is None else 2 exact_title = 1 if title and \ cleanup_title(title) == cleanup_title(mi.title) else 2 language = 1 if mi.language: mil = canonicalize_lang(mi.language) if mil != 'und' and mil != canonicalize_lang(get_lang()): language = 2 has_cover = 2 if (not source_plugin.cached_cover_url_is_reliable or source_plugin.get_cached_cover_url(mi.identifiers) is None) else 1 self.base = (same_identifier, has_cover, all_fields, language, exact_title) self.comments_len = len(mi.comments.strip() if mi.comments else '') self.extra = (getattr(mi, 'source_relevance', 0), )
def lookup(self, word): from urllib import quote word = quote(word.encode('utf-8')) try: url = lookup_website(canonicalize_lang(self.view.current_language) or 'en').format(word=word) except Exception: traceback.print_exc() url = default_lookup_website(canonicalize_lang(self.view.current_language) or 'en').format(word=word) open_url(url)
def evaluate(self, formatter, kwargs, mi, locals, lang_strings): retval = [] for c in [c.strip() for c in lang_strings.split(",") if c.strip()]: try: cv = canonicalize_lang(c) if cv: retval.append(canonicalize_lang(cv)) except: pass return ", ".join(retval)
def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None): self.docx = DOCX(path_or_stream, log=log) self.ms_pat = re.compile(r'\s{2,}') self.ws_pat = re.compile(r'[\n\r\t]') self.log = self.docx.log self.notes_text = notes_text or _('Notes') self.dest_dir = dest_dir or os.getcwdu() self.mi = self.docx.metadata self.body = BODY() self.tables = Tables() self.styles = Styles(self.tables) self.images = Images() self.object_map = OrderedDict() self.html = HTML( HEAD( META(charset='utf-8'), TITLE(self.mi.title or _('Unknown')), LINK(rel='stylesheet', type='text/css', href='docx.css'), ), self.body ) self.html.text='\n\t' self.html[0].text='\n\t\t' self.html[0].tail='\n' for child in self.html[0]: child.tail = '\n\t\t' self.html[0][-1].tail = '\n\t' self.html[1].text = self.html[1].tail = '\n' lang = canonicalize_lang(self.mi.language) if lang and lang != 'und': lang = lang_as_iso639_1(lang) if lang: self.html.set('lang', lang)
def get_book_language(container): for lang in container.opf_xpath('//dc:language'): raw = lang.text if raw: code = canonicalize_lang(raw.split(',')[0].strip()) if code: return code
def add_quick_start_guide(library_view, db_images): from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks import calibre_cover from calibre.utils.zipfile import safe_replace from calibre.utils.localization import get_lang, canonicalize_lang from calibre.ptempfile import PersistentTemporaryFile l = canonicalize_lang(get_lang()) or "eng" gprefs["quick_start_guide_added"] = True imgbuf = BytesIO(calibre_cover(_("Quick Start Guide"), "", author_size=8)) try: with open(P("quick_start/%s.epub" % l), "rb") as src: buf = BytesIO(src.read()) except EnvironmentError as err: if err.errno != errno.ENOENT: raise with open(P("quick_start/eng.epub"), "rb") as src: buf = BytesIO(src.read()) safe_replace(buf, "images/cover.jpg", imgbuf) buf.seek(0) mi = get_metadata(buf, "epub") with PersistentTemporaryFile(".epub") as tmp: tmp.write(buf.getvalue()) library_view.model().add_books([tmp.name], ["epub"], [mi]) os.remove(tmp.name) library_view.model().books_added(1) if hasattr(db_images, "reset"): db_images.reset() if library_view.model().rowCount(None) < 3: library_view.resizeColumnsToContents()
def add_quick_start_guide(library_view, refresh_cover_browser=None): from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks import calibre_cover from calibre.utils.zipfile import safe_replace from calibre.utils.localization import get_lang, canonicalize_lang from calibre.ptempfile import PersistentTemporaryFile l = canonicalize_lang(get_lang()) or 'eng' gprefs['quick_start_guide_added'] = True imgbuf = BytesIO(calibre_cover(_('Quick Start Guide'), '', author_size=8)) try: with open(P('quick_start/%s.epub' % l), 'rb') as src: buf = BytesIO(src.read()) except EnvironmentError as err: if err.errno != errno.ENOENT: raise with open(P('quick_start/eng.epub'), 'rb') as src: buf = BytesIO(src.read()) safe_replace(buf, 'images/cover.jpg', imgbuf) buf.seek(0) mi = get_metadata(buf, 'epub') with PersistentTemporaryFile('.epub') as tmp: tmp.write(buf.getvalue()) library_view.model().add_books([tmp.name], ['epub'], [mi]) os.remove(tmp.name) library_view.model().books_added(1) if refresh_cover_browser is not None: refresh_cover_browser() if library_view.model().rowCount(None) < 3: library_view.resizeColumnsToContents()
def get_title_sort_pat(lang=None): ans = _title_pats.get(lang, None) if ans is not None: return ans q = lang from calibre.utils.localization import canonicalize_lang, get_lang if lang is None: q = tweaks['default_language_for_title_sort'] if q is None: q = get_lang() q = canonicalize_lang(q) if q else q data = tweaks['per_language_title_sort_articles'] try: ans = data.get(q, None) except AttributeError: ans = None # invalid tweak value try: ans = frozenset(ans) if ans else frozenset(data['eng']) except: ans = frozenset((r'A\s+', r'The\s+', r'An\s+')) ans = '|'.join(ans) ans = '^(%s)'%ans try: ans = re.compile(ans, re.IGNORECASE) except: ans = re.compile(r'^(A|The|An)\s+', re.IGNORECASE) _title_pats[lang] = ans return ans
def read_default_style_language(raw, mi): root = fromstring(raw) for lang in XPath('/w:styles/w:docDefaults/w:rPrDefault/w:rPr/w:lang/@w:val')(root): lang = canonicalize_lang(lang) if lang: mi.languages = [lang] break
def parse_localized_key(key): name, rest = key.partition('[')[0::2] if not rest: return name, None rest = rest[:-1] lang = re.split(r'[_.@]', rest)[0] return name, canonicalize_lang(lang)
def import_words(self): d = QDialog(self) d.l = l = QFormLayout(d) d.setWindowTitle(_('Import list of words')) d.w = w = QPlainTextEdit(d) l.addRow(QLabel(_('Enter a list of words, one per line'))) l.addRow(w) d.b = b = QPushButton(_('Paste from clipboard')) l.addRow(b) b.clicked.connect(w.paste) d.la = la = QLabel(_('Words in the user dictionary must have an associated language. Choose the language below:')) la.setWordWrap(True) l.addRow(la) d.le = le = LanguagesEdit(d) lc = canonicalize_lang(get_lang()) if lc: le.lang_codes = [lc] l.addRow(_('&Language:'), le) d.bb = bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel) l.addRow(bb) bb.accepted.connect(d.accept), bb.rejected.connect(d.reject) if d.exec_() != d.Accepted: return lc = le.lang_codes if not lc: return error_dialog(self, _('Must specify language'), _( 'You must specify a language to import words'), show=True) words = set(filter(None, [x.strip() for x in unicode(w.toPlainText()).splitlines()])) lang = lc[0] words = {(w, lang) for w in words} - self.current_dictionary.words if dictionaries.add_to_user_dictionary(self.current_dictionary.name, words, None): dictionaries.clear_caches() self.show_current_dictionary() self.dictionaries_changed = True
def commit_ncx_toc(container, toc, lang=None, uid=None): tocname = find_existing_ncx_toc(container) if tocname is None: item = container.generate_item('toc.ncx', id_prefix='toc') tocname = container.href_to_name(item.get('href'), base=container.opf_name) ncx_id = item.get('id') [s.set('toc', ncx_id) for s in container.opf_xpath('//opf:spine')] if not lang: lang = get_lang() for l in container.opf_xpath('//dc:language'): l = canonicalize_lang(xml2text(l).strip()) if l: lang = l lang = lang_as_iso639_1(l) or l break lang = lang_as_iso639_1(lang) or lang if not uid: uid = uuid_id() eid = container.opf.get('unique-identifier', None) if eid: m = container.opf_xpath('//*[@id="%s"]'%eid) if m: uid = xml2text(m[0]) title = _('Table of Contents') m = container.opf_xpath('//dc:title') if m: x = xml2text(m[0]).strip() title = x or title to_href = partial(container.name_to_href, base=tocname) root = create_ncx(toc, to_href, title, lang, uid) container.replace(tocname, root) container.pretty_print.add(tocname)
def parse_new_details(self, root, mi, non_hero): table = non_hero.xpath('descendant::table')[0] for tr in table.xpath('descendant::tr'): cells = tr.xpath('descendant::td') if len(cells) == 2: name = self.totext(cells[0]) val = self.totext(cells[1]) if not val: continue if name in self.language_names: ans = self.lang_map.get(val, None) if not ans: ans = canonicalize_lang(val) if ans: mi.language = ans elif name in self.publisher_names: pub = val.partition(';')[0].partition('(')[0].strip() if pub: mi.publisher = pub date = val.rpartition('(')[-1].replace(')', '').strip() try: from calibre.utils.date import parse_only_date date = self.delocalize_datestr(date) mi.pubdate = parse_only_date(date, assume_utc=True) except: self.log.exception('Failed to parse pubdate: %s' % val) elif name in {'ISBN', 'ISBN-10', 'ISBN-13'}: ans = check_isbn(val) if ans: self.isbn = mi.isbn = ans
def data(self, index, role=Qt.DisplayRole): if role == SORT_ROLE: try: return self.sort_keys[index.row()][index.column()] except IndexError: pass elif role == Qt.DisplayRole: col = index.column() try: entry = self.files[index.row()] except IndexError: return None if col == 0: return entry.word if col == 1: ans = calibre_langcode_to_name(canonicalize_lang(entry.locale.langcode)) or '' if entry.locale.countrycode: ans += ' (%s)' % entry.locale.countrycode return ans if col == 2: return type('')(len(entry.usage)) elif role == Qt.UserRole: try: return self.files[index.row()] except IndexError: pass
def commit_toc(container, toc, lang=None, uid=None): tocname = find_existing_toc(container) if tocname is None: item = container.generate_item("toc.ncx", id_prefix="toc") tocname = container.href_to_name(item.get("href"), base=container.opf_name) if not lang: lang = get_lang() for l in container.opf_xpath("//dc:language"): l = canonicalize_lang(xml2text(l).strip()) if l: lang = l lang = lang_as_iso639_1(l) or l break lang = lang_as_iso639_1(lang) or lang if not uid: uid = uuid_id() eid = container.opf.get("unique-identifier", None) if eid: m = container.opf_xpath('//*[@id="%s"]' % eid) if m: uid = xml2text(m[0]) title = _("Table of Contents") m = container.opf_xpath("//dc:title") if m: x = xml2text(m[0]).strip() title = x or title to_href = partial(container.name_to_href, base=tocname) root = create_ncx(toc, to_href, title, lang, uid) container.replace(tocname, root) container.pretty_print.add(tocname)
def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None): self.docx = DOCX(path_or_stream, log=log) self.log = self.docx.log self.notes_text = notes_text or _("Notes") self.dest_dir = dest_dir or os.getcwdu() self.mi = self.docx.metadata self.body = BODY() self.styles = Styles() self.images = Images() self.tables = Tables() self.object_map = OrderedDict() self.html = HTML( HEAD( META(charset="utf-8"), TITLE(self.mi.title or _("Unknown")), LINK(rel="stylesheet", type="text/css", href="docx.css"), ), self.body, ) self.html.text = "\n\t" self.html[0].text = "\n\t\t" self.html[0].tail = "\n" for child in self.html[0]: child.tail = "\n\t\t" self.html[0][-1].tail = "\n\t" self.html[1].text = self.html[1].tail = "\n" lang = canonicalize_lang(self.mi.language) if lang and lang != "und": lang = lang_as_iso639_1(lang) if lang: self.html.set("lang", lang)
def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if 'title' in data: mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif 'creator' in data: mi.authors = string_to_authors(data['creator']) if 'description' in data: mi.comments = data['description'] if 'language' in data: mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata','') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def read_languages(root, prefixes, refines): ans = [] for lang in XPath('./opf:metadata/dc:language')(root): val = canonicalize_lang((lang.text or '').strip()) if val and val not in ans and val != 'und': ans.append(val) return uniq(ans)
def field_from_string(field, raw, field_metadata): ''' Parse the string raw to return an object that is suitable for calling set() on a Metadata object. ''' dt = field_metadata['datatype'] val = object if dt in {'int', 'float'}: val = int(raw) if dt == 'int' else float(raw) elif dt == 'rating': val = float(raw) * 2 elif dt == 'datetime': from calibre.utils.date import parse_only_date val = parse_only_date(raw) elif dt == 'bool': if raw.lower() in {'true', 'yes', 'y'}: val = True elif raw.lower() in {'false', 'no', 'n'}: val = False else: raise ValueError('Unknown value for %s: %s'%(field, raw)) elif dt == 'text': ism = field_metadata['is_multiple'] if ism: val = [x.strip() for x in raw.split(ism['ui_to_list'])] if field == 'identifiers': val = {x.partition(':')[0]:x.partition(':')[-1] for x in val} elif field == 'languages': from calibre.utils.localization import canonicalize_lang val = [canonicalize_lang(x) for x in val] val = [x for x in val if x] if val is object: val = raw return val
def adapt_languages(to_tuple, x): ans = [] for lang in to_tuple(x): lc = canonicalize_lang(lang) if not lc or lc in ans or lc in ('und', 'zxx', 'mis', 'mul'): continue ans.append(lc) return tuple(ans)
def read_metadata_kfx(stream, read_cover=True): ' Read the metadata.kfx file that is found in the sdr book folder for KFX files ' c = Container(stream.read()) m = extract_metadata(c.decode()) # dump_metadata(m) def has(x): return m[x] and m[x][0] def get(x, single=True): ans = m[x] if single: ans = clean_xml_chars(ans[0]) if ans else '' else: ans = [clean_xml_chars(y) for y in ans] return ans title = get('title') or _('Unknown') authors = get('authors', False) or [_('Unknown')] auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$') def fix_author(x): if tweaks['author_sort_copy_method'] != 'copy': m = auth_pat.match(x.strip()) if m is not None: return m.group(2) + ' ' + m.group(1) return x mi = Metadata(title, [fix_author(x) for x in authors]) if has('author'): mi.author_sort = get('author') if has('ASIN'): mi.set_identifier('mobi-asin', get('ASIN')) elif has('content_id'): mi.set_identifier('mobi-asin', get('content_id')) if has('languages'): langs = list(filter(None, (canonicalize_lang(x) for x in get('languages', False)))) if langs: mi.languages = langs if has('issue_date'): try: mi.pubdate = parse_only_date(get('issue_date')) except Exception: pass if has('publisher') and get('publisher') != 'Unknown': mi.publisher = get('publisher') if read_cover and m[COVER_KEY]: try: data = base64.standard_b64decode(m[COVER_KEY]) w, h, fmt = identify_data(data) except Exception: w, h, fmt = 0, 0, None if fmt and w and h: mi.cover_data = (fmt, data) return mi
def read_metadata_kfx(stream, read_cover=True): " Read the metadata.kfx file that is found in the sdr book folder for KFX files " c = Container(stream.read()) m = extract_metadata(c.decode()) # dump_metadata(m) def has(x): return m[x] and m[x][0] def get(x, single=True): ans = m[x] if single: ans = clean_xml_chars(ans[0]) if ans else "" else: ans = [clean_xml_chars(y) for y in ans] return ans title = get("title") or _("Unknown") authors = get("authors", False) or [_("Unknown")] auth_pat = re.compile(r"([^,]+?)\s*,\s+([^,]+)$") def fix_author(x): if tweaks["author_sort_copy_method"] != "copy": m = auth_pat.match(x.strip()) if m is not None: return m.group(2) + " " + m.group(1) return x mi = Metadata(title, [fix_author(x) for x in authors]) if has("author"): mi.author_sort = get("author") if has("ASIN"): mi.set_identifier("mobi-asin", get("ASIN")) elif has("content_id"): mi.set_identifier("mobi-asin", get("content_id")) if has("languages"): langs = list(filter(None, (canonicalize_lang(x) for x in get("languages", False)))) if langs: mi.languages = langs if has("issue_date"): try: mi.pubdate = parse_only_date(get("issue_date")) except Exception: pass if has("publisher") and get("publisher") != "Unknown": mi.publisher = get("publisher") if read_cover and m[COVER_KEY]: try: data = base64.standard_b64decode(m[COVER_KEY]) fmt, w, h = identify(bytes(data)) except Exception: w, h, fmt = 0, 0, None if fmt and w > -1 and h > -1: mi.cover_data = (fmt, data) return mi
def _parse_language(self, root): lang_node = root.xpath('//div[@id="metacol"]/div[@id="details"]/div[@class="buttons"]/div[@id="bookDataBox"]/div/div[@itemprop="inLanguage"]') if lang_node: raw = tostring(lang_node[0], method='text', encoding=unicode).strip() ans = self.lang_map.get(raw, None) if ans: return ans ans = canonicalize_lang(ans) if ans: return ans
def parse_language(self, pd): for x in reversed(pd.xpath(self.language_xpath)): if x.tail: raw = x.tail.strip().partition(',')[0].strip() ans = self.lang_map.get(raw, None) if ans: return ans ans = canonicalize_lang(ans) if ans: return ans
def lookup(self, word): from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 from urllib import quote lang = lang_as_iso639_1(self.view.current_language) if not lang: lang = canonicalize_lang(lang) or 'en' word = quote(word.encode('utf-8')) if lang == 'en': prefix = 'https://www.wordnik.com/words/' else: prefix = 'http://%s.wiktionary.org/wiki/' % lang open_url(prefix + word)
def update_from_mi(self, mi, update_sorts=True, merge_tags=True, merge_comments=False): fw = self.focusWidget() if not mi.is_null('title'): self.title.set_value(mi.title) if update_sorts: self.title_sort.auto_generate() if not mi.is_null('authors'): self.authors.set_value(mi.authors) if not mi.is_null('author_sort'): self.author_sort.set_value(mi.author_sort) elif update_sorts and not mi.is_null('authors'): self.author_sort.auto_generate() if not mi.is_null('rating'): try: self.rating.set_value(mi.rating) except: pass if not mi.is_null('publisher'): self.publisher.set_value(mi.publisher) if not mi.is_null('tags'): old_tags = self.tags.current_val tags = mi.tags if mi.tags else [] if old_tags and merge_tags: ltags, lotags = {t.lower() for t in tags}, {t.lower() for t in old_tags} tags = [t for t in tags if t.lower() in ltags-lotags] + old_tags self.tags.set_value(tags) if not mi.is_null('identifiers'): current = self.identifiers.current_val current.update(mi.identifiers) self.identifiers.set_value(current) if not mi.is_null('pubdate'): self.pubdate.set_value(mi.pubdate) if not mi.is_null('series') and mi.series.strip(): self.series.set_value(mi.series) if mi.series_index is not None: self.series_index.reset_original() self.series_index.set_value(float(mi.series_index)) if not mi.is_null('languages'): langs = [canonicalize_lang(x) for x in mi.languages] langs = [x for x in langs if x is not None] if langs: self.languages.set_value(langs) if mi.comments and mi.comments.strip(): val = mi.comments if val and merge_comments: cval = self.comments.current_val if cval: val = merge_two_comments(cval, val) self.comments.set_value(val) if fw is not None: fw.setFocus(Qt.OtherFocusReason)
def localize_string(data): lang = canonicalize_lang(get_lang()) def key_matches(key): if key is None: return False base = re.split(r'[_.@]', key)[0] return canonicalize_lang(base) == lang matches = tuple(filter(key_matches, data)) if matches: return data[matches[0]] return data.get(None) or ''
def _parse_language(self, root): raw = "Korean" lang_node = root.xpath('//div[@class="book_info_basic2"]') if lang_node: match = re.search("%s\s?:\s?([^\s]*)" % u'언어',lang_node[0].text_content(),re.I) if match: raw = match.group(1) ans = self.lang_map.get(raw, None) if ans: return ans ans = canonicalize_lang(ans) if ans: return ans
def _parse_language(self, root): lang_node = root.xpath( '//div[@id="metacol"]/div[@id="details"]/div[@class="buttons"]/div[@id="bookDataBox"]/div/div[@itemprop="inLanguage"]' ) if lang_node: raw = tostring(lang_node[0], method='text', encoding=unicode).strip() ans = self.lang_map.get(raw, None) if ans: return ans ans = canonicalize_lang(ans) if ans: return ans
def _parse_language(self, root): raw = "Korean" lang_node = root.xpath('//div[@class="book_info_basic2"]') if lang_node: match = re.search("%s\s?:\s?([^\s]*)" % '언어', lang_node[0].text_content(), re.I) if match: raw = match.group(1) ans = self.lang_map.get(raw, None) if ans: return ans ans = canonicalize_lang(ans) if ans: return ans
def lookup(self, word): from urllib import quote word = quote(word.encode('utf-8')) lang = canonicalize_lang(self.view.current_language) or get_lang() or 'en' try: url = lookup_website(lang).format(word=word) except Exception: if not self.lookup_error_reported.get(lang): self.lookup_error_reported[lang] = True error_dialog(self, _('Failed to use dictionary'), _( 'Failed to use the custom dictionary for language: %s Falling back to default dictionary.') % lang, det_msg=traceback.format_exc(), show=True) url = default_lookup_website(lang).format(word=word) open_url(url)
def update_from_mi(self, mi, update_sorts=True, merge_tags=True, merge_comments=False): fw = self.focusWidget() if not mi.is_null('title'): self.title.set_value(mi.title) if update_sorts: self.title_sort.auto_generate() if not mi.is_null('authors'): self.authors.set_value(mi.authors) if not mi.is_null('author_sort'): self.author_sort.set_value(mi.author_sort) elif update_sorts and not mi.is_null('authors'): self.author_sort.auto_generate() if not mi.is_null('rating'): self.rating.set_value(mi.rating * 2) if not mi.is_null('publisher'): self.publisher.set_value(mi.publisher) if not mi.is_null('tags'): old_tags = self.tags.current_val tags = mi.tags if mi.tags else [] if old_tags and merge_tags: ltags, lotags = {t.lower() for t in tags}, {t.lower() for t in old_tags} tags = [t for t in tags if t.lower() in ltags-lotags] + old_tags self.tags.set_value(tags) if not mi.is_null('identifiers'): current = self.identifiers.current_val current.update(mi.identifiers) self.identifiers.set_value(current) if not mi.is_null('pubdate'): self.pubdate.set_value(mi.pubdate) if not mi.is_null('series') and mi.series.strip(): self.series.set_value(mi.series) if mi.series_index is not None: self.series_index.reset_original() self.series_index.set_value(float(mi.series_index)) if not mi.is_null('languages'): langs = [canonicalize_lang(x) for x in mi.languages] langs = [x for x in langs if x is not None] if langs: self.languages.set_value(langs) if mi.comments and mi.comments.strip(): val = mi.comments if val and merge_comments: cval = self.comments.current_val if cval: val = merge_two_comments(cval, val) self.comments.set_value(val) if fw is not None: fw.setFocus(Qt.OtherFocusReason)
def update_from_mi(self, mi, update_sorts=True, merge_tags=True, merge_comments=False): if not mi.is_null('title'): self.title.current_val = mi.title if update_sorts: self.title_sort.auto_generate() if not mi.is_null('authors'): self.authors.current_val = mi.authors if not mi.is_null('author_sort'): self.author_sort.current_val = mi.author_sort elif update_sorts: self.author_sort.auto_generate() if not mi.is_null('rating'): try: self.rating.current_val = mi.rating except: pass if not mi.is_null('publisher'): self.publisher.current_val = mi.publisher if not mi.is_null('tags'): old_tags = self.tags.current_val tags = mi.tags if mi.tags else [] if old_tags and merge_tags: ltags, lotags = {t.lower() for t in tags}, {t.lower() for t in old_tags} tags = [t for t in tags if t.lower() in ltags-lotags] + old_tags self.tags.current_val = tags if not mi.is_null('identifiers'): current = self.identifiers.current_val current.update(mi.identifiers) self.identifiers.current_val = current if not mi.is_null('pubdate'): self.pubdate.current_val = mi.pubdate if not mi.is_null('series') and mi.series.strip(): self.series.current_val = mi.series if mi.series_index is not None: self.series_index.reset_original() self.series_index.current_val = float(mi.series_index) if not mi.is_null('languages'): langs = [canonicalize_lang(x) for x in mi.languages] langs = [x for x in langs if x is not None] if langs: self.languages.current_val = langs if mi.comments and mi.comments.strip(): val = mi.comments if val and merge_comments: cval = self.comments.current_val if cval: val = merge_two_comments(cval, val) self.comments.current_val = val
def main(opts, args, dbctx): aut = string_to_authors(opts.authors) if opts.authors else [] tags = [x.strip() for x in opts.tags.split(',')] if opts.tags else [] lcodes = [canonicalize_lang(x) for x in (opts.languages or '').split(',')] lcodes = [x for x in lcodes if x] identifiers = (x.partition(':')[::2] for x in opts.identifier) identifiers = dict((k.strip(), v.strip()) for k, v in identifiers if k.strip() and v.strip()) if opts.empty: do_add_empty(dbctx, opts.title, aut, opts.isbn, tags, opts.series, opts.series_index, opts.cover, identifiers, lcodes) return 0 if len(args) < 1: raise SystemExit(_('You must specify at least one file to add')) do_add(dbctx, args, opts.one_book_per_directory, opts.recurse, opts.duplicates, opts.title, aut, opts.isbn, tags, opts.series, opts.series_index, opts.cover, identifiers, lcodes, opts.filters) return 0
def createJSONDictionary(self, metadata): # Create the dictionary that we will convert to JSON text cbi = dict() cbi_container = { 'appID': 'ComicTagger/', 'lastModified': str(datetime.now()), 'ComicBookInfo/1.0': cbi } # helper func def assign(cbi_entry, md_entry): if md_entry is not None: cbi[cbi_entry] = md_entry # helper func def toInt(s): i = None if type(s) in [str, unicode, int]: try: i = int(s) except ValueError: pass return i assign('series', metadata.series) assign('title', metadata.title) assign('issue', metadata.issue) assign('publisher', metadata.publisher) assign('publicationMonth', toInt(metadata.month)) assign('publicationYear', toInt(metadata.year)) assign('numberOfIssues', toInt(metadata.issueCount)) assign('comments', metadata.comments) assign('genre', metadata.genre) assign('volume', toInt(metadata.volume)) assign('numberOfVolumes', toInt(metadata.volumeCount)) assign('language', calibre_langcode_to_name(canonicalize_lang(metadata.language))) assign('country', metadata.country) assign('rating', metadata.criticalRating) assign('credits', metadata.credits) assign('tags', metadata.tags) return cbi_container
def __init__(self, parent=None): Base.__init__(self, parent) self.days = [QCheckBox(force_unicode(calendar.day_abbr[d]), self) for d in range(7)] for i, cb in enumerate(self.days): row = i % 2 col = i // 2 self.l.addWidget(cb, row, col, 1, 1) self.time = QTimeEdit(self) self.time.setDisplayFormat('hh:mm AP') if canonicalize_lang(get_lang()) in {'deu', 'nds'}: self.time.setDisplayFormat('HH:mm') self.hl = QHBoxLayout() self.l1 = QLabel(_('&Download after:')) self.l1.setBuddy(self.time) self.hl.addWidget(self.l1) self.hl.addWidget(self.time) self.l.addLayout(self.hl, 1, 3, 1, 1) self.initialize()
def read_doc_props(raw, mi, XPath): root = fromstring(raw) titles = XPath('//dc:title')(root) if titles: title = titles[0].text if title and title.strip(): mi.title = title.strip() tags = [] for subject in XPath('//dc:subject')(root): if subject.text and subject.text.strip(): tags.append(subject.text.strip().replace(',', '_')) for keywords in XPath('//cp:keywords')(root): if keywords.text and keywords.text.strip(): for x in keywords.text.split(): tags.extend(y.strip() for y in x.split(',') if y.strip()) if tags: mi.tags = tags authors = XPath('//dc:creator')(root) aut = [] for author in authors: if author.text and author.text.strip(): aut.extend(string_to_authors(author.text)) if aut: mi.authors = aut mi.author_sort = authors_to_sort_string(aut) desc = XPath('//dc:description')(root) if desc: raw = etree.tostring(desc[0], method='text', encoding=unicode_type) raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary mi.comments = raw.strip() langs = [] for lang in XPath('//dc:language')(root): if lang.text and lang.text.strip(): l = canonicalize_lang(lang.text) if l: langs.append(l) if langs: mi.languages = langs
def field_from_string(field, raw, field_metadata): ''' Parse the string raw to return an object that is suitable for calling set() on a Metadata object. ''' dt = field_metadata['datatype'] val = object if dt in {'int', 'float'}: val = int(raw) if dt == 'int' else float(raw) elif dt == 'rating': val = float(raw) * 2 elif dt == 'datetime': from calibre.utils.iso8601 import parse_iso8601 try: val = parse_iso8601(raw, require_aware=True) except Exception: from calibre.utils.date import parse_only_date val = parse_only_date(raw) elif dt == 'bool': if raw.lower() in {'true', 'yes', 'y'}: val = True elif raw.lower() in {'false', 'no', 'n'}: val = False else: raise ValueError('Unknown value for %s: %s' % (field, raw)) elif dt == 'text': ism = field_metadata['is_multiple'] if ism: val = [x.strip() for x in raw.split(ism['ui_to_list'])] if field == 'identifiers': val = {x.partition(':')[0]: x.partition(':')[-1] for x in val} elif field == 'languages': from calibre.utils.localization import canonicalize_lang val = [canonicalize_lang(x) for x in val] val = [x for x in val if x] if val is object: val = raw return val
def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, True) parser.setFeature(xml.sax.handler.feature_external_ges, False) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if 'title' in data: mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif 'creator' in data: mi.authors = string_to_authors(data['creator']) if 'description' in data: mi.comments = data['description'] if 'language' in data: mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata', '') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def metadata_to_xmp_packet(mi): A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x')) R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf')) root = A.xmpmeta(R.RDF) rdf = root[0] dc = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('dc')) dc.set(expand('rdf:about'), '') rdf.append(dc) for prop, tag in iteritems({ 'title': 'dc:title', 'comments': 'dc:description' }): val = mi.get(prop) or '' create_alt_property(dc, tag, val) for prop, (tag, ordered) in iteritems({ 'authors': ('dc:creator', True), 'tags': ('dc:subject', False), 'publisher': ('dc:publisher', False), }): val = mi.get(prop) or () if isinstance(val, string_or_bytes): val = [val] create_sequence_property(dc, tag, val, ordered) if not mi.is_null('pubdate'): create_sequence_property(dc, 'dc:date', [isoformat(mi.pubdate, as_utc=False) ]) # Adobe spec recommends local time if not mi.is_null('languages'): langs = list( filter( None, map(lambda x: lang_as_iso639_1(x) or canonicalize_lang(x), mi.languages))) if langs: create_sequence_property(dc, 'dc:language', langs, ordered=False) xmp = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('xmp', 'xmpidq')) xmp.set(expand('rdf:about'), '') rdf.append(xmp) extra_ids = {} for x in ('prism', 'pdfx'): p = extra_ids[x] = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap(x)) p.set(expand('rdf:about'), '') rdf.append(p) identifiers = mi.get_identifiers() if identifiers: create_identifiers(xmp, identifiers) for scheme, val in iteritems(identifiers): if scheme in {'isbn', 'doi'}: for prefix, parent in iteritems(extra_ids): ie = parent.makeelement(expand('%s:%s' % (prefix, scheme))) ie.text = val parent.append(ie) d = xmp.makeelement(expand('xmp:MetadataDate')) d.text = isoformat(now(), as_utc=False) xmp.append(d) calibre = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('calibre', 'calibreSI', 'calibreCC')) calibre.set(expand('rdf:about'), '') rdf.append(calibre) if not mi.is_null('rating'): try: r = float(mi.rating) except (TypeError, ValueError): pass else: create_simple_property(calibre, 'calibre:rating', '%g' % r) if not mi.is_null('series'): create_series(calibre, mi.series, mi.series_index) if not mi.is_null('timestamp'): create_simple_property(calibre, 'calibre:timestamp', isoformat(mi.timestamp, as_utc=False)) for x in ('author_link_map', 'user_categories'): val = getattr(mi, x, None) if val: create_simple_property(calibre, 'calibre:' + x, dump_dict(val)) for x in ('title_sort', 'author_sort'): if not mi.is_null(x): create_simple_property(calibre, 'calibre:' + x, getattr(mi, x)) all_user_metadata = mi.get_all_user_metadata(True) if all_user_metadata: create_user_metadata(calibre, all_user_metadata) return serialize_xmp_packet(root)
def key_matches(key): if key is None: return False base = re.split(r'[_.@]', key)[0] return canonicalize_lang(base) == lang
def get_matches(self, location, query, candidates=None, allow_recursion=True): # If candidates is not None, it must not be modified. Changing its # value will break query optimization in the search parser matches = set() if candidates is None: candidates = self.all_book_ids if not candidates or not query or not query.strip(): return matches if location not in self.all_search_locations: return matches if (len(location) > 2 and location.startswith('@') and location[1:] in self.grouped_search_terms): location = location[1:] # get metadata key associated with the search term. Eliminates # dealing with plurals and other aliases original_location = location location = self.field_metadata.search_term_to_field_key( icu_lower(location.strip())) # grouped search terms if isinstance(location, list): if allow_recursion: if query.lower() == 'false': invert = True query = 'true' else: invert = False for loc in location: c = candidates.copy() m = self.get_matches(loc, query, candidates=c, allow_recursion=False) matches |= m c -= m if len(c) == 0: break if invert: matches = self.all_book_ids - matches return matches raise ParseException( _('Recursive query group detected: {0}').format(query)) # If the user has asked to restrict searching over all field, apply # that restriction if (location == 'all' and self.limit_search_columns and self.limit_search_columns_to): terms = set() for l in self.limit_search_columns_to: l = icu_lower(l.strip()) if l and l != 'all' and l in self.all_search_locations: terms.add(l) if terms: c = candidates.copy() for l in terms: try: m = self.get_matches(l, query, candidates=c, allow_recursion=allow_recursion) matches |= m c -= m if len(c) == 0: break except: pass return matches upf = prefs['use_primary_find_in_search'] if location in self.field_metadata: fm = self.field_metadata[location] dt = fm['datatype'] # take care of dates special case if (dt == 'datetime' or (dt == 'composite' and fm['display'].get('composite_sort', '') == 'date')): if location == 'date': location = 'timestamp' return self.date_search( icu_lower(query), partial(self.field_iter, location, candidates)) # take care of numbers special case if (dt in ('rating', 'int', 'float') or (dt == 'composite' and fm['display'].get('composite_sort', '') == 'number')): if location == 'id': is_many = False def fi(default_value=None): for qid in candidates: yield qid, {qid} else: field = self.dbcache.fields[location] fi, is_many = partial(self.field_iter, location, candidates), field.is_many return self.num_search(icu_lower(query), fi, location, dt, candidates, is_many=is_many) # take care of the 'count' operator for is_multiples if (fm['is_multiple'] and len(query) > 1 and query[0] == '#' and query[1] in '=<>!'): return self.num_search( icu_lower(query[1:]), partial(self.dbcache.fields[location].iter_counts, candidates), location, dt, candidates) # take care of boolean special case if dt == 'bool': return self.bool_search( icu_lower(query), partial(self.field_iter, location, candidates), self.dbcache._pref('bools_are_tristate')) # special case: colon-separated fields such as identifiers. isbn # is a special case within the case if fm.get('is_csp', False): field_iter = partial(self.field_iter, location, candidates) if location == 'identifiers' and original_location == 'isbn': return self.keypair_search('=isbn:' + query, field_iter, candidates, upf) return self.keypair_search(query, field_iter, candidates, upf) # check for user categories if len(location) >= 2 and location.startswith('@'): return self.get_user_category_matches(location[1:], icu_lower(query), candidates) # Everything else (and 'all' matches) case_sensitive = prefs['case_sensitive'] matchkind, query = _matchkind(query, case_sensitive=case_sensitive) all_locs = set() text_fields = set() field_metadata = {} for x, fm in self.field_metadata.iteritems(): if x.startswith('@'): continue if fm['search_terms'] and x not in {'series_sort', 'id'}: if x not in self.virtual_fields and x != 'uuid': # We dont search virtual fields because if we do, search # caching will not be used all_locs.add(x) field_metadata[x] = fm if fm['datatype'] in { 'composite', 'text', 'comments', 'series', 'enumeration' }: text_fields.add(x) locations = all_locs if location == 'all' else {location} current_candidates = set(candidates) try: rating_query = int(float(query)) * 2 except: rating_query = None try: int_query = int(float(query)) except: int_query = None try: float_query = float(query) except: float_query = None for location in locations: current_candidates -= matches q = query if location == 'languages': q = canonicalize_lang(query) if q is None: lm = lang_map() rm = {v.lower(): k for k, v in lm.iteritems()} q = rm.get(query, query) if matchkind == CONTAINS_MATCH and q.lower() in {'true', 'false'}: found = set() for val, book_ids in self.field_iter(location, current_candidates): if val and (not hasattr(val, 'strip') or val.strip()): found |= book_ids matches |= (found if q.lower() == 'true' else (current_candidates - found)) continue dt = field_metadata.get(location, {}).get('datatype', None) if dt == 'rating': if rating_query is not None: for val, book_ids in self.field_iter( location, current_candidates): if val == rating_query: matches |= book_ids continue if dt == 'float': if float_query is not None: for val, book_ids in self.field_iter( location, current_candidates): if val == float_query: matches |= book_ids continue if dt == 'int': if int_query is not None: for val, book_ids in self.field_iter( location, current_candidates): if val == int_query: matches |= book_ids continue if location in text_fields: for val, book_ids in self.field_iter(location, current_candidates): if val is not None: if isinstance(val, basestring): val = (val, ) if _match(q, val, matchkind, use_primary_find_in_search=upf, case_sensitive=case_sensitive): matches |= book_ids if location == 'series_sort': book_lang_map = self.dbcache.fields['languages'].book_value_map for val, book_ids in self.dbcache.fields[ 'series'].iter_searchable_values_for_sort( current_candidates, book_lang_map): if val is not None: if _match(q, (val, ), matchkind, use_primary_find_in_search=upf, case_sensitive=case_sensitive): matches |= book_ids return matches
def localize_string(data): lang = canonicalize_lang(get_lang()) return data.get(lang, data.get(None)) or ''
def locale_sort_key(loc): try: return lsk_cache[loc] except KeyError: lsk_cache[loc] = (psk(calibre_langcode_to_name(canonicalize_lang(loc[0]))), psk(loc[1] or '')) return lsk_cache[loc]